#!/usr/bin/perl

use v5.36;
use utf8;

no warnings qw( experimental::builtin experimental::for_list );
use builtin qw( true false indexed );

use Text::Treesitter;
use Text::Treesitter::QueryCursor;
use Text::Treesitter::QueryMatch;

use Convert::Color;
use File::Slurper qw( read_text );
use Getopt::Long;
use String::Tagged;
use String::Tagged::Terminal;

GetOptions(
   'language|l=s'   => \( my $LANGUAGE = "c" ),
   'unrecognised|u' => \( my $PRINT_UNRECOGNISED ),
   'directory|d=s'  => \( my $LANGUAGE_DIR ),
   'use-theme|U'    => \( my $USE_THEME ),
   'folding|F'      => \( my $FOLDING ),
   'injections|J'   => \( my $INJECTIONS ),
) or exit 1;

STDOUT->binmode( ':encoding(UTF-8)' );

my %FORMATS = (
   # Names stolen from tree-sitter's highlight theme
   attribute  => { fg => "vga:cyan", italic => 1 },
   comment    => { fg => "xterm:15", bg => "xterm:54", italic => 1 },
   decorator  => { fg => "xterm:140", italic => 1 },
   function   => { fg => "xterm:147", },
   keyword    => { fg => "vga:yellow", bold => 1 },
   module     => { fg => "vga:green", bold => 1 },
   number     => { fg => "vga:magenta" },
   operator   => { fg => "vga:yellow" },
   string     => { fg => "vga:magenta" },
   type       => { fg => "vga:green" },
   variable   => { fg => "vga:cyan" },

   'string.special' => { fg => "vga:red" },
   'function.builtin' => { fg => "xterm:147", bold => 1 },

   # For tree-sitter-perl
   'variable.scalar' => { fg => "xterm:50" },
   'variable.array'  => { fg => "xterm:43" },
   'variable.hash'   => { fg => "xterm:81" },

   # For markup languages; e.g. used by tree-sitter-pod
   'text.emphasis' => { italic => 1 },
   'text.literal'  => { monospace => 1 },
   'text.quote'    => { italic => 1, bg => "xterm:236", },
   'text.strong'   => { bold => 1 },
   'text.title'    => { fg => "vga:yellow", bold => 1, under => 1 },
   'text.uri'      => { fg => "vga:blue", under => 1 },

   # Extra names
   label      => { fg => "xterm:140", under => 1 },
   preproc    => { fg => "xterm:140", bold => 1 },
   verbatim   => { fg => "xterm:251", monospace => 1 },
);

if( $USE_THEME and my $config = Text::Treesitter->treesitter_config ) {
   my %theme = $config->{theme}->%*;
   foreach my $key ( sort keys %theme ) {
      my %format = ( ref $theme{$key} ) ? $theme{$key}->%* : ( color => $theme{$key} );

      $format{fg} = "xterm:" . delete $format{color} if defined $format{color};

      $FORMATS{$key} = \%format;
   }
}

foreach ( values %FORMATS ) {
   $_->{fg} and
      $_->{fg} = Convert::Color->new( $_->{fg} )->as_xterm;
   $_->{bg} and
      $_->{bg} = Convert::Color->new( $_->{bg} )->as_xterm;
}

my $str = String::Tagged->new( read_text $ARGV[0] // "/dev/stdin" );

my @fold_regions;
my $fold_deepest = 0;

my %UNRECOGNISED_CAPTURES;

my %TS_FOR_LANGUAGE;

sub apply_language_highlights( $language, %opts )
{
   my $ts = $TS_FOR_LANGUAGE{ $language } //= eval { Text::Treesitter->new(
      lang_name => $language,
      lang_dir  => $LANGUAGE_DIR,
   ) };
   defined $ts or return;

   my $query_highlight = $ts->load_query_file( "highlights.scm" );

   my $query_folding;
   if( $FOLDING and -f ( my $folding_path = $ts->query_file_path( "fold.scm" ) ) ) {
      $query_folding = $ts->load_query_file( $folding_path );
   }

   my $query_injections;
   if( $INJECTIONS and -f ( my $injections_path = $ts->query_file_path( "injections.scm" ) ) ) {
      $query_injections = $ts->load_query_file( $injections_path );
   }

   my $tree;
   if( defined $opts{start_byte} ) {
      $tree = $ts->parse_string_range( $str, %opts{qw( start_byte end_byte )} );
   }
   else {
      $tree = $ts->parse_string( $str );
   }
   my $root = $tree->root_node;

   my $qc = Text::Treesitter::QueryCursor->new;

   # For ease of code management, line numbers are 0-indexed
   if( $FOLDING and $query_folding ) {
      $qc->exec( $query_folding, $root );

      my @regions_applied;

      while( my $captures = $qc->next_match_captures( multi => 1 ) ) {
         my @nodes = $captures->{fold}->@*;

         # Some fold patterns capture multiple toplevel nodes.
         my $startline = ( $nodes[ 0]->start_point )[0];
         my $endline   = ( $nodes[-1]->end_point )[0];

         die "TODO: This fold region starts earlier than the previous one"
            if @fold_regions and $startline < $fold_regions[-1][0];

         next if $startline == $endline;

         push @fold_regions, [ $startline, $endline ];

         pop @regions_applied while @regions_applied and $regions_applied[-1][1] < $startline;
         push @regions_applied, $fold_regions[-1];

         $fold_deepest = scalar @regions_applied if @regions_applied > $fold_deepest;
      }
   }

   if( $INJECTIONS and $query_injections ) {
      $qc->exec( $query_injections, $root );

      while( my $captures = $qc->next_match_captures ) {
         my $sublanguage;
         my $content;
         if( defined $captures->{language} ) {
            $sublanguage = $captures->{language}->text;
            $content     = $captures->{content};
         }
         elsif( keys $captures->%* > 1 ) {
            warn "This injection capture yielded more than one name key\n";
            next;
         }
         else {
            $sublanguage = ( keys $captures->%* )[0];
            $content     = $captures->{$sublanguage};
         }

         apply_language_highlights( $sublanguage,
            start_byte => $content->start_byte,
            end_byte   => $content->end_byte,
         );
      }
   }

   $qc->exec( $query_highlight, $root );

   while( my $captures = $qc->next_match_captures ) {
      CAPTURE: foreach my $capturename ( sort keys $captures->%* ) {
         my $node = $captures->{$capturename};

         my $start = $tree->byte_to_char( $node->start_byte );
         my $len   = $tree->byte_to_char( $node->end_byte ) - $start;

         my @nameparts = split m/\./, $capturename;
         while( @nameparts ) {
            if( my $format = $FORMATS{ join ".", @nameparts } ) {
               $str->apply_tag( $start, $len, $_, $format->{$_} ) for keys %$format;
               next CAPTURE;
            }

            pop @nameparts;
         }

         $UNRECOGNISED_CAPTURES{ $capturename }++;
      }
   }
}

apply_language_highlights( $LANGUAGE );

foreach my ( $lnum, $line ) ( indexed $str->split( qr/\n/ ) ) {
   if( $FOLDING ) {
      my @regions = grep { $_->[0] <= $lnum and $lnum <= $_->[1] } @fold_regions;
      my $final_here;
      my $markers = join "", map {
         $_->[0] == $lnum ? ( $final_here = true, "┌" )[1] :
         $_->[1] == $lnum ? ( $final_here = true, "└" )[1] :
                            "│";
      } @regions;

      $markers .= ( $final_here ? "─" : " " ) while length $markers < $fold_deepest;

      print $markers . " ";
   }
   String::Tagged::Terminal->new_from_formatting( $line )
      ->say_to_terminal;
}

if( $PRINT_UNRECOGNISED and keys %UNRECOGNISED_CAPTURES ) {
   print STDERR "-------\nUnrecognised:\n";
   foreach ( sort keys %UNRECOGNISED_CAPTURES ) {
      print STDERR "  $_\n";
   }
}