1414
1515use FindBin;
1616use lib " $FindBin::RealBin /../../tools/" ;
17+ use GenerateSparseArray;
1718
1819my $output_path = ' .' ;
1920
@@ -465,11 +466,11 @@ sub get_hash_key
465466
466467print $OT " \n };\n " ;
467468
469+ my $case_table_name = " case_map" ;
468470my @codepoints = keys %simple ;
469- my $range = make_ranges(\@codepoints , 500);
470- my @case_map_lines = range_tables($range );
471- my $case_map_length = scalar @case_map_lines ;
472- my $case_map_table = join " \n " , @case_map_lines ;
471+ my $range = GenerateSparseArray::make(\@codepoints , 500);
472+ my $case_map_table = GenerateSparseArray::tables($range , $case_table_name ,
473+ sub { $simple { $_ [0] }{Index } || 0 });
473474
474475print $OT <<"EOS" ;
475476
@@ -478,10 +479,7 @@ sub get_hash_key
478479 * of the following arrays: case_map_lower, case_map_title, case_map_upper,
479480 * case_map_fold.
480481 */
481- static const uint16 case_map[$case_map_length ] =
482- {
483482$case_map_table
484- };
485483
486484
487485EOS
@@ -506,12 +504,13 @@ sub get_hash_key
506504 /* Fast path for codepoints < $fastpath_limit */
507505 if (cp < $fastpath_limit )
508506 {
509- return case_map [cp];
507+ return $case_table_name \ [ cp];
510508 }
511509
512510EOS
513511
514- print $OT join (" \n " , @{ branch($range , 0, $# $range , 1) });
512+ print $OT GenerateSparseArray::branch_as_text($range , 0, $# $range , 1,
513+ $case_table_name );
515514
516515print $OT <<"EOS" ;
517516
@@ -521,146 +520,3 @@ sub get_hash_key
521520EOS
522521
523522close $OT ;
524-
525- # The function generates C code with a series of nested if-else conditions
526- # to search for the matching interval.
527- sub branch
528- {
529- my ($range , $from , $to , $indent ) = @_ ;
530- my ($idx , $space , $entry , $table , @result );
531-
532- $idx = ($from + int (($to - $from ) / 2));
533- return \@result unless exists $range -> [$idx ];
534-
535- $space = " \t " x $indent ;
536-
537- $entry = $range -> [$idx ];
538-
539- # IF state
540- if ($idx == $from )
541- {
542- if ($idx == 0)
543- {
544- push @result ,
545- sprintf (" %sif (cp >= 0x%04X && cp < 0x%04X)\n %s {" ,
546- $space , $entry -> {Start }, $entry -> {End }, $space );
547- }
548- else
549- {
550- push @result ,
551- sprintf (" %sif (cp < 0x%04X)\n %s {" ,
552- $space , $entry -> {End }, $space );
553- }
554-
555- push @result ,
556- sprintf (" %s \t return case_map[cp - 0x%04X + %d ];" ,
557- $space , $entry -> {Start }, $entry -> {Offset });
558- }
559- else
560- {
561- push @result ,
562- sprintf (" %sif (cp < 0x%04X)\n %s {" , $space , $entry -> {End }, $space );
563- push @result , @{ branch($range , $from , $idx - 1, $indent + 1) };
564- }
565-
566- push @result , $space . " }" ;
567-
568- # return now if it's the last range
569- return \@result if $idx == (scalar @$range ) - 1;
570-
571- # ELSE looks ahead to the next range to avoid adding an
572- # unnecessary level of branching.
573- $entry = @$range [ $idx + 1 ];
574-
575- # ELSE state
576- push @result ,
577- sprintf (" %selse if (cp >= 0x%04X)\n %s {" ,
578- $space , $entry -> {Start }, $space );
579-
580- if ($idx == $to )
581- {
582- push @result ,
583- sprintf (" %s \t return case_map\[ cp - 0x%04X + %d ];" ,
584- $space , $entry -> {Start }, $entry -> {Offset });
585- }
586- else
587- {
588- push @result , @{ branch($range , $idx + 1, $to , $indent + 1) };
589- }
590-
591- push @result , $space . " }" ;
592-
593- return \@result ;
594- }
595-
596- # Group numbers into ranges where the difference between neighboring
597- # elements does not exceed $limit. If the difference is greater, a new
598- # range is created. This is used to break the sequence into intervals
599- # where the gaps between numbers are greater than limit.
600- #
601- # For example, if there are numbers 1, 2, 3, 5, 6 and limit = 1, then
602- # there is a difference of 2 between 3 and 5, which is greater than 1,
603- # so there will be ranges 1-3 and 5-6.
604- sub make_ranges
605- {
606- my ($nums , $limit ) = @_ ;
607- my ($prev , $start , $total , @sorted , @range );
608-
609- @sorted = sort { $a <=> $b } @$nums ;
610-
611- die " expecting at least 2 codepoints" if (scalar @sorted < 2);
612-
613- $start = shift @sorted ;
614-
615- die " expecting first codepoint to start at 0" unless $start == 0;
616-
617- $prev = $start ;
618- $total = 0;
619-
620- # append final 'undef' to signal final iteration
621- push @sorted , undef ;
622-
623- foreach my $curr (@sorted )
624- {
625- # if last iteration always append the range
626- if (!defined ($curr ) || ($curr - $prev > $limit ))
627- {
628- push @range ,
629- {
630- Start => $start ,
631- End => $prev + 1,
632- Offset => $total
633- };
634- $total += $prev + 1 - $start ;
635- $start = $curr ;
636- }
637-
638- $prev = $curr ;
639- }
640-
641- return \@range ;
642- }
643-
644- # The function combines all ranges into the case_map table. Ranges may
645- # include codepoints without a case mapping at all, in which case the
646- # entry in case_map should be zero.
647- sub range_tables
648- {
649- my ($range ) = @_ ;
650- my (@lines , @result );
651-
652- foreach my $entry (@$range )
653- {
654- my $start = $entry -> {Start };
655- my $end = $entry -> {End } - 1;
656-
657- foreach my $cp ($start .. $end )
658- {
659- my $idx = sprintf (" %d ," , ($simple {$cp }{Index } || 0));
660- $idx .= " \t " if length ($idx ) < 4;
661- push @lines , sprintf (" \t %s \t\t\t\t\t\t /* U+%06X */" , $idx , $cp );
662- }
663- }
664-
665- return @lines ;
666- }
0 commit comments