[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
branch master updated: Use pre-set keys for index sorting
From: |
Patrice Dumas |
Subject: |
branch master updated: Use pre-set keys for index sorting |
Date: |
Mon, 13 Feb 2023 11:50:57 -0500 |
This is an automated email from the git hooks/post-receive script.
pertusus pushed a commit to branch master
in repository texinfo.
The following commit(s) were added to refs/heads/master by this push:
new 434dcffe6a Use pre-set keys for index sorting
434dcffe6a is described below
commit 434dcffe6a531f2d25200b2059ddfd784ab58c6f
Author: Patrice Dumas <pertusus@free.fr>
AuthorDate: Mon Feb 13 17:50:37 2023 +0100
Use pre-set keys for index sorting
* tp/Texinfo/Structuring.pm (_sort_string, _sort_index_entries),
(_collator_sort_index_entries, index_entry_sort_string, sort_indices),
tp/Texinfo/Convert/LaTeX.pm (_index_entry): add the possibility to
preset the sort keys using getSortKey. Distinguish sort string, which
can be the preset binary string, from entry string which is a text
representation of the sorted string. Idea from Gavin.
---
ChangeLog | 11 ++++
tp/Texinfo/Convert/LaTeX.pm | 4 +-
tp/Texinfo/Structuring.pm | 119 +++++++++++++++++++++++++++++++++++++-------
3 files changed, 113 insertions(+), 21 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index f67384c090..8341aa3fb9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+2023-02-12 Patrice Dumas <pertusus@free.fr>
+
+ Use pre-set keys for index sorting
+
+ * tp/Texinfo/Structuring.pm (_sort_string, _sort_index_entries),
+ (_collator_sort_index_entries, index_entry_sort_string, sort_indices),
+ tp/Texinfo/Convert/LaTeX.pm (_index_entry): add the possibility to
+ preset the sort keys using getSortKey. Distinguish sort string, which
+ can be the preset binary string, from entry string which is a text
+ representation of the sorted string. Idea from Gavin.
+
2023-02-12 Gavin Smith <gavinsmith0123@gmail.com>
Non-XS optimizations
diff --git a/tp/Texinfo/Convert/LaTeX.pm b/tp/Texinfo/Convert/LaTeX.pm
index 91370f0ba8..7c706a1ffe 100644
--- a/tp/Texinfo/Convert/LaTeX.pm
+++ b/tp/Texinfo/Convert/LaTeX.pm
@@ -2437,7 +2437,6 @@ sub _index_entry($$)
$self->{'formatting_context'}->[-1]->{'index'} = 1;
my @result;
foreach my $subentry_entry_and_sortas (@$subentries) {
- my $sortas;
my ($subentry, $subentry_sortas) = @$subentry_entry_and_sortas;
if ($in_code) {
push @{$self->{'formatting_context'}->[-1]->{'code'}}, 1;
@@ -2447,7 +2446,8 @@ sub _index_entry($$)
pop @{$self->{'formatting_context'}->[-1]->{'code'}};
}
# always setup a string to sort with as we may use commands
- $sortas = Texinfo::Structuring::index_entry_sort_string($entry,
+ my ($sortas, $sort_string)
+ = Texinfo::Structuring::index_entry_sort_string($entry,
$subentry, $subentry_sortas, $options);
my $result = '';
if (defined($sortas)) {
diff --git a/tp/Texinfo/Structuring.pm b/tp/Texinfo/Structuring.pm
index cd6fad0b66..6955789ad2 100644
--- a/tp/Texinfo/Structuring.pm
+++ b/tp/Texinfo/Structuring.pm
@@ -1736,6 +1736,48 @@ sub new_complete_node_menu
return $new_menu;
}
+sub _sort_string($$)
+{
+ my $a = shift;
+ my $b = shift;
+ return (($a =~ /^[[:alpha:]]/ and $b =~ /^[[:alpha:]]/)
+ or ($a !~ /^[[:alpha:]]/ and $b !~ /^[[:alpha:]]/))
+ ? ($a cmp $b)
+ : (($a =~ /^[[:alpha:]]/ && 1) || -1);
+}
+
+sub _sort_index_entries($$)
+{
+ my $key1 = shift;
+ my $key2 = shift;
+
+ my $key_index = 0;
+ # the keys array corresponds to th emain entry and subentries
+ foreach my $key1_str (@{$key1->{'keys'}}) {
+ my $res = _sort_string($key1_str,
+ $key2->{'keys'}->[$key_index]);
+ if ($res != 0) {
+ return $res;
+ }
+ $key_index ++;
+ if (scalar(@{$key2->{'keys'}}) <= $key_index) {
+ last;
+ }
+ }
+ my $res = (scalar(@{$key1->{'keys'}}) <=> scalar(@{$key2->{'keys'}}));
+ if ($res == 0) {
+ $res = ($key1->{'number'} <=> $key2->{'number'});
+ }
+ # This may happen if 2 indices are merged as the number is per
+ # index name. The @-command should be different though, for
+ # index names to be different.
+ if ($res == 0) {
+ $res = ($key1->{'index_at_command'} cmp $key2->{'index_at_command'});
+ }
+ return $res;
+}
+
+# This is a duplicate of the functions above, for efficiency
sub _collator_sort_string($$$)
{
my $a = shift;
@@ -1747,7 +1789,7 @@ sub _collator_sort_string($$$)
: (($a =~ /^[[:alpha:]]/ && 1) || -1);
}
-sub _sort_index_entries($$$)
+sub _collator_sort_index_entries($$$)
{
my $key1 = shift;
my $key2 = shift;
@@ -1756,8 +1798,8 @@ sub _sort_index_entries($$$)
my $key_index = 0;
# the keys array corresponds to th emain entry and subentries
foreach my $key1_str (@{$key1->{'keys'}}) {
- my $res = _collator_sort_string(uc($key1_str),
- uc($key2->{'keys'}->[$key_index]),
+ my $res = _collator_sort_string($key1_str,
+ $key2->{'keys'}->[$key_index],
$collator);
if ($res != 0) {
return $res;
@@ -1796,12 +1838,13 @@ sub setup_index_entry_keys_formatting($)
}
# can be used for subentries
-sub index_entry_sort_string($$$$)
+sub index_entry_sort_string($$$$;$)
{
my $main_entry = shift;
my $entry_tree_element = shift;
my $sortas = shift;
my $options = shift;
+ my $collator = shift;
my $convert_to_text_options = {%$options};
$convert_to_text_options->{'code'} = $main_entry->{'in_code'};
@@ -1825,18 +1868,30 @@ sub index_entry_sort_string($$$$)
# represented internally in UTF-8. See "the Unicode bug" in the
# "perlunicode" man page.
utf8::upgrade($entry_key);
+ my $sort_entry_key;
+ if ($collator) {
+ $sort_entry_key = $collator->getSortKey(uc($entry_key));
+ } else {
+ $sort_entry_key = uc($entry_key);
+ }
- return $entry_key;
+ return ($entry_key, $sort_entry_key);
}
+# if true pre-set collating keys
+#my $default_preset_keys = 0;
+my $default_preset_keys = 1;
+
# the structure returned depends on $SORT_BY_LETTER being set
# or not. It is described in the pod documentation.
-sub sort_indices($$$;$)
+sub sort_indices($$$;$$)
{
my $registrar = shift;
my $customization_information = shift;
my $index_entries = shift;
my $sort_by_letter = shift;
+ my $preset_keys = shift;
+ $preset_keys = $default_preset_keys if (!defined($preset_keys));
my $options = setup_index_entry_keys_formatting($customization_information);
# TODO Unicode::Collate has been in perl core long enough, but
@@ -1871,6 +1926,8 @@ sub sort_indices($$$;$)
#my $collator = Unicode::Collate->new('variable' => 'Non-Ignorable',
# 'UCA_Version' => 9,
# 'table' => 'allkeys-3.1.1.txt');
+ my $entries_collator;
+ $entries_collator = $collator if $preset_keys;
my $sorted_index_entries;
my $index_entries_sort_strings = {};
return $sorted_index_entries, $index_entries_sort_strings
@@ -1882,10 +1939,12 @@ sub sort_indices($$$;$)
# used if $sort_by_letter
my $index_letter_hash = {};
foreach my $entry (@{$index_entries->{$index_name}}) {
- my $entry_key = index_entry_sort_string($entry,
+ my ($entry_key, $sort_entry_key)
+ = index_entry_sort_string($entry,
{'contents' => $entry->{'entry_content'}},
- $entry->{'sortas'}, $options);
+ $entry->{'sortas'}, $options, $entries_collator);
my @entry_keys;
+ my @sort_entry_keys;
my $letter = '';
if ($entry_key !~ /\S/) {
$registrar->line_warn($customization_information,
@@ -1893,8 +1952,10 @@ sub sort_indices($$$;$)
$entry->{'index_at_command'}),
$entry->{'entry_element'}->{'source_info'});
push @entry_keys, '';
+ push @sort_entry_keys, '';
} else {
push @entry_keys, $entry_key;
+ push @sort_entry_keys, $sort_entry_key;
if ($sort_by_letter) {
# the following line leads to each accented letter being separate
# $letter = uc(substr($entry_key, 0, 1));
@@ -1915,9 +1976,10 @@ sub sort_indices($$$;$)
while ($subentry->{'extra'} and $subentry->{'extra'}->{'subentry'}) {
$subentry_nr ++;
$subentry = $subentry->{'extra'}->{'subentry'};
- my $subentry_key = index_entry_sort_string($entry,
+ my ($subentry_key, $sort_subentry_key)
+ = index_entry_sort_string($entry,
{'contents' => $subentry->{'args'}->[0]->{'contents'}},
- $subentry->{'extra'}->{'sortas'}, $options);
+ $subentry->{'extra'}->{'sortas'}, $options, $entries_collator);
if ($subentry_key !~ /\S/) {
$registrar->line_warn($customization_information,
sprintf(__("empty index sub entry %d key in \@%s"),
@@ -1925,13 +1987,15 @@ sub sort_indices($$$;$)
$entry->{'index_at_command'}),
$entry->{'entry_element'}->{'source_info'});
push @entry_keys, '';
+ push @sort_entry_keys, '';
} else {
push @entry_keys, $subentry_key;
+ push @sort_entry_keys, $sort_subentry_key;
}
}
- foreach my $sub_entry_key (@entry_keys) {
+ foreach my $sub_entry_key (@sort_entry_keys) {
if ($sub_entry_key ne '') {
- my $sortable_entry = {'entry' => $entry, 'keys' => \@entry_keys,
+ my $sortable_entry = {'entry' => $entry, 'keys' => \@sort_entry_keys,
'number' => $entry->{'entry_number'},
'index_at_command' => $entry->{'index_at_command'}};
if ($sort_by_letter) {
@@ -1945,19 +2009,36 @@ sub sort_indices($$$;$)
$index_entries_sort_strings->{$entry} = join(', ', @entry_keys);
}
if ($sort_by_letter) {
- foreach my $letter (sort {_collator_sort_string($a, $b, $collator)}
- (keys %$index_letter_hash)) {
- my @sorted_letter_entries
- = map {$_->{'entry'}} sort {_sort_index_entries($a, $b, $collator)}
+ # need to use directly the collator here as there is no
+ # separate sort keys.
+ my @sorted_letters = sort {_collator_sort_string($a, $b, $collator)}
+ (keys %$index_letter_hash);
+ foreach my $letter (@sorted_letters) {
+ my @sorted_letter_entries;
+ if ($preset_keys) {
+ @sorted_letter_entries
+ = map {$_->{'entry'}} sort {_sort_index_entries($a, $b)}
@{$index_letter_hash->{$letter}};
+ } else {
+ @sorted_letter_entries
+ = map {$_->{'entry'}} sort {_collator_sort_index_entries($a, $b,
$collator)}
+ @{$index_letter_hash->{$letter}};
+ }
push @{$sorted_index_entries->{$index_name}},
{ 'letter' => $letter, 'entries' => \@sorted_letter_entries };
}
} else {
- $sorted_index_entries->{$index_name} = [
- map {$_->{'entry'}} sort {_sort_index_entries($a, $b, $collator)}
+ if ($preset_keys) {
+ $sorted_index_entries->{$index_name} = [
+ map {$_->{'entry'}} sort {_sort_index_entries($a, $b)}
@{$sortable_index_entries}
- ];
+ ];
+ } else {
+ $sorted_index_entries->{$index_name} = [
+ map {$_->{'entry'}} sort {_collator_sort_index_entries($a, $b,
$collator)}
+ @{$sortable_index_entries}
+ ];
+ }
}
}
return $sorted_index_entries, $index_entries_sort_strings;
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- branch master updated: Use pre-set keys for index sorting,
Patrice Dumas <=
- Prev by Date:
branch master updated: * tp/Texinfo/ParserNonXS.pm (_close_ignored_block_conditional) (_close_current, _process_remaining_on_line), tp/Texinfo/XS/parsetexi/close.c (close_ignored_block_conditional), (close_current), tp/Texinfo/XS/parsetexi/parser.c (process_remaining_on_line): add close_ignored_block_conditional to associated a source mark to a closed ignored conditional block, to do it in close_current in addition to process_remaining_on_line.
- Next by Date:
branch master updated: Add missing test results.
- Previous by thread:
branch master updated: * tp/Texinfo/ParserNonXS.pm (_close_ignored_block_conditional) (_close_current, _process_remaining_on_line), tp/Texinfo/XS/parsetexi/close.c (close_ignored_block_conditional), (close_current), tp/Texinfo/XS/parsetexi/parser.c (process_remaining_on_line): add close_ignored_block_conditional to associated a source mark to a closed ignored conditional block, to do it in close_current in addition to process_remaining_on_line.
- Next by thread:
branch master updated: Add missing test results.
- Index(es):