Skip to content

Commit

Permalink
[VUFIND-1710] Add configurable separators for topics in alphabrowse (v…
Browse files Browse the repository at this point in the history
…ufind-org#4011)

Currently, subjects are shown differently between the record view in
VuFind and the topic browse.  In the record view you see delimiters
like this:

    Morris, William, > 1834-1896 > Criticism and interpretation.

But in the topic browse, the fields are just joined with spaces:

    Morris, William, 1834-1896 Criticism and interpretation.

This commit:

  * Adds a UTF-8 separator to delimit terms within topics in the Solr
    index

  * Adjusts the topic_browse definition to use a new TopicNormalizer,
    which strips out these new delimiters when building the sort key.

  * Changes the frontend of the AlphaBrowse to insert a
    user-configurable delimiter, shown when browsing topics.

Note: since Normalizer classes currently live in the
vufind-browse-handler, there will be a corresponding commit there to
add TopicNormalizer.
  • Loading branch information
marktriggs authored Oct 21, 2024
1 parent a788da2 commit 951f74b
Show file tree
Hide file tree
Showing 11 changed files with 75 additions and 8 deletions.
9 changes: 9 additions & 0 deletions config/vufind/config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -2196,11 +2196,18 @@ includeSchemaOrgMetadata = true
[AlphaBrowse]
; This setting controls how many headings are displayed on each page of results:
page_size = 20

; How many headings to show before the match (or the spot where the match
; would have been found). Default is 0 for backwards compatibility.
rows_before = 0

; For topic browse, controls the separator to place between terms when
; displaying topic headings. Defaults to " > " if not set.
;topic_browse_separator = " > "

; highlight the match row (or spot where match would have been)? default false
highlighting = false

; AlphaBrowse results are not subject to dynamic filtering. If you have default
; filters defined in searches.ini, you most likely will want to disable them when
; users navigate from browse results to search results, to ensure that the result
Expand All @@ -2209,12 +2216,14 @@ highlighting = false
; rather than NARROW search results, you will likely want to change this setting
; to false to avoid inconsistencies.
bypass_default_filters = true

; SEE ALSO: the General/includeAlphaBrowse setting in searchbox.ini, for including
; alphabrowse options in the main search drop-down options.

; This section controls the order and content of the browse type menu in the
; Alphabetic Browse module. The key is the browse index to use, the value is the
; string to display to the user (subject to translation).

[AlphaBrowse_Types]
topic = "By Topic"
author = "By Author"
Expand Down
Binary file modified import/browse-indexing.jar
Binary file not shown.
33 changes: 33 additions & 0 deletions import/index_java/src/org/vufind/index/FieldSpecTools.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
import java.util.Set;

/**
Expand Down Expand Up @@ -137,4 +139,35 @@ protected static final String getFieldData(DataField dataField, String subfieldC

return result.length() > 0 ? result.toString() : null;
}


private static Pattern unicodeEscape = Pattern.compile("(?i)\\\\u([0-9a-f]{4})");

/**
* Retrieves all subfields from a record, separated by a specified UTF-8 delimiter.
*
* This method takes a MARC record and a specification for the fields to extract, and returns
* a set of strings representing the subfields found, concatenated with a specified separator.
* The separator can include Unicode escape sequences, which will be converted to their corresponding
* characters.
*
* @param record the MARC record from which to extract subfields.
* @param fieldSpec a string specifying the fields and subfields to retrieve.
* @param separatorWithEscapes a string representing the delimiter to use between subfields,
* which can contain Unicode escape sequences in the format \\uXXXX.
* @return a set of strings with the concatenated subfields, separated by the given delimiter.
*/
public static Set<String> getAllSubfieldsUTF8Delimited(final Record record, String fieldSpec, String separatorWithEscapes)
{
StringBuilder separator = new StringBuilder(separatorWithEscapes);
Matcher m = unicodeEscape.matcher(separator);

while (m.find()) {
int codepoint = Integer.parseInt(m.group(1), 16);
separator.replace(m.start(), m.end(), new String(Character.toChars(codepoint)));
m.reset();
}

return org.solrmarc.index.SolrIndexer.instance().getAllSubfields(record, fieldSpec, separator.toString());
}
}
1 change: 1 addition & 0 deletions import/marc.properties
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ callnumber-sort = custom, getLCSortable(099ab:090ab:050ab)
callnumber-raw = 099ab:090ab:050ab

topic = custom, getAllSubfields(600:610:611:630:650:653:656, " ")
topic_browse = custom, getAllSubfieldsUTF8Delimited(600:610:611:630:650:653:656, "\u2002")
genre = custom, getAllSubfields(655, " ")
geographic = custom, getAllSubfields(651, " ")
era = custom, getAllSubfields(648, " ")
Expand Down
2 changes: 1 addition & 1 deletion index-alphabetic-browse.bat
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ mkdir "%index_dir%"
rem These parameters should match the ones in solr/vufind/biblio/conf/solrconfig.xml - BrowseRequestHandler
call %VUFIND_HOME%\index-alphabetic-browse.bat build_browse hierarchy hierarchy_browse
call %VUFIND_HOME%\index-alphabetic-browse.bat build_browse title title_fullStr 1 "-Dbib_field_iterator=org.vufind.solr.indexing.StoredFieldIterator -Dsortfield=title_sort -Dvaluefield=title_fullStr -Dbrowse.normalizer=org.vufind.util.TitleNormalizer"
call %VUFIND_HOME%\index-alphabetic-browse.bat build_browse topic topic_browse
call %VUFIND_HOME%\index-alphabetic-browse.bat build_browse topic topic_browse 0 "-Dbrowse.normalizer=org.vufind.util.TopicNormalizer"
call %VUFIND_HOME%\index-alphabetic-browse.bat build_browse author author_browse
call %VUFIND_HOME%\index-alphabetic-browse.bat build_browse lcc callnumber-raw 1 "-Dbrowse.normalizer=org.vufind.util.LCCallNormalizer"
call %VUFIND_HOME%\index-alphabetic-browse.bat build_browse dewey dewey-raw 1 "-Dbrowse.normalizer=org.vufind.util.DeweyCallNormalizer"
Expand Down
2 changes: 1 addition & 1 deletion index-alphabetic-browse.sh
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ function build_browse
# These parameters should match the ones in solr/vufind/biblio/conf/solrconfig.xml - BrowseRequestHandler
build_browse "hierarchy" "hierarchy_browse"
build_browse "title" "title_fullStr" 1 "-Dbib_field_iterator=org.vufind.solr.indexing.StoredFieldIterator -Dsortfield=title_sort -Dvaluefield=title_fullStr -Dbrowse.normalizer=org.vufind.util.TitleNormalizer"
build_browse "topic" "topic_browse"
build_browse "topic" "topic_browse" 0 "-Dbrowse.normalizer=org.vufind.util.TopicNormalizer"
build_browse "author" "author_browse"
build_browse "lcc" "callnumber-raw" 1 "-Dbrowse.normalizer=org.vufind.util.LCCallNormalizer"
build_browse "dewey" "dewey-raw" 1 "-Dbrowse.normalizer=org.vufind.util.DeweyCallNormalizer"
25 changes: 25 additions & 0 deletions module/VuFind/src/VuFind/Controller/AlphabrowseController.php
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,11 @@ protected function addResultsToView(
$view->prevpage = $page - 1;
}
}

if ($view->source === 'topic') {
$this->applyTopicDelimiters($result);
}

$view->result = $result;

// set up highlighting: page 0 contains match location
Expand All @@ -185,6 +190,26 @@ protected function addResultsToView(
}
}

/**
* Applies topic delimiters to the 'heading' field of each item in the browse results.
*
* @param array $result The result array containing 'Browse' items to be modified.
*
* @return void
*/
protected function applyTopicDelimiters(&$result): void
{
$config = $this->getConfig();

foreach ($result['Browse']['items'] as &$item) {
$item['heading'] = str_replace(
"\u{2002}",
($config->AlphaBrowse->topic_browse_separator ?? ' > '),
$item['heading']
);
}
}

/**
* Apply highlighting settings to the view based on the result set.
*
Expand Down
2 changes: 1 addition & 1 deletion module/VuFind/src/VuFind/View/Helper/Root/AlphaBrowse.php
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ public function getUrl($source, $item)

$query = [
'type' => ucwords($source) . 'Browse',
'lookfor' => $this->escapeForSolr($item['heading']),
'lookfor' => $this->escapeForSolr($item['sort_key']),
];
if ($this->options['bypass_default_filters'] ?? true) {
$query['dfApplied'] = 1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ public function testGetUrlWithMultipleRecordsAndDefaultSettings(): void
]
);
$helper = $this->getHelper($url);
$item = ['heading' => 'xyzzy', 'count' => 2];
$item = ['heading' => 'xyzzy', 'sort_key' => 'xyzzy', 'count' => 2];
$this->assertEquals('foo', $helper->getUrl('title', $item));
}

Expand All @@ -112,7 +112,7 @@ public function testGetUrlWithSingleRecordAndDefaultSettings(): void
]
);
$helper = $this->getHelper($url);
$item = ['heading' => 'xyzzy', 'count' => 1];
$item = ['heading' => 'xyzzy', 'sort_key' => 'xyzzy', 'count' => 1];
$this->assertEquals('foo', $helper->getUrl('title', $item));
}

Expand All @@ -131,7 +131,7 @@ public function testGetUrlEscapesQuotes(): void
]
);
$helper = $this->getHelper($url);
$item = ['heading' => '"xyzzy"', 'count' => 100];
$item = ['heading' => '"xyzzy"', 'sort_key' => '"xyzzy"', 'count' => 100];
$this->assertEquals('foo', $helper->getUrl('title', $item));
}

Expand All @@ -150,7 +150,7 @@ public function testGetUrlAppliesFilterBypassSetting(): void
]
);
$helper = $this->getHelper($url, ['bypass_default_filters' => false]);
$item = ['heading' => 'xyzzy', 'count' => 100];
$item = ['heading' => 'xyzzy', 'sort_key' => 'xyzzy', 'count' => 100];
$this->assertEquals('foo', $helper->getUrl('title', $item));
}
}
1 change: 0 additions & 1 deletion solr/vufind/biblio/conf/schema.xml
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,6 @@
<copyField source="allfields" dest="allfields_unstemmed"/>
<copyField source="fulltext" dest="fulltext_unstemmed"/>
<!-- CopyFields for Alphabetic Browse -->
<copyField source="topic" dest="topic_browse"/>
<copyField source="author" dest="author_browse"/>
<copyField source="author2" dest="author_browse"/>
<copyField source="author_corporate" dest="author_browse"/>
Expand Down
Binary file modified solr/vufind/jars/browse-handler.jar
Binary file not shown.

0 comments on commit 951f74b

Please sign in to comment.