From 12410e4600025c7c45c60ce39178ba8b5fe81bc8 Mon Sep 17 00:00:00 2001 From: Drini Cami Date: Thu, 20 Nov 2025 20:04:12 -0500 Subject: [PATCH] Tune solr caching performance --- conf/solr/conf/solrconfig.xml | 27 +++++++-- .../worksearch/tests/test_worksearch.py | 59 +++++++++++++++++++ 2 files changed, 81 insertions(+), 5 deletions(-) diff --git a/conf/solr/conf/solrconfig.xml b/conf/solr/conf/solrconfig.xml index 12ef5e358d8..64e49bbae3a 100644 --- a/conf/solr/conf/solrconfig.xml +++ b/conf/solr/conf/solrconfig.xml @@ -388,9 +388,9 @@ to occupy. Note that when this option is specified, the size and initialSize parameters are ignored. --> - + - harry potter - ({!edismax q.op="AND" qf="text alternative_title^20 author_name^20" bf="min(100,edition_count)" v=$userWorkQuery}) type:work + 0 20 + json + true + 3 + want_to_read_count,author_name,ebook_access,ratings_average,key,first_publish_year,public_scan_b,id_wikisource,id_project_runeberg,editions:[subquery],cover_edition_key,cover_i,ratings_count,language,author_key,lending_identifier_s,id_openstax,id_cita_press,id_project_gutenberg,id_librivox,lending_edition_s,subtitle,ia_collection_s,edition_count,has_fulltext,title,id_standard_ebooks,ia + harry potter + type:edition + harry potter + harry potter + ({!edismax bq="language:eng^40 ebook_access:public^10 ebook_access:borrowable^8 ebook_access:printdisabled^2 cover_i:*^2" v=$userEdQuery qf="text alternative_title^4 author_name^4"}) + +({!edismax q.op="AND" qf="text alternative_title^10 author_name^10" pf="alternative_title^10 author_name^10" bf="min(100,edition_count) min(100,def(readinglog_count,0))" v=$userWorkQuery}) +(_query_:"{!parent which=type:work v=$fullEdQuery filters=$editions.fq}" OR edition_count:0) + true + subject + harry potter + 10 + ({!terms f=_root_ v=$row.key}) AND ({!edismax bq="language:eng^40 ebook_access:public^10 ebook_access:borrowable^8 ebook_access:printdisabled^2 cover_i:*^2" v=$userEdQuery qf="text alternative_title^4 author_name^4"}) + 1 + want_to_read_count,ebook_access,ratings_average,key,first_publish_year,id_project_runeberg,cover_edition_key,ia,cover_i,ratings_count,language,lending_identifier_s,id_openstax,id_cita_press,id_project_gutenberg,id_librivox,lending_edition_s,subtitle,ia_collection_s,edition_count,has_fulltext,title,id_standard_ebooks,public_scan_b,id_wikisource true author_facet first_publish_year has_fulltext + public_scan_b language person_facet place_facet diff --git a/openlibrary/plugins/worksearch/tests/test_worksearch.py b/openlibrary/plugins/worksearch/tests/test_worksearch.py index 722135b8fd2..bba8c1c69a7 100644 --- a/openlibrary/plugins/worksearch/tests/test_worksearch.py +++ b/openlibrary/plugins/worksearch/tests/test_worksearch.py @@ -1,3 +1,5 @@ +from pathlib import Path + import web from openlibrary.plugins.worksearch.code import ( @@ -91,3 +93,60 @@ def test_prepare_solr_query_params_first_publish_year_string(): # Check that the fq param for first_publish_year is correctly added fq_params = [p for p in params if p[0] == 'fq'] assert ('fq', 'first_publish_year:"1997"') in fq_params + + +def test_solr_config_contains_realistic_search(mock_site): + import xml.etree.ElementTree as ET + + SOLR_CONFIG_PATH = ( + Path(__file__).parent.parent.parent.parent.parent + / "conf" + / "solr" + / "conf" + / "solrconfig.xml" + ) + parsed_solr_config = ET.parse(SOLR_CONFIG_PATH) + root = parsed_solr_config.getroot() + # Find listener[event=newSearcher] > arr[name=queries] > lst:firstchild + first_query = root.find( + ".//listener[@event='newSearcher']/arr[@name='queries']/lst" + ) + assert first_query is not None + # `` has a child `...`; convert to a list of tuples + new_searcher_query_params = [ + (child.attrib['name'], str(child.text)) for child in first_query.findall('str') + ] + + expected_params, _ = _prepare_solr_query_params( + scheme=WorkSearchScheme(), + param={'q': 'harry potter'}, + spellcheck_count=3, + facet=True, + highlight=True, + fields=list( + WorkSearchScheme.default_fetched_fields + | { + 'editions', + 'providers', + 'ratings_average', + 'ratings_count', + 'want_to_read_count', + } + ), + rows=20, + ) + + def normalize_params(params: list[tuple[str, str]]): + ignored_fields = {'ol.label', 'editions.ol.label'} + sorted_fields = {'fl', 'editions.fl'} + params = [(k, str(v)) for k, v in params if k not in ignored_fields] + params = [ + (k, ','.join(sorted(v.split(','))) if k in sorted_fields else v) + for k, v in params + ] + return params + + new_searcher_query_params = normalize_params(new_searcher_query_params) + expected_params = normalize_params(expected_params) + + assert set(new_searcher_query_params) == set(expected_params)