From 12410e4600025c7c45c60ce39178ba8b5fe81bc8 Mon Sep 17 00:00:00 2001
From: Drini Cami <cdrini@gmail.com>
Date: Thu, 20 Nov 2025 20:04:12 -0500
Subject: [PATCH] Tune solr caching performance

---
 conf/solr/conf/solrconfig.xml                 | 27 +++++++--
 .../worksearch/tests/test_worksearch.py       | 59 +++++++++++++++++++
 2 files changed, 81 insertions(+), 5 deletions(-)
diff --git a/conf/solr/conf/solrconfig.xml b/conf/solr/conf/solrconfig.xml
index 12ef5e358d8..64e49bbae3a 100644
--- a/conf/solr/conf/solrconfig.xml
+++ b/conf/solr/conf/solrconfig.xml
@@ -388,9 +388,9 @@
                       to occupy. Note that when this option is specified, the size
                       and initialSize parameters are ignored.
       -->
-    <filterCache size="512"
-                 initialSize="512"
-                 autowarmCount="128"/>
+    <filterCache size="2048"
+                 initialSize="2048"
+                 autowarmCount="1024"/>
 
     <!-- Query Result Cache
 
@@ -537,14 +537,31 @@
       <arr name="queries">
         <!-- Work search -->
         <lst>
-          <str name="userWorkQuery">harry potter</str>
-          <str name="q">({!edismax q.op="AND" qf="text alternative_title^20 author_name^20" bf="min(100,edition_count)" v=$userWorkQuery})</str>
           <str name="fq">type:work</str>
+          <str name="start">0</str>
           <str name="rows">20</str>
+          <str name="wt">json</str>
+          <str name="spellcheck">true</str>
+          <str name="spellcheck.count">3</str>
+          <str name="fl">want_to_read_count,author_name,ebook_access,ratings_average,key,first_publish_year,public_scan_b,id_wikisource,id_project_runeberg,editions:[subquery],cover_edition_key,cover_i,ratings_count,language,author_key,lending_identifier_s,id_openstax,id_cita_press,id_project_gutenberg,id_librivox,lending_edition_s,subtitle,ia_collection_s,edition_count,has_fulltext,title,id_standard_ebooks,ia</str>
+          <str name="userWorkQuery">harry potter</str>
+          <str name="editions.fq">type:edition</str>
+          <str name="userEdQuery">harry potter</str>
+          <str name="editions.userEdQuery">harry potter</str>
+          <str name="fullEdQuery">({!edismax bq="language:eng^40 ebook_access:public^10 ebook_access:borrowable^8 ebook_access:printdisabled^2 cover_i:*^2" v=$userEdQuery qf="text alternative_title^4 author_name^4"})</str>
+          <str name="q">+({!edismax q.op="AND" qf="text alternative_title^10 author_name^10" pf="alternative_title^10 author_name^10" bf="min(100,edition_count) min(100,def(readinglog_count,0))" v=$userWorkQuery}) +(_query_:"{!parent which=type:work v=$fullEdQuery filters=$editions.fq}" OR edition_count:0)</str>
+          <str name="hl">true</str>
+          <str name="hl.fl">subject</str>
+          <str name="hl.q">harry potter</str>
+          <str name="hl.snippets">10</str>
+          <str name="editions.q">({!terms f=_root_ v=$row.key}) AND ({!edismax bq="language:eng^40 ebook_access:public^10 ebook_access:borrowable^8 ebook_access:printdisabled^2 cover_i:*^2" v=$userEdQuery qf="text alternative_title^4 author_name^4"})</str>
+          <str name="editions.rows">1</str>
+          <str name="editions.fl">want_to_read_count,ebook_access,ratings_average,key,first_publish_year,id_project_runeberg,cover_edition_key,ia,cover_i,ratings_count,language,lending_identifier_s,id_openstax,id_cita_press,id_project_gutenberg,id_librivox,lending_edition_s,subtitle,ia_collection_s,edition_count,has_fulltext,title,id_standard_ebooks,public_scan_b,id_wikisource</str>
           <str name="facet">true</str>
           <str name="facet.field">author_facet</str>
           <str name="facet.field">first_publish_year</str>
           <str name="facet.field">has_fulltext</str>
+          <str name="facet.field">public_scan_b</str>
           <str name="facet.field">language</str>
           <str name="facet.field">person_facet</str>
           <str name="facet.field">place_facet</str>
diff --git a/openlibrary/plugins/worksearch/tests/test_worksearch.py b/openlibrary/plugins/worksearch/tests/test_worksearch.py
index 722135b8fd2..bba8c1c69a7 100644
--- a/openlibrary/plugins/worksearch/tests/test_worksearch.py
+++ b/openlibrary/plugins/worksearch/tests/test_worksearch.py
@@ -1,3 +1,5 @@
+from pathlib import Path
+
 import web
 
 from openlibrary.plugins.worksearch.code import (
@@ -91,3 +93,60 @@ def test_prepare_solr_query_params_first_publish_year_string():
     # Check that the fq param for first_publish_year is correctly added
     fq_params = [p for p in params if p[0] == 'fq']
     assert ('fq', 'first_publish_year:"1997"') in fq_params
+
+
+def test_solr_config_contains_realistic_search(mock_site):
+    import xml.etree.ElementTree as ET
+
+    SOLR_CONFIG_PATH = (
+        Path(__file__).parent.parent.parent.parent.parent
+        / "conf"
+        / "solr"
+        / "conf"
+        / "solrconfig.xml"
+    )
+    parsed_solr_config = ET.parse(SOLR_CONFIG_PATH)
+    root = parsed_solr_config.getroot()
+    # Find listener[event=newSearcher] > arr[name=queries] > lst:firstchild
+    first_query = root.find(
+        ".//listener[@event='newSearcher']/arr[@name='queries']/lst"
+    )
+    assert first_query is not None
+    # `<lst>` has a child `<str name="q">...</str>`; convert to a list of tuples
+    new_searcher_query_params = [
+        (child.attrib['name'], str(child.text)) for child in first_query.findall('str')
+    ]
+
+    expected_params, _ = _prepare_solr_query_params(
+        scheme=WorkSearchScheme(),
+        param={'q': 'harry potter'},
+        spellcheck_count=3,
+        facet=True,
+        highlight=True,
+        fields=list(
+            WorkSearchScheme.default_fetched_fields
+            | {
+                'editions',
+                'providers',
+                'ratings_average',
+                'ratings_count',
+                'want_to_read_count',
+            }
+        ),
+        rows=20,
+    )
+
+    def normalize_params(params: list[tuple[str, str]]):
+        ignored_fields = {'ol.label', 'editions.ol.label'}
+        sorted_fields = {'fl', 'editions.fl'}
+        params = [(k, str(v)) for k, v in params if k not in ignored_fields]
+        params = [
+            (k, ','.join(sorted(v.split(','))) if k in sorted_fields else v)
+            for k, v in params
+        ]
+        return params
+
+    new_searcher_query_params = normalize_params(new_searcher_query_params)
+    expected_params = normalize_params(expected_params)
+
+    assert set(new_searcher_query_params) == set(expected_params)