Future-House · mskarlin · Oct 4, 2024 · Sep 23, 2024 · Oct 2, 2024 · Oct 2, 2024
diff --git a/README.md b/README.md
@@ -20,6 +20,7 @@ question answering, summarization, and contradiction detection.
 - [Installation](#installation)
 - [CLI Usage](#cli-usage)
   - [Bundled Settings](#bundled-settings)
+  - [Rate Limits](#rate-limits)
 - [Library Usage](#library-usage)
   - [`ask` manually](#ask-manually)
   - [Adding Documents Manually](#adding-documents-manually)
@@ -250,6 +251,39 @@ Inside [`paperqa/configs`](paperqa/configs) we bundle known useful settings:
 | wikicrow     | Setting to emulate the Wikipedia article writing used in our WikiCrow publication.                                           |
 | contracrow   | Setting to find contradictions in papers, your query should be a claim that needs to be flagged as a contradiction (or not). |
 | debug        | Setting useful solely for debugging, but not in any actual application beyond debugging.                                     |
+| tier1_limits | Settings that match OpenAI rate limits for each tier, you can use `tier<1-5>_limits` to specify the tier.                    |
+
+### Rate Limits
+
+If you are hitting rate limits, say with the OpenAI Tier 1 plan, you can add them into PaperQA2.
+For each OpenAI tier, a pre-built setting exists to limit usage.
+
+```bash
+pqa --settings 'tier1_limits' ask 'Are there nm scale features in thermoelectric materials?'
+```
+
+This will limit your system to use the [tier1_limits](paperqa/config/tier1_limits.json),
+and slow down your queries to accommodate.
+
+You can also specify them manually with any rate limit string that matches the specification in the [limits](https://limits.readthedocs.io/en/stable/quickstart.html#rate-limit-string-notation) module:
+
+```bash
+pqa --summary_llm_config '{"rate_limit": {"gpt-4o-2024-08-06": "30000 per 1 minute"}}' ask 'Are there nm scale features in thermoelectric materials?'
+```
+
+Or by adding into a `Settings` object, if calling imperatively:
+
+```python
+from paperqa import Settings, ask
+
+answer = ask(
+    "What manufacturing challenges are unique to bispecific antibodies?",
+    settings=Settings(
+        llm_config={"rate_limit": {"gpt-4o-2024-08-06": "30000 per 1 minute"}},
+        summary_llm_config={"rate_limit": {"gpt-4o-2024-08-06": "30000 per 1 minute"}},
+    ),
+)
+```
 
 ## Library Usage
 

diff --git a/paperqa/configs/debug.json b/paperqa/configs/debug.json
@@ -12,5 +12,8 @@
   "parsing": {
     "use_doc_details": false,
     "defer_embedding": true
+  },
+  "prompts": {
+    "use_json": false
   }
 }
diff --git a/paperqa/configs/fast.json b/paperqa/configs/fast.json
@@ -9,5 +9,11 @@
   },
   "parsing": {
     "use_doc_details": false
+  },
+  "prompts": {
+    "use_json": false
+  },
+  "agent": {
+    "agent_type": "fake"
   }
 }
diff --git a/paperqa/configs/high_quality.json b/paperqa/configs/high_quality.json
@@ -8,11 +8,5 @@
     "use_doc_details": true,
     "chunk_size": 7000,
     "overlap": 250
-  },
-  "prompts": {
-    "use_json": true
-  },
-  "agent": {
-    "agent_type": "ToolSelector"
   }
 }
diff --git a/paperqa/configs/tier1_limits.json b/paperqa/configs/tier1_limits.json
@@ -0,0 +1,53 @@
+{
+  "answer": {
+    "evidence_k": 5,
+    "evidence_detailed_citations": false,
+    "evidence_summary_length": "25 to 50 words",
+    "answer_max_sources": 3,
+    "answer_length": "50 to 100 words",
+    "max_concurrent_requests": 5
+  },
+  "parsing": {
+    "use_doc_details": false
+  },
+  "prompts": {
+    "use_json": true
+  },
+  "llm_config": {
+    "rate_limit": {
+      "gpt-4o": "30000 per 1 minute",
+      "gpt-4o-2024-08-06": "30000 per 1 minute",
+      "gpt-4o-2024-05-13": "30000 per 1 minute",
+      "gpt-4o-mini": "200000 per 1 minute",
+      "gpt-4o-mini-2024-07-18": "200000 per 1 minute",
+      "gpt-4-turbo": "30000 per 1 minute",
+      "gpt-4-turbo-2024-04-09": "30000 per 1 minute",
+      "gpt-4-0613": "10000 per 1 minute",
+      "gpt-4-0314": "10000 per 1 minute",
+      "gpt-4": "10000 per 1 minute",
+      "gpt-3.5-turbo-0125": "200000 per 1 minute",
+      "gpt-3.5-turbo": "200000 per 1 minute",
+      "gpt-3.5-turbo-1106": "200000 per 1 minute"
+    }
+  },
+  "summary_llm_config": {
+    "rate_limit": {
+      "gpt-4o": "30000 per 1 minute",
+      "gpt-4o-2024-08-06": "30000 per 1 minute",
+      "gpt-4o-2024-05-13": "30000 per 1 minute",
+      "gpt-4o-mini": "200000 per 1 minute",
+      "gpt-4o-mini-2024-07-18": "200000 per 1 minute",
+      "gpt-4-turbo": "30000 per 1 minute",
+      "gpt-4-turbo-2024-04-09": "30000 per 1 minute",
+      "gpt-4-0613": "10000 per 1 minute",
+      "gpt-4-0314": "10000 per 1 minute",
+      "gpt-4": "10000 per 1 minute",
+      "gpt-3.5-turbo-0125": "200000 per 1 minute",
+      "gpt-3.5-turbo": "200000 per 1 minute",
+      "gpt-3.5-turbo-1106": "200000 per 1 minute"
+    }
+  },
+  "embedding_config": {
+    "rate_limit": "1000000 per 1 minute"
+  }
+}
diff --git a/paperqa/configs/tier2_limits.json b/paperqa/configs/tier2_limits.json
@@ -0,0 +1,52 @@
+{
+  "answer": {
+    "evidence_k": 8,
+    "answer_max_sources": 3,
+    "max_concurrent_requests": 8
+  },
+  "parsing": {
+    "use_doc_details": true,
+    "chunk_size": 7000,
+    "overlap": 250
+  },
+  "prompts": {
+    "use_json": true
+  },
+  "llm_config": {
+    "rate_limit": {
+      "gpt-4o": "450000 per 1 minute",
+      "gpt-4o-2024-08-06": "450000 per 1 minute",
+      "gpt-4o-2024-05-13": "450000 per 1 minute",
+      "gpt-4o-mini": "2000000 per 1 minute",
+      "gpt-4o-mini-2024-07-18": "2000000 per 1 minute",
+      "gpt-4-turbo": "450000 per 1 minute",
+      "gpt-4-turbo-2024-04-09": "450000 per 1 minute",
+      "gpt-4-0613": "40000 per 1 minute",
+      "gpt-4-0314": "40000 per 1 minute",
+      "gpt-4": "40000 per 1 minute",
+      "gpt-3.5-turbo-0125": "2000000 per 1 minute",
+      "gpt-3.5-turbo": "2000000 per 1 minute",
+      "gpt-3.5-turbo-1106": "2000000 per 1 minute"
+    }
+  },
+  "summary_llm_config": {
+    "rate_limit": {
+      "gpt-4o": "450000 per 1 minute",
+      "gpt-4o-2024-08-06": "450000 per 1 minute",
+      "gpt-4o-2024-05-13": "450000 per 1 minute",
+      "gpt-4o-mini": "2000000 per 1 minute",
+      "gpt-4o-mini-2024-07-18": "2000000 per 1 minute",
+      "gpt-4-turbo": "450000 per 1 minute",
+      "gpt-4-turbo-2024-04-09": "450000 per 1 minute",
+      "gpt-4-0613": "40000 per 1 minute",
+      "gpt-4-0314": "40000 per 1 minute",
+      "gpt-4": "40000 per 1 minute",
+      "gpt-3.5-turbo-0125": "2000000 per 1 minute",
+      "gpt-3.5-turbo": "2000000 per 1 minute",
+      "gpt-3.5-turbo-1106": "2000000 per 1 minute"
+    }
+  },
+  "embedding_config": {
+    "rate_limit": "1000000 per 1 minute"
+  }
+}
diff --git a/paperqa/configs/tier3_limits.json b/paperqa/configs/tier3_limits.json
@@ -0,0 +1,52 @@
+{
+  "answer": {
+    "evidence_k": 8,
+    "answer_max_sources": 3,
+    "max_concurrent_requests": 8
+  },
+  "parsing": {
+    "use_doc_details": true,
+    "chunk_size": 7000,
+    "overlap": 250
+  },
+  "prompts": {
+    "use_json": true
+  },
+  "llm_config": {
+    "rate_limit": {
+      "gpt-4o": "800000 per 1 minute",
+      "gpt-4o-2024-08-06": "800000 per 1 minute",
+      "gpt-4o-2024-05-13": "800000 per 1 minute",
+      "gpt-4o-mini": "4000000 per 1 minute",
+      "gpt-4o-mini-2024-07-18": "4000000 per 1 minute",
+      "gpt-4-turbo": "600000 per 1 minute",
+      "gpt-4-turbo-2024-04-09": "600000 per 1 minute",
+      "gpt-4-0613": "80000 per 1 minute",
+      "gpt-4-0314": "80000 per 1 minute",
+      "gpt-4": "80000 per 1 minute",
+      "gpt-3.5-turbo-0125": "4000000 per 1 minute",
+      "gpt-3.5-turbo": "4000000 per 1 minute",
+      "gpt-3.5-turbo-1106": "4000000 per 1 minute"
+    }
+  },
+  "summary_llm_config": {
+    "rate_limit": {
+      "gpt-4o": "800000 per 1 minute",
+      "gpt-4o-2024-08-06": "800000 per 1 minute",
+      "gpt-4o-2024-05-13": "800000 per 1 minute",
+      "gpt-4o-mini": "4000000 per 1 minute",
+      "gpt-4o-mini-2024-07-18": "4000000 per 1 minute",
+      "gpt-4-turbo": "600000 per 1 minute",
+      "gpt-4-turbo-2024-04-09": "600000 per 1 minute",
+      "gpt-4-0613": "80000 per 1 minute",
+      "gpt-4-0314": "80000 per 1 minute",
+      "gpt-4": "80000 per 1 minute",
+      "gpt-3.5-turbo-0125": "4000000 per 1 minute",
+      "gpt-3.5-turbo": "4000000 per 1 minute",
+      "gpt-3.5-turbo-1106": "4000000 per 1 minute"
+    }
+  },
+  "embedding_config": {
+    "rate_limit": "5000000 per 1 minute"
+  }
+}
diff --git a/paperqa/configs/tier4_limits.json b/paperqa/configs/tier4_limits.json
@@ -0,0 +1,52 @@
+{
+  "answer": {
+    "evidence_k": 10,
+    "answer_max_sources": 5,
+    "max_concurrent_requests": 8
+  },
+  "parsing": {
+    "use_doc_details": true,
+    "chunk_size": 7000,
+    "overlap": 250
+  },
+  "prompts": {
+    "use_json": true
+  },
+  "llm_config": {
+    "rate_limit": {
+      "gpt-4o": "2000000 per 1 minute",
+      "gpt-4o-2024-08-06": "2000000 per 1 minute",
+      "gpt-4o-2024-05-13": "2000000 per 1 minute",
+      "gpt-4o-mini": "10000000 per 1 minute",
+      "gpt-4o-mini-2024-07-18": "10000000 per 1 minute",
+      "gpt-4-turbo": "800000 per 1 minute",
+      "gpt-4-turbo-2024-04-09": "800000 per 1 minute",
+      "gpt-4-0613": "300000 per 1 minute",
+      "gpt-4-0314": "300000 per 1 minute",
+      "gpt-4": "300000 per 1 minute",
+      "gpt-3.5-turbo-0125": "10000000 per 1 minute",
+      "gpt-3.5-turbo": "10000000 per 1 minute",
+      "gpt-3.5-turbo-1106": "10000000 per 1 minute"
+    }
+  },
+  "summary_llm_config": {
+    "rate_limit": {
+      "gpt-4o": "2000000 per 1 minute",
+      "gpt-4o-2024-08-06": "2000000 per 1 minute",
+      "gpt-4o-2024-05-13": "2000000 per 1 minute",
+      "gpt-4o-mini": "10000000 per 1 minute",
+      "gpt-4o-mini-2024-07-18": "10000000 per 1 minute",
+      "gpt-4-turbo": "800000 per 1 minute",
+      "gpt-4-turbo-2024-04-09": "800000 per 1 minute",
+      "gpt-4-0613": "300000 per 1 minute",
+      "gpt-4-0314": "300000 per 1 minute",
+      "gpt-4": "300000 per 1 minute",
+      "gpt-3.5-turbo-0125": "10000000 per 1 minute",
+      "gpt-3.5-turbo": "10000000 per 1 minute",
+      "gpt-3.5-turbo-1106": "10000000 per 1 minute"
+    }
+  },
+  "embedding_config": {
+    "rate_limit": "5000000 per 1 minute"
+  }
+}
diff --git a/paperqa/configs/tier5_limits.json b/paperqa/configs/tier5_limits.json
@@ -0,0 +1,52 @@
+{
+  "answer": {
+    "evidence_k": 15,
+    "answer_max_sources": 5,
+    "max_concurrent_requests": 8
+  },
+  "parsing": {
+    "use_doc_details": true,
+    "chunk_size": 7000,
+    "overlap": 250
+  },
+  "prompts": {
+    "use_json": true
+  },
+  "llm_config": {
+    "rate_limit": {
+      "gpt-4o": "30000000 per 1 minute",
+      "gpt-4o-2024-08-06": "30000000 per 1 minute",
+      "gpt-4o-2024-05-13": "30000000 per 1 minute",
+      "gpt-4o-mini": "150000000 per 1 minute",
+      "gpt-4o-mini-2024-07-18": "150000000 per 1 minute",
+      "gpt-4-turbo": "2000000 per 1 minute",
+      "gpt-4-turbo-2024-04-09": "2000000 per 1 minute",
+      "gpt-4-0613": "1000000 per 1 minute",
+      "gpt-4-0314": "1000000 per 1 minute",
+      "gpt-4": "1000000 per 1 minute",
+      "gpt-3.5-turbo-0125": "50000000 per 1 minute",
+      "gpt-3.5-turbo": "50000000 per 1 minute",
+      "gpt-3.5-turbo-1106": "50000000 per 1 minute"
+    }
+  },
+  "summary_llm_config": {
+    "rate_limit": {
+      "gpt-4o": "30000000 per 1 minute",
+      "gpt-4o-2024-08-06": "30000000 per 1 minute",
+      "gpt-4o-2024-05-13": "30000000 per 1 minute",
+      "gpt-4o-mini": "150000000 per 1 minute",
+      "gpt-4o-mini-2024-07-18": "150000000 per 1 minute",
+      "gpt-4-turbo": "2000000 per 1 minute",
+      "gpt-4-turbo-2024-04-09": "2000000 per 1 minute",
+      "gpt-4-0613": "1000000 per 1 minute",
+      "gpt-4-0314": "1000000 per 1 minute",
+      "gpt-4": "1000000 per 1 minute",
+      "gpt-3.5-turbo-0125": "50000000 per 1 minute",
+      "gpt-3.5-turbo": "50000000 per 1 minute",
+      "gpt-3.5-turbo-1106": "50000000 per 1 minute"
+    }
+  },
+  "embedding_config": {
+    "rate_limit": "10000000 per 1 minute"
+  }
+}
diff --git a/paperqa/core.py b/paperqa/core.py
@@ -28,8 +28,15 @@ def replace_newlines(match: re.Match) -> str:
     # https://regex101.com/r/VFcDmB/1
     pattern = r'"(?:[^"\\]|\\.)*"'
     text = re.sub(pattern, replace_newlines, text)
-
-    return json.loads(text)
+    try:
+        return json.loads(text)
+    except json.JSONDecodeError as e:
+        raise ValueError(
+            "Failed to parse JSON. Your model may not "
+            "be capable of supporting JSON output. Try "
+            "a different model or with "
+            "`Settings(prompts={'use_json': False})`"
+        ) from e
 
 
 async def map_fxn_summary(