Skip to content

Commit 682ddec

Browse files
authored
Merge branch 'deepset-ai:main' into ci-deepset-ai#5931-isort
2 parents 675df3c + 4e921c6 commit 682ddec

33 files changed

+722
-146
lines changed

.github/workflows/benchmarks.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@ jobs:
2121
steps:
2222
- uses: actions/checkout@v4
2323

24-
- uses: iterative/setup-cml@v1
24+
- uses: iterative/setup-cml@v2
2525

2626
- name: AWS authentication
27-
uses: aws-actions/configure-aws-credentials@8c3f20df09ac63af7b3ae3d7c91f105f857d8497
27+
uses: aws-actions/configure-aws-credentials@010d0da01d0b5a38af31e9c3470dbfdabdecca3a
2828
with:
2929
aws-region: ${{ env.AWS_REGION }}
3030
role-to-assume: ${{ secrets.AWS_CI_ROLE_ARN }}
@@ -237,7 +237,7 @@ jobs:
237237
runs-on: ubuntu-latest
238238
steps:
239239
- name: AWS authentication
240-
uses: aws-actions/configure-aws-credentials@8c3f20df09ac63af7b3ae3d7c91f105f857d8497
240+
uses: aws-actions/configure-aws-credentials@010d0da01d0b5a38af31e9c3470dbfdabdecca3a
241241
with:
242242
aws-region: ${{ env.AWS_REGION }}
243243
role-to-assume: ${{ secrets.AWS_CI_ROLE_ARN }}

.github/workflows/release_notes.yml

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@ jobs:
2222
- name: Checkout
2323
uses: actions/checkout@v4
2424
with:
25-
# With the default value of 1, there are corner cases where tj-actions/changed-files
26-
# fails with a `no merge base` error
27-
fetch-depth: 0
25+
# With the default value of 1, there are corner cases where tj-actions/changed-files
26+
# fails with a `no merge base` error
27+
fetch-depth: 0
2828

2929
- name: Get release note files
3030
id: changed-files
@@ -35,5 +35,10 @@ jobs:
3535
- name: Check release notes
3636
if: steps.changed-files.outputs.any_changed == 'false' && !contains( github.event.pull_request.labels.*.name, 'ignore-for-release-notes')
3737
run: |
38+
# Check if any of the commit messages contain tags ci/docs/test
39+
if git log --pretty=%s origin/main..HEAD | grep -E '^(ci:|docs:|test:)' > /dev/null; then
40+
echo "Skipping release note check for commits with 'ci:', 'docs:', or 'test:' tags."
41+
else
3842
echo "::error::The release notes file is missing, please add one or attach the label 'ignore-for-release-notes' to this PR."
3943
exit 1
44+
fi

.github/workflows/tests_preview.yml

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ jobs:
2929
black:
3030
runs-on: ubuntu-latest
3131
steps:
32-
- uses: actions/checkout@v3
32+
- uses: actions/checkout@v4
3333

3434
- uses: actions/setup-python@v4
3535
with:
@@ -109,7 +109,7 @@ jobs:
109109
- macos-latest
110110
runs-on: ${{ matrix.os }}
111111
steps:
112-
- uses: actions/checkout@v3
112+
- uses: actions/checkout@v4
113113

114114
- uses: actions/setup-python@v4
115115
with:
@@ -156,19 +156,14 @@ jobs:
156156
integration-tests-linux:
157157
name: Integration / ubuntu-latest
158158
needs: unit-tests
159-
strategy:
160-
fail-fast: false
161-
matrix:
162-
os:
163-
- ubuntu-latest
164-
runs-on: ${{ matrix.os }}
159+
runs-on: ubuntu-latest
165160
services:
166161
tika:
167162
image: apache/tika:2.9.0.0
168163
ports:
169164
- 9998:9998
170165
steps:
171-
- uses: actions/checkout@v3
166+
- uses: actions/checkout@v4
172167

173168
- uses: actions/setup-python@v4
174169
with:
@@ -222,7 +217,7 @@ jobs:
222217
needs: unit-tests
223218
runs-on: macos-latest
224219
steps:
225-
- uses: actions/checkout@v3
220+
- uses: actions/checkout@v4
226221

227222
- uses: actions/setup-python@v4
228223
with:
@@ -280,7 +275,7 @@ jobs:
280275
needs: unit-tests
281276
runs-on: windows-latest
282277
steps:
283-
- uses: actions/checkout@v3
278+
- uses: actions/checkout@v4
284279

285280
- uses: actions/setup-python@v4
286281
with:

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ Then move into the cloned folder and install the project with `pip`, including t
106106
cd haystack && pip install -e '.[dev]'
107107
```
108108

109-
If you want to contribute to the Haystack repo, check our [Contributor Guidelines](#💙-contributing) first.
109+
If you want to contribute to the Haystack repo, check our [Contributor Guidelines](https://github.com/deepset-ai/haystack/blob/main/CONTRIBUTING.md) first.
110110

111111
See the list of [dependencies](https://github.com/deepset-ai/haystack/blob/main/pyproject.toml) to check which ones you want to install (for example, `[all]`, `[dev]`, or other).
112112

haystack/nodes/answer_generator/openai.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import logging
22
import os
33
from typing import List, Optional, Tuple, Union
4+
import warnings
45

56
from haystack import Document
67
from haystack.environment import HAYSTACK_REMOTE_API_TIMEOUT_SEC
@@ -21,6 +22,10 @@
2122

2223
class OpenAIAnswerGenerator(BaseGenerator):
2324
"""
25+
This component is now deprecated and will be removed in future versions.
26+
Use `PromptNode` instead of `OpenAIAnswerGenerator`,
27+
as explained in https://haystack.deepset.ai/tutorials/22_pipeline_with_promptnode.
28+
2429
Uses the GPT-3 models from the OpenAI API to generate Answers based on the Documents it receives.
2530
The Documents can come from a Retriever or you can supply them manually.
2631
@@ -109,6 +114,12 @@ def __init__(
109114
:param openai_organization: The OpenAI-Organization ID, defaults to `None`. For more details, see see OpenAI
110115
[documentation](https://platform.openai.com/docs/api-reference/requesting-organization).
111116
"""
117+
warnings.warn(
118+
"`OpenAIAnswerGenerator component is deprecated and will be removed in future versions. Use `PromptNode` "
119+
"instead of `OpenAIAnswerGenerator`.",
120+
category=DeprecationWarning,
121+
)
122+
112123
super().__init__(progress_bar=progress_bar)
113124
if (examples is None and examples_context is not None) or (examples is not None and examples_context is None):
114125
logger.warning(

haystack/nodes/prompt/invocation_layer/chatgpt.py

Lines changed: 115 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,17 @@
11
import logging
2-
from typing import Optional, List, Dict, Union, Any
2+
from typing import Any, Dict, List, Optional, Union
33

44
from haystack.nodes.prompt.invocation_layer.handlers import DefaultTokenStreamingHandler, TokenStreamingHandler
55
from haystack.nodes.prompt.invocation_layer.open_ai import OpenAIInvocationLayer
66
from haystack.nodes.prompt.invocation_layer.utils import has_azure_parameters
7-
from haystack.utils.openai_utils import openai_request, _check_openai_finish_reason, count_openai_tokens_messages
7+
from haystack.utils.openai_utils import (
8+
_check_openai_finish_reason,
9+
check_openai_async_policy_violation,
10+
check_openai_policy_violation,
11+
count_openai_tokens_messages,
12+
openai_async_request,
13+
openai_request,
14+
)
815

916
logger = logging.getLogger(__name__)
1017

@@ -43,45 +50,6 @@ def __init__(
4350
"""
4451
super().__init__(api_key, model_name_or_path, max_length, api_base=api_base, **kwargs)
4552

46-
def _execute_openai_request(
47-
self, prompt: Union[str, List[Dict]], base_payload: Dict, kwargs_with_defaults: Dict, stream: bool
48-
):
49-
"""
50-
For more details, see [OpenAI ChatGPT API reference](https://platform.openai.com/docs/api-reference/chat).
51-
"""
52-
if isinstance(prompt, str):
53-
messages = [{"role": "user", "content": prompt}]
54-
elif isinstance(prompt, list) and len(prompt) > 0 and isinstance(prompt[0], dict):
55-
messages = prompt
56-
else:
57-
raise ValueError(
58-
f"The prompt format is different than what the model expects. "
59-
f"The model {self.model_name_or_path} requires either a string or messages in the ChatML format. "
60-
f"For more details, see this [GitHub discussion](https://github.com/openai/openai-python/blob/main/chatml.md)."
61-
)
62-
extra_payload = {"messages": messages}
63-
payload = {**base_payload, **extra_payload}
64-
if not stream:
65-
response = openai_request(url=self.url, headers=self.headers, payload=payload)
66-
_check_openai_finish_reason(result=response, payload=payload)
67-
assistant_response = [choice["message"]["content"].strip() for choice in response["choices"]]
68-
else:
69-
response = openai_request(
70-
url=self.url, headers=self.headers, payload=payload, read_response=False, stream=True
71-
)
72-
handler: TokenStreamingHandler = kwargs_with_defaults.pop("stream_handler", DefaultTokenStreamingHandler())
73-
assistant_response = self._process_streaming_response(response=response, stream_handler=handler)
74-
75-
# Although ChatGPT generates text until stop words are encountered, unfortunately it includes the stop word
76-
# We want to exclude it to be consistent with other invocation layers
77-
if "stop" in kwargs_with_defaults and kwargs_with_defaults["stop"] is not None:
78-
stop_words = kwargs_with_defaults["stop"]
79-
for idx, _ in enumerate(assistant_response):
80-
for stop_word in stop_words:
81-
assistant_response[idx] = assistant_response[idx].replace(stop_word, "").strip()
82-
83-
return assistant_response
84-
8553
def _extract_token(self, event_data: Dict[str, Any]):
8654
delta = event_data["choices"][0]["delta"]
8755
if "content" in delta:
@@ -141,3 +109,109 @@ def supports(cls, model_name_or_path: str, **kwargs) -> bool:
141109
and not "gpt-3.5-turbo-instruct" in model_name_or_path
142110
)
143111
return valid_model and not has_azure_parameters(**kwargs)
112+
113+
async def ainvoke(self, *args, **kwargs):
114+
"""
115+
Invokes a prompt on the model. Based on the model, it takes in a prompt (or either a prompt or a list of messages)
116+
and returns a list of responses using a REST invocation.
117+
118+
:return: The responses are being returned.
119+
120+
Note: Only kwargs relevant to OpenAI are passed to OpenAI rest API. Others kwargs are ignored.
121+
For more details, see OpenAI [documentation](https://platform.openai.com/docs/api-reference/completions/create).
122+
"""
123+
prompt, base_payload, kwargs_with_defaults, stream, moderation = self._prepare_invoke(*args, **kwargs)
124+
125+
if moderation and await check_openai_async_policy_violation(input=prompt, headers=self.headers):
126+
logger.info("Prompt '%s' will not be sent to OpenAI due to potential policy violation.", prompt)
127+
return []
128+
129+
if isinstance(prompt, str):
130+
messages = [{"role": "user", "content": prompt}]
131+
elif isinstance(prompt, list) and len(prompt) > 0 and isinstance(prompt[0], dict):
132+
messages = prompt
133+
else:
134+
raise ValueError(
135+
f"The prompt format is different than what the model expects. "
136+
f"The model {self.model_name_or_path} requires either a string or messages in the ChatML format. "
137+
f"For more details, see this [GitHub discussion](https://github.com/openai/openai-python/blob/main/chatml.md)."
138+
)
139+
extra_payload = {"messages": messages}
140+
payload = {**base_payload, **extra_payload}
141+
if not stream:
142+
response = await openai_async_request(url=self.url, headers=self.headers, payload=payload)
143+
_check_openai_finish_reason(result=response, payload=payload)
144+
assistant_response = [choice["message"]["content"].strip() for choice in response["choices"]]
145+
else:
146+
response = await openai_async_request(
147+
url=self.url, headers=self.headers, payload=payload, read_response=False, stream=True
148+
)
149+
handler: TokenStreamingHandler = kwargs_with_defaults.pop("stream_handler", DefaultTokenStreamingHandler())
150+
assistant_response = self._process_streaming_response(response=response, stream_handler=handler)
151+
152+
# Although ChatGPT generates text until stop words are encountered, unfortunately it includes the stop word
153+
# We want to exclude it to be consistent with other invocation layers
154+
if "stop" in kwargs_with_defaults and kwargs_with_defaults["stop"] is not None:
155+
stop_words = kwargs_with_defaults["stop"]
156+
for idx, _ in enumerate(assistant_response):
157+
for stop_word in stop_words:
158+
assistant_response[idx] = assistant_response[idx].replace(stop_word, "").strip()
159+
160+
if moderation and await check_openai_async_policy_violation(input=assistant_response, headers=self.headers):
161+
logger.info("Response '%s' will not be returned due to potential policy violation.", assistant_response)
162+
return []
163+
164+
return assistant_response
165+
166+
def invoke(self, *args, **kwargs):
167+
"""
168+
Invokes a prompt on the model. Based on the model, it takes in a prompt (or either a prompt or a list of messages)
169+
and returns a list of responses using a REST invocation.
170+
171+
:return: The responses are being returned.
172+
173+
Note: Only kwargs relevant to OpenAI are passed to OpenAI rest API. Others kwargs are ignored.
174+
For more details, see OpenAI [documentation](https://platform.openai.com/docs/api-reference/completions/create).
175+
"""
176+
prompt, base_payload, kwargs_with_defaults, stream, moderation = self._prepare_invoke(*args, **kwargs)
177+
178+
if moderation and check_openai_policy_violation(input=prompt, headers=self.headers):
179+
logger.info("Prompt '%s' will not be sent to OpenAI due to potential policy violation.", prompt)
180+
return []
181+
182+
if isinstance(prompt, str):
183+
messages = [{"role": "user", "content": prompt}]
184+
elif isinstance(prompt, list) and len(prompt) > 0 and isinstance(prompt[0], dict):
185+
messages = prompt
186+
else:
187+
raise ValueError(
188+
f"The prompt format is different than what the model expects. "
189+
f"The model {self.model_name_or_path} requires either a string or messages in the ChatML format. "
190+
f"For more details, see this [GitHub discussion](https://github.com/openai/openai-python/blob/main/chatml.md)."
191+
)
192+
extra_payload = {"messages": messages}
193+
payload = {**base_payload, **extra_payload}
194+
if not stream:
195+
response = openai_request(url=self.url, headers=self.headers, payload=payload)
196+
_check_openai_finish_reason(result=response, payload=payload)
197+
assistant_response = [choice["message"]["content"].strip() for choice in response["choices"]]
198+
else:
199+
response = openai_request(
200+
url=self.url, headers=self.headers, payload=payload, read_response=False, stream=True
201+
)
202+
handler: TokenStreamingHandler = kwargs_with_defaults.pop("stream_handler", DefaultTokenStreamingHandler())
203+
assistant_response = self._process_streaming_response(response=response, stream_handler=handler)
204+
205+
# Although ChatGPT generates text until stop words are encountered, unfortunately it includes the stop word
206+
# We want to exclude it to be consistent with other invocation layers
207+
if "stop" in kwargs_with_defaults and kwargs_with_defaults["stop"] is not None:
208+
stop_words = kwargs_with_defaults["stop"]
209+
for idx, _ in enumerate(assistant_response):
210+
for stop_word in stop_words:
211+
assistant_response[idx] = assistant_response[idx].replace(stop_word, "").strip()
212+
213+
if moderation and check_openai_policy_violation(input=assistant_response, headers=self.headers):
214+
logger.info("Response '%s' will not be returned due to potential policy violation.", assistant_response)
215+
return []
216+
217+
return assistant_response

0 commit comments

Comments
 (0)