Skip to content

Commit

Permalink
Merge branch 'deepset-ai:main' into ci-deepset-ai#5931-isort
Browse files Browse the repository at this point in the history
  • Loading branch information
mjspeck authored Oct 7, 2023
2 parents 675df3c + 4e921c6 commit 682ddec
Show file tree
Hide file tree
Showing 33 changed files with 722 additions and 146 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@ jobs:
steps:
- uses: actions/checkout@v4

- uses: iterative/setup-cml@v1
- uses: iterative/setup-cml@v2

- name: AWS authentication
uses: aws-actions/configure-aws-credentials@8c3f20df09ac63af7b3ae3d7c91f105f857d8497
uses: aws-actions/configure-aws-credentials@010d0da01d0b5a38af31e9c3470dbfdabdecca3a
with:
aws-region: ${{ env.AWS_REGION }}
role-to-assume: ${{ secrets.AWS_CI_ROLE_ARN }}
Expand Down Expand Up @@ -237,7 +237,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: AWS authentication
uses: aws-actions/configure-aws-credentials@8c3f20df09ac63af7b3ae3d7c91f105f857d8497
uses: aws-actions/configure-aws-credentials@010d0da01d0b5a38af31e9c3470dbfdabdecca3a
with:
aws-region: ${{ env.AWS_REGION }}
role-to-assume: ${{ secrets.AWS_CI_ROLE_ARN }}
Expand Down
11 changes: 8 additions & 3 deletions .github/workflows/release_notes.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ jobs:
- name: Checkout
uses: actions/checkout@v4
with:
# With the default value of 1, there are corner cases where tj-actions/changed-files
# fails with a `no merge base` error
fetch-depth: 0
# With the default value of 1, there are corner cases where tj-actions/changed-files
# fails with a `no merge base` error
fetch-depth: 0

- name: Get release note files
id: changed-files
Expand All @@ -35,5 +35,10 @@ jobs:
- name: Check release notes
if: steps.changed-files.outputs.any_changed == 'false' && !contains( github.event.pull_request.labels.*.name, 'ignore-for-release-notes')
run: |
# Check if any of the commit messages contain tags ci/docs/test
if git log --pretty=%s origin/main..HEAD | grep -E '^(ci:|docs:|test:)' > /dev/null; then
echo "Skipping release note check for commits with 'ci:', 'docs:', or 'test:' tags."
else
echo "::error::The release notes file is missing, please add one or attach the label 'ignore-for-release-notes' to this PR."
exit 1
fi
17 changes: 6 additions & 11 deletions .github/workflows/tests_preview.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
black:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- uses: actions/setup-python@v4
with:
Expand Down Expand Up @@ -109,7 +109,7 @@ jobs:
- macos-latest
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- uses: actions/setup-python@v4
with:
Expand Down Expand Up @@ -156,19 +156,14 @@ jobs:
integration-tests-linux:
name: Integration / ubuntu-latest
needs: unit-tests
strategy:
fail-fast: false
matrix:
os:
- ubuntu-latest
runs-on: ${{ matrix.os }}
runs-on: ubuntu-latest
services:
tika:
image: apache/tika:2.9.0.0
ports:
- 9998:9998
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- uses: actions/setup-python@v4
with:
Expand Down Expand Up @@ -222,7 +217,7 @@ jobs:
needs: unit-tests
runs-on: macos-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- uses: actions/setup-python@v4
with:
Expand Down Expand Up @@ -280,7 +275,7 @@ jobs:
needs: unit-tests
runs-on: windows-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- uses: actions/setup-python@v4
with:
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ Then move into the cloned folder and install the project with `pip`, including t
cd haystack && pip install -e '.[dev]'
```

If you want to contribute to the Haystack repo, check our [Contributor Guidelines](#💙-contributing) first.
If you want to contribute to the Haystack repo, check our [Contributor Guidelines](https://github.com/deepset-ai/haystack/blob/main/CONTRIBUTING.md) first.

See the list of [dependencies](https://github.com/deepset-ai/haystack/blob/main/pyproject.toml) to check which ones you want to install (for example, `[all]`, `[dev]`, or other).

Expand Down
11 changes: 11 additions & 0 deletions haystack/nodes/answer_generator/openai.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import logging
import os
from typing import List, Optional, Tuple, Union
import warnings

from haystack import Document
from haystack.environment import HAYSTACK_REMOTE_API_TIMEOUT_SEC
Expand All @@ -21,6 +22,10 @@

class OpenAIAnswerGenerator(BaseGenerator):
"""
This component is now deprecated and will be removed in future versions.
Use `PromptNode` instead of `OpenAIAnswerGenerator`,
as explained in https://haystack.deepset.ai/tutorials/22_pipeline_with_promptnode.
Uses the GPT-3 models from the OpenAI API to generate Answers based on the Documents it receives.
The Documents can come from a Retriever or you can supply them manually.
Expand Down Expand Up @@ -109,6 +114,12 @@ def __init__(
:param openai_organization: The OpenAI-Organization ID, defaults to `None`. For more details, see see OpenAI
[documentation](https://platform.openai.com/docs/api-reference/requesting-organization).
"""
warnings.warn(
"`OpenAIAnswerGenerator component is deprecated and will be removed in future versions. Use `PromptNode` "
"instead of `OpenAIAnswerGenerator`.",
category=DeprecationWarning,
)

super().__init__(progress_bar=progress_bar)
if (examples is None and examples_context is not None) or (examples is not None and examples_context is None):
logger.warning(
Expand Down
156 changes: 115 additions & 41 deletions haystack/nodes/prompt/invocation_layer/chatgpt.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@
import logging
from typing import Optional, List, Dict, Union, Any
from typing import Any, Dict, List, Optional, Union

from haystack.nodes.prompt.invocation_layer.handlers import DefaultTokenStreamingHandler, TokenStreamingHandler
from haystack.nodes.prompt.invocation_layer.open_ai import OpenAIInvocationLayer
from haystack.nodes.prompt.invocation_layer.utils import has_azure_parameters
from haystack.utils.openai_utils import openai_request, _check_openai_finish_reason, count_openai_tokens_messages
from haystack.utils.openai_utils import (
_check_openai_finish_reason,
check_openai_async_policy_violation,
check_openai_policy_violation,
count_openai_tokens_messages,
openai_async_request,
openai_request,
)

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -43,45 +50,6 @@ def __init__(
"""
super().__init__(api_key, model_name_or_path, max_length, api_base=api_base, **kwargs)

def _execute_openai_request(
self, prompt: Union[str, List[Dict]], base_payload: Dict, kwargs_with_defaults: Dict, stream: bool
):
"""
For more details, see [OpenAI ChatGPT API reference](https://platform.openai.com/docs/api-reference/chat).
"""
if isinstance(prompt, str):
messages = [{"role": "user", "content": prompt}]
elif isinstance(prompt, list) and len(prompt) > 0 and isinstance(prompt[0], dict):
messages = prompt
else:
raise ValueError(
f"The prompt format is different than what the model expects. "
f"The model {self.model_name_or_path} requires either a string or messages in the ChatML format. "
f"For more details, see this [GitHub discussion](https://github.com/openai/openai-python/blob/main/chatml.md)."
)
extra_payload = {"messages": messages}
payload = {**base_payload, **extra_payload}
if not stream:
response = openai_request(url=self.url, headers=self.headers, payload=payload)
_check_openai_finish_reason(result=response, payload=payload)
assistant_response = [choice["message"]["content"].strip() for choice in response["choices"]]
else:
response = openai_request(
url=self.url, headers=self.headers, payload=payload, read_response=False, stream=True
)
handler: TokenStreamingHandler = kwargs_with_defaults.pop("stream_handler", DefaultTokenStreamingHandler())
assistant_response = self._process_streaming_response(response=response, stream_handler=handler)

# Although ChatGPT generates text until stop words are encountered, unfortunately it includes the stop word
# We want to exclude it to be consistent with other invocation layers
if "stop" in kwargs_with_defaults and kwargs_with_defaults["stop"] is not None:
stop_words = kwargs_with_defaults["stop"]
for idx, _ in enumerate(assistant_response):
for stop_word in stop_words:
assistant_response[idx] = assistant_response[idx].replace(stop_word, "").strip()

return assistant_response

def _extract_token(self, event_data: Dict[str, Any]):
delta = event_data["choices"][0]["delta"]
if "content" in delta:
Expand Down Expand Up @@ -141,3 +109,109 @@ def supports(cls, model_name_or_path: str, **kwargs) -> bool:
and not "gpt-3.5-turbo-instruct" in model_name_or_path
)
return valid_model and not has_azure_parameters(**kwargs)

async def ainvoke(self, *args, **kwargs):
"""
Invokes a prompt on the model. Based on the model, it takes in a prompt (or either a prompt or a list of messages)
and returns a list of responses using a REST invocation.
:return: The responses are being returned.
Note: Only kwargs relevant to OpenAI are passed to OpenAI rest API. Others kwargs are ignored.
For more details, see OpenAI [documentation](https://platform.openai.com/docs/api-reference/completions/create).
"""
prompt, base_payload, kwargs_with_defaults, stream, moderation = self._prepare_invoke(*args, **kwargs)

if moderation and await check_openai_async_policy_violation(input=prompt, headers=self.headers):
logger.info("Prompt '%s' will not be sent to OpenAI due to potential policy violation.", prompt)
return []

if isinstance(prompt, str):
messages = [{"role": "user", "content": prompt}]
elif isinstance(prompt, list) and len(prompt) > 0 and isinstance(prompt[0], dict):
messages = prompt
else:
raise ValueError(
f"The prompt format is different than what the model expects. "
f"The model {self.model_name_or_path} requires either a string or messages in the ChatML format. "
f"For more details, see this [GitHub discussion](https://github.com/openai/openai-python/blob/main/chatml.md)."
)
extra_payload = {"messages": messages}
payload = {**base_payload, **extra_payload}
if not stream:
response = await openai_async_request(url=self.url, headers=self.headers, payload=payload)
_check_openai_finish_reason(result=response, payload=payload)
assistant_response = [choice["message"]["content"].strip() for choice in response["choices"]]
else:
response = await openai_async_request(
url=self.url, headers=self.headers, payload=payload, read_response=False, stream=True
)
handler: TokenStreamingHandler = kwargs_with_defaults.pop("stream_handler", DefaultTokenStreamingHandler())
assistant_response = self._process_streaming_response(response=response, stream_handler=handler)

# Although ChatGPT generates text until stop words are encountered, unfortunately it includes the stop word
# We want to exclude it to be consistent with other invocation layers
if "stop" in kwargs_with_defaults and kwargs_with_defaults["stop"] is not None:
stop_words = kwargs_with_defaults["stop"]
for idx, _ in enumerate(assistant_response):
for stop_word in stop_words:
assistant_response[idx] = assistant_response[idx].replace(stop_word, "").strip()

if moderation and await check_openai_async_policy_violation(input=assistant_response, headers=self.headers):
logger.info("Response '%s' will not be returned due to potential policy violation.", assistant_response)
return []

return assistant_response

def invoke(self, *args, **kwargs):
"""
Invokes a prompt on the model. Based on the model, it takes in a prompt (or either a prompt or a list of messages)
and returns a list of responses using a REST invocation.
:return: The responses are being returned.
Note: Only kwargs relevant to OpenAI are passed to OpenAI rest API. Others kwargs are ignored.
For more details, see OpenAI [documentation](https://platform.openai.com/docs/api-reference/completions/create).
"""
prompt, base_payload, kwargs_with_defaults, stream, moderation = self._prepare_invoke(*args, **kwargs)

if moderation and check_openai_policy_violation(input=prompt, headers=self.headers):
logger.info("Prompt '%s' will not be sent to OpenAI due to potential policy violation.", prompt)
return []

if isinstance(prompt, str):
messages = [{"role": "user", "content": prompt}]
elif isinstance(prompt, list) and len(prompt) > 0 and isinstance(prompt[0], dict):
messages = prompt
else:
raise ValueError(
f"The prompt format is different than what the model expects. "
f"The model {self.model_name_or_path} requires either a string or messages in the ChatML format. "
f"For more details, see this [GitHub discussion](https://github.com/openai/openai-python/blob/main/chatml.md)."
)
extra_payload = {"messages": messages}
payload = {**base_payload, **extra_payload}
if not stream:
response = openai_request(url=self.url, headers=self.headers, payload=payload)
_check_openai_finish_reason(result=response, payload=payload)
assistant_response = [choice["message"]["content"].strip() for choice in response["choices"]]
else:
response = openai_request(
url=self.url, headers=self.headers, payload=payload, read_response=False, stream=True
)
handler: TokenStreamingHandler = kwargs_with_defaults.pop("stream_handler", DefaultTokenStreamingHandler())
assistant_response = self._process_streaming_response(response=response, stream_handler=handler)

# Although ChatGPT generates text until stop words are encountered, unfortunately it includes the stop word
# We want to exclude it to be consistent with other invocation layers
if "stop" in kwargs_with_defaults and kwargs_with_defaults["stop"] is not None:
stop_words = kwargs_with_defaults["stop"]
for idx, _ in enumerate(assistant_response):
for stop_word in stop_words:
assistant_response[idx] = assistant_response[idx].replace(stop_word, "").strip()

if moderation and check_openai_policy_violation(input=assistant_response, headers=self.headers):
logger.info("Response '%s' will not be returned due to potential policy violation.", assistant_response)
return []

return assistant_response
Loading

0 comments on commit 682ddec

Please sign in to comment.