From be8c1afbcc05573181f7063cf257bb0ba3cece99 Mon Sep 17 00:00:00 2001 From: qingzhong1 <137043369+qingzhong1@users.noreply.github.com> Date: Wed, 24 Jan 2024 21:15:39 +0800 Subject: [PATCH 1/4] modify readme (#312) * modify readme * modify readme --- .../erniebot_researcher/README.md | 53 +++++++++++++++---- .../erniebot_researcher/research_agent.py | 1 - 2 files changed, 43 insertions(+), 11 deletions(-) diff --git a/erniebot-agent/applications/erniebot_researcher/README.md b/erniebot-agent/applications/erniebot_researcher/README.md index bdfeae2d..e912dac7 100644 --- a/erniebot-agent/applications/erniebot_researcher/README.md +++ b/erniebot-agent/applications/erniebot_researcher/README.md @@ -66,29 +66,52 @@ wget https://paddlenlp.bj.bcebos.com/pipelines/fonts/SimSun.ttf > 第四步:创建索引 -下载实例数据 +**数据准备** + +我们支持docx、pdf、txt等格式的文件,用户可以把这些文件放到同一个文件夹下,然后运行下面的命令创建索引,后续会根据这些文件写报告。 + +为了方便测试,我们提供了样例数据。 +样例数据: ``` wget https://paddlenlp.bj.bcebos.com/pipelines/erniebot_researcher_example.tar.gz tar xvf erniebot_researcher_example.tar.gz ``` -首先需要在[AI Studio星河社区](https://aistudio.baidu.com/index)注册并登录账号,然后在AI Studio的[访问令牌页面](https://aistudio.baidu.com/index/accessToken)获取`Access Token`,最后设置环境变量: +url数据: +如果用户有文件对应的url链接,可以传入存储url链接的txt。在txt中,每一行存储url链接和对应文件的路径,例如: ``` -export EB_AGENT_ACCESS_TOKEN= -export AISTUDIO_ACCESS_TOKEN= +https://zhuanlan.zhihu.com/p/659457816 erniebot_researcher_example/Ai_Agent的起源.md ``` +如果用户不传入url文件,则默认文件的路径为其url链接 -如果用户有url链接,你可以传入存储url链接的txt。 -在txt中,每一行存储文件的路径和对应的url链接,例如: -'https://zhuanlan.zhihu.com/p/659457816 erniebot_researcher_example/Ai_Agent的起源.md' +摘要数据: -如果用户不传入url文件,则默认文件的路径为其url链接 +用户可以利用path_abstract参数传入自己文件对应摘要的存储路径。 +其中摘要需要用json文件存储。其中json文件内存储的是多个字典,每个字典有3组键值对, +- `page_content` : `str`, 文件摘要。 +- `url` : `str`, 文件url链接。 +- `name` : `str`, 文件名字。 + +例如: + +``` +[{"page_content":"文件摘要","url":"https://zhuanlan.zhihu.com/p/659457816","name":Ai_Agent的起源}, +...] +``` + +如果用户没有摘要路径,则无需改变path_abstract的默认值,我们会利用ernie-4.0来自动生成摘要,生成的摘要存储路径为abstract.json。 + +**创建索引** + +首先需要在[AI Studio星河社区](https://aistudio.baidu.com/index)注册并登录账号,然后在AI Studio的[访问令牌页面](https://aistudio.baidu.com/index/accessToken)获取`Access Token`,最后设置环境变量: + +**有摘要有url链接** -用户可以自己传入文件摘要的存储路径。其中摘要需要用json文件存储。其中json文件内存储的是多个字典,每个字典有3组键值对,"page_content"存储文件的摘要,"url"是文件的url链接,"name"是文章的名字。例如: -[{"page_content":"文章摘要","url":"https://zhuanlan.zhihu.com/p/659457816","name":Ai_Agent的起源},...] ``` +export EB_AGENT_ACCESS_TOKEN= +export AISTUDIO_ACCESS_TOKEN= python ./tools/preprocessing.py \ --index_name_full_text \ --index_name_abstract \ @@ -97,6 +120,16 @@ python ./tools/preprocessing.py \ --path_abstract ``` +**无摘要无url链接** + +``` +export EB_AGENT_ACCESS_TOKEN= +export AISTUDIO_ACCESS_TOKEN= +python ./tools/preprocessing.py \ +--index_name_full_text \ +--index_name_abstract \ +--path_full_text +``` > 第五步:运行 diff --git a/erniebot-agent/applications/erniebot_researcher/research_agent.py b/erniebot-agent/applications/erniebot_researcher/research_agent.py index 778668c1..cb1a4953 100644 --- a/erniebot-agent/applications/erniebot_researcher/research_agent.py +++ b/erniebot-agent/applications/erniebot_researcher/research_agent.py @@ -155,7 +155,6 @@ async def run(self, query: str): for sub_query in sub_queries: research_result = await self.run_search_summary(sub_query) paragraphs_item.extend(research_result) - paragraphs = [] for item in paragraphs_item: if item not in paragraphs: From 4aca819f2f720d0617b558672c629dac14039ef1 Mon Sep 17 00:00:00 2001 From: Lin Manhui Date: Thu, 8 Feb 2024 10:53:20 +0800 Subject: [PATCH 2/4] Add ernie-speed (#315) --- erniebot/src/erniebot/constants.py | 2 +- erniebot/src/erniebot/resources/chat_completion.py | 13 +++++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/erniebot/src/erniebot/constants.py b/erniebot/src/erniebot/constants.py index 0267411a..e305f4f6 100644 --- a/erniebot/src/erniebot/constants.py +++ b/erniebot/src/erniebot/constants.py @@ -21,4 +21,4 @@ DEFAULT_REQUEST_TIMEOUT_SECS: Final[float] = 600 POLLING_INTERVAL_SECS: Final[float] = 5 -POLLING_TIMEOUT_SECS: Final[float] = 20 +POLLING_TIMEOUT_SECS: Final[float] = 600 diff --git a/erniebot/src/erniebot/resources/chat_completion.py b/erniebot/src/erniebot/resources/chat_completion.py index ee098283..ff3bfe2d 100644 --- a/erniebot/src/erniebot/resources/chat_completion.py +++ b/erniebot/src/erniebot/resources/chat_completion.py @@ -64,6 +64,9 @@ class ChatCompletion(EBResource, CreatableWithStreaming): "ernie-longtext": { "model_id": "ernie_bot_8k", }, + "ernie-speed": { + "model_id": "ernie_speed", + }, }, }, APIType.AISTUDIO: { @@ -478,8 +481,14 @@ def _set_val_if_key_exists(src: dict, dst: dict, key: str) -> None: # params params = {} - if model == "ernie-turbo": - for arg in ("functions", "stop", "disable_search", "enable_citation"): + if model in ("ernie-turbo", "ernie-speed"): + for arg in ( + "functions", + "stop", + "disable_search", + "enable_citation", + "tool_choice", + ): if arg in kwargs: raise errors.InvalidArgumentError(f"`{arg}` is not supported by the {model} model.") params["messages"] = messages From 2bfa1b1649d15c89b92794289341e6881a9656d7 Mon Sep 17 00:00:00 2001 From: w5688414 Date: Mon, 26 Feb 2024 14:35:44 +0800 Subject: [PATCH 3/4] [aistudio api] Update weipu api (#319) * Update weipu api * Updare erniebot api * remove unused comments * restore erniebot * reformat * update * remove lines * fix ci * Add ernieb speed * suport no access token config * Fix unitest --- .../tools/test_llama_index_retrieval_tool.py | 2 +- erniebot/src/erniebot/backends/bce.py | 1 - erniebot/src/erniebot/backends/custom.py | 17 +++++++++++++++++ erniebot/src/erniebot/http_client.py | 1 - .../src/erniebot/resources/chat_completion.py | 11 ++++++++++- 5 files changed, 28 insertions(+), 4 deletions(-) diff --git a/erniebot-agent/tests/unit_tests/tools/test_llama_index_retrieval_tool.py b/erniebot-agent/tests/unit_tests/tools/test_llama_index_retrieval_tool.py index 164f1f27..88afee00 100644 --- a/erniebot-agent/tests/unit_tests/tools/test_llama_index_retrieval_tool.py +++ b/erniebot-agent/tests/unit_tests/tools/test_llama_index_retrieval_tool.py @@ -1,5 +1,5 @@ import pytest -from llama_index.schema import NodeWithScore, TextNode +from llama_index.core.schema import NodeWithScore, TextNode from erniebot_agent.tools.llama_index_retrieval_tool import LlamaIndexRetrievalTool diff --git a/erniebot/src/erniebot/backends/bce.py b/erniebot/src/erniebot/backends/bce.py index 9f311b45..4099a051 100644 --- a/erniebot/src/erniebot/backends/bce.py +++ b/erniebot/src/erniebot/backends/bce.py @@ -349,7 +349,6 @@ def handle_response(cls, resp: EBResponse) -> EBResponse: if "error_code" in resp and "error_msg" in resp: ecode = resp["error_code"] emsg = resp["error_msg"] - print(ecode) if ecode in (4, 17): raise errors.RequestLimitError(emsg, ecode=ecode) elif ecode in (13, 15, 18): diff --git a/erniebot/src/erniebot/backends/custom.py b/erniebot/src/erniebot/backends/custom.py index 90df4a3d..cd708751 100644 --- a/erniebot/src/erniebot/backends/custom.py +++ b/erniebot/src/erniebot/backends/custom.py @@ -12,8 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os from typing import Any, AsyncIterator, ClassVar, Dict, Iterator, Optional, Union +import erniebot.utils.logging as logging from erniebot.api_types import APIType from erniebot.backends.bce import QianfanLegacyBackend from erniebot.response import EBResponse @@ -29,6 +31,10 @@ class CustomBackend(EBBackend): def __init__(self, config_dict: Dict[str, Any]) -> None: super().__init__(config_dict=config_dict) + access_token = self._cfg.get("access_token", None) + if access_token is None: + access_token = os.environ.get("AISTUDIO_ACCESS_TOKEN", None) + self._access_token = access_token def request( self, @@ -71,6 +77,8 @@ async def arequest( supplied_headers=headers, params=params, ) + if self._access_token is not None: + headers = self._add_aistudio_fields_to_headers(headers) return await self._client.asend_request( method, url, @@ -83,3 +91,12 @@ async def arequest( @classmethod def handle_response(cls, resp: EBResponse) -> EBResponse: return QianfanLegacyBackend.handle_response(resp) + + def _add_aistudio_fields_to_headers(self, headers: HeadersType) -> HeadersType: + if "Authorization" in headers: + logging.warning( + "Key 'Authorization' already exists in `headers`: %r", + headers["Authorization"], + ) + headers["Authorization"] = f"{self._access_token}" + return headers diff --git a/erniebot/src/erniebot/http_client.py b/erniebot/src/erniebot/http_client.py index 96245d71..f7d0390f 100644 --- a/erniebot/src/erniebot/http_client.py +++ b/erniebot/src/erniebot/http_client.py @@ -411,7 +411,6 @@ def _interpret_response_line( logging.debug("Decoded response body: %r", decoded_rbody) response = EBResponse(rcode=rcode, rbody=decoded_rbody, rheaders=dict(rheaders)) - if rcode != http.HTTPStatus.OK: raise errors.HTTPRequestError( f"The status code is not {http.HTTPStatus.OK}.", diff --git a/erniebot/src/erniebot/resources/chat_completion.py b/erniebot/src/erniebot/resources/chat_completion.py index ff3bfe2d..605b0bd4 100644 --- a/erniebot/src/erniebot/resources/chat_completion.py +++ b/erniebot/src/erniebot/resources/chat_completion.py @@ -92,6 +92,15 @@ class ChatCompletion(EBResource, CreatableWithStreaming): "ernie-3.5": { "model_id": "completions", }, + "ernie-4.0": { + "model_id": "completions_pro", + }, + "ernie-longtext": { + "model_id": "ernie_bot_8k", + }, + "ernie-speed": { + "model_id": "ernie_speed", + }, }, }, } @@ -514,7 +523,7 @@ def _set_val_if_key_exists(src: dict, dst: dict, key: str) -> None: # headers headers: HeadersType = {} - if self.api_type is APIType.AISTUDIO: + if self.api_type is APIType.AISTUDIO or self.api_type is APIType.CUSTOM: headers["Content-Type"] = "application/json" if "headers" in kwargs: headers.update(kwargs["headers"]) From 157ca7ea006dc302f173f0919d4df4562c666590 Mon Sep 17 00:00:00 2001 From: Sijun He Date: Wed, 28 Feb 2024 14:08:25 +0800 Subject: [PATCH 4/4] Update chat_completion_with_plugins.py (#324) --- .../src/erniebot/resources/chat_completion_with_plugins.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/erniebot/src/erniebot/resources/chat_completion_with_plugins.py b/erniebot/src/erniebot/resources/chat_completion_with_plugins.py index 3ee63db5..c26e63dc 100644 --- a/erniebot/src/erniebot/resources/chat_completion_with_plugins.py +++ b/erniebot/src/erniebot/resources/chat_completion_with_plugins.py @@ -46,13 +46,13 @@ class ChatCompletionWithPlugins(EBResource, CreatableWithStreaming): ) _API_INFO_DICT: ClassVar[Dict[APIType, Dict[str, Any]]] = { APIType.QIANFAN: { - "path": "/erniebot/plugin", + "path": "/erniebot/plugins", }, APIType.CUSTOM: { "path": "/erniebot/plugins_v3", }, APIType.AISTUDIO: { - "path": "/erniebot/plugin", + "path": "/erniebot/plugins", }, }