diff --git a/apps/setting/models_provider/impl/gemini_model_provider/credential/stt.py b/apps/setting/models_provider/impl/gemini_model_provider/credential/stt.py new file mode 100644 index 00000000000..90e0164f051 --- /dev/null +++ b/apps/setting/models_provider/impl/gemini_model_provider/credential/stt.py @@ -0,0 +1,41 @@ +# coding=utf-8 +from typing import Dict + +from common import forms +from common.exception.app_exception import AppApiException +from common.forms import BaseForm +from setting.models_provider.base_model_provider import BaseModelCredential, ValidCode + + +class GeminiSTTModelCredential(BaseForm, BaseModelCredential): + api_key = forms.PasswordInputField('API Key', required=True) + + def is_valid(self, model_type: str, model_name, model_credential: Dict[str, object], provider, + raise_exception=False): + model_type_list = provider.get_model_type_list() + if not any(list(filter(lambda mt: mt.get('value') == model_type, model_type_list))): + raise AppApiException(ValidCode.valid_error.value, f'{model_type} 模型类型不支持') + + for key in ['api_key']: + if key not in model_credential: + if raise_exception: + raise AppApiException(ValidCode.valid_error.value, f'{key} 字段为必填字段') + else: + return False + try: + model = provider.get_model(model_type, model_name, model_credential) + model.check_auth() + except Exception as e: + if isinstance(e, AppApiException): + raise e + if raise_exception: + raise AppApiException(ValidCode.valid_error.value, f'校验失败,请检查参数是否正确: {str(e)}') + else: + return False + return True + + def encryption_dict(self, model: Dict[str, object]): + return {**model, 'api_key': super().encryption(model.get('api_key', ''))} + + def get_model_params_setting_form(self, model_name): + pass diff --git a/apps/setting/models_provider/impl/gemini_model_provider/gemini_model_provider.py b/apps/setting/models_provider/impl/gemini_model_provider/gemini_model_provider.py index a9acd40cf66..2556328d798 100644 --- a/apps/setting/models_provider/impl/gemini_model_provider/gemini_model_provider.py +++ b/apps/setting/models_provider/impl/gemini_model_provider/gemini_model_provider.py @@ -13,12 +13,15 @@ ModelInfoManage from setting.models_provider.impl.gemini_model_provider.credential.image import GeminiImageModelCredential from setting.models_provider.impl.gemini_model_provider.credential.llm import GeminiLLMModelCredential +from setting.models_provider.impl.gemini_model_provider.credential.stt import GeminiSTTModelCredential from setting.models_provider.impl.gemini_model_provider.model.image import GeminiImage from setting.models_provider.impl.gemini_model_provider.model.llm import GeminiChatModel +from setting.models_provider.impl.gemini_model_provider.model.stt import GeminiSpeechToText from smartdoc.conf import PROJECT_DIR gemini_llm_model_credential = GeminiLLMModelCredential() gemini_image_model_credential = GeminiImageModelCredential() +gemini_stt_model_credential = GeminiSTTModelCredential() model_info_list = [ ModelInfo('gemini-1.0-pro', '最新的Gemini 1.0 Pro模型,随Google更新而更新', @@ -42,14 +45,25 @@ GeminiImage), ] - +model_stt_info_list = [ + ModelInfo('gemini-1.5-flash', '最新的Gemini 1.5 Flash模型,随Google更新而更新', + ModelTypeConst.STT, + gemini_stt_model_credential, + GeminiSpeechToText), + ModelInfo('gemini-1.5-pro', '最新的Gemini 1.5 Flash模型,随Google更新而更新', + ModelTypeConst.STT, + gemini_stt_model_credential, + GeminiSpeechToText), +] model_info_manage = ( ModelInfoManage.builder() .append_model_info_list(model_info_list) .append_model_info_list(model_image_info_list) + .append_model_info_list(model_stt_info_list) .append_default_model_info(model_info_list[0]) .append_default_model_info(model_image_info_list[0]) + .append_default_model_info(model_stt_info_list[0]) .build() ) diff --git a/apps/setting/models_provider/impl/gemini_model_provider/model/stt.py b/apps/setting/models_provider/impl/gemini_model_provider/model/stt.py new file mode 100644 index 00000000000..96fa6f0d433 --- /dev/null +++ b/apps/setting/models_provider/impl/gemini_model_provider/model/stt.py @@ -0,0 +1,60 @@ +import asyncio +import io +from typing import Dict + +from langchain_core.messages import HumanMessage +from langchain_google_genai import ChatGoogleGenerativeAI +from openai import OpenAI + +from common.config.tokenizer_manage_config import TokenizerManage +from setting.models_provider.base_model_provider import MaxKBBaseModel +from setting.models_provider.impl.base_stt import BaseSpeechToText +import google.generativeai as genai + + +def custom_get_token_ids(text: str): + tokenizer = TokenizerManage.get_tokenizer() + return tokenizer.encode(text) + + +class GeminiSpeechToText(MaxKBBaseModel, BaseSpeechToText): + api_key: str + model: str + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.api_key = kwargs.get('api_key') + + @staticmethod + def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs): + optional_params = {} + if 'max_tokens' in model_kwargs and model_kwargs['max_tokens'] is not None: + optional_params['max_tokens'] = model_kwargs['max_tokens'] + if 'temperature' in model_kwargs and model_kwargs['temperature'] is not None: + optional_params['temperature'] = model_kwargs['temperature'] + return GeminiSpeechToText( + model=model_name, + api_key=model_credential.get('api_key'), + **optional_params, + ) + + def check_auth(self): + client = ChatGoogleGenerativeAI( + model=self.model, + google_api_key=self.api_key + ) + response_list = client.invoke('你好') + # print(response_list) + + def speech_to_text(self, audio_file): + client = ChatGoogleGenerativeAI( + model=self.model, + google_api_key=self.api_key + ) + audio_data = audio_file.read() + msg = HumanMessage(content=[ + {'type': 'text', 'text': '把音频转成文字'}, + {"type": "media", 'mime_type': 'audio/mp3', "data": audio_data} + ]) + res = client.invoke([msg]) + return res.content