From 15b9484af6070be928773f509c34413042f150b1 Mon Sep 17 00:00:00 2001 From: Brian Yang Date: Tue, 14 May 2024 10:06:21 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E5=A2=9E=E5=8A=A0Gemini=E5=A4=A7?= =?UTF-8?q?=E6=A8=A1=E5=9E=8B=E6=94=AF=E6=8C=81=20(#439)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../constants/model_provider_constants.py | 2 + .../azure_model_provider.py | 40 -------- .../model/azure_chat_model.py | 14 ++- .../impl/gemini_model_provider/__init__.py | 8 ++ .../gemini_model_provider.py | 99 +++++++++++++++++++ .../icon/gemini_icon_svg | 10 ++ .../model/gemini_chat_model.py | 30 ++++++ pyproject.toml | 1 + 8 files changed, 160 insertions(+), 44 deletions(-) create mode 100644 apps/setting/models_provider/impl/gemini_model_provider/__init__.py create mode 100644 apps/setting/models_provider/impl/gemini_model_provider/gemini_model_provider.py create mode 100644 apps/setting/models_provider/impl/gemini_model_provider/icon/gemini_icon_svg create mode 100644 apps/setting/models_provider/impl/gemini_model_provider/model/gemini_chat_model.py diff --git a/apps/setting/models_provider/constants/model_provider_constants.py b/apps/setting/models_provider/constants/model_provider_constants.py index 1e587bba..0a7565f3 100644 --- a/apps/setting/models_provider/constants/model_provider_constants.py +++ b/apps/setting/models_provider/constants/model_provider_constants.py @@ -17,6 +17,7 @@ from setting.models_provider.impl.kimi_model_provider.kimi_model_provider import from setting.models_provider.impl.xf_model_provider.xf_model_provider import XunFeiModelProvider from setting.models_provider.impl.zhipu_model_provider.zhipu_model_provider import ZhiPuModelProvider from setting.models_provider.impl.deepseek_model_provider.deepseek_model_provider import DeepSeekModelProvider +from setting.models_provider.impl.gemini_model_provider.gemini_model_provider import GeminiModelProvider class ModelProvideConstants(Enum): @@ -29,3 +30,4 @@ class ModelProvideConstants(Enum): model_zhipu_provider = ZhiPuModelProvider() model_xf_provider = XunFeiModelProvider() model_deepseek_provider = DeepSeekModelProvider() + model_gemini_provider = GeminiModelProvider() diff --git a/apps/setting/models_provider/impl/azure_model_provider/azure_model_provider.py b/apps/setting/models_provider/impl/azure_model_provider/azure_model_provider.py index 5731d7e3..3164dd8e 100644 --- a/apps/setting/models_provider/impl/azure_model_provider/azure_model_provider.py +++ b/apps/setting/models_provider/impl/azure_model_provider/azure_model_provider.py @@ -21,43 +21,6 @@ from setting.models_provider.base_model_provider import IModelProvider, ModelPro from setting.models_provider.impl.azure_model_provider.model.azure_chat_model import AzureChatModel from smartdoc.conf import PROJECT_DIR -""" -class AzureLLMModelCredential(BaseForm, BaseModelCredential): - - def is_valid(self, model_type: str, model_name, model_credential: Dict[str, object], raise_exception=False): - model_type_list = AzureModelProvider().get_model_type_list() - if not any(list(filter(lambda mt: mt.get('value') == model_type, model_type_list))): - raise AppApiException(ValidCode.valid_error.value, f'{model_type} 模型类型不支持') - - for key in ['api_base', 'api_key', 'deployment_name']: - if key not in model_credential: - if raise_exception: - raise AppApiException(ValidCode.valid_error.value, f'{key} 字段为必填字段') - else: - return False - try: - model = AzureModelProvider().get_model(model_type, model_name, model_credential) - model.invoke([HumanMessage(content='你好')]) - except Exception as e: - if isinstance(e, AppApiException): - raise e - if raise_exception: - raise AppApiException(ValidCode.valid_error.value, '校验失败,请检查参数是否正确') - else: - return False - - return True - - def encryption_dict(self, model: Dict[str, object]): - return {**model, 'api_key': super().encryption(model.get('api_key', ''))} - - api_base = forms.TextInputField('API 版本 (api_version)', required=True) - - api_key = forms.PasswordInputField("API Key(API 密钥)", required=True) - - deployment_name = forms.TextInputField("部署名(deployment_name)", required=True) -""" - class DefaultAzureLLMModelCredential(BaseForm, BaseModelCredential): @@ -97,8 +60,6 @@ class DefaultAzureLLMModelCredential(BaseForm, BaseModelCredential): deployment_name = forms.TextInputField("部署名 (deployment_name)", required=True) -# azure_llm_model_credential: AzureLLMModelCredential = AzureLLMModelCredential() - base_azure_llm_model_credential = DefaultAzureLLMModelCredential() model_dict = { @@ -114,7 +75,6 @@ class AzureModelProvider(IModelProvider): return 3 def get_model(self, model_type, model_name, model_credential: Dict[str, object], **model_kwargs) -> AzureChatModel: - model_info: ModelInfo = model_dict.get(model_name) azure_chat_open_ai = AzureChatModel( azure_endpoint=model_credential.get('api_base'), openai_api_version=model_credential.get('api_version', '2024-02-15-preview'), diff --git a/apps/setting/models_provider/impl/azure_model_provider/model/azure_chat_model.py b/apps/setting/models_provider/impl/azure_model_provider/model/azure_chat_model.py index f11249de..6388dbde 100644 --- a/apps/setting/models_provider/impl/azure_model_provider/model/azure_chat_model.py +++ b/apps/setting/models_provider/impl/azure_model_provider/model/azure_chat_model.py @@ -16,9 +16,15 @@ from common.config.tokenizer_manage_config import TokenizerManage class AzureChatModel(AzureChatOpenAI): def get_num_tokens_from_messages(self, messages: List[BaseMessage]) -> int: - tokenizer = TokenizerManage.get_tokenizer() - return sum([len(tokenizer.encode(get_buffer_string([m]))) for m in messages]) + try: + return super().get_num_tokens_from_messages(messages) + except Exception as e: + tokenizer = TokenizerManage.get_tokenizer() + return sum([len(tokenizer.encode(get_buffer_string([m]))) for m in messages]) def get_num_tokens(self, text: str) -> int: - tokenizer = TokenizerManage.get_tokenizer() - return len(tokenizer.encode(text)) + try: + return super().get_num_tokens(text) + except Exception as e: + tokenizer = TokenizerManage.get_tokenizer() + return len(tokenizer.encode(text)) diff --git a/apps/setting/models_provider/impl/gemini_model_provider/__init__.py b/apps/setting/models_provider/impl/gemini_model_provider/__init__.py new file mode 100644 index 00000000..43fd3dd0 --- /dev/null +++ b/apps/setting/models_provider/impl/gemini_model_provider/__init__.py @@ -0,0 +1,8 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +""" +@Project :MaxKB +@File :__init__.py.py +@Author :Brian Yang +@Date :5/13/24 7:40 AM +""" diff --git a/apps/setting/models_provider/impl/gemini_model_provider/gemini_model_provider.py b/apps/setting/models_provider/impl/gemini_model_provider/gemini_model_provider.py new file mode 100644 index 00000000..5ddddf78 --- /dev/null +++ b/apps/setting/models_provider/impl/gemini_model_provider/gemini_model_provider.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +""" +@Project :MaxKB +@File :gemini_model_provider.py +@Author :Brian Yang +@Date :5/13/24 7:47 AM +""" +import os +from typing import Dict + +from langchain.schema import HumanMessage + +from common import forms +from common.exception.app_exception import AppApiException +from common.forms import BaseForm +from common.util.file_util import get_file_content +from setting.models_provider.base_model_provider import IModelProvider, ModelProvideInfo, BaseModelCredential, \ + ModelInfo, ModelTypeConst, ValidCode +from setting.models_provider.impl.gemini_model_provider.model.gemini_chat_model import GeminiChatModel +from smartdoc.conf import PROJECT_DIR + + +class GeminiLLMModelCredential(BaseForm, BaseModelCredential): + + def is_valid(self, model_type: str, model_name, model_credential: Dict[str, object], raise_exception=False): + model_type_list = GeminiModelProvider().get_model_type_list() + if not any(list(filter(lambda mt: mt.get('value') == model_type, model_type_list))): + raise AppApiException(ValidCode.valid_error.value, f'{model_type} 模型类型不支持') + + for key in ['api_key']: + if key not in model_credential: + if raise_exception: + raise AppApiException(ValidCode.valid_error.value, f'{key} 字段为必填字段') + else: + return False + try: + model = GeminiModelProvider().get_model(model_type, model_name, model_credential) + model.invoke([HumanMessage(content='你好')]) + except Exception as e: + if isinstance(e, AppApiException): + raise e + if raise_exception: + raise AppApiException(ValidCode.valid_error.value, f'校验失败,请检查参数是否正确: {str(e)}') + else: + return False + return True + + def encryption_dict(self, model: Dict[str, object]): + return {**model, 'api_key': super().encryption(model.get('api_key', ''))} + + api_key = forms.PasswordInputField('API Key', required=True) + + +gemini_llm_model_credential = GeminiLLMModelCredential() + +model_dict = { + 'gemini-1.0-pro': ModelInfo('gemini-1.0-pro', '最新的Gemini 1.0 Pro模型,随Google更新而更新', + ModelTypeConst.LLM, + gemini_llm_model_credential, + ), + 'gemini-1.0-pro-vision': ModelInfo('gemini-1.0-pro-vision', '最新的Gemini 1.0 Pro Vision模型,随Google更新而更新', + ModelTypeConst.LLM, + gemini_llm_model_credential, + ), +} + + +class GeminiModelProvider(IModelProvider): + + def get_dialogue_number(self): + return 3 + + def get_model(self, model_type, model_name, model_credential: Dict[str, object], + **model_kwargs) -> GeminiChatModel: + gemini_chat = GeminiChatModel( + model=model_name, + google_api_key=model_credential.get('api_key') + ) + return gemini_chat + + def get_model_credential(self, model_type, model_name): + if model_name in model_dict: + return model_dict.get(model_name).model_credential + return gemini_llm_model_credential + + def get_model_provide_info(self): + return ModelProvideInfo(provider='model_gemini_provider', name='Gemini', icon=get_file_content( + os.path.join(PROJECT_DIR, "apps", "setting", 'models_provider', 'impl', 'gemini_model_provider', 'icon', + 'gemini_icon_svg'))) + + def get_model_list(self, model_type: str): + if model_type is None: + raise AppApiException(500, '模型类型不能为空') + return [model_dict.get(key).to_dict() for key in + list(filter(lambda key: model_dict.get(key).model_type == model_type, model_dict.keys()))] + + def get_model_type_list(self): + return [{'key': "大语言模型", 'value': "LLM"}] diff --git a/apps/setting/models_provider/impl/gemini_model_provider/icon/gemini_icon_svg b/apps/setting/models_provider/impl/gemini_model_provider/icon/gemini_icon_svg new file mode 100644 index 00000000..00c48a35 --- /dev/null +++ b/apps/setting/models_provider/impl/gemini_model_provider/icon/gemini_icon_svg @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/apps/setting/models_provider/impl/gemini_model_provider/model/gemini_chat_model.py b/apps/setting/models_provider/impl/gemini_model_provider/model/gemini_chat_model.py new file mode 100644 index 00000000..7a972d9d --- /dev/null +++ b/apps/setting/models_provider/impl/gemini_model_provider/model/gemini_chat_model.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +""" +@Project :MaxKB +@File :gemini_chat_model.py +@Author :Brian Yang +@Date :5/13/24 7:40 AM +""" +from typing import List + +from langchain_core.messages import BaseMessage, get_buffer_string +from langchain_google_genai import ChatGoogleGenerativeAI + +from common.config.tokenizer_manage_config import TokenizerManage + + +class GeminiChatModel(ChatGoogleGenerativeAI): + def get_num_tokens_from_messages(self, messages: List[BaseMessage]) -> int: + try: + return super().get_num_tokens_from_messages(messages) + except Exception as e: + tokenizer = TokenizerManage.get_tokenizer() + return sum([len(tokenizer.encode(get_buffer_string([m]))) for m in messages]) + + def get_num_tokens(self, text: str) -> int: + try: + return super().get_num_tokens(text) + except Exception as e: + tokenizer = TokenizerManage.get_tokenizer() + return len(tokenizer.encode(text)) diff --git a/pyproject.toml b/pyproject.toml index f70b250b..3eddbace 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,6 +37,7 @@ zhipuai = "^2.0.1" httpx = "^0.27.0" httpx-sse = "^0.4.0" websocket-client = "^1.7.0" +langchain-google-genai = "^1.0.3" [build-system] requires = ["poetry-core"]