替换embedding模型
This commit is contained in:
parent
1dd15185d6
commit
9b8c07a2e6
@ -39,7 +39,7 @@ RUN mkdir -p /app/public
|
|||||||
RUN mkdir -p /app/models
|
RUN mkdir -p /app/models
|
||||||
|
|
||||||
# 下载sentence-transformers模型到models目录
|
# 下载sentence-transformers模型到models目录
|
||||||
RUN python -c "from sentence_transformers import SentenceTransformer; model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2'); model.save('/app/models/paraphrase-multilingual-MiniLM-L12-v2')"
|
RUN python -c "from sentence_transformers import SentenceTransformer; model = SentenceTransformer('TaylorAI/gte-tiny'); model.save('/app/models/gte-tiny')"
|
||||||
|
|
||||||
# 暴露端口
|
# 暴露端口
|
||||||
EXPOSE 8001
|
EXPOSE 8001
|
||||||
|
|||||||
@ -40,7 +40,7 @@ RUN mkdir -p /app/public
|
|||||||
RUN mkdir -p /app/models
|
RUN mkdir -p /app/models
|
||||||
|
|
||||||
# 从modelscope下载sentence-transformers模型到models目录
|
# 从modelscope下载sentence-transformers模型到models目录
|
||||||
#RUN python -c "from modelscope import snapshot_download; model_dir = snapshot_download('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2'); import shutil; shutil.move(model_dir, '/app/models/paraphrase-multilingual-MiniLM-L12-v2')"
|
#RUN python -c "from modelscope import snapshot_download; model_dir = snapshot_download('TaylorAI/gte-tiny'); import shutil; shutil.move(model_dir, '/app/models/gte-tiny')"
|
||||||
|
|
||||||
# 复制应用代码
|
# 复制应用代码
|
||||||
COPY . .
|
COPY . .
|
||||||
|
|||||||
@ -25,9 +25,9 @@ logger = logging.getLogger(__name__)
|
|||||||
class GlobalModelManager:
|
class GlobalModelManager:
|
||||||
"""全局模型管理器"""
|
"""全局模型管理器"""
|
||||||
|
|
||||||
def __init__(self, model_name: str = 'sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2'):
|
def __init__(self, model_name: str = 'TaylorAI/gte-tiny'):
|
||||||
self.model_name = model_name
|
self.model_name = model_name
|
||||||
self.local_model_path = "./models/paraphrase-multilingual-MiniLM-L12-v2"
|
self.local_model_path = "./models/gte-tiny"
|
||||||
self._model: Optional[SentenceTransformer] = None
|
self._model: Optional[SentenceTransformer] = None
|
||||||
self._lock = asyncio.Lock()
|
self._lock = asyncio.Lock()
|
||||||
self._load_time = 0
|
self._load_time = 0
|
||||||
@ -64,8 +64,7 @@ class GlobalModelManager:
|
|||||||
None,
|
None,
|
||||||
lambda: SentenceTransformer(
|
lambda: SentenceTransformer(
|
||||||
model_path,
|
model_path,
|
||||||
device=self._device,
|
device=self._device
|
||||||
truncate_dim=128
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -126,6 +125,6 @@ def get_model_manager() -> GlobalModelManager:
|
|||||||
"""获取模型管理器实例"""
|
"""获取模型管理器实例"""
|
||||||
global _model_manager
|
global _model_manager
|
||||||
if _model_manager is None:
|
if _model_manager is None:
|
||||||
model_name = os.getenv("SENTENCE_TRANSFORMER_MODEL", "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
|
model_name = os.getenv("SENTENCE_TRANSFORMER_MODEL", "TaylorAI/gte-tiny")
|
||||||
_model_manager = GlobalModelManager(model_name)
|
_model_manager = GlobalModelManager(model_name)
|
||||||
return _model_manager
|
return _model_manager
|
||||||
|
|||||||
@ -198,7 +198,7 @@ def merge_embeddings_by_group(unique_id: str, group_name: str) -> Dict:
|
|||||||
dimensions = 0
|
dimensions = 0
|
||||||
chunking_strategy = 'unknown'
|
chunking_strategy = 'unknown'
|
||||||
chunking_params = {}
|
chunking_params = {}
|
||||||
model_path = 'sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2'
|
model_path = 'TaylorAI/gte-tiny'
|
||||||
|
|
||||||
for filename_stem, embedding_path in sorted(embedding_files):
|
for filename_stem, embedding_path in sorted(embedding_files):
|
||||||
try:
|
try:
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user