fix: 修复上传文档,高级分段设置分段长度为10w字符,生成预览还是4096个字符一段 (#884)
This commit is contained in:
parent
485eeb6ac1
commit
d935e9a836
@ -30,9 +30,6 @@ class Command(BaseCommand):
|
|||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
log_format = '%(h)s %(t)s %(L)ss "%(r)s" %(s)s %(b)s '
|
log_format = '%(h)s %(t)s %(L)ss "%(r)s" %(s)s %(b)s '
|
||||||
print(options.get('worker_connections'))
|
|
||||||
print(options.get('threads'))
|
|
||||||
print(options)
|
|
||||||
cmd = [
|
cmd = [
|
||||||
'gunicorn', 'smartdoc.wsgi:application',
|
'gunicorn', 'smartdoc.wsgi:application',
|
||||||
'-b', options.get('b') if options.get('b') is not None else '0.0.0.0:8080',
|
'-b', options.get('b') if options.get('b') is not None else '0.0.0.0:8080',
|
||||||
|
|||||||
@ -280,11 +280,11 @@ def filter_special_char(content: str):
|
|||||||
|
|
||||||
class SplitModel:
|
class SplitModel:
|
||||||
|
|
||||||
def __init__(self, content_level_pattern, with_filter=True, limit=4096):
|
def __init__(self, content_level_pattern, with_filter=True, limit=100000):
|
||||||
self.content_level_pattern = content_level_pattern
|
self.content_level_pattern = content_level_pattern
|
||||||
self.with_filter = with_filter
|
self.with_filter = with_filter
|
||||||
if limit is None or limit > 4096:
|
if limit is None or limit > 100000:
|
||||||
limit = 4096
|
limit = 100000
|
||||||
if limit < 50:
|
if limit < 50:
|
||||||
limit = 50
|
limit = 50
|
||||||
self.limit = limit
|
self.limit = limit
|
||||||
@ -375,7 +375,7 @@ default_split_pattern = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def get_split_model(filename: str, with_filter: bool = False, limit: int = 4096):
|
def get_split_model(filename: str, with_filter: bool = False, limit: int = 100000):
|
||||||
"""
|
"""
|
||||||
根据文件名称获取分段模型
|
根据文件名称获取分段模型
|
||||||
:param limit: 每段大小
|
:param limit: 每段大小
|
||||||
|
|||||||
@ -788,7 +788,7 @@ class DocumentSerializers(ApiMixin, serializers.Serializer):
|
|||||||
file_list = self.data.get("file")
|
file_list = self.data.get("file")
|
||||||
return list(
|
return list(
|
||||||
map(lambda f: file_to_paragraph(f, self.data.get("patterns", None), self.data.get("with_filter", None),
|
map(lambda f: file_to_paragraph(f, self.data.get("patterns", None), self.data.get("with_filter", None),
|
||||||
self.data.get("limit", None)), file_list))
|
self.data.get("limit", 4096)), file_list))
|
||||||
|
|
||||||
class SplitPattern(ApiMixin, serializers.Serializer):
|
class SplitPattern(ApiMixin, serializers.Serializer):
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user