fix: 同步知识库,无法获取内容
This commit is contained in:
parent
8450b3598c
commit
22c319a2bf
@ -7,7 +7,7 @@ from typing import List, Set
|
|||||||
import requests
|
import requests
|
||||||
import html2text as ht
|
import html2text as ht
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from urllib.parse import urljoin, urlparse, ParseResult
|
from urllib.parse import urljoin, urlparse, ParseResult, urlsplit, parse_qs
|
||||||
|
|
||||||
requests.packages.urllib3.disable_warnings()
|
requests.packages.urllib3.disable_warnings()
|
||||||
|
|
||||||
@ -60,7 +60,11 @@ class Fork:
|
|||||||
|
|
||||||
def __init__(self, base_fork_url: str, selector_list: List[str]):
|
def __init__(self, base_fork_url: str, selector_list: List[str]):
|
||||||
self.base_fork_url = urljoin(base_fork_url if base_fork_url.endswith("/") else base_fork_url + '/', '.')
|
self.base_fork_url = urljoin(base_fork_url if base_fork_url.endswith("/") else base_fork_url + '/', '.')
|
||||||
|
parsed = urlsplit(base_fork_url)
|
||||||
|
query = parsed.query
|
||||||
self.base_fork_url = self.base_fork_url[:-1]
|
self.base_fork_url = self.base_fork_url[:-1]
|
||||||
|
if query is not None and len(query) > 0:
|
||||||
|
self.base_fork_url = self.base_fork_url + '?' + query
|
||||||
self.selector_list = [selector for selector in selector_list if selector is not None and len(selector) > 0]
|
self.selector_list = [selector for selector in selector_list if selector is not None and len(selector) > 0]
|
||||||
self.urlparse = urlparse(self.base_fork_url)
|
self.urlparse = urlparse(self.base_fork_url)
|
||||||
self.base_url = ParseResult(scheme=self.urlparse.scheme, netloc=self.urlparse.netloc, path='', params='',
|
self.base_url = ParseResult(scheme=self.urlparse.scheme, netloc=self.urlparse.netloc, path='', params='',
|
||||||
|
|||||||
@ -26,7 +26,7 @@ tiktoken = "^0.5.1"
|
|||||||
qianfan = "^0.1.1"
|
qianfan = "^0.1.1"
|
||||||
pycryptodome = "^3.19.0"
|
pycryptodome = "^3.19.0"
|
||||||
beautifulsoup4 = "^4.12.2"
|
beautifulsoup4 = "^4.12.2"
|
||||||
html2text = "^2020.1.16"
|
html2text = "^2024.2.26"
|
||||||
|
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user