fix: 同步web站点内容编码错误,导致乱码
This commit is contained in:
parent
a01d5beb59
commit
cf003aa2d2
@ -6,6 +6,7 @@ from functools import reduce
|
||||
from typing import List, Set
|
||||
from urllib.parse import urljoin, urlparse, ParseResult, urlsplit
|
||||
|
||||
import chardet
|
||||
import html2text as ht
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
@ -121,7 +122,7 @@ class Fork:
|
||||
|
||||
@staticmethod
|
||||
def get_beautiful_soup(response):
|
||||
encoding = response.apparent_encoding if response.apparent_encoding is not None else 'utf-8'
|
||||
encoding = response.encoding if response.encoding and response.encoding != 'ISO-8859-1' is not None else response.apparent_encoding
|
||||
html_content = response.content.decode(encoding)
|
||||
return BeautifulSoup(html_content, "html.parser")
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user