fix: 同步web站点内容编码错误,导致乱码
This commit is contained in:
parent
a01d5beb59
commit
cf003aa2d2
@ -6,6 +6,7 @@ from functools import reduce
|
|||||||
from typing import List, Set
|
from typing import List, Set
|
||||||
from urllib.parse import urljoin, urlparse, ParseResult, urlsplit
|
from urllib.parse import urljoin, urlparse, ParseResult, urlsplit
|
||||||
|
|
||||||
|
import chardet
|
||||||
import html2text as ht
|
import html2text as ht
|
||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
@ -121,7 +122,7 @@ class Fork:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_beautiful_soup(response):
|
def get_beautiful_soup(response):
|
||||||
encoding = response.apparent_encoding if response.apparent_encoding is not None else 'utf-8'
|
encoding = response.encoding if response.encoding and response.encoding != 'ISO-8859-1' is not None else response.apparent_encoding
|
||||||
html_content = response.content.decode(encoding)
|
html_content = response.content.decode(encoding)
|
||||||
return BeautifulSoup(html_content, "html.parser")
|
return BeautifulSoup(html_content, "html.parser")
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user