73 lines
2.6 KiB
Python
73 lines
2.6 KiB
Python
"""Zero-dependency HTTP helpers shared by all sinks (stdlib urllib only).
|
|
|
|
``http_request`` is the single seam tests monkeypatch.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import mimetypes
|
|
import urllib.error
|
|
import urllib.request
|
|
from typing import Optional
|
|
|
|
USER_AGENT = "MinerU-Skill-sink/1.0"
|
|
|
|
|
|
def http_request(method, url, *, headers=None, data=None, timeout=60):
|
|
"""Perform one HTTP request. Returns ``(status_code, body_bytes)``."""
|
|
req = urllib.request.Request(url, data=data, method=method, headers=headers or {})
|
|
req.add_header("User-Agent", USER_AGENT)
|
|
try:
|
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
|
return resp.getcode(), resp.read()
|
|
except urllib.error.HTTPError as exc:
|
|
body = exc.read() if hasattr(exc, "read") else b""
|
|
return exc.code, body
|
|
|
|
|
|
def request_json(method, url, *, headers=None, payload=None, timeout=60):
|
|
"""JSON request helper. Returns ``(status_code, parsed_json_or_empty_dict)``."""
|
|
hdrs = dict(headers or {})
|
|
body = None
|
|
if payload is not None:
|
|
hdrs.setdefault("Content-Type", "application/json")
|
|
body = json.dumps(payload, ensure_ascii=False).encode("utf-8")
|
|
status, raw = http_request(method, url, headers=hdrs, data=body, timeout=timeout)
|
|
parsed: dict = {}
|
|
if raw:
|
|
try:
|
|
parsed = json.loads(raw.decode("utf-8"))
|
|
except (ValueError, UnicodeDecodeError):
|
|
parsed = {}
|
|
return status, parsed
|
|
|
|
|
|
def encode_multipart(fields=None, files=None):
|
|
"""Build a ``multipart/form-data`` body with stdlib only.
|
|
|
|
``fields``: dict of str -> str. ``files``: list of (field_name, filename, bytes).
|
|
Returns ``(content_type, body_bytes)``.
|
|
"""
|
|
boundary = "----MinerUSinkBoundary7MA4YWxkTrZu0gW"
|
|
crlf = b"\r\n"
|
|
parts = []
|
|
for name, value in (fields or {}).items():
|
|
parts.append(b"--" + boundary.encode())
|
|
parts.append(f'Content-Disposition: form-data; name="{name}"'.encode())
|
|
parts.append(b"")
|
|
parts.append(str(value).encode("utf-8"))
|
|
for field_name, filename, content in files or []:
|
|
ctype = mimetypes.guess_type(filename)[0] or "application/octet-stream"
|
|
parts.append(b"--" + boundary.encode())
|
|
parts.append(
|
|
f'Content-Disposition: form-data; name="{field_name}"; filename="{filename}"'.encode()
|
|
)
|
|
parts.append(f"Content-Type: {ctype}".encode())
|
|
parts.append(b"")
|
|
parts.append(content)
|
|
parts.append(b"--" + boundary.encode() + b"--")
|
|
parts.append(b"")
|
|
body = crlf.join(parts)
|
|
return f"multipart/form-data; boundary={boundary}", body
|