qwen_agent/skills_developing/z-card-image/scripts/render_article.py
2026-03-17 21:55:10 +08:00

267 lines
8.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
render_article.py — 将文本渲染成单张 article-3-4 卡片图
LLM 负责分页逻辑,每页单独调用本脚本。
用法(单页模式,推荐):
python3 render_article.py \
--title "文章标题" \
--text "该页正文(段落间空行分隔)" \
--page-num 1 \
--page-total 3 \
--out /path/to/workspace/tmp/card_01.png \
[--highlight "#22a854"] \
[--bg "#e6f5ef"] \
[--footer "公众号 · 早早集市"]
用法(批量模式,兼容保留):
python3 render_article.py \
--title "文章标题" \
--text "全文..." \
--out-dir /path/to/output \
[--chars-per-page 280]
"""
import argparse, shutil, subprocess, sys, tempfile, re
from html import escape
from pathlib import Path
SKILL_DIR = Path(__file__).parent.parent
TEMPLATE_PATH = SKILL_DIR / "assets" / "templates" / "article-3-4.html"
MD_CSS_PATH = SKILL_DIR / "assets" / "styles" / "md.css"
ICONS_DIR = SKILL_DIR / "assets" / "icons"
FONTS_DIR = SKILL_DIR / "assets" / "fonts"
CHROME_PATHS = [
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
"/Applications/Chromium.app/Contents/MacOS/Chromium",
"google-chrome",
"chromium",
]
W, H = 900, 1200
CHARS_PER_PAGE = 280
def find_chrome():
for p in CHROME_PATHS:
if Path(p).exists() or shutil.which(p):
return p
return None
def split_at_sentence_boundary(text: str, limit: int) -> tuple:
"""
在 limit 字符以内,找最后一个句子结束符(。!?…)处截断。
宁少勿多:找不到就在最后一个逗号/分号处截,再找不到就硬截到 limit*0.85。
返回 (taken, rest)
"""
if len(text) <= limit:
return text, ''
# 在 limit 范围内从后往前找强句末
strong_ends = set('。!?…\n')
weak_ends = set(',.;')
candidate = -1
for i in range(min(limit, len(text)) - 1, max(limit // 2, 0) - 1, -1):
if text[i] in strong_ends:
candidate = i + 1
break
if candidate == -1:
# 找弱分隔符
for i in range(min(limit, len(text)) - 1, max(limit // 2, 0) - 1, -1):
if text[i] in weak_ends:
candidate = i + 1
break
if candidate == -1:
# 实在没有,保守截到 85%
candidate = int(limit * 0.85)
return text[:candidate].strip(), text[candidate:].strip()
def split_text_into_pages(text: str, chars_per_page: int) -> list:
"""
按段落优先、句子边界兜底的分页逻辑。
宁少勿多:每页预留 10% buffer不塞满。
"""
safe_limit = int(chars_per_page * 0.9) # 保守上限
paragraphs = [p.strip() for p in re.split(r'\n{2,}', text.strip()) if p.strip()]
pages = []
current_chunks = []
current_len = 0
for para in paragraphs:
# 超长段落先按句子边界切碎
while len(para) > safe_limit:
taken, para = split_at_sentence_boundary(para, safe_limit)
if current_chunks:
pages.append(current_chunks)
current_chunks = []
current_len = 0
pages.append([taken])
if not para:
continue
# 加入当前页会不会超限
if current_len + len(para) > safe_limit and current_chunks:
pages.append(current_chunks)
current_chunks = []
current_len = 0
current_chunks.append(para)
current_len += len(para)
if current_chunks:
pages.append(current_chunks)
return pages
def text_to_html(text: str) -> str:
"""把文本整体交给 markdown 渲染,支持完整 MD 语法"""
try:
import markdown as md_lib
except ImportError:
sys.exit('需要安装 markdown 库pip install markdown')
return md_lib.markdown(text, extensions=['fenced_code', 'tables', 'nl2br'])
def md_to_html(text: str) -> str:
"""把 Markdown 转成 HTML 片段,需要 pip install markdown"""
try:
import markdown
return markdown.markdown(text, extensions=['fenced_code', 'tables', 'nl2br'])
except ImportError:
sys.exit('需要安装 markdown 库pip install markdown')
def render_page(chrome, tpl, out_path, title, content_html, page_label, bottom_tip,
highlight, bg, footer, icon_path, avatar_path, font_path, md_css_path=''):
html = tpl
replacements = {
'{{MD_CSS_PATH}}': str(md_css_path) if md_css_path else '',
'{{TITLE}}': escape(title),
'{{CONTENT_HTML}}': content_html,
'{{PAGE_LABEL}}': escape(page_label),
'{{BOTTOM_TIP}}': escape(bottom_tip),
'{{HIGHLIGHT_COLOR}}': highlight,
'{{BG_COLOR}}': bg,
'{{FOOTER_TEXT}}': escape(footer),
'{{ICON_PATH}}': icon_path,
'{{AVATAR_PATH}}': avatar_path,
'{{FONT_PATH}}': font_path,
}
for k, v in replacements.items():
html = html.replace(k, v)
with tempfile.NamedTemporaryFile(suffix='.html', delete=False, mode='w', encoding='utf-8') as f:
f.write(html)
tmp_html = f.name
cmd = [
chrome, '--headless', '--disable-gpu', '--no-sandbox',
f'--screenshot={out_path}',
f'--window-size={W},{H}',
f'file://{tmp_html}',
]
result = subprocess.run(cmd, capture_output=True)
Path(tmp_html).unlink(missing_ok=True)
if result.returncode != 0:
sys.exit(f'Chrome failed:\n{result.stderr.decode()}')
print(f'{out_path}')
def main():
ap = argparse.ArgumentParser()
ap.add_argument('--title', required=True)
ap.add_argument('--text', default='')
ap.add_argument('--text-file', default='')
# 单页模式
ap.add_argument('--page-num', type=int, default=0)
ap.add_argument('--page-total', type=int, default=0)
ap.add_argument('--out', default='')
# 批量模式
ap.add_argument('--out-dir', default='')
ap.add_argument('--chars-per-page', type=int, default=CHARS_PER_PAGE)
ap.add_argument('--md', action='store_true', help='输入为 Markdown自动转 HTML 渲染')
# 样式
ap.add_argument('--highlight', default='#3d6b4f')
ap.add_argument('--bg', default='#f9fcfa')
ap.add_argument('--footer', default='公众号 · 早早集市')
ap.add_argument('--icon', default='')
args = ap.parse_args()
if args.text_file:
text = Path(args.text_file).read_text(encoding='utf-8')
elif args.text:
text = args.text
else:
sys.exit('需要 --text 或 --text-file')
chrome = find_chrome()
if not chrome:
sys.exit('Chrome/Chromium not found')
# MD 和纯文本共用同一模板,都走 markdown 渲染
tpl = TEMPLATE_PATH.read_text(encoding='utf-8')
icon_path = args.icon or str(ICONS_DIR / 'zzclub-logo-gray.svg')
avatar_path = str(ICONS_DIR / 'avatar_jinx_cartoon.jpg')
font_path = str(FONTS_DIR / 'AlimamaShuHeiTi-Bold.ttf')
md_css_path = str(MD_CSS_PATH)
def to_content_html(t: str) -> str:
return text_to_html(t) # text_to_html 已内置 markdown 渲染
# 单页模式
if args.page_num > 0 and args.out:
page_total = args.page_total if args.page_total > 0 else args.page_num
page_label = f'{args.page_num} / {page_total}'
bottom_tip = '· 全文完' if args.page_num == page_total else '← 滑动查看更多'
render_page(
chrome=chrome, tpl=tpl, out_path=Path(args.out),
title=args.title, content_html=to_content_html(text),
page_label=page_label, bottom_tip=bottom_tip,
highlight=args.highlight, bg=args.bg, footer=args.footer,
icon_path=icon_path, avatar_path=avatar_path, font_path=font_path,
md_css_path=md_css_path,
)
return
# 批量模式
if not args.out_dir:
sys.exit('需要 --out (单页模式) 或 --out-dir (批量模式)')
pages = split_text_into_pages(text, args.chars_per_page)
total = len(pages)
print(f'{total}')
out_dir = Path(args.out_dir)
out_dir.mkdir(parents=True, exist_ok=True)
for i, chunks in enumerate(pages, 1):
parts = []
for chunk in chunks:
c = escape(chunk)
c = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', c)
parts.append(f'<p>{c}</p>')
content_html = '\n'.join(parts)
page_label = f'{i} / {total}'
bottom_tip = '· 全文完' if i == total else '← 滑动查看更多'
render_page(
chrome=chrome, tpl=tpl, out_path=out_dir / f'card_{i:02d}.png',
title=args.title, content_html=content_html,
page_label=page_label, bottom_tip=bottom_tip,
highlight=args.highlight, bg=args.bg, footer=args.footer,
icon_path=icon_path, avatar_path=avatar_path, font_path=font_path,
)
print(f'\n🎉 完成,共输出 {total} 张图到 {out_dir}')
if __name__ == '__main__':
main()