#!/usr/bin/env python3
"""字符合规检查器"""
import re, os

os.chdir(os.path.dirname(os.path.abspath(__file__)))

def strip_meta(body: str) -> str:
    """去掉字数元注释/分隔线/markdown 标记后再数字符(贴到平台的正文字符数)"""
    lines = []
    for ln in body.split("\n"):
        s = ln.strip()
        if s.startswith("> 字") or s.startswith("> 字符数"):
            continue
        if re.match(r"^>\s*\d+\s*字", s):
            continue
        lines.append(ln)
    return "\n".join(lines).strip()

# ========== LinkedIn ==========
with open("week1_linkedin.md", encoding="utf-8") as f:
    li = f.read()

# 每篇以 "## 📌 Post" 开头到下一个 "## 📌" 或 EOF
li_posts = re.split(r"\n## 📌 Post \d+.*?\n", li)
li_headers = re.findall(r"## 📌 Post \d+[^\n]*", li)
li_posts = li_posts[1:]  # 丢掉开头的文件标题段

print("=" * 60)
print("LinkedIn 5 篇字符数检查(限制 3000)")
print("=" * 60)
li_results = []
for i, (h, body) in enumerate(zip(li_headers, li_posts), 1):
    # 切到下一个 --- *** --- 之前;LinkedIn 帖子主体到第一行 "#xxx" hashtag 行结束
    # 保留 hashtag(它也是要发的一部分)
    # 去掉 "**【支柱:...】**" 这种元注释
    body_clean = re.sub(r"\*\*【支柱:.*?】\*\*", "", body)
    # 去掉分隔 ---
    body_clean = re.sub(r"\n-{3,}\n", "\n", body_clean)
    body_clean = body_clean.strip()
    # 去掉 markdown 加粗标记 ** 便于按平台可见字符数
    visible = re.sub(r"\*\*", "", body_clean)
    n = len(visible)
    status = "✅ 通过" if n <= 3000 else "❌ 超标"
    print(f"Post {i}: {n} 字符  {status}  ({h.strip()[:40]})")
    li_results.append((i, h.strip(), n, n <= 3000))

# ========== Twitter ==========
with open("week1_twitter.md", encoding="utf-8") as f:
    tw = f.read()

print()
print("=" * 60)
print("Twitter 每条推文字符检查(限制 280)")
print("=" * 60)

# 找到所有 "字数:XXX 字/约 YYY 字符" 行之前的那段,即推文正文
# 更稳健:按 "### 🧵 n/8" 或 "### 🗓️ Day n" 切块
tw_blocks = re.split(r"\n### (?:🧵 \d+/\d+|🗓️ Day \d+)[^\n]*\n", tw)
tw_heads = re.findall(r"### (?:🧵 \d+/\d+|🗓️ Day \d+)[^\n]*", tw)
tw_blocks = tw_blocks[1:len(tw_heads)+1]

tw_results = []
for head, block in zip(tw_heads, tw_blocks):
    # 推文正文 = 去掉 > 字数注释、去掉 ---、去掉 markdown 加粗
    lines = []
    for ln in block.split("\n"):
        s = ln.strip()
        if not s: continue
        if s.startswith("---"): continue
        if s.startswith("> "): continue
        if s.startswith("##"): break
        lines.append(ln)
    body = "\n".join(lines).strip()
    visible = re.sub(r"\*\*", "", body)
    n = len(visible)
    status = "✅ 通过" if n <= 280 else "❌ 超标"
    print(f"{head.strip():50s}  {n:>4} 字符  {status}")
    tw_results.append((head.strip(), n, n <= 280))

# 写汇总
import json
summary = {
    "linkedin": [{"idx": r[0], "title": r[1], "chars": r[2], "pass": r[3]} for r in li_results],
    "twitter": [{"title": r[0], "chars": r[1], "pass": r[2]} for r in tw_results],
}
with open("_check_result.json", "w", encoding="utf-8") as f:
    json.dump(summary, f, ensure_ascii=False, indent=2)
print()
print("✅ 详细结果已写入 _check_result.json")
