import json import os import re from functools import lru_cache from html.parser import HTMLParser from pathlib import Path from urllib.error import HTTPError, URLError from urllib.request import Request, urlopen API_URL = "https://api.github.com/markdown" API_VERSION = "2022-11-28" TEMPLATE_PATH = Path(__file__).resolve().parent.parent / "template.html" class FirstHeadingParser(HTMLParser): def __init__(self) -> None: super().__init__() self._capture = False self._done = False self._parts: list[str] = [] def handle_starttag(self, tag: str, attrs) -> None: if self._done: return if tag in {"h1", "h2", "h3", "h4", "h5", "h6"}: self._capture = True def handle_endtag(self, tag: str) -> None: if self._capture and tag in {"h1", "h2", "h3", "h4", "h5", "h6"}: self._capture = False self._done = True def handle_data(self, data: str) -> None: if self._capture and not self._done: self._parts.append(data) def title(self) -> str: return "".join(self._parts).strip() def render_markdown(markdown_text: str) -> str: payload = json.dumps({"text": markdown_text}).encode("utf-8") headers = { "Accept": "text/html", "Content-Type": "application/json", "User-Agent": "md-to-html-service", "X-GitHub-Api-Version": API_VERSION, } github_token = os.getenv("GITHUB_TOKEN") if github_token: headers["Authorization"] = f"Bearer {github_token}" request = Request(API_URL, data=payload, headers=headers, method="POST") try: with urlopen(request, timeout=30) as response: return response.read().decode("utf-8") except HTTPError as exc: error_body = exc.read().decode("utf-8", errors="replace") raise RuntimeError( f"GitHub API error: {exc.code} {exc.reason}\n{error_body}" ) from exc except URLError as exc: raise RuntimeError(f"Failed to reach GitHub API: {exc.reason}") from exc def extract_title(html_text: str, fallback: str) -> str: parser = FirstHeadingParser() parser.feed(html_text) return parser.title() or fallback def apply_template(template_text: str, html_text: str, title: str) -> str: updated = re.sub( r"