From: Andreas Glashauser Date: Mon, 16 Mar 2026 14:47:41 +0000 (+0100) Subject: initial commit X-Git-Url: https://git.andreasglashauser.com/sitemap.xml?a=commitdiff_plain;h=HEAD;p=markdown-to-anki.git initial commit --- 09a378f1b0f10e652330d3862d39279929559679 diff --git a/markdown_to_anki.py b/markdown_to_anki.py new file mode 100644 index 0000000..e9fecad --- /dev/null +++ b/markdown_to_anki.py @@ -0,0 +1,549 @@ +#!/usr/bin/env python3 + +import argparse +import hashlib +import html +import json +import random +import re +import sqlite3 +import tempfile +import sys +import time +import zipfile +from dataclasses import dataclass +from pathlib import Path + +SCHEMA = """ +CREATE TABLE col ( + id integer PRIMARY KEY, + crt integer NOT NULL, + mod integer NOT NULL, + scm integer NOT NULL, + ver integer NOT NULL, + dty integer NOT NULL, + usn integer NOT NULL, + ls integer NOT NULL, + conf text NOT NULL, + models text NOT NULL, + decks text NOT NULL, + dconf text NOT NULL, + tags text NOT NULL +); +CREATE TABLE notes ( + id integer PRIMARY KEY, + guid text NOT NULL, + mid integer NOT NULL, + mod integer NOT NULL, + usn integer NOT NULL, + tags text NOT NULL, + flds text NOT NULL, + sfld integer NOT NULL, + csum integer NOT NULL, + flags integer NOT NULL, + data text NOT NULL +); +CREATE TABLE cards ( + id integer PRIMARY KEY, + nid integer NOT NULL, + did integer NOT NULL, + ord integer NOT NULL, + mod integer NOT NULL, + usn integer NOT NULL, + type integer NOT NULL, + queue integer NOT NULL, + due integer NOT NULL, + ivl integer NOT NULL, + factor integer NOT NULL, + reps integer NOT NULL, + lapses integer NOT NULL, + left integer NOT NULL, + odue integer NOT NULL, + odid integer NOT NULL, + flags integer NOT NULL, + data text NOT NULL +); +CREATE TABLE revlog ( + id integer PRIMARY KEY, + cid integer NOT NULL, + usn integer NOT NULL, + ease integer NOT NULL, + ivl integer NOT NULL, + lastIvl integer NOT NULL, + factor integer NOT NULL, + time integer NOT NULL, + type integer NOT NULL +); +CREATE TABLE graves ( + usn integer NOT NULL, + oid integer NOT NULL, + type integer NOT NULL +); +CREATE INDEX ix_notes_usn ON notes (usn); +CREATE INDEX ix_cards_usn ON cards (usn); +CREATE INDEX ix_revlog_usn ON revlog (usn); +CREATE INDEX ix_cards_nid ON cards (nid); +CREATE INDEX ix_cards_sched ON cards (did, queue, due); +CREATE INDEX ix_revlog_cid ON revlog (cid); +CREATE INDEX ix_notes_csum ON notes (csum); +""" + +INLINE_CODE_RE = re.compile(r"`([^`]+)`") +BOLD_RE = re.compile(r"\*\*([^*]+)\*\*") +ITALIC_RE = re.compile(r"\*([^*]+)\*") +LINK_RE = re.compile(r"\[([^\]]+)\]\(([^)]+)\)") + + +@dataclass +class Card: + deck: str + front: str + back: str + + +def parse_deck_heading(line: str, number: int) -> str: + deck = line[2:].strip() + if not deck: + raise ValueError(f"Line {number}: deck heading is empty") + return deck + + +def parse_card_heading(line: str, number: int, current_deck: str | None) -> str: + if current_deck is None: + raise ValueError(f"Line {number}: card found before any deck heading") + front = line[3:].strip() + if not front: + raise ValueError(f"Line {number}: card front is empty") + return front + + +def append_back_line( + line: str, + raw_line: str, + number: int, + current_front: str | None, + back_lines: list[str], +) -> None: + if current_front is not None: + back_lines.append(raw_line) + return + if line.strip(): + raise ValueError( + f"Line {number}: text outside a card; use '# Deck name' then '## Front text'" + ) + + +def parse_markdown(text: str) -> list[Card]: + cards: list[Card] = [] + current_deck: str | None = None + current_front: str | None = None + back_lines: list[str] = [] + + def flush_card() -> None: + nonlocal current_front, back_lines + if current_front is None: + return + back = "\n".join(back_lines).strip() + if not back: + raise ValueError(f"Card '{current_front}' is missing a back side") + cards.append( + Card(deck=current_deck or "Default", front=current_front.strip(), back=back) + ) + current_front = None + back_lines = [] + + for number, raw_line in enumerate(text.splitlines(), start=1): + line = raw_line.rstrip() + if line.startswith("# "): + flush_card() + current_deck = parse_deck_heading(line, number) + continue + if line.startswith("## "): + flush_card() + current_front = parse_card_heading(line, number, current_deck) + continue + append_back_line(line, raw_line, number, current_front, back_lines) + + flush_card() + + if not cards: + raise ValueError("No cards found. Use '# Deck name' and '## Front text'.") + return cards + + +def markdown_to_html(text: str) -> str: + blocks = re.split(r"\n\s*\n", text.strip()) + rendered: list[str] = [] + for block in blocks: + lines = [line.rstrip() for line in block.splitlines()] + if lines and all(line.lstrip().startswith(("- ", "* ")) for line in lines): + items: list[str] = [] + for line in lines: + item = line.lstrip()[2:].strip() + items.append(f"
  • {render_inline(item)}
  • ") + rendered.append("") + else: + parts = [render_inline(line.strip()) for line in lines] + rendered.append("

    " + "
    ".join(parts) + "

    ") + return "\n".join(rendered) + + +def render_inline(text: str) -> str: + escaped = html.escape(text, quote=False) + escaped = LINK_RE.sub(r'\1', escaped) + escaped = INLINE_CODE_RE.sub(r"\1", escaped) + escaped = BOLD_RE.sub(r"\1", escaped) + escaped = ITALIC_RE.sub(r"\1", escaped) + return escaped + + +def strip_html(text: str) -> str: + text = re.sub(r"<[^>]+>", "", text) + return html.unescape(text) + + +def make_guid(rng: random.Random) -> str: + alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" + return "".join(rng.choice(alphabet) for _ in range(10)) + + +def checksum(text: str) -> int: + digest = hashlib.sha1(text.encode("utf-8")).hexdigest() + return int(digest[:8], 16) + + +def make_model(model_id: int, now_ms: int) -> dict[str, object]: + return { + str(model_id): { + "css": ".card { font-family: arial; font-size: 20px; line-height: 1.5; text-align: center; color: black; background-color: white; }", + "did": None, + "flds": [ + { + "collapsed": False, + "description": "", + "excludeFromSearch": False, + "font": "Arial", + "id": model_id + 1, + "name": "Front", + "ord": 0, + "plainText": False, + "preventDeletion": False, + "rtl": False, + "size": 20, + "sticky": False, + "tag": None, + }, + { + "collapsed": False, + "description": "", + "excludeFromSearch": False, + "font": "Arial", + "id": model_id + 2, + "name": "Back", + "ord": 1, + "plainText": False, + "preventDeletion": False, + "rtl": False, + "size": 20, + "sticky": False, + "tag": None, + }, + ], + "id": model_id, + "latexPost": "\\end{document}", + "latexPre": "\\documentclass[12pt]{article}\n\\special{papersize=3in,5in}\n\\usepackage[utf8]{inputenc}\n\\usepackage{amssymb,amsmath}\n\\pagestyle{empty}\n\\setlength{\\parindent}{0in}\n\\begin{document}\n", + "latexsvg": False, + "mod": now_ms, + "name": "Markdown Basic", + "originalStockKind": 0, + "req": [[0, "any", [0]]], + "sortf": 0, + "tmpls": [ + { + "afmt": "{{FrontSide}}\n\n
    \n\n{{Back}}", + "bafmt": "", + "bfont": "", + "bqfmt": "", + "bsize": 0, + "did": None, + "id": model_id + 3, + "name": "Card 1", + "ord": 0, + "qfmt": "{{Front}}", + } + ], + "type": 0, + "usn": 0, + "ver": None, + } + } + + +def make_decks( + deck_names: list[str], now_ms: int +) -> tuple[dict[str, object], dict[str, int]]: + decks: dict[str, object] = { + "1": { + "browserCollapsed": False, + "collapsed": False, + "conf": 1, + "desc": "", + "dyn": 0, + "extendNew": 0, + "extendRev": 0, + "id": 1, + "lrnToday": [0, 0], + "mod": now_ms, + "name": "Default", + "newLimit": None, + "newLimitToday": None, + "newToday": [0, 0], + "reviewLimit": None, + "reviewLimitToday": None, + "revToday": [0, 0], + "timeToday": [0, 0], + "usn": 0, + } + } + ids = {"Default": 1} + next_id = now_ms + for name in deck_names: + if name == "Default" or name in ids: + continue + next_id += 1 + ids[name] = next_id + decks[str(next_id)] = { + "browserCollapsed": False, + "collapsed": False, + "conf": 1, + "desc": "", + "dyn": 0, + "extendNew": 0, + "extendRev": 0, + "id": next_id, + "lrnToday": [0, 0], + "mod": now_ms, + "name": name, + "newLimit": None, + "newLimitToday": None, + "newToday": [0, 0], + "reviewLimit": None, + "reviewLimitToday": None, + "revToday": [0, 0], + "timeToday": [0, 0], + "usn": 0, + } + return decks, ids + + +def make_dconf() -> dict[str, object]: + return { + "1": { + "autoplay": True, + "dyn": False, + "id": 1, + "lapse": { + "delays": [10], + "leechAction": 1, + "leechFails": 8, + "minInt": 1, + "mult": 0, + }, + "maxTaken": 60, + "mod": 0, + "name": "Default", + "new": { + "bury": False, + "delays": [1, 10], + "initialFactor": 2500, + "ints": [1, 4, 0], + "order": 1, + "perDay": 20, + }, + "replayq": True, + "rev": { + "bury": False, + "ease4": 1.3, + "ivlFct": 1, + "maxIvl": 36500, + "perDay": 200, + }, + "timer": 0, + "usn": 0, + } + } + + +def make_conf(model_id: int, deck_id: int) -> dict[str, object]: + return { + "activeDecks": [deck_id], + "addToCur": True, + "collapseTime": 1200, + "creationOffset": 0, + "curDeck": deck_id, + "curModel": model_id, + "dayLearnFirst": False, + "dueCounts": True, + "estTimes": True, + "newSpread": 0, + "nextPos": 1, + "sched2021": True, + "schedVer": 2, + "sortBackwards": False, + "sortType": "noteFld", + "timeLim": 0, + } + + +def build_collection(cards: list[Card], db_path: Path) -> None: + now = int(time.time()) + now_ms = int(time.time() * 1000) + model_id = now_ms + deck_names = [card.deck for card in cards] + decks, deck_ids = make_decks(deck_names, now_ms) + first_deck_id = deck_ids[cards[0].deck] + conn = sqlite3.connect(db_path) + try: + _ = conn.executescript(SCHEMA) + _ = conn.execute( + "INSERT INTO col VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", + ( + 1, + now, + now_ms, + now_ms, + 11, + 0, + 0, + 0, + json.dumps(make_conf(model_id, first_deck_id), separators=(",", ":")), + json.dumps(make_model(model_id, now_ms), separators=(",", ":")), + json.dumps(decks, separators=(",", ":")), + json.dumps(make_dconf(), separators=(",", ":")), + "{}", + ), + ) + + rng = random.Random(now_ms) + for due, card in enumerate(cards, start=1): + note_id = now_ms + due + card_id = now_ms + 100000 + due + front_html = markdown_to_html(card.front) + back_html = markdown_to_html(card.back) + sort_field = strip_html(front_html) + _ = conn.execute( + "INSERT INTO notes VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", + ( + note_id, + make_guid(rng), + model_id, + now, + 0, + "", + front_html + "\x1f" + back_html, + sort_field, + checksum(sort_field), + 0, + "", + ), + ) + _ = conn.execute( + "INSERT INTO cards VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", + ( + card_id, + note_id, + deck_ids[card.deck], + 0, + now, + 0, + 0, + 0, + due, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + "", + ), + ) + + conn.commit() + finally: + conn.close() + + +def package_apkg(db_path: Path, output_path: Path) -> None: + media_path = db_path.parent / "media" + _ = media_path.write_text("{}", encoding="utf-8") + with zipfile.ZipFile(output_path, "w", compression=zipfile.ZIP_DEFLATED) as archive: + archive.write(db_path, arcname="collection.anki2") + archive.write(media_path, arcname="media") + + +def convert(markdown_path: Path, output_path: Path) -> None: + cards = parse_markdown(markdown_path.read_text(encoding="utf-8")) + output_path.parent.mkdir(parents=True, exist_ok=True) + with tempfile.TemporaryDirectory(prefix="markdown_to_anki_") as temp_dir: + temp_root = Path(temp_dir) + db_path = temp_root / "collection.anki2" + build_collection(cards, db_path) + package_apkg(db_path, output_path) + + +@dataclass +class Args(argparse.Namespace): + input: Path + output: Path | None + + def __init__(self): + super().__init__() + + +def parse_args() -> Args: + parser = argparse.ArgumentParser( + description="Convert a tiny markdown format into an Anki .apkg archive.", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" + Required Markdown Format: + # Deck name + ## Front of first card + Back of first card + ## Front of second card + Back of second card + # Another deck + ## Front + Back + """, + ) + _ = parser.add_argument("input", type=Path, help="Input markdown file") + _ = parser.add_argument( + "-o", + "--output", + type=Path, + help="Output .apkg path (default: same name as input)", + ) + return parser.parse_args(namespace=Args()) + + +def main(): + args = parse_args() + input_path = Path(args.input) + if not input_path.is_file(): + raise SystemExit(f"Input file not found: {input_path}") + + output_path = args.output or input_path.with_suffix(".apkg") + + try: + convert(input_path, output_path) + except ValueError as exc: + raise sys.exit(f"Error: {exc}") + + print(f"Wrote {output_path}") + + +if __name__ == "__main__": + main()