]> Andreas Glashauser Git - markdown-to-anki.git/commitdiff
initial commit main
authorAndreas Glashauser <ag@andreasglashauser.com>
Mon, 16 Mar 2026 14:47:41 +0000 (15:47 +0100)
committerAndreas Glashauser <ag@andreasglashauser.com>
Mon, 16 Mar 2026 14:47:41 +0000 (15:47 +0100)
markdown_to_anki.py [new file with mode: 0644]

diff --git a/markdown_to_anki.py b/markdown_to_anki.py
new file mode 100644 (file)
index 0000000..e9fecad
--- /dev/null
@@ -0,0 +1,549 @@
+#!/usr/bin/env python3
+
+import argparse
+import hashlib
+import html
+import json
+import random
+import re
+import sqlite3
+import tempfile
+import sys
+import time
+import zipfile
+from dataclasses import dataclass
+from pathlib import Path
+
+SCHEMA = """
+CREATE TABLE col (
+  id integer PRIMARY KEY,
+  crt integer NOT NULL,
+  mod integer NOT NULL,
+  scm integer NOT NULL,
+  ver integer NOT NULL,
+  dty integer NOT NULL,
+  usn integer NOT NULL,
+  ls integer NOT NULL,
+  conf text NOT NULL,
+  models text NOT NULL,
+  decks text NOT NULL,
+  dconf text NOT NULL,
+  tags text NOT NULL
+);
+CREATE TABLE notes (
+  id integer PRIMARY KEY,
+  guid text NOT NULL,
+  mid integer NOT NULL,
+  mod integer NOT NULL,
+  usn integer NOT NULL,
+  tags text NOT NULL,
+  flds text NOT NULL,
+  sfld integer NOT NULL,
+  csum integer NOT NULL,
+  flags integer NOT NULL,
+  data text NOT NULL
+);
+CREATE TABLE cards (
+  id integer PRIMARY KEY,
+  nid integer NOT NULL,
+  did integer NOT NULL,
+  ord integer NOT NULL,
+  mod integer NOT NULL,
+  usn integer NOT NULL,
+  type integer NOT NULL,
+  queue integer NOT NULL,
+  due integer NOT NULL,
+  ivl integer NOT NULL,
+  factor integer NOT NULL,
+  reps integer NOT NULL,
+  lapses integer NOT NULL,
+  left integer NOT NULL,
+  odue integer NOT NULL,
+  odid integer NOT NULL,
+  flags integer NOT NULL,
+  data text NOT NULL
+);
+CREATE TABLE revlog (
+  id integer PRIMARY KEY,
+  cid integer NOT NULL,
+  usn integer NOT NULL,
+  ease integer NOT NULL,
+  ivl integer NOT NULL,
+  lastIvl integer NOT NULL,
+  factor integer NOT NULL,
+  time integer NOT NULL,
+  type integer NOT NULL
+);
+CREATE TABLE graves (
+  usn integer NOT NULL,
+  oid integer NOT NULL,
+  type integer NOT NULL
+);
+CREATE INDEX ix_notes_usn ON notes (usn);
+CREATE INDEX ix_cards_usn ON cards (usn);
+CREATE INDEX ix_revlog_usn ON revlog (usn);
+CREATE INDEX ix_cards_nid ON cards (nid);
+CREATE INDEX ix_cards_sched ON cards (did, queue, due);
+CREATE INDEX ix_revlog_cid ON revlog (cid);
+CREATE INDEX ix_notes_csum ON notes (csum);
+"""
+
+INLINE_CODE_RE = re.compile(r"`([^`]+)`")
+BOLD_RE = re.compile(r"\*\*([^*]+)\*\*")
+ITALIC_RE = re.compile(r"\*([^*]+)\*")
+LINK_RE = re.compile(r"\[([^\]]+)\]\(([^)]+)\)")
+
+
+@dataclass
+class Card:
+    deck: str
+    front: str
+    back: str
+
+
+def parse_deck_heading(line: str, number: int) -> str:
+    deck = line[2:].strip()
+    if not deck:
+        raise ValueError(f"Line {number}: deck heading is empty")
+    return deck
+
+
+def parse_card_heading(line: str, number: int, current_deck: str | None) -> str:
+    if current_deck is None:
+        raise ValueError(f"Line {number}: card found before any deck heading")
+    front = line[3:].strip()
+    if not front:
+        raise ValueError(f"Line {number}: card front is empty")
+    return front
+
+
+def append_back_line(
+    line: str,
+    raw_line: str,
+    number: int,
+    current_front: str | None,
+    back_lines: list[str],
+) -> None:
+    if current_front is not None:
+        back_lines.append(raw_line)
+        return
+    if line.strip():
+        raise ValueError(
+            f"Line {number}: text outside a card; use '# Deck name' then '## Front text'"
+        )
+
+
+def parse_markdown(text: str) -> list[Card]:
+    cards: list[Card] = []
+    current_deck: str | None = None
+    current_front: str | None = None
+    back_lines: list[str] = []
+
+    def flush_card() -> None:
+        nonlocal current_front, back_lines
+        if current_front is None:
+            return
+        back = "\n".join(back_lines).strip()
+        if not back:
+            raise ValueError(f"Card '{current_front}' is missing a back side")
+        cards.append(
+            Card(deck=current_deck or "Default", front=current_front.strip(), back=back)
+        )
+        current_front = None
+        back_lines = []
+
+    for number, raw_line in enumerate(text.splitlines(), start=1):
+        line = raw_line.rstrip()
+        if line.startswith("# "):
+            flush_card()
+            current_deck = parse_deck_heading(line, number)
+            continue
+        if line.startswith("## "):
+            flush_card()
+            current_front = parse_card_heading(line, number, current_deck)
+            continue
+        append_back_line(line, raw_line, number, current_front, back_lines)
+
+    flush_card()
+
+    if not cards:
+        raise ValueError("No cards found. Use '# Deck name' and '## Front text'.")
+    return cards
+
+
+def markdown_to_html(text: str) -> str:
+    blocks = re.split(r"\n\s*\n", text.strip())
+    rendered: list[str] = []
+    for block in blocks:
+        lines = [line.rstrip() for line in block.splitlines()]
+        if lines and all(line.lstrip().startswith(("- ", "* ")) for line in lines):
+            items: list[str] = []
+            for line in lines:
+                item = line.lstrip()[2:].strip()
+                items.append(f"<li>{render_inline(item)}</li>")
+            rendered.append("<ul>" + "".join(items) + "</ul>")
+        else:
+            parts = [render_inline(line.strip()) for line in lines]
+            rendered.append("<p>" + "<br>".join(parts) + "</p>")
+    return "\n".join(rendered)
+
+
+def render_inline(text: str) -> str:
+    escaped = html.escape(text, quote=False)
+    escaped = LINK_RE.sub(r'<a href="\2">\1</a>', escaped)
+    escaped = INLINE_CODE_RE.sub(r"<code>\1</code>", escaped)
+    escaped = BOLD_RE.sub(r"<strong>\1</strong>", escaped)
+    escaped = ITALIC_RE.sub(r"<em>\1</em>", escaped)
+    return escaped
+
+
+def strip_html(text: str) -> str:
+    text = re.sub(r"<[^>]+>", "", text)
+    return html.unescape(text)
+
+
+def make_guid(rng: random.Random) -> str:
+    alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
+    return "".join(rng.choice(alphabet) for _ in range(10))
+
+
+def checksum(text: str) -> int:
+    digest = hashlib.sha1(text.encode("utf-8")).hexdigest()
+    return int(digest[:8], 16)
+
+
+def make_model(model_id: int, now_ms: int) -> dict[str, object]:
+    return {
+        str(model_id): {
+            "css": ".card { font-family: arial; font-size: 20px; line-height: 1.5; text-align: center; color: black; background-color: white; }",
+            "did": None,
+            "flds": [
+                {
+                    "collapsed": False,
+                    "description": "",
+                    "excludeFromSearch": False,
+                    "font": "Arial",
+                    "id": model_id + 1,
+                    "name": "Front",
+                    "ord": 0,
+                    "plainText": False,
+                    "preventDeletion": False,
+                    "rtl": False,
+                    "size": 20,
+                    "sticky": False,
+                    "tag": None,
+                },
+                {
+                    "collapsed": False,
+                    "description": "",
+                    "excludeFromSearch": False,
+                    "font": "Arial",
+                    "id": model_id + 2,
+                    "name": "Back",
+                    "ord": 1,
+                    "plainText": False,
+                    "preventDeletion": False,
+                    "rtl": False,
+                    "size": 20,
+                    "sticky": False,
+                    "tag": None,
+                },
+            ],
+            "id": model_id,
+            "latexPost": "\\end{document}",
+            "latexPre": "\\documentclass[12pt]{article}\n\\special{papersize=3in,5in}\n\\usepackage[utf8]{inputenc}\n\\usepackage{amssymb,amsmath}\n\\pagestyle{empty}\n\\setlength{\\parindent}{0in}\n\\begin{document}\n",
+            "latexsvg": False,
+            "mod": now_ms,
+            "name": "Markdown Basic",
+            "originalStockKind": 0,
+            "req": [[0, "any", [0]]],
+            "sortf": 0,
+            "tmpls": [
+                {
+                    "afmt": "{{FrontSide}}\n\n<hr id=answer>\n\n{{Back}}",
+                    "bafmt": "",
+                    "bfont": "",
+                    "bqfmt": "",
+                    "bsize": 0,
+                    "did": None,
+                    "id": model_id + 3,
+                    "name": "Card 1",
+                    "ord": 0,
+                    "qfmt": "{{Front}}",
+                }
+            ],
+            "type": 0,
+            "usn": 0,
+            "ver": None,
+        }
+    }
+
+
+def make_decks(
+    deck_names: list[str], now_ms: int
+) -> tuple[dict[str, object], dict[str, int]]:
+    decks: dict[str, object] = {
+        "1": {
+            "browserCollapsed": False,
+            "collapsed": False,
+            "conf": 1,
+            "desc": "",
+            "dyn": 0,
+            "extendNew": 0,
+            "extendRev": 0,
+            "id": 1,
+            "lrnToday": [0, 0],
+            "mod": now_ms,
+            "name": "Default",
+            "newLimit": None,
+            "newLimitToday": None,
+            "newToday": [0, 0],
+            "reviewLimit": None,
+            "reviewLimitToday": None,
+            "revToday": [0, 0],
+            "timeToday": [0, 0],
+            "usn": 0,
+        }
+    }
+    ids = {"Default": 1}
+    next_id = now_ms
+    for name in deck_names:
+        if name == "Default" or name in ids:
+            continue
+        next_id += 1
+        ids[name] = next_id
+        decks[str(next_id)] = {
+            "browserCollapsed": False,
+            "collapsed": False,
+            "conf": 1,
+            "desc": "",
+            "dyn": 0,
+            "extendNew": 0,
+            "extendRev": 0,
+            "id": next_id,
+            "lrnToday": [0, 0],
+            "mod": now_ms,
+            "name": name,
+            "newLimit": None,
+            "newLimitToday": None,
+            "newToday": [0, 0],
+            "reviewLimit": None,
+            "reviewLimitToday": None,
+            "revToday": [0, 0],
+            "timeToday": [0, 0],
+            "usn": 0,
+        }
+    return decks, ids
+
+
+def make_dconf() -> dict[str, object]:
+    return {
+        "1": {
+            "autoplay": True,
+            "dyn": False,
+            "id": 1,
+            "lapse": {
+                "delays": [10],
+                "leechAction": 1,
+                "leechFails": 8,
+                "minInt": 1,
+                "mult": 0,
+            },
+            "maxTaken": 60,
+            "mod": 0,
+            "name": "Default",
+            "new": {
+                "bury": False,
+                "delays": [1, 10],
+                "initialFactor": 2500,
+                "ints": [1, 4, 0],
+                "order": 1,
+                "perDay": 20,
+            },
+            "replayq": True,
+            "rev": {
+                "bury": False,
+                "ease4": 1.3,
+                "ivlFct": 1,
+                "maxIvl": 36500,
+                "perDay": 200,
+            },
+            "timer": 0,
+            "usn": 0,
+        }
+    }
+
+
+def make_conf(model_id: int, deck_id: int) -> dict[str, object]:
+    return {
+        "activeDecks": [deck_id],
+        "addToCur": True,
+        "collapseTime": 1200,
+        "creationOffset": 0,
+        "curDeck": deck_id,
+        "curModel": model_id,
+        "dayLearnFirst": False,
+        "dueCounts": True,
+        "estTimes": True,
+        "newSpread": 0,
+        "nextPos": 1,
+        "sched2021": True,
+        "schedVer": 2,
+        "sortBackwards": False,
+        "sortType": "noteFld",
+        "timeLim": 0,
+    }
+
+
+def build_collection(cards: list[Card], db_path: Path) -> None:
+    now = int(time.time())
+    now_ms = int(time.time() * 1000)
+    model_id = now_ms
+    deck_names = [card.deck for card in cards]
+    decks, deck_ids = make_decks(deck_names, now_ms)
+    first_deck_id = deck_ids[cards[0].deck]
+    conn = sqlite3.connect(db_path)
+    try:
+        _ = conn.executescript(SCHEMA)
+        _ = conn.execute(
+            "INSERT INTO col VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
+            (
+                1,
+                now,
+                now_ms,
+                now_ms,
+                11,
+                0,
+                0,
+                0,
+                json.dumps(make_conf(model_id, first_deck_id), separators=(",", ":")),
+                json.dumps(make_model(model_id, now_ms), separators=(",", ":")),
+                json.dumps(decks, separators=(",", ":")),
+                json.dumps(make_dconf(), separators=(",", ":")),
+                "{}",
+            ),
+        )
+
+        rng = random.Random(now_ms)
+        for due, card in enumerate(cards, start=1):
+            note_id = now_ms + due
+            card_id = now_ms + 100000 + due
+            front_html = markdown_to_html(card.front)
+            back_html = markdown_to_html(card.back)
+            sort_field = strip_html(front_html)
+            _ = conn.execute(
+                "INSERT INTO notes VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
+                (
+                    note_id,
+                    make_guid(rng),
+                    model_id,
+                    now,
+                    0,
+                    "",
+                    front_html + "\x1f" + back_html,
+                    sort_field,
+                    checksum(sort_field),
+                    0,
+                    "",
+                ),
+            )
+            _ = conn.execute(
+                "INSERT INTO cards VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
+                (
+                    card_id,
+                    note_id,
+                    deck_ids[card.deck],
+                    0,
+                    now,
+                    0,
+                    0,
+                    0,
+                    due,
+                    0,
+                    0,
+                    0,
+                    0,
+                    0,
+                    0,
+                    0,
+                    0,
+                    "",
+                ),
+            )
+
+        conn.commit()
+    finally:
+        conn.close()
+
+
+def package_apkg(db_path: Path, output_path: Path) -> None:
+    media_path = db_path.parent / "media"
+    _ = media_path.write_text("{}", encoding="utf-8")
+    with zipfile.ZipFile(output_path, "w", compression=zipfile.ZIP_DEFLATED) as archive:
+        archive.write(db_path, arcname="collection.anki2")
+        archive.write(media_path, arcname="media")
+
+
+def convert(markdown_path: Path, output_path: Path) -> None:
+    cards = parse_markdown(markdown_path.read_text(encoding="utf-8"))
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    with tempfile.TemporaryDirectory(prefix="markdown_to_anki_") as temp_dir:
+        temp_root = Path(temp_dir)
+        db_path = temp_root / "collection.anki2"
+        build_collection(cards, db_path)
+        package_apkg(db_path, output_path)
+
+
+@dataclass
+class Args(argparse.Namespace):
+    input: Path
+    output: Path | None
+
+    def __init__(self):
+        super().__init__()
+
+
+def parse_args() -> Args:
+    parser = argparse.ArgumentParser(
+        description="Convert a tiny markdown format into an Anki .apkg archive.",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+        Required Markdown Format:
+          # Deck name
+          ## Front of first card
+          Back of first card
+          ## Front of second card
+          Back of second card
+          # Another deck
+          ## Front
+          Back
+        """,
+    )
+    _ = parser.add_argument("input", type=Path, help="Input markdown file")
+    _ = parser.add_argument(
+        "-o",
+        "--output",
+        type=Path,
+        help="Output .apkg path (default: same name as input)",
+    )
+    return parser.parse_args(namespace=Args())
+
+
+def main():
+    args = parse_args()
+    input_path = Path(args.input)
+    if not input_path.is_file():
+        raise SystemExit(f"Input file not found: {input_path}")
+
+    output_path = args.output or input_path.with_suffix(".apkg")
+
+    try:
+        convert(input_path, output_path)
+    except ValueError as exc:
+        raise sys.exit(f"Error: {exc}")
+
+    print(f"Wrote {output_path}")
+
+
+if __name__ == "__main__":
+    main()