#!/usr/bin/env python3 """ raw_to_md.py — Convert unstructured raw text into a structured, chronological Markdown file using Claude AI. Reads from --text, --input file, or stdin. Writes to --output or stdout. Usage: python raw_to_md.py --text "raw notes here" --output structured.md python raw_to_md.py --input dump.txt --output structured.md cat dump.txt | python raw_to_md.py --output structured.md Requires: ANTHROPIC_API_KEY environment variable """ import argparse import os import sys import anthropic _BASE_GUIDELINES = """\ - Add a ## Summary section at the very top with a 2-3 sentence overview. - Organise the rest chronologically when dates, versions, or sequence cues are present; \ otherwise group by logical topic. - Use ## for top-level sections, ### for subsections. - Use bullet lists for discrete items or steps. - Use **bold** for key terms, dates, names, and version numbers. - Use `code spans` for commands, file names, and technical strings. - Use fenced code blocks (``` … ```) for multi-line code or config samples. - Do NOT invent or embellish information that is not in the input. - Output ONLY the Markdown — no preamble, explanation, or trailing commentary.\ """ _FILTER_GENERIC = ( "- Silently discard any passages that are clearly noise, off-topic, or unrelated " "to the main subject of the input (e.g. unrelated chat messages, ads, system logs " "from other contexts). Do not mention the omission." ) _FILTER_TOPIC = ( "- This document is about: {topic}. " "Silently discard any passages that are clearly unrelated to this topic. " "Do not mention the omission." ) def _build_system_prompt(topic: str | None) -> str: filter_line = _FILTER_TOPIC.format(topic=topic) if topic else _FILTER_GENERIC return ( "You are a technical documentation writer. Your task is to convert raw, " "unstructured text into a clean, well-structured Markdown document.\n\n" f"Guidelines:\n{_BASE_GUIDELINES}\n{filter_line}" ) def convert(raw_text: str, model: str, topic: str | None = None) -> str: client = anthropic.Anthropic() response = client.messages.create( model=model, max_tokens=8192, system=_build_system_prompt(topic), messages=[ { 'role': 'user', 'content': ( 'Convert the following raw text into a structured, ' 'chronological Markdown document:\n\n' + raw_text ), } ], ) return response.content[0].text def main(): parser = argparse.ArgumentParser( description='Convert raw text to structured Markdown via Claude AI' ) src = parser.add_mutually_exclusive_group() src.add_argument('--text', '-t', help='Raw text passed directly as a string') src.add_argument('--input', '-i', help='Path to a plain-text input file') parser.add_argument('--output', '-o', help='Output .md file (default: stdout)') parser.add_argument( '--model', '-m', default='claude-sonnet-4-6', help='Claude model ID (default: claude-sonnet-4-6)', ) parser.add_argument( '--topic', '-p', default=None, help='Project topic or description — used to filter out unrelated content', ) args = parser.parse_args() if not os.environ.get('ANTHROPIC_API_KEY'): sys.exit('Error: ANTHROPIC_API_KEY environment variable is not set.') # Resolve input if args.text: raw = args.text elif args.input: path = args.input try: with open(path, encoding='utf-8') as f: raw = f.read() except OSError as e: sys.exit(f'Error reading {path}: {e}') else: if sys.stdin.isatty(): print('Paste raw text then press Ctrl+D:', file=sys.stderr) raw = sys.stdin.read() if not raw.strip(): sys.exit('Error: input is empty.') print(f'Sending to {args.model}…', file=sys.stderr) result = convert(raw, args.model, topic=args.topic) if args.output: try: with open(args.output, 'w', encoding='utf-8') as f: f.write(result) print(f'Saved → {args.output}', file=sys.stderr) except OSError as e: sys.exit(f'Error writing {args.output}: {e}') else: print(result) if __name__ == '__main__': main()