#!/usr/bin/env python3 """ raw_to_md.py — Convert unstructured raw text into a structured, chronological Markdown file using Claude AI. Reads from --text, --input file, or stdin. Writes to --output or stdout. Usage: python raw_to_md.py --text "raw notes here" --output structured.md python raw_to_md.py --input dump.txt --output structured.md cat dump.txt | python raw_to_md.py --output structured.md Requires: ANTHROPIC_API_KEY environment variable """ import argparse import os import sys import anthropic SYSTEM_PROMPT = """\ You are a technical documentation writer. Your task is to convert raw, unstructured text \ into a clean, well-structured Markdown document. Guidelines: - Add a ## Summary section at the very top with a 2-3 sentence overview. - Organise the rest chronologically when dates, versions, or sequence cues are present; \ otherwise group by logical topic. - Use ## for top-level sections, ### for subsections. - Use bullet lists for discrete items or steps. - Use **bold** for key terms, dates, names, and version numbers. - Use `code spans` for commands, file names, and technical strings. - Use fenced code blocks (``` … ```) for multi-line code or config samples. - Do NOT invent or embellish information that is not in the input. - Output ONLY the Markdown — no preamble, explanation, or trailing commentary.\ """ def convert(raw_text: str, model: str) -> str: client = anthropic.Anthropic() response = client.messages.create( model=model, max_tokens=8192, system=SYSTEM_PROMPT, messages=[ { 'role': 'user', 'content': ( 'Convert the following raw text into a structured, ' 'chronological Markdown document:\n\n' + raw_text ), } ], ) return response.content[0].text def main(): parser = argparse.ArgumentParser( description='Convert raw text to structured Markdown via Claude AI' ) src = parser.add_mutually_exclusive_group() src.add_argument('--text', '-t', help='Raw text passed directly as a string') src.add_argument('--input', '-i', help='Path to a plain-text input file') parser.add_argument('--output', '-o', help='Output .md file (default: stdout)') parser.add_argument( '--model', '-m', default='claude-sonnet-4-6', help='Claude model ID (default: claude-sonnet-4-6)', ) args = parser.parse_args() if not os.environ.get('ANTHROPIC_API_KEY'): sys.exit('Error: ANTHROPIC_API_KEY environment variable is not set.') # Resolve input if args.text: raw = args.text elif args.input: path = args.input try: with open(path, encoding='utf-8') as f: raw = f.read() except OSError as e: sys.exit(f'Error reading {path}: {e}') else: if sys.stdin.isatty(): print('Paste raw text then press Ctrl+D:', file=sys.stderr) raw = sys.stdin.read() if not raw.strip(): sys.exit('Error: input is empty.') print(f'Sending to {args.model}…', file=sys.stderr) result = convert(raw, args.model) if args.output: try: with open(args.output, 'w', encoding='utf-8') as f: f.write(result) print(f'Saved → {args.output}', file=sys.stderr) except OSError as e: sys.exit(f'Error writing {args.output}: {e}') else: print(result) if __name__ == '__main__': main()