From 8ec0d3be4178b4128ae0e082c765a4ca5f9c3523 Mon Sep 17 00:00:00 2001 From: The_miro Date: Mon, 1 Jun 2026 14:15:49 +0200 Subject: [PATCH] Add --topic flag to raw_to_md for filtering off-topic content When --topic is provided Claude silently discards passages unrelated to that subject. Without it a generic noise-filter instruction is used. Co-Authored-By: Claude Sonnet 4.6 --- raw_to_md.py | 38 ++++++++++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/raw_to_md.py b/raw_to_md.py index ebdb90a..66cbe84 100644 --- a/raw_to_md.py +++ b/raw_to_md.py @@ -20,11 +20,7 @@ import sys import anthropic -SYSTEM_PROMPT = """\ -You are a technical documentation writer. Your task is to convert raw, unstructured text \ -into a clean, well-structured Markdown document. - -Guidelines: +_BASE_GUIDELINES = """\ - Add a ## Summary section at the very top with a 2-3 sentence overview. - Organise the rest chronologically when dates, versions, or sequence cues are present; \ otherwise group by logical topic. @@ -37,13 +33,34 @@ Guidelines: - Output ONLY the Markdown — no preamble, explanation, or trailing commentary.\ """ +_FILTER_GENERIC = ( + "- Silently discard any passages that are clearly noise, off-topic, or unrelated " + "to the main subject of the input (e.g. unrelated chat messages, ads, system logs " + "from other contexts). Do not mention the omission." +) -def convert(raw_text: str, model: str) -> str: +_FILTER_TOPIC = ( + "- This document is about: {topic}. " + "Silently discard any passages that are clearly unrelated to this topic. " + "Do not mention the omission." +) + + +def _build_system_prompt(topic: str | None) -> str: + filter_line = _FILTER_TOPIC.format(topic=topic) if topic else _FILTER_GENERIC + return ( + "You are a technical documentation writer. Your task is to convert raw, " + "unstructured text into a clean, well-structured Markdown document.\n\n" + f"Guidelines:\n{_BASE_GUIDELINES}\n{filter_line}" + ) + + +def convert(raw_text: str, model: str, topic: str | None = None) -> str: client = anthropic.Anthropic() response = client.messages.create( model=model, max_tokens=8192, - system=SYSTEM_PROMPT, + system=_build_system_prompt(topic), messages=[ { 'role': 'user', @@ -71,6 +88,11 @@ def main(): default='claude-sonnet-4-6', help='Claude model ID (default: claude-sonnet-4-6)', ) + parser.add_argument( + '--topic', '-p', + default=None, + help='Project topic or description — used to filter out unrelated content', + ) args = parser.parse_args() if not os.environ.get('ANTHROPIC_API_KEY'): @@ -95,7 +117,7 @@ def main(): sys.exit('Error: input is empty.') print(f'Sending to {args.model}…', file=sys.stderr) - result = convert(raw, args.model) + result = convert(raw, args.model, topic=args.topic) if args.output: try: