Compare commits

..

No commits in common. "8ec0d3be4178b4128ae0e082c765a4ca5f9c3523" and "0eac070dace18eb04b55858f1b9b4548acd64218" have entirely different histories.

2 changed files with 11 additions and 45 deletions

View File

@ -18,7 +18,7 @@ from docx import Document
from docx.enum.text import WD_ALIGN_PARAGRAPH from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.oxml import OxmlElement from docx.oxml import OxmlElement
from docx.oxml.ns import qn from docx.oxml.ns import qn
from docx.shared import Inches, Pt from docx.shared import Pt
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@ -159,15 +159,10 @@ def _md_to_doc(doc, content):
# Document assembly # Document assembly
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
def build_docx(md_dir: Path, output: Path, title: str, logo: Path | None = None): def build_docx(md_dir: Path, output: Path, title: str):
doc = Document() doc = Document()
# Title page # Title page
if logo is not None:
logo_para = doc.add_paragraph()
logo_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
logo_para.add_run().add_picture(str(logo), width=Inches(2))
heading = doc.add_heading(title, level=0) heading = doc.add_heading(title, level=0)
heading.alignment = WD_ALIGN_PARAGRAPH.CENTER heading.alignment = WD_ALIGN_PARAGRAPH.CENTER
date_para = doc.add_paragraph(f'Generated: {datetime.now().strftime("%Y-%m-%d")}') date_para = doc.add_paragraph(f'Generated: {datetime.now().strftime("%Y-%m-%d")}')
@ -221,20 +216,13 @@ def main():
parser.add_argument('--dir', '-d', default='.', help='Directory containing .md files (default: .)') parser.add_argument('--dir', '-d', default='.', help='Directory containing .md files (default: .)')
parser.add_argument('--output', '-o', default='documentation.docx', help='Output .docx path') parser.add_argument('--output', '-o', default='documentation.docx', help='Output .docx path')
parser.add_argument('--title', '-t', default='Project Documentation', help='Document title') parser.add_argument('--title', '-t', default='Project Documentation', help='Document title')
parser.add_argument('--logo', '-l', default=None, help='Path to logo image (PNG/JPG) for the title page')
args = parser.parse_args() args = parser.parse_args()
md_dir = Path(args.dir).resolve() md_dir = Path(args.dir).resolve()
if not md_dir.is_dir(): if not md_dir.is_dir():
sys.exit(f'Error: {md_dir} is not a directory') sys.exit(f'Error: {md_dir} is not a directory')
logo_path = None build_docx(md_dir, Path(args.output), args.title)
if args.logo:
logo_path = Path(args.logo).resolve()
if not logo_path.is_file():
sys.exit(f'Error: logo file not found: {logo_path}')
build_docx(md_dir, Path(args.output), args.title, logo=logo_path)
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -20,7 +20,11 @@ import sys
import anthropic import anthropic
_BASE_GUIDELINES = """\ SYSTEM_PROMPT = """\
You are a technical documentation writer. Your task is to convert raw, unstructured text \
into a clean, well-structured Markdown document.
Guidelines:
- Add a ## Summary section at the very top with a 2-3 sentence overview. - Add a ## Summary section at the very top with a 2-3 sentence overview.
- Organise the rest chronologically when dates, versions, or sequence cues are present; \ - Organise the rest chronologically when dates, versions, or sequence cues are present; \
otherwise group by logical topic. otherwise group by logical topic.
@ -33,34 +37,13 @@ _BASE_GUIDELINES = """\
- Output ONLY the Markdown no preamble, explanation, or trailing commentary.\ - Output ONLY the Markdown no preamble, explanation, or trailing commentary.\
""" """
_FILTER_GENERIC = (
"- Silently discard any passages that are clearly noise, off-topic, or unrelated "
"to the main subject of the input (e.g. unrelated chat messages, ads, system logs "
"from other contexts). Do not mention the omission."
)
_FILTER_TOPIC = ( def convert(raw_text: str, model: str) -> str:
"- This document is about: {topic}. "
"Silently discard any passages that are clearly unrelated to this topic. "
"Do not mention the omission."
)
def _build_system_prompt(topic: str | None) -> str:
filter_line = _FILTER_TOPIC.format(topic=topic) if topic else _FILTER_GENERIC
return (
"You are a technical documentation writer. Your task is to convert raw, "
"unstructured text into a clean, well-structured Markdown document.\n\n"
f"Guidelines:\n{_BASE_GUIDELINES}\n{filter_line}"
)
def convert(raw_text: str, model: str, topic: str | None = None) -> str:
client = anthropic.Anthropic() client = anthropic.Anthropic()
response = client.messages.create( response = client.messages.create(
model=model, model=model,
max_tokens=8192, max_tokens=8192,
system=_build_system_prompt(topic), system=SYSTEM_PROMPT,
messages=[ messages=[
{ {
'role': 'user', 'role': 'user',
@ -88,11 +71,6 @@ def main():
default='claude-sonnet-4-6', default='claude-sonnet-4-6',
help='Claude model ID (default: claude-sonnet-4-6)', help='Claude model ID (default: claude-sonnet-4-6)',
) )
parser.add_argument(
'--topic', '-p',
default=None,
help='Project topic or description — used to filter out unrelated content',
)
args = parser.parse_args() args = parser.parse_args()
if not os.environ.get('ANTHROPIC_API_KEY'): if not os.environ.get('ANTHROPIC_API_KEY'):
@ -117,7 +95,7 @@ def main():
sys.exit('Error: input is empty.') sys.exit('Error: input is empty.')
print(f'Sending to {args.model}', file=sys.stderr) print(f'Sending to {args.model}', file=sys.stderr)
result = convert(raw, args.model, topic=args.topic) result = convert(raw, args.model)
if args.output: if args.output:
try: try: