Compare commits
2 Commits
0eac070dac
...
8ec0d3be41
| Author | SHA1 | Date |
|---|---|---|
|
|
8ec0d3be41 | |
|
|
5909a3ac9b |
|
|
@ -18,7 +18,7 @@ from docx import Document
|
|||
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
||||
from docx.oxml import OxmlElement
|
||||
from docx.oxml.ns import qn
|
||||
from docx.shared import Pt
|
||||
from docx.shared import Inches, Pt
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -159,10 +159,15 @@ def _md_to_doc(doc, content):
|
|||
# Document assembly
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def build_docx(md_dir: Path, output: Path, title: str):
|
||||
def build_docx(md_dir: Path, output: Path, title: str, logo: Path | None = None):
|
||||
doc = Document()
|
||||
|
||||
# Title page
|
||||
if logo is not None:
|
||||
logo_para = doc.add_paragraph()
|
||||
logo_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||
logo_para.add_run().add_picture(str(logo), width=Inches(2))
|
||||
|
||||
heading = doc.add_heading(title, level=0)
|
||||
heading.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||
date_para = doc.add_paragraph(f'Generated: {datetime.now().strftime("%Y-%m-%d")}')
|
||||
|
|
@ -216,13 +221,20 @@ def main():
|
|||
parser.add_argument('--dir', '-d', default='.', help='Directory containing .md files (default: .)')
|
||||
parser.add_argument('--output', '-o', default='documentation.docx', help='Output .docx path')
|
||||
parser.add_argument('--title', '-t', default='Project Documentation', help='Document title')
|
||||
parser.add_argument('--logo', '-l', default=None, help='Path to logo image (PNG/JPG) for the title page')
|
||||
args = parser.parse_args()
|
||||
|
||||
md_dir = Path(args.dir).resolve()
|
||||
if not md_dir.is_dir():
|
||||
sys.exit(f'Error: {md_dir} is not a directory')
|
||||
|
||||
build_docx(md_dir, Path(args.output), args.title)
|
||||
logo_path = None
|
||||
if args.logo:
|
||||
logo_path = Path(args.logo).resolve()
|
||||
if not logo_path.is_file():
|
||||
sys.exit(f'Error: logo file not found: {logo_path}')
|
||||
|
||||
build_docx(md_dir, Path(args.output), args.title, logo=logo_path)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
|||
38
raw_to_md.py
38
raw_to_md.py
|
|
@ -20,11 +20,7 @@ import sys
|
|||
|
||||
import anthropic
|
||||
|
||||
SYSTEM_PROMPT = """\
|
||||
You are a technical documentation writer. Your task is to convert raw, unstructured text \
|
||||
into a clean, well-structured Markdown document.
|
||||
|
||||
Guidelines:
|
||||
_BASE_GUIDELINES = """\
|
||||
- Add a ## Summary section at the very top with a 2-3 sentence overview.
|
||||
- Organise the rest chronologically when dates, versions, or sequence cues are present; \
|
||||
otherwise group by logical topic.
|
||||
|
|
@ -37,13 +33,34 @@ Guidelines:
|
|||
- Output ONLY the Markdown — no preamble, explanation, or trailing commentary.\
|
||||
"""
|
||||
|
||||
_FILTER_GENERIC = (
|
||||
"- Silently discard any passages that are clearly noise, off-topic, or unrelated "
|
||||
"to the main subject of the input (e.g. unrelated chat messages, ads, system logs "
|
||||
"from other contexts). Do not mention the omission."
|
||||
)
|
||||
|
||||
def convert(raw_text: str, model: str) -> str:
|
||||
_FILTER_TOPIC = (
|
||||
"- This document is about: {topic}. "
|
||||
"Silently discard any passages that are clearly unrelated to this topic. "
|
||||
"Do not mention the omission."
|
||||
)
|
||||
|
||||
|
||||
def _build_system_prompt(topic: str | None) -> str:
|
||||
filter_line = _FILTER_TOPIC.format(topic=topic) if topic else _FILTER_GENERIC
|
||||
return (
|
||||
"You are a technical documentation writer. Your task is to convert raw, "
|
||||
"unstructured text into a clean, well-structured Markdown document.\n\n"
|
||||
f"Guidelines:\n{_BASE_GUIDELINES}\n{filter_line}"
|
||||
)
|
||||
|
||||
|
||||
def convert(raw_text: str, model: str, topic: str | None = None) -> str:
|
||||
client = anthropic.Anthropic()
|
||||
response = client.messages.create(
|
||||
model=model,
|
||||
max_tokens=8192,
|
||||
system=SYSTEM_PROMPT,
|
||||
system=_build_system_prompt(topic),
|
||||
messages=[
|
||||
{
|
||||
'role': 'user',
|
||||
|
|
@ -71,6 +88,11 @@ def main():
|
|||
default='claude-sonnet-4-6',
|
||||
help='Claude model ID (default: claude-sonnet-4-6)',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--topic', '-p',
|
||||
default=None,
|
||||
help='Project topic or description — used to filter out unrelated content',
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if not os.environ.get('ANTHROPIC_API_KEY'):
|
||||
|
|
@ -95,7 +117,7 @@ def main():
|
|||
sys.exit('Error: input is empty.')
|
||||
|
||||
print(f'Sending to {args.model}…', file=sys.stderr)
|
||||
result = convert(raw, args.model)
|
||||
result = convert(raw, args.model, topic=args.topic)
|
||||
|
||||
if args.output:
|
||||
try:
|
||||
|
|
|
|||
Loading…
Reference in New Issue