Initial commit: laendleimmo.at scraper with auto-contact

- scraper.py: Playwright-based listing scraper with --minrooms, --maxprice,
  --house, --flat, --rent, --buy flags; auto-submits Anbieter-kontaktieren form;
  tracks contacted listings in contacted.json to avoid duplicates
- configure.py: questionary TUI wizard to set up contact_config.json
- .gitignore excludes contact_config.json and contacted.json

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
main
Amir Alexander Abdelbaki 2026-06-01 16:06:57 +02:00
commit 16203847aa
5 changed files with 698 additions and 0 deletions

15
.gitignore vendored Normal file
View File

@ -0,0 +1,15 @@
# user data — never commit
contact_config.json
contacted.json
# python
__pycache__/
*.py[cod]
.venv/
venv/
*.egg-info/
# editors
.vscode/
.idea/
*.swp

213
configure.py Normal file
View File

@ -0,0 +1,213 @@
#!/usr/bin/env python3
"""TUI wizard to configure contact details for the laendleimmo.at scraper."""
import json
import sys
from pathlib import Path
try:
import questionary
from questionary import Style
except ImportError:
sys.exit("questionary is not installed. Run: pip install questionary")
CONFIG_FILE = Path(__file__).parent / "contact_config.json"
STYLE = Style(
[
("qmark", "fg:#00aabb bold"),
("question", "bold"),
("answer", "fg:#00aabb bold"),
("pointer", "fg:#00aabb bold"),
("selected", "fg:#00aabb"),
("separator", "fg:#6c6c6c"),
("instruction", "fg:#858585 italic"),
]
)
DEFAULT_MESSAGE = """\
Sehr geehrte Damen und Herren,
ich interessiere mich sehr für Ihre Immobilie und würde mich über einen \
Besichtigungstermin sehr freuen.
Bitte nehmen Sie Kontakt mit mir auf, damit wir einen Termin vereinbaren können.
Mit freundlichen Grüßen
{name}"""
DIVIDER = "" * 52
def ask(fn, *args, **kwargs):
"""Wrapper that exits cleanly on Ctrl-C."""
result = fn(*args, style=STYLE, **kwargs).ask()
if result is None:
print("\nAborted.")
sys.exit(0)
return result
def validate_email(v: str) -> bool | str:
return True if "@" in v and "." in v.split("@")[-1] else "Enter a valid e-mail address"
def enter_multiline(prompt: str, default: str = "") -> str:
print(f"\n {prompt}")
print(f" (type your message; enter a blank line to finish)")
if default:
print(f" Current value shown below — press Enter on blank line to keep it:\n")
print(" " + default.replace("\n", "\n "))
print()
lines: list[str] = []
try:
while True:
line = input(" > ")
if line == "" and lines and lines[-1] == "":
lines.pop()
break
lines.append(line)
except (KeyboardInterrupt, EOFError):
print("\nAborted.")
sys.exit(0)
text = "\n".join(lines).strip()
return text if text else default
def load_existing() -> dict:
if CONFIG_FILE.exists():
with open(CONFIG_FILE, encoding="utf-8") as f:
return json.load(f)
return {}
def show_summary(cfg: dict) -> None:
print(f"\n {DIVIDER}")
print(" Saved configuration:")
print(f" {DIVIDER}")
print(f" Name : {cfg.get('name', '')}")
print(f" E-mail : {cfg.get('email', '')}")
print(f" Phone : {cfg.get('phone') or '(not set)'}")
msg_preview = cfg.get("message", "")[:60].replace("\n", " ")
print(f" Message : {msg_preview}")
login = cfg.get("login_email", "")
print(f" Login : {login if login else '(not configured)'}")
print(f" {DIVIDER}")
print(f" Config : {CONFIG_FILE}\n")
def main() -> None:
print()
print(f" {DIVIDER}")
print(" ländleimmo.at — Contact Configuration Wizard")
print(f" {DIVIDER}\n")
existing = load_existing()
if existing:
edit = ask(
questionary.confirm,
"Existing config found. Edit it?",
default=True,
)
if not edit:
show_summary(existing)
return
# ── Personal details ────────────────────────────────────────────────── #
print(f"\n {DIVIDER}")
print(" 1 / 3 Personal details")
print(f" {DIVIDER}\n")
name = ask(
questionary.text,
"Full name:",
default=existing.get("name", ""),
)
email = ask(
questionary.text,
"E-mail address:",
default=existing.get("email", ""),
validate=validate_email,
)
phone = ask(
questionary.text,
"Phone number (optional — press Enter to skip):",
default=existing.get("phone", ""),
)
# ── Message ─────────────────────────────────────────────────────────── #
print(f"\n {DIVIDER}")
print(" 2 / 3 Contact message")
print(f" {DIVIDER}")
print(" Tip: use {name} as a placeholder for your name.")
use_default_msg = ask(
questionary.select,
"Message template:",
choices=[
questionary.Choice("Use built-in German template", value="default"),
questionary.Choice("Write / edit message now", value="custom"),
*([questionary.Choice("Keep existing message", value="keep")] if existing.get("message") else []),
],
)
if use_default_msg == "default":
message = DEFAULT_MESSAGE
elif use_default_msg == "keep":
message = existing["message"]
else:
message = enter_multiline(
"Your contact message:",
default=existing.get("message", DEFAULT_MESSAGE),
)
# ── Site login (optional) ────────────────────────────────────────────── #
print(f"\n {DIVIDER}")
print(" 3 / 3 Site login (optional)")
print(f" {DIVIDER}")
print(" If you have an account on laendleimmo.at the scraper can log in")
print(" first so personal data is pre-filled on the contact form.\n")
use_login = ask(
questionary.confirm,
"Configure site login credentials?",
default=bool(existing.get("login_email")),
)
login_email = ""
login_password = ""
if use_login:
login_email = ask(
questionary.text,
"Site login e-mail:",
default=existing.get("login_email", email),
)
login_password = ask(
questionary.password,
"Site login password:",
)
# ── Build and save ───────────────────────────────────────────────────── #
cfg: dict = {
"name": name,
"email": email,
"phone": phone,
"message": message,
}
if use_login:
cfg["login_email"] = login_email
cfg["login_password"] = login_password
CONFIG_FILE.parent.mkdir(parents=True, exist_ok=True)
with open(CONFIG_FILE, "w", encoding="utf-8") as f:
json.dump(cfg, f, indent=2, ensure_ascii=False)
show_summary(cfg)
print(" Configuration saved successfully.\n")
if __name__ == "__main__":
main()

View File

@ -0,0 +1,8 @@
{
"name": "Max Mustermann",
"email": "max@example.com",
"phone": "+43 660 1234567",
"message": "Sehr geehrte Damen und Herren,\n\nich interessiere mich sehr für Ihre Immobilie und würde mich über einen Besichtigungstermin sehr freuen.\n\nBitte nehmen Sie Kontakt mit mir auf, damit wir einen Termin vereinbaren können.\n\nMit freundlichen Grüßen\nMax Mustermann",
"login_email": "",
"login_password": ""
}

2
requirements.txt Normal file
View File

@ -0,0 +1,2 @@
playwright>=1.44.0
questionary>=2.0.1

460
scraper.py Normal file
View File

@ -0,0 +1,460 @@
#!/usr/bin/env python3
"""laendleimmo.at scraper — finds listings and auto-submits the contact form."""
import argparse
import json
import sys
import time
import urllib.parse
from pathlib import Path
from typing import Optional
from playwright.sync_api import (
Browser,
Page,
sync_playwright,
TimeoutError as PWTimeout,
)
BASE_URL = "https://www.laendleimmo.at"
CONFIG_FILE = Path(__file__).parent / "contact_config.json"
CONTACTED_FILE = Path(__file__).parent / "contacted.json"
# --------------------------------------------------------------------------- #
# Config / state helpers
# --------------------------------------------------------------------------- #
def load_config() -> dict:
if not CONFIG_FILE.exists():
sys.exit(
f"[!] Config not found at {CONFIG_FILE}\n"
" Run: python configure.py"
)
with open(CONFIG_FILE, encoding="utf-8") as f:
return json.load(f)
def load_contacted() -> set:
if not CONTACTED_FILE.exists():
return set()
with open(CONTACTED_FILE, encoding="utf-8") as f:
return set(json.load(f))
def save_contacted(contacted: set) -> None:
with open(CONTACTED_FILE, "w", encoding="utf-8") as f:
json.dump(sorted(contacted), f, indent=2)
# --------------------------------------------------------------------------- #
# URL construction
# --------------------------------------------------------------------------- #
def build_search_urls(args) -> list[str]:
prop_types: list[str] = []
if args.flat:
prop_types.append("wohnung")
if args.house:
prop_types.append("haus")
if not prop_types:
prop_types = ["wohnung", "haus"]
marketing_types: list[Optional[str]] = []
if args.rent:
marketing_types.append("Mietobjekt")
if args.buy:
marketing_types.append("Kaufobjekt")
if not marketing_types:
marketing_types = [None] # no filter → all transaction types
urls: list[str] = []
for prop_type in prop_types:
for marketing_type in marketing_types:
params: list[tuple[str, str]] = []
if args.minrooms is not None:
params.append(("f[noOfRooms@f]", str(float(args.minrooms))))
if args.maxprice is not None:
params.append(("f[price@t]", str(args.maxprice)))
if marketing_type:
params.append(("f[marketingType]", marketing_type))
base = f"{BASE_URL}/{prop_type}/vorarlberg"
if params:
qs = urllib.parse.urlencode(params, quote_via=urllib.parse.quote)
urls.append(f"{base}?{qs}")
else:
urls.append(base)
return urls
# --------------------------------------------------------------------------- #
# Scraping
# --------------------------------------------------------------------------- #
def scrape_listings(page: Page, url: str) -> tuple[list[dict], Optional[str]]:
"""Return (listings_on_page, next_page_url_or_None)."""
try:
page.goto(url, wait_until="networkidle", timeout=30_000)
except PWTimeout:
page.goto(url, wait_until="domcontentloaded", timeout=30_000)
listings: list[dict] = []
seen: set[str] = set()
for a in page.query_selector_all("a[href*='/immobilien/']"):
href: Optional[str] = a.get_attribute("href")
if not href or href in seen:
continue
seen.add(href)
full_url = BASE_URL + href if href.startswith("/") else href
snippet = (a.inner_text() or "").strip().replace("\n", " ")[:120]
listings.append({"url": full_url, "snippet": snippet})
# Next-page link — try common patterns
next_url: Optional[str] = None
for sel in [
"a[aria-label='Nächste Seite']",
"a[aria-label='Next']",
"a[rel='next']",
".pagination a:last-child",
"a.next",
]:
el = page.query_selector(sel)
if el:
href = el.get_attribute("href")
if href and href != "#":
next_url = BASE_URL + href if href.startswith("/") else href
break
return listings, next_url
def collect_all_listings(
page: Page,
search_urls: list[str],
max_listings: int,
already_contacted: set,
) -> list[dict]:
all_new: list[dict] = []
for search_url in search_urls:
url: Optional[str] = search_url
page_num = 1
while url and len(all_new) < max_listings:
print(f" [scrape] page {page_num}: {url}")
listings, next_url = scrape_listings(page, url)
new = [l for l in listings if l["url"] not in already_contacted]
all_new.extend(new)
print(f" {len(listings)} found, {len(new)} new (total new: {len(all_new)})")
if len(all_new) >= max_listings:
break
url = next_url
page_num += 1
time.sleep(1.5)
return all_new[:max_listings]
# --------------------------------------------------------------------------- #
# Contact form submission
# --------------------------------------------------------------------------- #
def _try_fill(page: Page, selectors: list[str], value: str) -> bool:
for sel in selectors:
try:
el = page.query_selector(sel)
if el and el.is_visible():
el.fill(value)
return True
except Exception:
pass
return False
def _try_check(page: Page, selectors: list[str]) -> None:
for sel in selectors:
try:
for cb in page.query_selector_all(sel):
if cb.is_visible() and not cb.is_checked():
cb.check()
return
except Exception:
pass
def login(page: Page, config: dict) -> bool:
"""Attempt site login if credentials are configured. Returns True on success."""
email = config.get("login_email", "")
password = config.get("login_password", "")
if not email or not password:
return False
page.goto(f"{BASE_URL}/user/login", wait_until="networkidle", timeout=30_000)
for sel in ["input[type='email']", "input[name='email']", "input[name='username']"]:
if _try_fill(page, [sel], email):
break
for sel in ["input[type='password']", "input[name='password']"]:
if _try_fill(page, [sel], password):
break
for sel in [
"button[type='submit']",
"input[type='submit']",
"button:has-text('Anmelden')",
"button:has-text('Login')",
]:
btn = page.query_selector(sel)
if btn and btn.is_visible():
btn.click()
try:
page.wait_for_load_state("networkidle", timeout=10_000)
except PWTimeout:
pass
return True
return False
def submit_contact_form(page: Page, listing_url: str, config: dict) -> bool:
"""Navigate to listing and submit 'Anbieter kontaktieren' form."""
try:
page.goto(listing_url, wait_until="networkidle", timeout=30_000)
except PWTimeout:
try:
page.goto(listing_url, wait_until="domcontentloaded", timeout=30_000)
except PWTimeout:
print(" [!] Page load timed out")
return False
# Scroll to bottom to trigger lazy-loaded form
page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
time.sleep(1.5)
message = config.get("message", "")
# Personal data fields — may or may not be present (absent when logged in)
_try_fill(
page,
[
"input[name='name']",
"input[name='fullName']",
"input[name='contactName']",
"input[placeholder*='Name']",
"input[id*='name']:not([type='hidden'])",
],
config.get("name", ""),
)
_try_fill(
page,
[
"input[type='email']",
"input[name='email']",
"input[name='contactEmail']",
"input[placeholder*='Mail']",
],
config.get("email", ""),
)
_try_fill(
page,
[
"input[name='phone']",
"input[name='telefon']",
"input[name='tel']",
"input[type='tel']",
"input[placeholder*='Telefon']",
"input[placeholder*='Phone']",
],
config.get("phone", ""),
)
# Message textarea
filled_msg = False
for sel in [
"textarea[name='message']",
"textarea[name='nachricht']",
"textarea[name='body']",
"textarea[id*='message']",
"textarea[id*='nachricht']",
]:
if _try_fill(page, [sel], message):
filled_msg = True
break
if not filled_msg:
textareas = page.query_selector_all("textarea")
for ta in textareas:
if ta.is_visible():
ta.fill(message)
filled_msg = True
break
if not filled_msg:
print(" [!] No message field found — form structure may have changed")
return False
# Required consent checkbox
_try_check(
page,
[
"input[type='checkbox'][name*='zustimm']",
"input[type='checkbox'][name*='consent']",
"input[type='checkbox'][name*='datenschutz']",
"input[type='checkbox'][name*='agree']",
"input[type='checkbox'][name*='accept']",
],
)
# All remaining visible required checkboxes
for cb in page.query_selector_all("input[type='checkbox'][required]"):
if cb.is_visible() and not cb.is_checked():
cb.check()
# Submit
for sel in [
"button:has-text('Anfrage senden')",
"button:has-text('Anfrage absenden')",
"button:has-text('Senden')",
"button[type='submit']",
"input[type='submit']",
]:
btn = page.query_selector(sel)
if btn and btn.is_visible():
btn.click()
try:
page.wait_for_load_state("networkidle", timeout=15_000)
except PWTimeout:
pass
# Check for success text
body = page.content().lower()
if any(
kw in body
for kw in ["erfolgreich", "gesendet", "danke", "thank you", "wurde verschickt"]
):
return True
# Check for CAPTCHA
if any(kw in body for kw in ["captcha", "robot", "recaptcha"]):
print(" [!] CAPTCHA detected — manual action required")
return False
# Assume success if no obvious error page
return "error" not in body and "fehler" not in body
print(" [!] Submit button not found")
return False
# --------------------------------------------------------------------------- #
# Main
# --------------------------------------------------------------------------- #
def main() -> None:
parser = argparse.ArgumentParser(
description="Scrape laendleimmo.at and auto-contact matching listings.",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
%(prog)s --flat --rent --minrooms 2 --maxprice 1200
%(prog)s --house --buy --minrooms 4 --maxprice 500000 --dry-run
%(prog)s --flat --rent --maxprice 900 --no-headless
""",
)
parser.add_argument("--minrooms", type=int, metavar="N", help="Minimum number of rooms")
parser.add_argument("--maxprice", type=int, metavar="EUR", help="Maximum price in €")
parser.add_argument("--house", action="store_true", help="Search for houses (Haus)")
parser.add_argument("--flat", action="store_true", help="Search for flats/apartments (Wohnung)")
parser.add_argument("--rent", action="store_true", help="Rentals only (Mietobjekte)")
parser.add_argument("--buy", action="store_true", help="Purchases only (Kaufobjekte)")
parser.add_argument(
"--dry-run", action="store_true",
help="List matches without sending any contact forms",
)
parser.add_argument(
"--max-listings", type=int, default=50, metavar="N",
help="Max new listings to process (default: 50)",
)
parser.add_argument(
"--delay", type=float, default=3.0, metavar="SEC",
help="Seconds between contact form submissions (default: 3)",
)
parser.add_argument(
"--headless", action="store_true", default=True,
help="Run browser headlessly (default)",
)
parser.add_argument(
"--no-headless", dest="headless", action="store_false",
help="Show browser window (useful for debugging / CAPTCHA solving)",
)
parser.add_argument(
"--reset", action="store_true",
help="Clear the contacted.json history and start fresh",
)
args = parser.parse_args()
if args.reset:
if CONTACTED_FILE.exists():
CONTACTED_FILE.unlink()
print("[i] contacted.json cleared.")
config = load_config()
contacted = load_contacted()
search_urls = build_search_urls(args)
print("Search URLs:")
for u in search_urls:
print(f" {u}")
print(f"Already contacted: {len(contacted)} listings")
print()
with sync_playwright() as pw:
browser: Browser = pw.chromium.launch(headless=args.headless)
ctx = browser.new_context(
user_agent=(
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/124.0.0.0 Safari/537.36"
),
)
page = ctx.new_page()
# Optional login
if config.get("login_email"):
print("[i] Logging in...")
ok = login(page, config)
print(f" {'[OK] Logged in' if ok else '[!] Login failed — proceeding without login'}")
print()
# Collect listings
print("[i] Scraping listings...")
listings = collect_all_listings(page, search_urls, args.max_listings, contacted)
print(f"\n[i] {len(listings)} new listings to process.\n")
if not listings:
print("Nothing new to contact.")
browser.close()
return
if args.dry_run:
print("Dry-run mode — no forms will be submitted.\n")
for i, listing in enumerate(listings, 1):
print(f" {i:3}. {listing['url']}")
if listing["snippet"]:
print(f" {listing['snippet']}")
else:
success_count = 0
for i, listing in enumerate(listings, 1):
url = listing["url"]
print(f"[{i}/{len(listings)}] {url}")
ok = submit_contact_form(page, url, config)
if ok:
contacted.add(url)
save_contacted(contacted)
success_count += 1
print(" [OK] Contact form submitted")
else:
print(" [FAIL] Could not submit form")
if i < len(listings):
time.sleep(args.delay)
print(f"\nDone. {success_count}/{len(listings)} forms submitted.")
browser.close()
if __name__ == "__main__":
main()