#!/usr/bin/env python3
"""
SYLink HoneyBot — Log forwarder vers UniSOC.

Toutes les 1 min :
  - Lit incrémental cowrie.json + opencanary/*.log + auth.log (offsets persistés)
  - Batch (max 200 events) → POST /api/honeypot/events/ingest (auth license_token)
  - Marque le offset si POST OK (au crash, on reprend où on s'est arrêté)

Mode "stockage local quand même" : on n'efface PAS les logs après forward
(la rotation 7j + le watchdog disque s'en chargent). Garde traçabilité locale
en cas d'investigation forensic VM.
"""
from __future__ import annotations

import json
import os
import sys
import urllib.request
from datetime import datetime, timezone
from pathlib import Path

CONFIG_DIR = Path("/opt/sylink-honeypot/etc")
LOG_DIR = Path("/opt/sylink-honeypot/log")
OFFSETS = CONFIG_DIR / "forward_offsets.json"
LICENSE_FILE = CONFIG_DIR / "license.json"
LEAK_SIDECAR = CONFIG_DIR / "leak_creds.json"
PT_API_BASE = os.environ.get("HP_API_BASE", "https://api.unisoc.fr")
MAX_BATCH = 200

SOURCES = {
    "cowrie.json":    "/var/log/cowrie/cowrie.json",
    "opencanary.log": "/var/log/opencanary/opencanary.log",
    "veeam-fake.log": "/var/log/opencanary/veeam-fake.log",
}


def log(msg: str):
    LOG_DIR.mkdir(parents=True, exist_ok=True)
    line = f"[{datetime.now(timezone.utc).isoformat()}] {msg}"
    print(line, flush=True)
    with (LOG_DIR / "log_forwarder.log").open("a") as f:
        f.write(line + "\n")


def read_token() -> str | None:
    if not LICENSE_FILE.exists():
        return None
    try:
        return json.loads(LICENSE_FILE.read_text()).get("license_token")
    except Exception:
        return None


def load_offsets() -> dict:
    if not OFFSETS.exists():
        return {}
    try:
        return json.loads(OFFSETS.read_text())
    except Exception:
        return {}


def save_offsets(d: dict):
    OFFSETS.write_text(json.dumps(d, indent=2))


def load_leak_sidecar() -> dict:
    if not LEAK_SIDECAR.exists():
        return {}
    try:
        return json.loads(LEAK_SIDECAR.read_text())
    except Exception:
        return {}


def enrich_event(e: dict, leak_index: dict) -> dict:
    """Si l'event est un login (Cowrie) et utilise une cred du sidecar leak → ajoute leak metadata."""
    if not isinstance(e, dict):
        return e
    if "login" not in str(e.get("eventid", "")):
        return e
    u = e.get("username") or ""
    p = e.get("password") or ""
    key = f"{u}|{p}"
    if key in leak_index:
        e = dict(e)
        e["_leak_match"] = True
        e["_leak_source"] = leak_index[key].get("source")
        e["_leak_date"] = leak_index[key].get("leak_date")
        e["_leak_malware"] = leak_index[key].get("malware_family")
    return e


def read_new_lines(path: str, offset: int, max_lines: int = MAX_BATCH) -> tuple[list[str], int]:
    """Lit depuis offset jusqu'à max_lines (ou EOF). Retourne (lines, new_offset).
       Gère le truncate (logrotate copytruncate) : si fichier plus petit que offset → reset à 0."""
    p = Path(path)
    if not p.exists():
        return [], offset
    try:
        size = p.stat().st_size
        if size < offset:
            log(f"  {path} truncated (size={size} < offset={offset}) → reset offset 0")
            offset = 0
        with open(p, "r", errors="replace") as f:
            f.seek(offset)
            lines = []
            for _ in range(max_lines):
                line = f.readline()
                if not line:
                    break
                if line.strip():
                    lines.append(line.rstrip())
            new_offset = f.tell()
        return lines, new_offset
    except Exception as e:
        log(f"  read {path} échec : {e}")
        return [], offset


def push_batch(token: str, source: str, events: list[dict]) -> bool:
    url = f"{PT_API_BASE}/api/honeypot/events/ingest"
    data = json.dumps({"events": events, "source": source}).encode()
    req = urllib.request.Request(
        url, data=data, method="POST",
        headers={
            "Authorization": f"Bearer {token}",
            "Content-Type": "application/json",
        },
    )
    try:
        with urllib.request.urlopen(req, timeout=20) as resp:
            r = json.loads(resp.read())
        return bool(r.get("ok"))
    except Exception as e:
        log(f"  POST {url} échec : {e}")
        return False


def main() -> int:
    log("== log_forwarder start ==")
    token = read_token()
    if not token:
        log("license absente — skip")
        return 0

    offsets = load_offsets()
    leak_index = load_leak_sidecar()
    total = 0

    for src, path in SOURCES.items():
        offset = offsets.get(src, 0)
        lines, new_offset = read_new_lines(path, offset)
        if not lines:
            continue

        # Parse + enrich
        events = []
        for ln in lines:
            try:
                if path.endswith(".json"):
                    e = json.loads(ln)
                else:
                    # Format texte → wrap minimal
                    e = {"raw_line": ln, "eventid": f"text.{src}"}
                events.append(enrich_event(e, leak_index))
            except json.JSONDecodeError:
                events.append({"raw_line": ln, "eventid": f"text.{src}"})

        if push_batch(token, src, events):
            offsets[src] = new_offset
            total += len(events)
            log(f"  {src} : {len(events)} events forwardés (offset {offset} -> {new_offset})")
        else:
            log(f"  {src} : POST échec, offset NON sauvegardé (réessai au prochain run)")

    save_offsets(offsets)

    # Heartbeat garanti : un POST license/verify léger même sans events,
    # pour que last_seen côté backend soit < 1 min (sinon flap "hors ligne")
    if total == 0:
        try:
            req = urllib.request.Request(
                f"{PT_API_BASE}/api/honeypot/license/verify",
                method="POST",
                data=b"{}",
                headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"},
            )
            urllib.request.urlopen(req, timeout=8).read()
            log("heartbeat /license/verify ok")
        except Exception as e:
            log(f"heartbeat échec : {e}")

    log(f"== log_forwarder done (total {total} events) ==")
    return 0


if __name__ == "__main__":
    sys.exit(main())
