#!/usr/bin/env python3
"""
Validador local SII: valida un XML contra un XSD oficial (sin certificado).
- Descarga el XSD (y sus imports/includes) a caché local la primera vez.
- Valida usando lxml.etree.XMLSchema.

Uso:
  python validate_sii_xsd.py --xml archivo.xml --schema SuministroLR.xsd
"""
from __future__ import annotations
import argparse
import json
import re
import sys
import urllib.request
from pathlib import Path
from urllib.parse import urljoin, urlparse
from lxml import etree

DEFAULT_CACHE = Path("xsd_cache")
SCHEMAS_JSON = Path(__file__).with_name("schemas.json")

def _download(url: str) -> bytes:
    req = urllib.request.Request(url, headers={"User-Agent": "SII-local-validator/1.0"})
    with urllib.request.urlopen(req, timeout=30) as resp:
        return resp.read()

def _safe_name(url: str) -> str:
    p = urlparse(url)
    name = Path(p.path).name or "schema.xsd"
    return re.sub(r"[^A-Za-z0-9_.-]+", "_", name)

def _rewrite_schema_locations(xsd_bytes: bytes, base_url: str, cache_dir: Path) -> bytes:
    """
    Descarga recursivamente import/include (schemaLocation) y reescribe schemaLocation a ficheros locales.
    """
    parser = etree.XMLParser(resolve_entities=False, no_network=True, recover=False)
    root = etree.fromstring(xsd_bytes, parser=parser)

    for el in root.xpath('//*[@schemaLocation]'):
        loc = el.get("schemaLocation")
        if not loc:
            continue
        target_url = loc if bool(urlparse(loc).scheme) else urljoin(base_url, loc)
        local_name = _safe_name(target_url)
        local_path = cache_dir / local_name

        if not local_path.exists():
            try:
                data = _download(target_url)
            except Exception as e:
                raise RuntimeError(f"No pude descargar import/include: {target_url}\nDetalle: {e}") from e
            rewritten = _rewrite_schema_locations(data, target_url, cache_dir)
            local_path.write_bytes(rewritten)

        el.set("schemaLocation", local_name)

    return etree.tostring(root, xml_declaration=True, encoding="utf-8", pretty_print=True)

def ensure_schema(schema_name: str, cache_dir: Path) -> Path:
    if not SCHEMAS_JSON.exists():
        raise FileNotFoundError(f"No existe {SCHEMAS_JSON}")
    mapping = json.loads(SCHEMAS_JSON.read_text(encoding="utf-8"))
    if schema_name not in mapping:
        raise KeyError(f"Schema '{schema_name}' no está en schemas.json. Disponibles: {', '.join(mapping.keys())}")
    url = mapping[schema_name]
    cache_dir.mkdir(parents=True, exist_ok=True)
    out = cache_dir / schema_name
    if not out.exists():
        data = _download(url)
        rewritten = _rewrite_schema_locations(data, url, cache_dir)
        out.write_bytes(rewritten)
    return out

def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--xml", required=True, help="Ruta del XML a validar")
    ap.add_argument("--schema", required=True, help="Nombre del XSD (ej: SuministroLR.xsd)")
    ap.add_argument("--cache", default=str(DEFAULT_CACHE), help="Directorio de caché de XSDs (default: xsd_cache/)")
    args = ap.parse_args()

    xml_path = Path(args.xml)
    if not xml_path.exists():
        print(f"ERROR: No existe el XML: {xml_path}", file=sys.stderr)
        sys.exit(2)

    cache_dir = Path(args.cache)
    xsd_path = ensure_schema(args.schema, cache_dir)

    try:
        xsd_doc = etree.parse(str(xsd_path))
        schema = etree.XMLSchema(xsd_doc)
    except Exception as e:
        print(f"ERROR: No pude compilar el XSD {xsd_path}:\n{e}", file=sys.stderr)
        sys.exit(3)

    try:
        xml_doc = etree.parse(str(xml_path))
    except Exception as e:
        print(f"ERROR: El XML no es parseable: {xml_path}\n{e}", file=sys.stderr)
        sys.exit(4)

    ok = schema.validate(xml_doc)
    if ok:
        print("✅ XML VÁLIDO contra", args.schema)
        sys.exit(0)

    print("❌ XML NO válido contra", args.schema)
    for err in schema.error_log:
        print(f"- Línea {err.line}: {err.message}")
    sys.exit(1)

if __name__ == "__main__":
    main()
