#!/usr/bin/env python3
"""
Hueprint Web Simulator — scan public websites for the same color-quality
issues we look for in Swift code.

Proof of concept: the same color science rules transfer 1-for-1 from
SwiftUI to CSS. Different parser, same math.
"""

import re
import urllib.request
import urllib.parse
import json
from datetime import datetime
from pathlib import Path

ROOT = Path("/tmp/hueprint-simulator/web")
ROOT.mkdir(exist_ok=True)
CACHE = ROOT / "cache"
CACHE.mkdir(exist_ok=True)

SITES = [
    ("Apple",       "https://www.apple.com/"),
    ("Adobe Color", "https://color.adobe.com/"),
    ("Behance",     "https://www.behance.net/"),
    ("Figma",       "https://www.figma.com/"),
    ("Google",      "https://www.google.com/"),
    ("YouTube",     "https://www.youtube.com/"),
    ("Spotify",     "https://open.spotify.com/"),
    ("Linear",      "https://linear.app/"),
    ("Vercel",      "https://vercel.com/"),
    ("Shopify",     "https://www.shopify.com/"),
    ("Stripe",      "https://stripe.com/"),
    ("Tailwind",    "https://tailwindcss.com/"),
    ("Notion",      "https://www.notion.so/"),
    ("Framer",      "https://www.framer.com/"),
]

UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 14_0) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Safari/605.1.15"

def fetch(url, timeout=20):
    cache_key = re.sub(r'[^A-Za-z0-9]+', '_', url)[:200]
    cache_path = CACHE / cache_key
    if cache_path.exists():
        return cache_path.read_text(encoding="utf-8", errors="ignore")
    req = urllib.request.Request(url, headers={"User-Agent": UA})
    try:
        with urllib.request.urlopen(req, timeout=timeout) as r:
            body = r.read().decode("utf-8", errors="ignore")
        cache_path.write_text(body)
        return body
    except Exception as e:
        return ""

def absolutize(href, base):
    return urllib.parse.urljoin(base, href)

def find_css_urls(html, base_url):
    """Return absolute URLs of <link rel=stylesheet ...> entries."""
    urls = []
    for m in re.finditer(r'<link\s+[^>]*rel\s*=\s*["\']?stylesheet["\']?[^>]*>', html, re.I):
        tag = m.group(0)
        href_m = re.search(r'href\s*=\s*["\']([^"\']+)["\']', tag, re.I)
        if href_m:
            urls.append(absolutize(href_m.group(1), base_url))
    return urls

def find_inline_styles(html):
    """Pull <style>...</style> blocks + style="..." inline attrs."""
    blocks = re.findall(r'<style[^>]*>(.*?)</style>', html, re.I | re.S)
    inline = re.findall(r'style\s*=\s*["\']([^"\']+)["\']', html, re.I)
    return "\n".join(blocks) + "\n" + "\n".join(inline)

# ---------- Rules ----------

# R1: gradient functions without modern interpolation hint
GRADIENT_RE = re.compile(
    r'\b(linear-gradient|radial-gradient|conic-gradient)\s*\(',
    re.I,
)
MODERN_INTERP_RE = re.compile(
    r'\bin\s+(oklch|oklab|srgb-linear|display-p3|lch|lab|xyz)',
    re.I,
)

# R2: sRGB color tokens that are NOT using wide-gamut color() / oklch() / oklab()
SRGB_COLOR_RE = re.compile(
    r'(?:#[0-9a-fA-F]{3,8}|\brgb\(|\brgba\()',
)
WIDE_GAMUT_USAGE_RE = re.compile(
    r'\b(color\s*\(\s*display-p3|color\s*\(\s*rec2020|oklch\(|oklab\(|hwb\()',
    re.I,
)

# R3: missing color-mix() / modern color functions (signal of modernity)
MODERN_COLOR_FN_RE = re.compile(
    r'\b(color-mix|oklch|oklab|color\(\s*display-p3)\b',
    re.I,
)

def scan_css(css_text):
    """Return findings dict and rule-applicability metrics."""
    gradients = list(GRADIENT_RE.finditer(css_text))
    # for each gradient, check if MODERN_INTERP appears within ~120 chars after
    rgb_gradients = 0
    modern_gradients = 0
    for m in gradients:
        snippet = css_text[m.start():m.start()+200]
        if MODERN_INTERP_RE.search(snippet):
            modern_gradients += 1
        else:
            rgb_gradients += 1

    srgb_colors = len(SRGB_COLOR_RE.findall(css_text))
    wide_gamut_uses = len(WIDE_GAMUT_USAGE_RE.findall(css_text))
    modern_fn_uses = len(MODERN_COLOR_FN_RE.findall(css_text))

    return {
        "gradients_total":    rgb_gradients + modern_gradients,
        "gradients_rgb":      rgb_gradients,
        "gradients_modern":   modern_gradients,
        "srgb_color_tokens":  srgb_colors,
        "wide_gamut_uses":    wide_gamut_uses,
        "modern_fn_uses":     modern_fn_uses,
        "css_chars":          len(css_text),
    }

def gamut_score(metrics):
    """Simple score:
       - Penalize for rgb-only gradients
       - Penalize for zero wide-gamut color usage (when there are sRGB colors)
       - Reward modern color function usage
    """
    score = 100
    if metrics["gradients_total"] > 0:
        rgb_ratio = metrics["gradients_rgb"] / metrics["gradients_total"]
        score -= int(rgb_ratio * 40)  # up to -40 if all gradients are RGB-lerped
    if metrics["srgb_color_tokens"] > 50 and metrics["wide_gamut_uses"] == 0:
        score -= 30  # site uses lots of colors but zero wide-gamut
    if metrics["modern_fn_uses"] > 0:
        score += 10  # bonus for showing modern color awareness
    return max(40, min(100, score))

def audit_site(name, url):
    print(f"[fetch] {name} <{url}>")
    html = fetch(url)
    if not html:
        return None

    css_urls = find_css_urls(html, url)
    print(f"  {len(css_urls)} stylesheet links")
    # Limit to top 8 CSS files to avoid pulling 50MB of CSS
    css_urls = css_urls[:8]

    css_text = find_inline_styles(html)
    for cu in css_urls:
        css_text += "\n" + fetch(cu, timeout=15)

    metrics = scan_css(css_text)
    score = gamut_score(metrics)

    print(f"  css_chars={metrics['css_chars']:,}  "
          f"gradients={metrics['gradients_total']} "
          f"(rgb={metrics['gradients_rgb']}, modern={metrics['gradients_modern']})  "
          f"srgb_tokens={metrics['srgb_color_tokens']}  "
          f"wide_gamut={metrics['wide_gamut_uses']}  "
          f"modern_fns={metrics['modern_fn_uses']}  "
          f"score={score}")

    return {
        "name": name,
        "url": url,
        "score": score,
        "metrics": metrics,
    }

def main():
    results = []
    for name, url in SITES:
        r = audit_site(name, url)
        if r:
            results.append(r)
        else:
            print(f"  (skipped: fetch failed)")

    # Sort by score, worst first
    results.sort(key=lambda r: r["score"])

    out = {
        "generated": datetime.now().isoformat(),
        "results": results,
    }
    (ROOT / "results.json").write_text(json.dumps(out, indent=2))
    print(f"\nWrote {ROOT / 'results.json'}")

if __name__ == "__main__":
    main()
