import asyncio
import logging
import os
import random
from contextlib import asynccontextmanager
from logging.handlers import RotatingFileHandler
from pathlib import Path
from typing import Optional, Tuple
from urllib.parse import urlparse

from fastapi import FastAPI, HTTPException, Query, Request
from fastapi.responses import HTMLResponse, PlainTextResponse
from playwright.async_api import (
    Browser,
    BrowserContext,
    Page,
    Playwright,
    TimeoutError as PlaywrightTimeout,
    async_playwright,
)
from playwright_stealth import Stealth


LOG_FORMAT = "%(asctime)s | %(levelname)s | %(message)s"
BASE_DIR = Path(__file__).resolve().parent.parent
LOG_FILE = Path(os.getenv("PROXY_LOG_FILE", str(BASE_DIR / "tmp" / "proxy.log")))
LOG_FILE.parent.mkdir(parents=True, exist_ok=True)

logger = logging.getLogger("firmycz_proxy")
logger.setLevel(logging.INFO)
logger.propagate = False

if not logger.handlers:
    formatter = logging.Formatter(LOG_FORMAT)

    stream_handler = logging.StreamHandler()
    stream_handler.setFormatter(formatter)

    file_handler = RotatingFileHandler(
        LOG_FILE,
        maxBytes=1_000_000,
        backupCount=3,
        encoding="utf-8",
    )
    file_handler.setFormatter(formatter)

    logger.addHandler(stream_handler)
    logger.addHandler(file_handler)


APP_TITLE = "Firmy.cz HTML Proxy"
RENDER_TIMEOUT_MS = 45_000
CONTENT_TIMEOUT_MS = 15_000
MAX_RENDERS_BEFORE_RESTART = 60
MAX_CONCURRENT_RENDERS = max(1, int(os.getenv("MAX_CONCURRENT_RENDERS", "5")))
ALLOWED_HOSTS = {"www.firmy.cz", "firmy.cz"}
USER_AGENTS = [
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
    "(KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:135.0) Gecko/20100101 Firefox/135.0",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 "
    "(KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36",
    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
    "(KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36",
]
CONSENT_SELECTORS = [
    "button:has-text('Souhlasím')",
    "button:has-text('Povolit vše')",
    "button:has-text('Povolit')",
    "button:has-text('Přijmout')",
    "button:has-text('Rozumím')",
    "a:has-text('Souhlasím')",
    "a:has-text('Povolit')",
]


playwright_instance: Optional[Playwright] = None
browser: Optional[Browser] = None
browser_lock = asyncio.Lock()
render_semaphore = asyncio.Semaphore(MAX_CONCURRENT_RENDERS)
render_state_lock = asyncio.Lock()
render_counter = 0
active_renders = 0
restart_requested = False
stealth = Stealth()


def is_allowed_url(url: str) -> bool:
    parsed = urlparse(url)
    return parsed.scheme == "https" and parsed.netloc in ALLOWED_HOSTS and bool(parsed.path)


async def ensure_browser() -> Browser:
    global playwright_instance, browser

    async with browser_lock:
        if browser is not None and browser.is_connected():
            return browser

        if browser is not None:
            await browser.close()
            browser = None

        if playwright_instance is not None:
            await playwright_instance.stop()
            playwright_instance = None

        logger.info("Spouštím nový Chromium browser")
        playwright_instance = await async_playwright().start()
        browser = await playwright_instance.chromium.launch(
            headless=True,
            args=[
                "--no-sandbox",
                "--disable-setuid-sandbox",
                "--disable-dev-shm-usage",
                "--disable-gpu",
            ],
        )
        return browser


async def close_browser() -> None:
    global playwright_instance, browser

    async with browser_lock:
        if browser is not None:
            await browser.close()
            browser = None

        if playwright_instance is not None:
            await playwright_instance.stop()
            playwright_instance = None


async def restart_browser(reason: str) -> None:
    global render_counter

    logger.info("Restartuji browser: %s", reason)
    await close_browser()
    render_counter = 0


async def acquire_render_slot() -> None:
    global active_renders

    await render_semaphore.acquire()
    async with render_state_lock:
        active_renders += 1
        logger.info(
            "Pridelen render slot | aktivnich renderu: %s/%s",
            active_renders,
            MAX_CONCURRENT_RENDERS,
        )


async def release_render_slot() -> None:
    global active_renders, restart_requested

    should_restart = False

    async with render_state_lock:
        active_renders -= 1
        logger.info(
            "Uvolnen render slot | aktivnich renderu: %s/%s",
            active_renders,
            MAX_CONCURRENT_RENDERS,
        )
        if restart_requested and active_renders == 0:
            restart_requested = False
            should_restart = True

    render_semaphore.release()

    if should_restart:
        await restart_browser("preventivni restart po limitu renderu")


async def maybe_accept_consent(page: Page) -> None:
    for selector in CONSENT_SELECTORS:
        locator = page.locator(selector).first
        try:
            await locator.wait_for(state="visible", timeout=1_500)
            await locator.click(delay=random.randint(100, 350))
            logger.info("Kliknuto na consent tlačítko přes selector %s", selector)
            await asyncio.sleep(1.0)
            try:
                await page.wait_for_load_state("networkidle", timeout=5_000)
            except PlaywrightTimeout:
                logger.debug("Po consentu nenastal networkidle, pokračuji dál")
            return
        except PlaywrightTimeout:
            continue


async def wait_for_rendered_content(page: Page) -> None:
    try:
        await page.wait_for_selector("h1", state="visible", timeout=CONTENT_TIMEOUT_MS)
    except PlaywrightTimeout as exc:
        raise RuntimeError("Stránka nenahrála očekávaný nadpis h1") from exc

    try:
        await page.wait_for_function(
            """
            () => {
                const body = document.body;
                const h1 = document.querySelector("h1");
                if (!body || !h1) {
                    return false;
                }

                const text = (body.innerText || "").trim();
                return h1.textContent.trim().length > 0 && text.length > 400;
            }
            """,
            timeout=CONTENT_TIMEOUT_MS,
        )
    except PlaywrightTimeout as exc:
        raise RuntimeError("Stránka zůstala ve skeleton stavu bez dostatečného obsahu") from exc

    await asyncio.sleep(random.uniform(0.6, 1.2))


async def build_context(active_browser: Browser) -> Tuple[BrowserContext, Page]:
    context = await active_browser.new_context(
        viewport={"width": 1366, "height": 768},
        user_agent=random.choice(USER_AGENTS),
        locale="cs-CZ",
        timezone_id="Europe/Prague",
    )
    await stealth.apply_stealth_async(context)
    page = await context.new_page()
    page.set_default_timeout(CONTENT_TIMEOUT_MS)
    page.set_default_navigation_timeout(RENDER_TIMEOUT_MS)
    return context, page


async def render_page(url: str) -> str:
    active_browser = await ensure_browser()
    context, page = await build_context(active_browser)

    try:
        logger.info("Začínám render: %s", url)
        await page.goto(url, wait_until="domcontentloaded", timeout=RENDER_TIMEOUT_MS)

        try:
            await page.wait_for_load_state("networkidle", timeout=10_000)
        except PlaywrightTimeout:
            logger.debug("Networkidle nenastal včas, pokračuji na vlastní kontroly obsahu")

        await maybe_accept_consent(page)
        await wait_for_rendered_content(page)

        html = await page.content()
        if len(html) < 1_000:
            raise RuntimeError("Vyrenderované HTML je podezřele krátké")

        return html
    except PlaywrightTimeout as exc:
        raise RuntimeError("Vypršel čas při renderování stránky") from exc
    finally:
        try:
            await page.close()
        finally:
            await context.close()


async def register_successful_render() -> None:
    global render_counter, restart_requested

    async with render_state_lock:
        render_counter += 1
        logger.info("Dokončeno | nový counter: %s", render_counter)
        if render_counter >= MAX_RENDERS_BEFORE_RESTART:
            restart_requested = True
            logger.info("Browser oznacen k restartu po dokonceni aktivnich renderu")


@asynccontextmanager
async def lifespan(_: FastAPI):
    logger.info(
        "Proxy startuje | max concurrent renders: %s | log file: %s",
        MAX_CONCURRENT_RENDERS,
        LOG_FILE,
    )
    yield
    await close_browser()
    logger.info("Server ukončen – browser zavřen")


app = FastAPI(title=APP_TITLE, lifespan=lifespan)


@app.exception_handler(HTTPException)
async def http_exception_handler(_: Request, exc: HTTPException) -> PlainTextResponse:
    return PlainTextResponse(str(exc.detail), status_code=exc.status_code)


@app.exception_handler(Exception)
async def unhandled_exception_handler(_: Request, exc: Exception) -> PlainTextResponse:
    logger.error("Neočekávaná chyba: %s", exc, exc_info=True)
    return PlainTextResponse("Interní chyba proxy serveru", status_code=500)


@app.get("/health", response_class=PlainTextResponse)
@app.get("/healthz", response_class=PlainTextResponse)
async def healthcheck() -> str:
    return "ok"


@app.get("/render", response_class=HTMLResponse)
async def render(url: str = Query(..., description="HTTPS URL z webu firmy.cz")) -> HTMLResponse:
    if not is_allowed_url(url):
        raise HTTPException(400, detail="Povoleny jsou pouze HTTPS URL z firmy.cz")

    await acquire_render_slot()
    try:
        try:
            html = await render_page(url)
            await register_successful_render()
        except RuntimeError as exc:
            logger.error("Render selhal pro %s: %s", url, exc, exc_info=True)
            raise HTTPException(502, detail=str(exc)) from exc
    finally:
        await release_render_slot()

    return HTMLResponse(content=html)


if __name__ == "__main__":
    import uvicorn

    uvicorn.run(
        app,
        host="127.0.0.1",
        port=3000,
        log_level="info",
        workers=1,
    )
