a
    i(                     @   sp  U d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZmZ d dlmZ d dlmZmZmZmZ d dlmZmZ d d	lmZmZmZmZmZmZ d d
lm Z  dZ!e	e"# j$j$Z%e	e&de'e%d d Z(e(j$j)ddd e*dZ+e+,ej- de+_.e+j/sde0e!Z1e2 Z3e34e1 ee(ddddZ5e54e1 e+6e3 e+6e5 dZ7dZ8dZ9dZ:e;de<e&ddZ=ddhZ>g d Z?g d!Z@daAee eBd"< daCee eBd#< e D ZEe Fe=ZGe D ZHd aId aJdaKe  ZLe'eMd$d%d&ZNed'd(d)ZOdd'd*d+ZPe'dd,d-d.ZQdd'd/d0ZRdd'd1d2ZSedd3d4d5ZTedd3d6d7ZUeeeef d8d9d:ZVe'e'd$d;d<ZWdd'd=d>ZXeed?d@dAZYee7eYdBZZeZ[eeeedCdDdEZ\eZ[e]ee]edCdFdGZ^eZj_dHedIeZj_dJedIe'd'dKdLZ`eZj_dMedIedNdOdPfe'ed$dQdRZaebdSkrld dlcZcecjdeZdTdUdVddW dS )X    N)asynccontextmanager)RotatingFileHandler)Path)OptionalTuple)urlparse)FastAPIHTTPExceptionQueryRequest)HTMLResponsePlainTextResponse)BrowserBrowserContextPage
PlaywrightTimeoutErrorasync_playwright)Stealthz)%(asctime)s | %(levelname)s | %(message)sZPROXY_LOG_FILEtmpz	proxy.logT)parentsexist_okZfirmycz_proxyFi@B    zutf-8)ZmaxBytesZbackupCountencodingzFirmy.cz HTML Proxyiȯ  i:  <      MAX_CONCURRENT_RENDERS5zwww.firmy.czzfirmy.cz)zoMozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36zPMozilla/5.0 (Windows NT 10.0; Win64; x64; rv:135.0) Gecko/20100101 Firefox/135.0zuMozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36zeMozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36)u   button:has-text('Souhlasím')u   button:has-text('Povolit vše')zbutton:has-text('Povolit')u   button:has-text('Přijmout')u   button:has-text('Rozumím')u   a:has-text('Souhlasím')za:has-text('Povolit')playwright_instancebrowser)urlreturnc                 C   s&   t | }|jdko$|jtv o$t|jS )NZhttps)r   ZschemeZnetlocALLOWED_HOSTSboolpath)r    Zparsed r%   +/data/www/virtuals/stahovac/proxy/server.pyis_allowed_urlX   s    r'   )r!   c                	      s   t 4 I d H  td ur6t r6tW  d   I d H  S td urPt I d H  d atd urjt I d H  d atd t 	 I d H atj
jdg ddI d H atW  d   I d H  S 1 I d H s0    Y  d S )Nu!   Spouštím nový Chromium browserT)z--no-sandboxz--disable-setuid-sandboxz--disable-dev-shm-usagez--disable-gpu)Zheadlessargs)browser_lockr   Zis_connectedcloser   stoploggerinfor   startZchromiumZlaunchr%   r%   r%   r&   ensure_browser]   s     
	r/   c                	      sp   t 4 I d H J td ur(t I d H  d atd urBt I d H  d aW d   I d H  ql1 I d H sb0    Y  d S )N)r)   r   r*   r   r+   r%   r%   r%   r&   close_browserz   s    r0   )reasonr!   c                    s    t d|  t I d H  dad S )NzRestartuji browser: %sr   )r,   r-   r0   render_counter)r1   r%   r%   r&   restart_browser   s    r3   c                	      s`   t  I d H  t4 I d H , td7 atdtt W d   I d H  q\1 I d H sR0    Y  d S )Nr   z/Pridelen render slot | aktivnich renderu: %s/%s)render_semaphoreacquirerender_state_lockactive_rendersr,   r-   r   r%   r%   r%   r&   acquire_render_slot   s    r8   c               	      s   d} t 4 I d H @ td8 atdtt tr<tdkr<dad} W d   I d H  qf1 I d H s\0    Y  t  | rtdI d H  d S )NFr   z.Uvolnen render slot | aktivnich renderu: %s/%sr   Tz%preventivni restart po limitu renderu)	r6   r7   r,   r-   r   restart_requestedr4   releaser3   )Zshould_restartr%   r%   r&   release_render_slot   s    .r;   )pager!   c              	      s   t D ]}| |j}z|jdddI d H  |jtdddI d H  td| t	
dI d H  z| jd	d
dI d H  W n ty   td Y n0 W  d S  ty   Y qY q0 qd S )Nvisiblei  statetimeoutd   i^  )Zdelayu0   Kliknuto na consent tlačítko přes selector %sg      ?networkidlei  r@   u1   Po consentu nenastal networkidle, pokračuji dál)CONSENT_SELECTORSlocatorfirstZwait_forZclickrandomZrandintr,   r-   asynciosleepwait_for_load_statePlaywrightTimeoutdebug)r<   ZselectorrE   r%   r%   r&   maybe_accept_consent   s    rM   c              
      s   z| j ddtdI d H  W n. tyH } ztd|W Y d }~n
d }~0 0 z| jdtdI d H  W n. ty } ztd|W Y d }~n
d }~0 0 ttdd	I d H  d S )
NZh1r=   r>   u*   Stránka nenahrála očekávaný nadpis h1aw  
            () => {
                const body = document.body;
                const h1 = document.querySelector("h1");
                if (!body || !h1) {
                    return false;
                }

                const text = (body.innerText || "").trim();
                return h1.textContent.trim().length > 0 && text.length > 400;
            }
            rC   u=   Stránka zůstala ve skeleton stavu bez dostatečného obsahug333333?g333333?)	Zwait_for_selectorCONTENT_TIMEOUT_MSrK   RuntimeErrorZwait_for_functionrH   rI   rG   Zuniform)r<   excr%   r%   r&   wait_for_rendered_content   s      rQ   )active_browserr!   c                    s^   | j dddttdddI d H }t|I d H  | I d H }|t |	t
 ||fS )NiV  i   )widthZheightzcs-CZzEurope/Prague)ZviewportZ
user_agentZlocaleZtimezone_id)Znew_contextrG   ZchoiceUSER_AGENTSstealthZapply_stealth_asyncZnew_pageZset_default_timeoutrN   Zset_default_navigation_timeoutRENDER_TIMEOUT_MS)rR   contextr<   r%   r%   r&   build_context   s    

rX   c                    s  t  I d H }t|I d H \}}z8ztd|  |j| dtdI d H  z|jdddI d H  W n tyz   td Y n0 t	|I d H  t
|I d H  | I d H }t|dk rtd	|W W z | I d H  W | I d H  S | I d H  0 S  ty" } ztd
|W Y d }~n
d }~0 0 W z | I d H  W | I d H  n| I d H  0 n4z | I d H  W | I d H  n| I d H  0 0 d S )Nu   Začínám render: %sZdomcontentloaded)Z
wait_untilr@   rB   i'  rC   uB   Networkidle nenastal včas, pokračuji na vlastní kontroly obsahui  u*   Vyrenderované HTML je podezřele krátkéu)   Vypršel čas při renderování stránky)r/   rX   r,   r-   ZgotorV   rJ   rK   rL   rM   rQ   contentlenrO   r*   )r    rR   rW   r<   htmlrP   r%   r%   r&   render_page   s6     ""r\   c                	      sf   t 4 I d H @ td7 atdt ttkr8datd W d   I d H  qb1 I d H sX0    Y  d S )Nr   u   Dokončeno | nový counter: %sTz9Browser oznacen k restartu po dokonceni aktivnich renderu)r6   r2   r,   r-   MAX_RENDERS_BEFORE_RESTARTr9   r%   r%   r%   r&   register_successful_render  s    r^   _c                 C  s.   t dtt d V  t I d H  t d d S )Nz:Proxy startuje | max concurrent renders: %s | log file: %su#   Server ukončen – browser zavřen)r,   r-   r   LOG_FILEr0   r_   r%   r%   r&   lifespan  s    rb   )titlerb   )r`   rP   r!   c                    s   t t|j|jdS )Nstatus_code)r   strdetailre   r`   rP   r%   r%   r&   http_exception_handler%  s    ri   c                    s   t jd|dd tdddS )Nu   Neočekávaná chyba: %sTexc_infou   Interní chyba proxy serverui  rd   )r,   errorr   rh   r%   r%   r&   unhandled_exception_handler*  s    rm   z/health)Zresponse_classz/healthzc                      s   dS )Nokr%   r%   r%   r%   r&   healthcheck0  s    ro   z/render.zHTTPS URL z webu firmy.cz)Zdescriptionc              
      s   t | stdddt I d H  zxzt| I d H }t I d H  W nH ty } z0tjd| |dd tdt|d|W Y d }~n
d }~0 0 W t	 I d H  nt	 I d H  0 t
|dS )	Ni  z(Povoleny jsou pouze HTTPS URL z firmy.cz)rg   zRender selhal pro %s: %sTrj   i  )rY   )r'   r	   r8   r\   r^   rO   r,   rl   rf   r;   r   )r    r[   rP   r%   r%   r&   render6  s    *rp   __main__z	127.0.0.1i  r-   )ZhostZportZ	log_levelZworkers)erH   ZloggingosrG   
contextlibr   Zlogging.handlersr   pathlibr   typingr   r   Zurllib.parser   Zfastapir   r	   r
   r   Zfastapi.responsesr   r   Zplaywright.async_apir   r   r   r   r   rK   r   Zplaywright_stealthr   Z
LOG_FORMAT__file__ZresolveparentZBASE_DIRgetenvrf   ra   mkdirZ	getLoggerr,   ZsetLevelINFOZ	propagatehandlersZ	FormatterZ	formatterZStreamHandlerZstream_handlerZsetFormatterZfile_handlerZ
addHandlerZ	APP_TITLErV   rN   r]   maxintr   r"   rT   rD   r   __annotations__r   ZLockr)   Z	Semaphorer4   r6   r2   r7   r9   rU   r#   r'   r/   r0   r3   r8   r;   rM   rQ   rX   r\   r^   rb   ZappZexception_handlerri   	Exceptionrm   getro   rp   __name__Zuvicornrunr%   r%   r%   r&   <module>   s   
 





	

