o
    ;i5                  
   @   s  d dl Z d dlZd dlZd dlZd dlmZ d dlZd dlmZmZm	Z	m
Z
mZ d dlmZ d dlmZ d dlZdZdZdZeed	d
ZeeddZeeddZedddv Zd/dedede
eeef  ddfddZd0ddZdefddZdefddZ d/dede	e d e
ee  ddfd!d"Z!d#edeeef fd$d%Z"deeef defd&d'Z#d(e	eeef  d)eeef deeef fd*d+Z$d,eeef deeef fd-d.Z%dS )1    N)datetime)AnyDictListOptionalSet)request)	HTTPError)GEMINI_API_KEYGOOGLE_API_KEYKYC_GEMINI_API_KEYzgemini-2.5-flashzOhttps://generativelanguage.googleapis.com/v1beta/models/{model}:generateContentKYC_OCR_MAX_OUTPUT_TOKENS4096KYC_OCR_MAX_RETRIES2KYC_OCR_RETRY_BASE_SLEEPz0.5KYC_GEMINI_DEBUG0>   1onyesTRUEtrueeventdetailpayloadreturnc              
   C   s   t sd S zdd}t|dddP}|dt   d|  d| d |d urPz|tj|d	d
d d d  W n tyO   |t	|d d  d Y nw |d W d    W d S 1 saw   Y  W d S  tyr   Y d S w )Nz/tmp/gcv_gemini_debug.logautf-8encoding[z] : 
F)ensure_asciiiP  zQ--------------------------------------------------------------------------------
)
_GEMINI_DEBUGopenwriter   now	isoformatjsondumps	Exceptionstr)r   r   r   pathf r0   
gcv_ocr.py
_debug_log   s"   &$ &r2   c               	   C   s  t jt jtdfD ]{} t j| d}t j|sqzbt|dddQ}|D ]F}| }|r4|	dr5q'|	drD|t
dd  }d	|vrIq'|d	d
\}}| }|sXq'|  d}|rm|t jvrm|t j|< q'W d   n1 sxw   Y  W q ty   Y qw dS )z-Load .env values into environment if missing.z/var/www/htmlz.envrr   r   #zexport N=   z"')osr.   dirnameabspath__file__joinisfiler&   strip
startswithlensplitenvironOSError)baser.   r/   raw_linelinekeyvaluer0   r0   r1   _load_dotenv'   s:   

rH   c                  C   s:   t   tD ]} t| }|r| r|   S qtd)NzTGemini API key is not set. Set GEMINI_API_KEY, GOOGLE_API_KEY or KYC_GEMINI_API_KEY.)rH   _GEMINI_API_KEY_ENVr7   getenvr=   RuntimeError)env_namerF   r0   r0   r1   _get_gemini_api_keyC   s   
rM   c                   C   s   t   tdptdptS )NKYC_GEMINI_MODELKYC_OCR_MODEL)rH   r7   rJ   _DEFAULT_GEMINI_MODELr0   r0   r0   r1   _get_gemini_modelL   s   rQ   nodetextsseenc                 C   s   |d u rt  }t| trBt| }||v rd S || |  D ] \}}|dkr2t|tr2|| qt|ttfr?t	||| qd S t| trat| }||v rQd S || | D ]
}t	||| qXd S d S )Ntext)
set
isinstancedictidadditemsr-   appendlist_extract_text_like_fields)rR   rS   rT   oidrF   rG   itemr0   r0   r1   r^   Q   s0   




r^   sc                    s  | pd  } | stdg }tdtj}|| D ]}|dp"d  }|r,|| q||  dd t| D D ]}|| |d   q;t	
 }dtdtfd	d
dtdtfdddtdttttf  f fdd dtdtt ffdd}|D ]}||D ]z}	zt	|	}
 |
}|d ur|W     S W n	 ty   Y nw d|	v rt|	D ]0\}}|dkrqz||	|d  \}
} |
}|d ur|W       S W q ty   Y qw zt|	}
 |
}|d ur|W     S W q ty   Y qw q~td)N zEmpty model outputz```(?:json|JSON)?\s*(.*?)\s*```r6   c                 S   s   g | ]
\}}|d kr|qS ){r0   ).0ichr0   r0   r1   
<listcomp>z   s    z(_extract_json_strict.<locals>.<listcomp>rU   r   c                 S   s   t dd| S )Nz,\s*([}\]])z\1)resub)rU   r0   r0   r1   _strip_trailing_commas   s   z4_extract_json_strict.<locals>._strip_trailing_commasobjc                 S   sf   t | tsdS d| v rdS | r d| v r t|  h dr dS | s$dS tdd |  D r1dS dS )NFfieldsTerror>   coderm   statusmessagec                 s   s(    | ]\}}t |tot |tV  qd S N)rW   r-   rX   )rd   kvr0   r0   r1   	<genexpr>   s   & zA_extract_json_strict.<locals>._is_payload_like.<locals>.<genexpr>)rW   rX   rV   keysissubsetallr[   rk   r0   r0   r1   _is_payload_like   s   
"z._extract_json_strict.<locals>._is_payload_likec                    sN   t | tr| r| S d S t | tr%t| dkr%t | d tr% | d S d S )Nr6   r   )rW   rX   r]   r?   rx   )_as_payloadry   r0   r1   rz      s
   
$z)_extract_json_strict.<locals>._as_payloadcandc                    s"   |   }tdd|}| |gS )Nz^\uFEFFrb   )r=   rh   ri   )r{   cleaned)rj   r0   r1   _repair_candidate   s
   z/_extract_json_strict.<locals>._repair_candidaterc   zModel output is not valid JSON)r=   
ValueErrorrh   compileSfinditergroupr\   	enumerater*   JSONDecoderr-   r   boolr   r   r   loadsr,   
raw_decodeastliteral_eval)ra   
candidatesfence_patternmatchchunkstartdecoderr}   r{   variantrk   r   re   rf   _r0   )rz   ry   rj   r1   _extract_json_strictk   sj   

$	

r   c           
      C   s  |  dpg }t|tsg }|sg }t| | d| S g }|D ]Q}t|ts*q"| d}t|trZ| dp:g }t|trZ|D ]}t|tsJqB| d}t|trY|| qB| d}	t|	tri||	 t|trs|| q"|sg }t| | |rd| S d| S )Nr   rb   contentpartsrU   )	getrW   r]   r^   r;   r=   rX   r-   r\   )
r   r   extra_textsrS   	candidater   r   ptdirect_textr0   r0   r1   "_extract_text_from_gemini_response   sD   















r   fields_plandatac              	   C   s  | d}t|tst|trt|ni }| D ]}| d}t|tr%|s&q| |}t|tr3|}n!dd dd d}|d urNt| rJt| nd |d< |}|||< zt| dd|d< W n tym   d|d< Y nw | d}|d u r|d |d< nt| rt| nd |d< | d}|d u rd|d< nzt|}	tdt	d	|	|d< W n ty   d|d< Y nw | d
}
|
d u rd |d
< qt|
 }
|
sd n|
|d
< q| D ]}| d}||vrdd dd d||< qd|iS )Nrl   rF   r           )candidate_idxrU   
confidence
normalizedrU   r   r   g      ?r   )
r   rW   rX   r-   r=   intr,   floatmaxmin)r   r   rl   r/   rF   rs   	value_boxr   cccnr0   r0   r1   _normalize_output_fields   s`   




 

 




r   bundlec                 C   s(  t | tr
| dng }t |tsg }tj| pi }|s!di iS t }t }t	j
|d d| }d|dgdtddd	}d }t}ttd
 D ]A}	z||d d< tj|dt|dddddd}
tj|
dd}| jddd}W d    n1 sw   Y  t|}tddt|t |trt| ng d t|}tdddt |tr|d d ndi |stdd| t|}t||W   S  ty } z:|}td d!t||	d" |	tk r|d#k rtd#|d$ }t !t"d$|	   W Y d }~qIt|di iW  Y d }~  S d }~w t#ya } zCd}z| jddd}W n
 t$y1   Y nw t%d%|j& d&|j' d'| }|	tk r[|j&d(v r[t !t"d$|	   W Y d }~qI|d }~w t$y } z|}|	tk rt !t"d$|	   W Y d }~qIt%d)| |d }~ww t%d*| |)+Nrl   )modelz?key=user)roler   r   zapplication/json)temperaturemaxOutputTokensresponseMimeType)contentsgenerationConfigr6   r   r   POSTr   zkyc-gemini-ocr/1.0)zContent-TypeAcceptz
User-Agent)methodr   headers   )timeoutreplace)errorsgemini_raw_responsezresponse parsed)raw_lenru   gemini_text_extractzextracted textrU   i  rb   gemini_no_textz Gemini response has no text partgemini_parse_errorzJSON parse error)rm   attempti       zGemini HTTP r"   z; body=>             zGemini OCR failed: z!Gemini OCR failed after retries: )(rW   rX   r   r]   kyc	KYCEnginegemini_parts_from_bundlerM   rQ   _GEMINI_ENDPOINTformat_MAX_OUTPUT_TOKENSrange_MAX_RETRIESr   Requestr*   r+   encodeurlopenreaddecoder   r2   r?   ru   r   r-   r   r   r~   r   timesleep_RETRY_BASE_SLEEPr	   r,   rK   rn   reason)r   r   r   api_keyr   urlr   last_errmax_output_tokensr   reqresprawr   rU   
result_objebodyr0   r0   r1   llm_ocr3  s   


,&
r   rq   )r   N)&r   r*   r7   rh   r   r   typingr   r   r   r   r   urllibr   urllib.errorr	   kyc_policy_engine_llm_v2r   rI   rP   r   r   rJ   r   r   r   r   r%   r-   r2   rH   rM   rQ   r^   r   r   r   r   r0   r0   r0   r1   <module>   s4    (
	(\2*&B