
    ;i5              
          d dl Z d dlZd dlZd dlZd dlmZ d dlZd dlmZmZm	Z	m
Z
mZ d dlmZ d dlmZ d dlZdZdZdZ e ej        d	d
                    Z e ej        dd                    Z e ej        dd                    Z ej        dd          dv Zd&dedede
eeef                  ddfdZd'dZdefdZdefdZ d&dede	e         de
ee                  ddfdZ!dedeeef         fdZ"deeef         defd Z#d!e	eeef                  d"eeef         deeef         fd#Z$d$eeef         deeef         fd%Z%dS )(    N)datetime)AnyDictListOptionalSet)request)	HTTPError)GEMINI_API_KEYGOOGLE_API_KEYKYC_GEMINI_API_KEYzgemini-2.5-flashzOhttps://generativelanguage.googleapis.com/v1beta/models/{model}:generateContentKYC_OCR_MAX_OUTPUT_TOKENS4096KYC_OCR_MAX_RETRIES2KYC_OCR_RETRY_BASE_SLEEPz0.5KYC_GEMINI_DEBUG0>   1onyesTRUEtrueeventdetailpayloadreturnc           
      .   t           sd S 	 d}t          |dd          5 }|                    dt          j                                                     d|  d| d           |s	 |                    t          j        |d	
          d d         dz              n=# t          $ r0 |                    t          |          d d          d           Y nw xY w|                    d           d d d            d S # 1 swxY w Y   d S # t          $ r Y d S w xY w)Nz/tmp/gcv_gemini_debug.logautf-8encoding[z] : 
F)ensure_asciiiP  zQ--------------------------------------------------------------------------------
)
_GEMINI_DEBUGopenwriter   now	isoformatjsondumps	Exceptionstr)r   r   r   pathfs        /var/www/html/gcv_ocr.py
_debug_logr3      s    *$g... 	%!GGI0022IIeIIvIIIJJJ"9GGDJwUCCCFUFKdRSSSS  9 9 9GGs7||FUF3777888889GGO$$$	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	%    sY   D AC9%4BC97CC9CC9,D 9C==D  C=D 
DDc                     t           j                            t           j                            t                              dfD ]|} t           j                            | d          }t           j                            |          sC	 t          |dd          5 }|D ]}|                                }|r|	                    d          r.|	                    d          r)|t          d          d                                         }d	|vrq|                    d	d
          \  }}|                                }|s|                                                                                    d          }|r|t           j        vr|t           j        |<   	 ddd           n# 1 swxY w Y   m# t          $ r Y zw xY wdS )z-Load .env values into environment if missing.z/var/www/htmlz.envrr    r!   #zexport N=   z"')osr0   dirnameabspath__file__joinisfiler(   strip
startswithlensplitenvironOSError)baser0   r1   raw_linelinekeyvalues          r2   _load_dotenvrJ   '   s   !:!:;;_M  w||D&))w~~d## 		dC'222 0a ! 0 0H#>>++D !4??3#7#7 ! y11 =#C	NNOO4::<<$ !%C!3!3JC))++C ! !KKMM//1177>>E 0s"*44*/
300 0 0 0 0 0 0 0 0 0 0 0 0 0 0   	 	 	H	+ s7   F1C=F$F1$F(	(F1+F(	,F11
F?>F?c                      t                       t          D ]B} t          j        |           }|r*|                                r|                                c S Ct          d          )NzTGemini API key is not set. Set GEMINI_API_KEY, GOOGLE_API_KEY or KYC_GEMINI_API_KEY.)rJ   _GEMINI_API_KEY_ENVr9   getenvr?   RuntimeError)env_namerH   s     r2   _get_gemini_api_keyrP   C   sd    NNN'  i!! 	399;; 	99;;
m
n
nn    c                  |    t                       t          j        d          pt          j        d          pt          S )NKYC_GEMINI_MODELKYC_OCR_MODEL)rJ   r9   rM   _DEFAULT_GEMINI_MODEL rQ   r2   _get_gemini_modelrW   L   s1    NNN9'((_BIo,F,F_J__rQ   nodetextsseenc                 F   |t                      }t          | t                    rt          |           }||v rd S |                    |           |                                 D ]c\  }}|dk    r+t          |t                    r|                    |           6t          |t          t          f          rt          |||           dd S t          | t                    r@t          |           }||v rd S |                    |           | D ]}t          |||           d S d S )Ntext)
set
isinstancedictidadditemsr/   appendlist_extract_text_like_fields)rX   rY   rZ   oidrH   rI   items          r2   re   re   Q   s1   |uu$ hh$;;F**,, 	> 	>JCf}}E3!7!7}U###%$.. >)%===$ 9hh$;;F 	9 	9D%dE488889 9
	9 	9rQ   sc                    | pd                                 } | st          d          g }t          j        dt          j                  }|                    |           D ]B}|                    d          pd                                 }|r|                    |           C|                    |            d t          |           D             D ]}|                    | |d                      t          j
                    }dt          dt          fdd	t          dt          fd
d	t          dt          t          t          t          f                  ffddt          dt           t                   ffd}|D ]} ||          D ]}		 t          j        |	          }
 |
          }||c c S n# t$          $ r Y nw xY wd|	v rbt          |	          D ]R\  }}|dk    r	 |                    |	|d                    \  }
} |
          }||c c c S C# t$          $ r Y Ow xY w	 t)          j        |	          }
 |
          }||c c S # t$          $ r Y w xY wt          d          )N zEmpty model outputz```(?:json|JSON)?\s*(.*?)\s*```r8   c                 $    g | ]\  }}|d k    |S ){rV   ).0ichs      r2   
<listcomp>z(_extract_json_strict.<locals>.<listcomp>z   s!    ;;;2s!rQ   r\   r   c                 .    t          j        dd|           S )Nz,\s*([}\]])z\1)resub)r\   s    r2   _strip_trailing_commasz4_extract_json_strict.<locals>._strip_trailing_commas   s    vneT222rQ   objc                    t          | t                    sdS d| v rdS | r<d| v r8t          |                                                               h d          rdS | sdS t          d |                                 D                       rdS dS )NFfieldsTerror>   coderx   statusmessagec              3   p   K   | ]1\  }}t          |t                    ot          |t                    V  2d S N)r^   r/   r_   )rm   kvs      r2   	<genexpr>zA_extract_json_strict.<locals>._is_payload_like.<locals>.<genexpr>   s?      RRdaz!S!!9jD&9&9RRRRRRrQ   )r^   r_   r]   keysissubsetallrb   )ru   s    r2   _is_payload_likez._extract_json_strict.<locals>._is_payload_like   s    #t$$ 	5s??4 	7c>>c#((**oo&>&>?e?e?e&f&f>5 	5RRciikkRRRRR 	4 trQ   c                     t          | t                    r |           r| nd S t          | t                    r?t          |           dk    r,t          | d         t                    r | d                   S d S )Nr8   r   )r^   r_   rd   rA   )ru   _as_payloadr   s    r2   r   z)_extract_json_strict.<locals>._as_payload   sz    c4   	:**3//933T9c4   	'SXX]]z#a&$7O7O];s1v&&&trQ   candc                 r    |                                  }t          j        dd|          }| |          gS )Nz^\uFEFFrj   )r?   rr   rs   )r   cleanedrt   s     r2   _repair_candidatez/_extract_json_strict.<locals>._repair_candidate   s>    **,,&R11""7++
 	
rQ   rl   zModel output is not valid JSON)r?   
ValueErrorrr   compileSfinditergrouprc   	enumerater,   JSONDecoderr/   r   boolr   r   r   loadsr.   
raw_decodeastliteral_eval)rh   
candidatesfence_patternmatchchunkstartdecoderr   r   variantru   r   rn   ro   _r   r   rt   s                  @@@r2   _extract_json_strictr   k   sU   	
bA /-...J JA24HHM''** % %Q%2,,.. 	%e$$$ a;;1;;; % %!EFF)$$$$  G3S 3S 3 3 3 3c d      $sCx.!9       
 
S	 
 
 
 
 
 
   ((.. 	 	Gj))%+c**&"NNNNN '    g~~&w// 	! 	!EArSyy !!(!3!3GABBK!@!@Q"-+c"2"2".#*NNNNNNN /$ ! ! ! !&w//%+c**&"NNNNN '   9	> 5
6
66s6   "F44
G G$.H
H'	&H'	+"I
I I c                    |                      d          pg }t          |t                    sg }|s9g }t          | |           d                    |                                          S g }|D ]2}t          |t                    s|                     d          }t          |t                    r|                     d          pg }t          |t                    rZ|D ]W}t          |t                    s|                     d          }t          |t                    r|                    |           X|                     d          }	t          |	t                    r|                    |	           t          |t                    r|                    |           4|s;g }t          | |           |r'd                    |                                          S d                    |                                          S )Nr   rj   contentpartsr\   )	getr^   rd   re   r=   r?   r_   r/   rc   )
r   r   extra_textsrY   	candidater   r   ptdirect_texts
             r2   "_extract_text_from_gemini_responser      s   \**0bJj$'' 
 ,!#!';777ww{##))+++E " "	)T** 	--	**gt$$ 	(KK((.BE%&& ( ( (A%a.. ! fA!!S)) (QmmF++k3'' 	&LL%%%gs## 	"LL!!! 0!#!';777 	077;''--///775>>!!!rQ   fields_plandatac                    |                     d          }t          |t                    s&t          |t                    rt          |          ni }| D ]}|                     d          }t          |t                    r|s0|                     |          }t          |t                    r|}nWdd dd d}|Gt          |                                          r!t          |                                          nd |d<   |}|||<   	 t          |                     dd                    |d<   n# t          $ r d|d<   Y nw xY w|                     d          }|d |d<   nGt          |                                          r!t          |                                          nd |d<   |                     d          }|d|d<   nG	 t          |          }	t          dt          d	|	                    |d<   n# t          $ r d|d<   Y nw xY w|                     d
          }
|
d |d
<   t          |
                                          }
|
sd n|
|d
<   | D ]%}|                     d          }||vr
dd dd d||<   &d|iS )Nrw   rH   r           )candidate_idxr\   
confidence
normalizedr\   r   r   g      ?r   )
r   r^   r_   r/   r?   intr.   floatmaxmin)r   r   rw   r1   rH   r   	value_boxr   cccns              r2   _normalize_output_fieldsr      s   XXhFfd## >)$55=d2 23 23eeEll#s## 	3 	JJsOOa 	$II "#!"	 I }69!ffllnn$NCFFLLNNN$	&!A#F3K	#!$QUU?A%>%>!?!?Ao 	# 	# 	#!"Ao	# EE&MM9AfII*-a&&,,..BAdAfI EE,9!AlOO&1XX"%c3sB<<"8"8, & & &"%,& EE,9"AlOOAA*+2ddAlOO  d deeEllf,-t3^bccF3Kfs$   &D//E E0G77H	H	bundlec           
         t          | t                    r|                     d          ng }t          |t                    sg }t          j                            | pi           }|sdi iS t                      }t                      }t          
                    |           d| }d|dgdt          ddd	}d }t          }t          t          d
z             D ]}		 ||d         d<   t          j        |dt!          j        |                              d          dddd          }
t          j        |
d          5 }|                                                    dd          }d d d            n# 1 swxY w Y   t!          j        |          }t/          ddt1          |          t          |t                    r!t          |                                          ng d           t5          |          }t/          dddt          |t6                    r
|d d         ndi           |st/          dd|           t9          |          }t;          ||          c S # t<          $ r}|}t/          d d!t7          |          |	d"           |	t          k     r>|d#k     rt?          d#|d$z            }tA          j!        tD          d$|	z  z             Y d }~t;          |di i          cY d }~c S d }~wtF          $ r}d}	 |                                                    dd          }n# tH          $ r Y nw xY wtK          d%|j&         d&|j'         d'|           }|	t          k     r.|j&        d(v r%tA          j!        tD          d$|	z  z             Y d }~|d }~wtH          $ rJ}|}|	t          k     r%tA          j!        tD          d$|	z  z             Y d }~tK          d)|           |d }~ww xY wtK          d*|           |)+Nrw   )modelz?key=user)roler   r   zapplication/json)temperaturemaxOutputTokensresponseMimeType)contentsgenerationConfigr8   r   r   POSTr    zkyc-gemini-ocr/1.0)zContent-TypeAcceptz
User-Agent)methodr   headers   )timeoutreplace)errorsgemini_raw_responsezresponse parsed)raw_lenr   gemini_text_extractzextracted textr\   i  rj   gemini_no_textz Gemini response has no text partgemini_parse_errorzJSON parse error)rx   attempti       zGemini HTTP r$   z; body=>             zGemini OCR failed: z!Gemini OCR failed after retries: )(r^   r_   r   rd   kyc	KYCEnginegemini_parts_from_bundlerP   rW   _GEMINI_ENDPOINTformat_MAX_OUTPUT_TOKENSrange_MAX_RETRIESr	   Requestr,   r-   encodeurlopenreaddecoder   r3   rA   r   r   r/   r   r   r   r   timesleep_RETRY_BASE_SLEEPr
   r.   rN   ry   reason)r   r   r   api_keyr   urlr   last_errmax_output_tokensr   reqresprawr   r\   
result_objebodys                     r2   llm_ocrr   3  s$   *4VT*B*BJ&**X&&&Kk4(( "%-"H"HSU"V"VE "~!##GE$$5$11
A
A
A
AC
  
 1 2
 
 G %)H*)** 6A 6A5	A=NG&'(9:/Z((//88$60"6 		 	 	C b111 DTiikk(((CCD D D D D D D D D D D D D D D :c??D,.?SQTXXt~  @D  FJ  uK  uK  `S_cdhdmdmdodo_p_p_p  QS  BT  BT  U  U  U5d;;D,.>XbcgilXmXmIuetesu@vwww W+-OQUVVV-d33J+KDDDDD 
	I 
	I 
	I H+-?3q66^eAfAfggg%%$t++(+D2Ca2G(H(H%
,W=>>>+K(BHHHHHHHHHH 
	 
	 
	DvvxxwyAA   #$T16$T$TQX$T$Td$T$TUUH%%!&4M*M*M
,W=>>>N 	A 	A 	AH%%
,W=>>>8Q8899q@	A E8EE
F
FHTs   A"I ?*E5)I 5E9	9I <E9	=C I  
O-*A&K/K/'O-/O-<N?)L)(N)
L63N5L66ANNO-#,O(O((O-r}   )r   N)&r   r,   r9   rr   r   r   typingr   r   r   r   r   urllibr	   urllib.errorr
   kyc_policy_engine_llm_v2r   rL   rU   r   r   rM   r   r   r   r   r'   r/   r3   rJ   rP   rW   re   r   r   r   r   rV   rQ   r2   <module>r      s   



  				 				        1 1 1 1 1 1 1 1 1 1 1 1 1 1       " " " " " " & & & &P * d S#>GGHH s9292C8899E)")$>FFGG 	,c226XX c 3 $sCx.1I UY    "   8oS o o o o`3 ` ` ` `
9 9C 9S	 9#c(AS 9_c 9 9 9 94Y7C Y7DcN Y7 Y7 Y7 Y7x'"S#X '"3 '" '" '" '"T?$tCH~*> ?d3PS8n ?Y]^acf^fYg ? ? ? ?DUUDcN UUtCH~ UU UU UU UU UU UUrQ   