
    ެi4                        U d dl Z d dlZd dlZd dlmZmZmZ d dlZ	 d dl	m
Z
 d dl	mZmZmZmZ n# e$ r dZ
exZxZxZZY nw xY w e j        dd          ZdgZdZ e e j        dd	                    Z e e j        d
d                    Z e e j        dd                    Z e e j        dd                    Z e j        dd                                                                          Z e j        dd                                                                          dv Zh dZdaed         e d<   de!e"         fdZ#de"de$fdZ%d1de"dedee"         dee"ef         fdZ&de!ee"                  fdZ'd ede$fd!Z(d"eddfd#Z)d$ed%e!e"         ddfd&Z*d2d'Z+d(ede"fd)Z,dede!fd*Z-d+dde"dede"fd,Z.d-e"dee"ef         fd.Z/d/ee"ef         dee"ef         fd0Z0dS )3    N)AnyDictOptional)OpenAI)RateLimitErrorAPITimeoutErrorAPIConnectionErrorInternalServerErrorKYC_OCR_MODELgpt-5.2)	zdoes not existzis not valid for this APIzmodel is not supportedzmodel is not availablezunknown modelzunsupported reasoningzinvalid reasoningtemperaturezdoes not support temperatureKYC_OCR_MAX_OUTPUT_TOKENS2200KYC_OCR_MAX_RETRIES2KYC_OCR_RETRY_BASE_SLEEPz0.4KYC_OCR_OPENAI_TIMEOUT90KYC_OCR_GPT5_REASONING_EFFORTxhighKYC_OCR_DEBUG0)1trueyeson>   lowhighnoner   mediumminimalr   _OAI_CLIENTreturnc                      t                      } g }t          D ]I}|rt          |t                    s|| v r|                     |           |                    |           J|sg d}|S )N)r   z
gpt-5-miniz
gpt-5-nanogpt-5zgpt-4o-mini)set_MODEL_FALLBACKS
isinstancestraddappend)seen
candidatesmodels      /var/www/html/llm_ocr_openai.py_normalize_model_candidatesr0   7   s    55DJ! ! ! 	Juc22 	D==%     UTTT
    r.   c                 J    t          | o|                     d                    S )Nr%   )bool
startswith)r.   s    r/   _is_gpt5_familyr5   F   s#    3%**733444r1   content_itemsreasoning_effortc                 `    | d|dgdddiit           dd}t          |           r	|rd|i|d	<   |S )
Nuserrolecontentformattypejson_objectF)r.   inputtextmax_output_tokensstoreeffort	reasoning)_MAX_OUTPUT_TOKENSr5   )r.   r6   r7   kwargss       r/   _build_request_kwargsrH   J   sa    !m<<=FM23/ F u ;"2 ;')9:{Mr1   c                      t           t          vrd gS t           g} t           dk    r|                     d           |                     d            g }| D ]}||vr|                    |           |S )Nr   )_GPT5_REASONING_EFFORT_GPT5_ALLOWED_REASONING_EFFORTr+   )chainuniqvs      r/   _reasoning_chainrO   W   s~    %CCCv$&E''V	LL "D  D==KKNNNKr1   errc                     t          |                                           t          fdt          D                       S )Nc              3       K   | ]}|v V  	d S N ).0fragmentmsgs     r/   	<genexpr>z)_is_model_access_error.<locals>.<genexpr>i   s'      II8x3IIIIIIr1   )r)   lowerany_KNOWN_UNSUPPORTED_SUBSTR)rP   rW   s    @r/   _is_model_access_errorr\   g   s<    
c((..

CIIII/HIIIIIIr1   partsc                  2    t           rt          dg| R   d S d S )Nz[KYC-OCR-DEBUG])_DEBUGprint)r]   s    r/   
_debug_logra   l   s1     )(%(((((() )r1   nodeoutc                    t          | t                    r-|                                 }|r|                    |           dS t          | t                    r3|                     d          }t          |t                    r|                    |           |                     d          }t          |t                    r|                    |           nt          |t                    r~|                    d          }t          |t                    r|                    |           |                    d          }t          |t                    r|                    |           dD ]}|| v rt          | |         |           dS t          | t          t          f          r| D ]}t          ||           dS dS )zMRecursively collect text-like fields from SDK objects and dict/list payloads.Noutput_textrA   value)r<   outputchoicesmessagesresponseresult)	r(   r)   stripr+   dictget_collect_textlisttuple)rb   rc   txtrN   maybekits          r/   ro   ro   q   s   $ jjll 	JJsOOO$ HH]##a 	JJqMMMHHVa 	"JJqMMMM4   	"EE'NNE%%% "

5!!!EE&MME%%% "

5!!!S 	, 	,ADyyd1gs+++$u&& # 	# 	#B"c""""# #	# 	#r1   c                      t           t           S t          t          d          t          j        d          } | st          d          t          | t
                    a t           S )Nzopenai package not availableOPENAI_API_KEYzOPENAI_API_KEY is not set)api_keytimeout)r"   r   RuntimeErrorosgetenv_OPENAI_TIMEOUT)rx   s    r/   _get_clientr~      sa    ~9:::i())G 86777/BBBKr1   respc                    | dS t          | dd           }t          |t                    r|                                S d }t	          | d          r(	 |                                 }n# t          $ r d }Y nw xY w|t          | dd           }t          |t                    sdS |                    d          }t          |t                    r|                                S g }t          ||           |r'd
                    |                                          S |                    d          }t          |t                    r%g }|D ]}t          |t                    r|                    d          nd }t          |t                    sD|D ]}	t          |	t                    s|	                    d          dk    r2|	                    d	          }t          |t                    r*|                    d
          p|                    d	          }t          |t                    r|                    |           |r'd
                    |                                          S dS )N re   
model_dump__dict__
rg   r<   r>   rA   rf   )getattrr(   r)   rl   hasattrr   	Exceptionrm   rn   ro   joinrp   r+   )
r   rr   payloadrA   
txt_chunksrg   chunksitemr<   cs
             r/   _response_to_textr      sr   |r $t
,
,C#s yy{{ Gt\"" 	oo''GG 	 	 	GGG	 $
D11gt$$ r;;}%%D$ zz|| J':&&& -yy$$**,,,[[""F&$ + 	' 	'D-7d-C-CMdhhy)))Ggt,,  	' 	'!!T** 55==M11eeFmmc4(( >'''**=cggfooCc3'' 'MM#&&&	'  	+776??((***2s   A' 'A65A6c                    g }t          | t                    s|S | D ]}t          |t                    st          |                    d          pd          }|dv rD|                    dd          }t          |t                    r|                    d|d           |dk    rX|                    d          }t          |t                    r.|                                r|                    dd|id	           |S )
Nr>   r   )
input_textrA   rA   )r>   rA   input_image	image_urlurl)r>   r   )r(   rp   rm   r)   rn   r+   rl   )r6   user_contentr   trr   r   s         r/   _content_items_to_chat_messager      s%   LmT**   $%% 	  &B''&&&((62&&C#s## C##VS$A$ABBB---I)S)) ioo.?.? ##'"'!3% %   
 r1   clientc                    t          |          }|sdS 	 d|dg}| j        j                            ||ddit                    }t          |d          ra|j        rZ|j        d         }t          |d	d           }|:t          |d
d           }t          |t                    r|
                                S n5# t          $ r(}	t          dt          |	                     Y d }	~	dS d }	~	ww xY wdS )Nr   r9   r:   r>   r?   )r.   ri   response_format
max_tokensrh   r   messager<   zchat fallback failed:)r   chatcompletionscreaterF   r   rh   r   r(   r)   rl   r   ra   repr)
r   r.   r6   r   ri   	chat_respc0rW   rr   es
             r/   _call_chat_fallbackr      s"   1-@@L r55
 K+22#]3)	 3 
 
	 9i(( 	'Y-> 	'"1%B"i..Cc9d33c3'' '99;;&   *DGG444rrrrr 2s   B B7 7
C)C$$C)sc                 b   | pd                                 } | st          d          	 t          j        |           S # t          $ r Y nw xY w|                     d          }|                     d          }|dk    r%||k    rt          j        | ||dz                      S t          d          )Nr   Empty model output{}r      zModel output is not valid JSON)rl   
ValueErrorjsonloadsr   findrfind)r   startends      r/   _extract_json_strictr     s    	
bA /-...z!}}    FF3KKE
''#,,CzzcEkkz!E#'M*+++
5
6
66s   = 
A
	A
bundlec                 J	   t                      }t          j                            |           }t	                      }d }d }t                      }|D ]}|D ]}t          t          dz             D ]}		 t          |||          }
t          d||	dz   |t          |          d            |j        j        di |
}t          |          }|s7t          dd|i           t          |||          }|st          dd|i           t          |          }|                    d          }t#          |t$                    st'          d          t)          |                                          D ] \  }}t#          |t$                    sd	d
dd d||<   &	 t-          |                    dd	                    |d<   n# t.          $ r d	|d<   Y nw xY w|                    dd
          }|d
nt1          |          |d<   |                    dd           }|d |d<   nG	 t3          |          }t5          dt7          d|                    |d<   n# t.          $ r d |d<   Y nw xY w|                    dd           }|d nt1          |          |d<   "d|ic c c S # t8          t:          t<          t>          f$ ru}|}t          dtA          |          j!        t1          |          d|d|	dz              |	t          k     r%tE          j#        tH          d|	z  z             Y d }~Y d }~ nCd }~wt&          $ rv}|}t1          |          %                    d          rt          d|||	dz   d           |	t          k     r%tE          j#        tH          d|	z  z             Y d }~.Y d }~ nd }~wt.          $ r}tM          |          r|}Y d }~ nt          dtA          |          j!        t1          |          d|           t1          |          '                                }|(                    |          t          |          dz
  k     r
d|v rY d }~ ntS          d|           |d }~ww xY w|tS          d|           ||tS          d|           |tS          d          ) Nr   zOCR request)r.   attemptrE   itemsz,responses output empty, trying chat fallbackr.   zchat fallback output emptyfieldsz(Missing/invalid 'fields' in OCR responser   r   g        )candidate_idxrA   
confidence
normalizedr   rA   r   g      ?r   zretryable error:zmodel=zattempt=   r   zempty output, retrying)r.   rE   r   znon-retryable OCR error:rE   zllm_ocr non-retryable failure: zllm_ocr model not available: zVllm_ocr retryable failure: unable to get successful response after fallback attempts: z1llm_ocr failed without receiving a valid responserT   )*r~   kyc	KYCEngine openai_content_items_from_bundler0   rO   range_MAX_RETRIESrH   ra   len	responsesr   r   r   r   rn   r(   rm   r   rp   r   intr   r)   floatmaxminr   r   r	   r
   r>   __name__timesleep_RETRY_BASE_SLEEPr4   r\   rY   indexrz   )r   r   r6   model_candidateslast_model_errorlast_retry_errorreasoning_chainr.   r7   r   reqr   raw_textdatar   rt   rN   r   r   ccnr   rW   s                          r/   llm_ocrr   !  s   ]]FMBB6JJM 344,0,0&((O! RU RU / Q	U Q	U !122 PU PUOU/}FVWWC%%*'.{)9%(%7%7	    36+299S99D066H# W"#QT[]bScddd#6vum#T#T' W&'CguEUVVV/99D!XXh//F%fd33 U()STTT !%V\\^^ 4 4 H H1)!T22 %:;RWZjn(o(oF1I$314QUU?A5N5N1O1OAo..( 3 3 312Ao...3 EE&"--*+)BBQ&	EE,559.2AlOO7%*1XX25c3sB<<2H2H,#, 7 7 726,7 EE,5523)$$Q,$f--------&9KM`a   '($14773CSVVXW\^hjqtujuvvv--
#4W#EFFF EEEEE!   '($1vv(()=>> F"#;u[kx  CD  yD  >E  >E  F  F  F--
#4W#EFFF EEEEE  	U 	U 	U-a00 +,(9477;KSQRVVU]_deeea&&,,..C&,,-=>>_AUAUXYAYYY^imp^p^p&'L'L'LMMSTT	UQQ	Uf #M;KMMNNTdd#  G  uE  G  G  H  H  N^  	^
J
K
KKs   3DJ&F:9J:G	JGAJ0I
JIJI4JQ%	5A#L%	%Q%	2A%N#	#Q%	0Q 	B Q 	Q 	 Q%	rS   )r#   r   )1r{   r   r   typingr   r   r   kyc_policy_engine_llm_v2r   openair   r   r   r	   r
   r   r|   _DEFAULT_MODELr'   r[   r   rF   r   r   r   r}   rl   rY   rJ   r_   rK   r"   __annotations__rp   r)   r0   r3   r5   rH   rO   r\   ra   ro   r~   r   r   r   r   r   rT   r1   r/   <module>r      s   
				   & & & & & & & & & & & & & &\_____________ \ \ \FR[[N[_['9<O<O<O\
 ?I66 
  S#>GGHH s9292C8899E)")$>FFGG %		":DAABB"#BGLLRRTTZZ\\ 	?C	(	(	.	.	0	0	6	6	8	8<V	V" " "  #'Xh & & &T#Y    53 54 5 5 5 5
 
 
S 
HUXM 
eijmorjres 
 
 
 
$x}-     J	 Jd J J J J
)s )t ) ) ) )
# #$s) # # # # #B   "4C 4C 4 4 4 4n# $    0  S S    :7C 7DcN 7 7 7 7$bLDcN bLtCH~ bL bL bL bL bL bLs   0 AA