o
    ެi4                     @   sH  U d dl Z d dlZd dlZd dlmZmZmZ d dlZzd dl	m
Z
 d dl	mZmZmZmZ W n eyB   dZ
e Z Z ZZY nw e ddZdgZdZee dd	Zee d
dZee ddZee ddZe dd  Ze dd  dv Zh dZdaed e d< de!e" fddZ#de"de$fddZ%d>de"dedee" dee"ef fd d!Z&de!ee"  fd"d#Z'd$ede$fd%d&Z(d'eddfd(d)Z)d*ed+e!e" ddfd,d-Z*d?d.d/Z+d0ede"fd1d2Z,dede!fd3d4Z-d5dde"dede"fd6d7Z.d8e"dee"ef fd9d:Z/d;ee"ef dee"ef fd<d=Z0dS )@    N)AnyDictOptional)OpenAI)RateLimitErrorAPITimeoutErrorAPIConnectionErrorInternalServerErrorKYC_OCR_MODELgpt-5.2)	zdoes not existzis not valid for this APIzmodel is not supportedzmodel is not availablezunknown modelzunsupported reasoningzinvalid reasoningtemperaturezdoes not support temperatureKYC_OCR_MAX_OUTPUT_TOKENS2200KYC_OCR_MAX_RETRIES2KYC_OCR_RETRY_BASE_SLEEPz0.4KYC_OCR_OPENAI_TIMEOUT90KYC_OCR_GPT5_REASONING_EFFORTxhighKYC_OCR_DEBUG0)1trueyeson>   lowhighnoner   mediumminimalr   _OAI_CLIENTreturnc                  C   sR   t  } g }tD ]}|rt|tsq|| v rq| | || q|s'g d}|S )N)r   z
gpt-5-miniz
gpt-5-nanogpt-5zgpt-4o-mini)set_MODEL_FALLBACKS
isinstancestraddappend)seen
candidatesmodel r-   llm_ocr_openai.py_normalize_model_candidates7   s   
r/   r,   c                 C   s   t | o| dS )Nr#   )bool
startswith)r,   r-   r-   r.   _is_gpt5_familyF   s   r2   content_itemsreasoning_effortc                 C   s<   | d|dgdddiit dd}t| r|rd|i|d	< |S )
Nuserrolecontentformattypejson_objectF)r,   inputtextmax_output_tokensstoreeffort	reasoning)_MAX_OUTPUT_TOKENSr2   )r,   r3   r4   kwargsr-   r-   r.   _build_request_kwargsJ   s   

rD   c                  C   sT   t tvrd gS t g} t dkr| d | d  g }| D ]}||vr'|| q|S )Nr   )_GPT5_REASONING_EFFORT_GPT5_ALLOWED_REASONING_EFFORTr)   )chainuniqvr-   r-   r.   _reasoning_chainW   s   


rJ   errc                    s"   t |   t fddtD S )Nc                 3   s    | ]}| v V  qd S Nr-   ).0fragmentmsgr-   r.   	<genexpr>i   s    z)_is_model_access_error.<locals>.<genexpr>)r'   lowerany_KNOWN_UNSUPPORTED_SUBSTR)rK   r-   rO   r.   _is_model_access_errorg   s   rU   partsc                  G   s   t rtdg| R   d S d S )Nz[KYC-OCR-DEBUG])_DEBUGprint)rV   r-   r-   r.   
_debug_logl   s   rY   nodeoutc                 C   s   t | tr|  }|r|| dS t | trk| d}t |tr&|| | d}t |tr6|| n#t |trY|d}t |trJ|| |d}t |trY|| dD ]}|| v rht| | | q[dS t | ttfr|| D ]	}t|| qtdS dS )zMRecursively collect text-like fields from SDK objects and dict/list payloads.Noutput_textr=   value)r8   outputchoicesmessagesresponseresult)	r&   r'   stripr)   dictget_collect_textlisttuple)rZ   r[   txtrI   maybekitr-   r-   r.   rf   q   s:   














rf   c                  C   sB   t d urt S td u rtdtd} | stdt| tda t S )Nzopenai package not availableOPENAI_API_KEYzOPENAI_API_KEY is not set)api_keytimeout)r!   r   RuntimeErrorosgetenv_OPENAI_TIMEOUT)rn   r-   r-   r.   _get_client   s   
rt   respc           
      C   sx  | d u rdS t | dd }t|tr| S d }t| dr.z|  }W n ty-   d }Y nw |d u r8t | dd }t|ts?dS |d}t|trM| S g }t	|| |r]d
| S |d}t|trg }|D ]E}t|trw|dnd }t|tsqk|D ].}	t|	tsq|	ddkrq|	d	}t|tr|d
p|d	}t|tr|| qqk|rd
| S dS )N r\   
model_dump__dict__
r^   r8   r:   r=   r]   )getattrr&   r'   rc   hasattrrw   	Exceptionrd   re   rf   joinrg   r)   )
ru   ri   payloadr=   
txt_chunksr^   chunksitemr8   cr-   r-   r.   _response_to_text   sZ   














r   c                 C   s   g }t | ts	|S | D ]E}t |tsqt|dpd}|dv r4|dd}t |tr3|d|d q|dkrP|d}t |trP| rP|dd|id	 q|S )
Nr:   rv   )
input_textr=   r=   )r:   r=   input_image	image_urlurl)r:   r   )r&   rg   rd   r'   re   r)   rc   )r3   user_contentr   tri   r   r-   r-   r.   _content_items_to_chat_message   s*   



r   clientc           
   
   C   s   t |}|sdS zFd|dg}| jjj||dditd}t|drC|jrF|jd }t|d	d }|d urIt|d
d }t|t	rL|
 W S W dS W dS W dS W dS  tyh }	 ztdt|	 W Y d }	~	dS d }	~	ww )Nrv   r5   r6   r:   r;   )r,   r`   response_format
max_tokensr_   r   messager8   zchat fallback failed:)r   chatcompletionscreaterB   r{   r_   rz   r&   r'   rc   r|   rY   repr)
r   r,   r3   r   r`   	chat_respc0rP   ri   er-   r-   r.   _call_chat_fallback   s@   


r   sc                 C   sz   | pd  } | stdzt| W S  ty   Y nw | d}| d}|dkr9||kr9t| ||d  S td)Nrv   Empty model output{}r      zModel output is not valid JSON)rc   
ValueErrorjsonloadsr|   findrfind)r   startendr-   r-   r.   _extract_json_strict  s   

r   bundlec                 C   s  t  }tj| }t }d }d }t }|D ]}|D ]}ttd D ]}	zt|||}
t	d||	d |t
|d |jjdi |
}t|}|s`t	dd|i t|||}|s`t	dd|i t|}|d}t|tsrtdt| D ]~\}}t|tsd	d
dd d||< qxzt|dd	|d< W n ty   d	|d< Y nw |dd
}|d u rd
nt||d< |dd }|d u rd |d< nzt|}tdtd||d< W n ty   d |d< Y nw |dd }|d u rd nt||d< qxd|iW       S  ttttfy@ } z.|}t	dt |j!t|d|d|	d  |	tk r5t"#t$d|	   W Y d }~q#W Y d }~ nd }~w ty| } z0|}t|%dr]t	d|||	d d |	tk rqt"#t$d|	   W Y d }~q#W Y d }~ nRd }~w ty } zAt&|r|}W Y d }~ n8t	dt |j!t|d| t|' }|(|t
|d k rd|v rW Y d }~ nt)d| |d }~ww qq|d urt)d| ||d urt)d| |t)d) Nr   zOCR request)r,   attemptrA   itemsz,responses output empty, trying chat fallbackr,   zchat fallback output emptyfieldsz(Missing/invalid 'fields' in OCR responser   rv   g        )candidate_idxr=   
confidence
normalizedr   r=   r   g      ?r   zretryable error:zmodel=zattempt=   r   zempty output, retrying)r,   rA   r   znon-retryable OCR error:rA   zllm_ocr non-retryable failure: zllm_ocr model not available: zVllm_ocr retryable failure: unable to get successful response after fallback attempts: z1llm_ocr failed without receiving a valid responser-   )*rt   kyc	KYCEngine openai_content_items_from_bundler/   rJ   range_MAX_RETRIESrD   rY   len	responsesr   r   r   r   re   r&   rd   r   rg   r   intr|   r'   floatmaxminr   r   r   r	   r:   __name__timesleep_RETRY_BASE_SLEEPr1   rU   rR   indexrp   )r   r   r3   model_candidateslast_model_errorlast_retry_errorreasoning_chainr,   r4   r   reqru   raw_textdatar   rk   rI   r   r   ccnr   rP   r-   r-   r.   llm_ocr!  s   

	



"


"
S
r   rL   )r"   r   )1rq   r   r   typingr   r   r   kyc_policy_engine_llm_v2r   openair   r   r   r   r	   r|   rr   _DEFAULT_MODELr%   rT   r   rB   r   r   r   rs   rc   rR   rE   rW   rF   r!   __annotations__rg   r'   r/   r0   r2   rD   rJ   rU   rY   rf   rt   r   r   r   r   r   r-   r-   r-   r.   <module>   sJ   
(
!7&