o
    .Ēi                     @   s>  U d dl Z d dlZd dlZd dlmZmZmZ d dlZzd dl	m
Z
 d dl	mZmZmZmZ W n eyB   dZ
e Z Z ZZY nw e ddZee ddZee d	d
Zee ddZdaed ed< dddZdedeeef fddZdeeef deeef fddZdeeef deeef fddZdS )    N)AnyDictOptional)	Anthropic)APIConnectionErrorAPITimeoutErrorRateLimitErrorAPIStatusErrorKYC_OCR_MODELzclaude-sonnet-4-5KYC_OCR_MAX_OUTPUT_TOKENS1400KYC_OCR_MAX_RETRIES2KYC_OCR_RETRY_BASE_SLEEPz0.5r   _ANTH_CLIENTreturnc                  C   s@   t d urt S td u rtdtd} | stdt| da t S )Nz7anthropic package not available (pip install anthropic)ANTHROPIC_API_KEYzANTHROPIC_API_KEY is not setapi_key)r   r   RuntimeErrorosgetenvr    r   /var/www/html/llm_ocr_claude.py_get_client1   s   

r   sc                 C   sz   | pd  } | stdzt| W S  ty   Y nw | d}| d}|dkr9||kr9t| ||d  S td)N zEmpty model output{}r      zModel output is not valid JSON)strip
ValueErrorjsonloads	Exceptionfindrfind)r   startendr   r   r   _extract_json_strictQ   s   

r)   bundlec                    s   |  dpg }g }|D ]}| d}t|tr"| r"||  qdddiddiddiddidg dd	d
  fdd|D }ddd|d	didgd	d
S )z

    Enforce:

      { "fields": { "<key>": {candidate_idx:int, text:string, confidence:number, normalized:string } } }

    We do NOT require all keys here; we fill missing keys in post-processing.

    fieldskeyobjecttypeintegerstringnumbercandidate_idxtext
confidence
normalizedF)r.   
propertiesrequiredadditionalPropertiesc                    s   i | ]}| qS r   r   ).0kfield_value_schemar   r   
<dictcomp>   s    z(_build_output_schema.<locals>.<dictcomp>)r.   r7   r9   )get
isinstancestrr    append)r*   fields_plankeysfr;   fields_propsr   r<   r   _build_output_schemau   s6   
rG   c                 C   s*  t  }tj| }t| }g }| dpg D ]}|d}t|tr-| r-|	|  qt
td D ]]}z|jjttd|dgdd|did	d
}d}	t|dd pUg }
|
D ]*}t|trp|ddkro|	|dpmd7 }	qXt|dd dkr|	t|ddpd7 }	qXt|	}|d}t|tsi }|D ]}||}t|tsd	dddd}|||< zt|dd	|d< W n ty   d	|d< Y nw |dd}|d u rdnt||d< |dd}zt|}tdtd||d< W n ty   d|d< Y nw |dd}|d u r	dnt|}| sd n||d< qd|iW   S  tttfyK } z|tk r?ttd|   W Y d }~q4td| |d }~w ty~ } z't|dd }|dv rr|tk rrttd|   W Y d }~q4td| |d }~w ty } ztd| |d }~ww d S )Nr+   r,   r   user)rolecontentformatjson_schema)r.   schemar   )model
max_tokensmessagesoutput_configtemperaturer   rJ   r.   r4   g        r2   r3   r5   g      ?r6      zllm_ocr retryable failure: status_code)i  i  zllm_ocr APIStatusError: zllm_ocr non-retryable failure: ) r   kyc	KYCEngine#anthropic_content_items_from_bundlerG   r?   r@   rA   r    rB   range_MAX_RETRIESrP   create_DEFAULT_MODEL_MAX_OUTPUT_TOKENSgetattrdictr)   intr$   floatmaxminr   r   r   timesleep_RETRY_BASE_SLEEPr   r	   )r*   clientcontent_blocksoutput_schemarD   rE   r;   attemptrespout_textrJ   blockdatar+   vtcccnecoder   r   r   llm_ocr   s   







 ru   )r   r   )r   r"   rc   typingr   r   r   kyc_policy_engine_llm_v2rU   	anthropicr   r   r   r   r	   r$   r   r[   r_   r\   rY   r`   re   r   __annotations__r   rA   r)   rG   ru   r   r   r   r   <module>   s*   


 "$&T