o
    4(i%                    @  sf  U d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	m
Z
mZ d dlmZmZmZmZmZmZmZ d dlZd dlZd dlZd dlmZ ei dddd	d
ddddddddddddddddddd	ddddddddd ddddd!Zed"d"d#d$d$d%d&d&d&d'd"d(d'd)Zed*Zd+Z e!e"d,d-Z#e!e"d.d/Z$e"d0d1Z%d2Z&d3Z'e(e"d4d5Z)e"d6d* + d7v Z,e-e.Z/d*d;d<Z0d*d=d>Z1d*d?d@Z2d*dAdBZ3d*dCdDZ4d*dEdFZ5d+dJdKZ6d+dLdMZ7d,dOdPZ8d-dSdTZ9d.dYdZZ:d/d\d]Z;d0d^d_Z<d1dadbZ=d2dhdiZ>d3dldmZ?d4dodpZ@d5dsdtZAd6dxdyZBd7d{d|ZCeeeeef geeDee f f ZEd8d~dZFd-ddZGddddd9ddZHd:ddZId;ddZJd.ddZKd.ddZLd.ddZMd.ddZNd.ddZOeKeLeMe:eNeNeOdZPdeQd< d<ddZRd=ddZSd>ddZTd?ddZUd@ddZVdAddZWdAddZXdAddZYdBddZZdCdDddZ[dd d ddEddĄZ\dFdGddɄZ]dde#e$d̜dHddфZ^G ddӄ dӃZ_G ddՄ de_Z`G ddׄ de_ZadabdeQd< ec ZddIddۄZedJddZfe
G dd dZge
G dd dZhdKddZii ZjdeQd< ec ZkdLddZlG dd dZmdMddZndNddZodOddZp	dPdQddZqdRddZrdddddddddddddSd(d)ZsdS (T      )annotationsN)	dataclassfield)DictAnyListOptionalTupleIterableCallable)YOLOu   ۰0u   ۱1u   ۲2u   ۳3u   ۴4u   ۵5u   ۶6u   ۷7u   ۸8u   ۹9u   ٠u   ١u   ٢u   ٣u   ٤u   ٥u   ٦)u   ٧u   ٨u   ٩u   یu   کu   هu   وu   ا u   لا)u   يu   ىu   كu   ةu   ۀu   ؤu   إu   أu   ٱu   ءu   ئu   ﻻu   ـz/[\u0610-\u061A\u064B-\u065F\u0670\u06D6-\u06ED]u   ‌KYC_OCR_MAX_DIM2200KYC_OCR_JPEG_QUALITY98KYC_EDENAI_URLz'https://api.edenai.run/v3/universal-ai/zocr/ocr/googlezeyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VyX2lkIjoiNTc1MDVlYTgtOGI3Yy00ZDQyLTlhYmMtOGMxYzliOGEyZDRjIiwidHlwZSI6ImFwaV90b2tlbiJ9.PLCBmy1gNDTpqfzCJwfQlaIVY4EkcE0TL7lsXiOHTMYKYC_EDENAI_TIMEOUT_SEC40!KYC_EDENAI_SHOW_ORIGINAL_RESPONSE)r   trueyesonsstrreturnc                 C  s   t dd| pd S )Nz\s+ r   )resubstripr#    r+   kyc_policy_engine_llm_v2.pycollapse_spacesH   s   r-   c                 C  s   | pd tS )Nr   )	translate_FA_AR_TO_EN_DIGITSr*   r+   r+   r,   to_en_digitsL      r0   c                 C  s:   | pd} t | } | t} | td} td| } t| S )Nr   r&   )r0   r.   _AR_FA_LETTERSreplace_ZWNJ_DIACRITICS_REr(   r-   r*   r+   r+   r,   normalize_fa_textP   s   
r6   c                 C  s(   t | } tjdd| tjd} t|  S )Nz0[^\w\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\s\-]r   flags)r6   r'   r(   UNICODEr-   lowerr*   r+   r+   r,   normalize_nameY   s   r;   c                 C  s   t | } tdd| S )Nz[^0-9]r   )r0   r'   r(   r*   r+   r+   r,   normalize_digits_only_   s   r<   c                 C  s   t | } tdd|  S )Nz[^0-9A-Za-z]r   )r0   r'   r(   upperr*   r+   r+   r,   normalize_id_alphanumd   s   r>   abfloatc           
   
   C  s   | pd} |pd}| |krdS | r|sdS t tt|d }t| ddD ]5\}}|g}t|ddD ]%\}}||kr;dnd}|t|| d ||d  d ||d  |  q1|}q$|d }	d|	tt| t|  S )Nr         ?           )startr   )listrangelen	enumerateappendminmax)
r?   r@   previcacurjcbcostdistr+   r+   r,   levenshtein_ratiom   s   4rV   c                 C  sr   t | } t |}| r|sdS t|  }t| }t||@ }t||B }|r,|| nd}d| dt| |  S )NrC   333333?g?)r;   setsplitrI   rV   )r?   r@   sasbinterunionrR   r+   r+   r,   token_set_similarity   s   r^   Optional[Tuple[int, int, int]]c              	   C  s8  t | pd} | dddd} td| }t|dk rdS dd	 |dd D }|d
 |d |d f|d
 |d |d f|d |d
 |d f|d |d |d
 f|d |d
 |d f|d |d |d
 fg}|D ]0\}}}d|  krxdkrn qid|  krdkrn qid|  krdkrn qi|||f  S qidS )z~
    Accepts common formats and returns (YYYY, M, D) if parseable.
    This is only for validation/matching, not for OCR.
    r   -/.z\d{1,4}   Nc                 S     g | ]}t |qS r+   int.0xr+   r+   r,   
<listcomp>       z$parse_date_loose.<locals>.<listcomp>r   rD        i        )r0   r3   r'   findallrI   )r#   numsvalspermsymdr+   r+   r,   parse_date_loose   s$   Hrw   codeboolc                   s   t | } t| dkrdS tt| dkrdS dd | D  t fddtdD }|d	 } d }|d
k r:||kS |d	| kS )N
   FrD   c                 S  rd   r+   re   rh   cr+   r+   r,   rj      rk   z/iran_national_code_is_valid.<locals>.<listcomp>c                 3  s     | ]} | d |  V  qdS )rz   Nr+   rh   rO   digitsr+   r,   	<genexpr>   s    z.iran_national_code_is_valid.<locals>.<genexpr>	      rl   )r<   rI   rX   sumrH   )rx   r#   rcheckr+   r~   r,   iran_national_code_is_valid   s   r   
value_normconstraintsDict[str, Any]Tuple[bool, Optional[str]]c           	      C  s~  t | }t|dkrtdS |ddr |dd dkr tdS |d	dr2|d
d dkr2tdS |dd
 }zt|dd }t|dd }t|dd }W n ty_   td Y S w d|  krjdkrqn n|d }nd|  kr{dkrn tdS |}ntdS d|  krd
kstdS  tdS ddddddddddddd}d|  kr|| kstdS  tdS t S )z
    Indonesia NIK (KTP/e-KTP) validation:
    - Must be 16 digits
    - DOB segment: positions 7-12 (DDMMYY) where DD+40 indicates female
    - No checksum (structural validation only)
       
nik_lengthreject_region_000000FN   000000nik_region_all_zeroreject_serial_0000rn   0000nik_serial_all_zeror   rl      nik_dob_parse)   G   (   rD   ro   nik_day_rangenik_month_range      )rD   rl   rc   r      r         r   rz   r   rn   nik_day_invalid)r<   rI   _v_failgetrf   	Exception_v_ok)	r   r   vdobddmm_yydd_real
month_daysr+   r+   r,   validate_id_nik   s>   
r   countryc                 C  s    | pd   }|dkrdS dS )zj
    Conservative OCR locale hint. Keep this deterministic; override via policy when you know better.
    r   IRfaen)r)   r=   )r   r|   r+   r+   r,   default_locale_hint   s   r   c                  C  s   t  } | s
td| S )Nz"Missing hardcoded Eden AI API key.)_EDENAI_API_KEYr)   
ValueErrorkeyr+   r+   r,   _get_edenai_api_key   s   r   locale_hintc                 C  s@   | pd   }|sdS |ddd ddd }|sdS |S )Nr   r   r`   rD   r   _)r)   r:   rY   )r   lhbaser+   r+   r,   _edenai_language_from_locale   s   r   datar   keysIterable[str]	List[Any]c                   s0   dd |D  g d	 fdd|  S )
Nc                 S  s   h | ]}t | qS r+   )r$   r:   )rh   kr+   r+   r,   	<setcomp>
  s    z(_deep_values_for_keys.<locals>.<setcomp>noder   r%   Nonec                   sf   t | tr!|  D ]\}}t|  v r| | q	d S t | tr/| D ]}| q(d S d S N)
isinstancedictitemsr$   r:   rK   rG   )r   r   r   ri   keysetoutwalkr+   r,   r     s   




z#_deep_values_for_keys.<locals>.walk)r   r   r%   r   r+   )r   r   r+   r   r,   _deep_values_for_keys	  s
   
r   r   Optional[str]c                 C  s   t | trt| }|pd S d S r   )r   r$   r-   )r   r#   r+   r+   r,   _as_nonempty_text  s   
r   Optional[float]c                 C  sV   zt | }W n
 ty   Y d S w |dkr|dkr|d }|dk r#d}|dkr)d}|S )NrB   g      Y@rC   )rA   r   )r   r|   r+   r+   r,   
_to_conf01"  s   r   output%Tuple[Optional[str], Optional[float]]c           	      C  s   d}d}d }t | |D ]}t|}|r!|d u st|t|kr!|}qg }t | |D ]}t|}|d ur8|| q)|r?t|nd }||fS )N)text	full_textraw_textocr_textcontenttranscriptionvalue)
confidencescoreocr_confidenceglobal_scoreprobability)r   r   rI   r   rK   rM   )	r   	text_keys	conf_keys	best_textvvtconfsr|   	best_confr+   r+   r,   _extract_eden_text_conf0  s    
r   image_data_urllanguageapi_keyc              
   C  s   zdd l }W n ty } ztd|d }~ww d| dd}t| |dtd}|jt||td}z| }W n tyN } z
td	|j	 d
|d }~ww |
ddkrm|
dpe|
dped|j	 }	td|	 |
d}
t|
S )Nr   z-requests package is required for Eden AI OCR.zBearer zapplication/json)AuthorizationzContent-Type)filer   )modelinputshow_original_response)headersjsontimeoutz#Eden AI response is not JSON (http=z).statussuccesserrormessagehttp_zEden AI OCR failed: r   )requestsr   RuntimeError_EDENAI_MODEL_EDENAI_SHOW_ORIGINAL_RESPONSEpost_EDENAI_URL_EDENAI_TIMEOUT_SECr   status_coder   r   )r   r   r   r   er   payloadrespr   errr   r+   r+   r,   _call_edenai_ocr_singleD  s6   
	 
r  request_bundlec                 C  s  t  }t| dp
d}t|}i }| dpg }|D ]}t|ts#qt|dp*d }|s1q|dp7g }d}	d}
d}d	}t|D ]n\}}t|tsNqD|d
}t|tr\| s]qDzt|||d\}}W n t	y } zt
d||| W Y d}~qDd}~ww d}|r|d7 }|dur||7 }||kr|}z	t|d}	W n t	y   |}	Y nw |}
|}qD|	|
|dur|nddd||< qd|iS )zw
    Adapter for KYC bundle -> Eden AI OCR.
    Returns response in the same shape expected by extract_with_llm().
    r   r   fieldsr   r   
candidatesr   Ng      r   )r   r   r   z+Eden OCR failed for key=%s candidate=%s: %srC         ?idxcandidate_idxr   r   
normalized)r   r$   r   r   r   r   r)   rJ   r  r   _LOGwarningrf   )r  r   r   r   
out_fieldsr  r   r   r  best_idxr   r   
best_scorerO   candr   r   confr   r   r+   r+   r,   eden_llm_ocre  sl   


r  Optional[List[int]]c              	   C  s  | d u rd S t | tr(g }| D ]}z	|t| W q ty%   Y  d S w |S t | tr|  }|s5d S |drZ|drZzt	
|}t |trOt|W S W n	 tyY   Y nw td|}g }|D ]}|siqdz	|t| W qd ty}   Y  d S w |S d S )N[]z[\s,;]+)r   rG   rK   rf   r   r$   r)   
startswithendswithr   loads_parse_int_listr'   rY   )r   r   ri   r#   arrpartspr+   r+   r,   r    sH   




r  c                 C  s   t | } t| dk rdS dd | D }d}t|d d }t|dd D ]\}}|d |kr@|d }|dkr;|d8 }||7 }q%||7 }q%|d }|| d	 dkS )
zStandard Luhn mod-10 checksum.rl   Fc                 S  rd   r+   re   r{   r+   r+   r,   rj     rk   z!luhn_is_valid.<locals>.<listcomp>r   NrF   r   rz   )r<   rI   rJ   )rx   r   checksumparityrO   rv   r   r   r+   r+   r,   luhn_is_valid  s   

r   r   mod_minus_r)modcheck_index
check_ruleweights	List[int]r"  rf   r#  Optional[int]r$  c                  s8  t | } | sdS dd | D t}|dk rdS |du r!|d nt|  dk r-|    dk s5 |kr7dS   } fddt|D }t|t|krQdS td	d
 t||D }|t| }	|pfd  }
|
dkr||	dk ru|	nt||	 }n|
dv rt||	 t| }n	|
dkr|	}ndS t|t|kS )a  
    Generic weighted mod-11 validator.

    Parameters are taken from constraints:
      - weights: list[int] for all digits EXCEPT the check digit.
      - mod: default 11
      - check_index: index of check digit (default last)
      - check_rule:
          * "iran"        : expected = r if r < 2 else (11 - r)
          * "mod_minus_r" : expected = (mod - r) % mod
          * "r"           : expected = r
    Fc                 S  rd   r+   re   r{   r+   r+   r,   rj     rk   z+mod11_weighted_is_valid.<locals>.<listcomp>rl   NrD   r   c                   s   g | ]
}| kr| qS r+   r+   r}   cir   r+   r,   rj     s    c                 s      | ]	\}}|| V  qd S r   r+   )rh   rv   wr+   r+   r,   r         z*mod11_weighted_is_valid.<locals>.<genexpr>r   iran>   mod-11r!  r   )r<   rI   rf   rH   r   zipr)   r:   )rx   r%  r"  r#  r$  ncheck_digitdata_digitsr#   r   ruleexpectedr+   r(  r,   mod11_weighted_is_valid  s6   r5  c                   C  s   dS )NTNr+   r+   r+   r+   r,   r      s   r   reasonc                 C  s   d| fS )NFr+   r7  r+   r+   r,   r   $     r   c                 C     t | rt S tdS )Niran_national_code_invalid)r   r   r   r   r   r+   r+   r,   validate_iran_national_code(     r=  c                 C  r:  )Nluhn_invalid)r   r   r   r<  r+   r+   r,   validate_luhn.  r>  r@  c                 C  s   t |dp
|d}|stdS |dd}|d}|dd}zt| |t||d ur2t|nd t|d	}W n tyE   d
}Y nw |rKt S tdS )Nr%  weight_listmod11_missing_weightsr"  r   r#  r$  r!  )r%  r"  r#  r$  Fmod11_invalid)r  r   r   r5  rf   r$   r   r   )r   r   r%  r"  r#  r$  okr+   r+   r,   validate_mod11_weighted4  s(   

rE  c                 C  s   t  S r   )r   r<  r+   r+   r,   validate_noneM     rF  c                 C  s   t dS )Nidentity_rule_missing)r   r<  r+   r+   r,   validate_missing_identity_ruleQ  r9  rI  )iran_national_codeluhnmod11_weightedid_niknoneskipmissing_identity_rulezDict[str, ValidatorFn]_VALIDATOR_REGISTRYvalidator_namec                 C  sL   | pd   }|st S t|}|du rtd| S ||p!d|p$i S )z
    Dispatches to a deterministic validator.
    Unknown validator names are treated as invalid (safer than silently skipping).
    r   Nzunknown_validator:)r)   r:   r   rQ  r   r   )rR  r   r   namefnr+   r+   r,   run_validator`  s   
rU  quad
np.ndarrayscalec                 C  s.   |  tj} | jddd}|| | t|  S )Nr   T)axiskeepdims)astypenpfloat32meanrA   )rV  rX  r|   r+   r+   r,   expand_quads  s   r_  ptsc                 C  s|   |  tj} | jdd}| t| }| t| }tj| ddd}| t| }| t| }tj||||gtjdS )NrD   )rY  rF   dtype)	r[  r\  r]  r   argminargmaxdiffreshapearray)r`  r#   tlbrrv   trblr+   r+   r,   order_points_clockwisex  s   rl  r+  hc                 C  sX   t | d d df d|d | d d df< t | d d df d|d | d d df< | S )Nr   rD   )r\  clip)r`  r+  rm  r+   r+   r,   clip_points  s   **ro  img_bgrc                 C  s
  | j d d \}}|tj}t|||}t|}|\}}}}tj|| }tj|| }	tt	||	}
tj|| }tj|| }tt	||}t	|
d}
t	|d}tj
ddg|
d dg|
d |d gd|d ggtjd}t||}tj| ||
|ftjdS )Nrl   r   rD   ra  r7   )shaper[  r\  r]  ro  rl  linalgnormrf   rM   rg  cv2getPerspectiveTransformwarpPerspectiveINTER_CUBIC)rp  rV  HWrh  rj  ri  rk  widthAwidthBmaxWheightAheightBmaxHdstMr+   r+   r,   warp_quad_to_rect  s    

8r  c              	   C  z   t | t j}t |ddt j\}}|jdd \}}tdttdt|| }t j	|||||t j
dd}t |t jS )zj
    Classic threshold preprocessing (requested):
    - Convert to gray
    - BINARY threshold at 120
    x      Nrl   rn   Q?r   rt  cvtColorCOLOR_BGR2GRAY	thresholdTHRESH_BINARYrq  rM   rf   roundcopyMakeBorderBORDER_CONSTANTCOLOR_GRAY2BGRrp  grayr   threshrm  r+  padr+   r+   r,   apply_numeric_ocr_filter_cv2  s   r  c              	   C  s   t | t j}d}d}t j|||d}tg dg dg dg}t |d|}|j\}}d}	t j||	|	|	|	t j	dd}
t |
t j
S )	zE
    Apply Grayscale + Contrast + Sharpening for better LLM OCR.
    gffffff?rz   )alphabetar   rF   r   rF   r   rF   rF   r  r  )rt  r  r  convertScaleAbsr\  rg  filter2Drq  r  r  r  )rp  r  r  r  contrastkernelsharprm  r+  r  paddedr+   r+   r,   apply_smart_ocr_filter_cv2  s   

r  c              	   C  r  )z:Simple binary threshold for card OCR (requested approach).r  r  Nrl   rn   r  r  r  r  r+   r+   r,   apply_adaptive_ocr_threshold  s   r  imgc                 C  s.   t g dg dg dg}t| d|}|S )z(Sharpen OCR input with requested kernel.r  r  rF   )r\  rg  rt  r  )r  r  	sharpenedr+   r+   r,   apply_sharp_ocr_filter_cv2  s   
r  rm   max_dimc                 C  sv   | j d d \}}t||}||kr| S |t| }tdtt|| }tdtt|| }tj| ||ftjdS )Nrl   interpolation)rq  rM   rA   rf   r  rt  resize
INTER_AREA)r  r  rm  r+  ru   rX  nwnhr+   r+   r,   _resize_max_dim  s   
r  ix  r  min_hmin_wr  r  c          	   	   C  s   | j d d \}}t||}||kr>|t| }tj| tdtt|| tdtt|| ftjd} | j d d \}}d}|rO||k rOt||t| }|r^||k r^t||t| }|dkrytj| tt|| tt|| ftjd} | S )Nrl   r  rB   g)\(?)	rq  rM   rA   rt  r  rf   r  r  rw  )	r  r  r  r  rm  r+  ru   r#   s_upr+   r+   r,   _resize_to_range  s   
:.r  b   r   defaultc                 C  s4   zt | }W n ty   | Y S w tdtd|S )Nr   d   )rf   r   rM   rL   )r   r  qr+   r+   r,   _normalize_jpeg_quality  s   r  r   smartkindvariantr  jpeg_qualityr  r  r  c          
      C  s   |pd  }|p	d  }|dv rt| |ddd} nt| |ddd} |d	kr*t| }n|d
kr3t| }n|dkrE|dv r@t| }n	t| }nt| }t|t}t	d|tj
|g\}}|s`tdt| d}	d|	 S )Nr   r  numericdate   iX  r  r  r   rawsharpenr  .jpgzcv2.imencode(.jpg) failedasciizdata:image/jpeg;base64,)r:   r  r  r  r  r  r  _OCR_JPEG_QUALITYrt  imencodeIMWRITE_JPEG_QUALITYr   base64	b64encodetobytesdecode)
rp  r  r  r  r  	processedr  rD  bufb64r+   r+   r,   _encode_image_for_llm  s(   





r  c                   @     e Zd Zd	ddZdS )
FaceMatcherBasedoc_photo_bgrrW  
selfie_bgrr%   r   c                 C  s   t  r   )NotImplementedErrorselfr  r  r+   r+   r,   match*  rG  zFaceMatcherBase.matchNr  rW  r  rW  r%   r   __name__
__module____qualname__r  r+   r+   r+   r,   r  )      r  c                   @  r  )
FaceMatcherUnavailabler  rW  r  r%   r   c                 C  s   d d dd dS )Nface_module_unavailablescore01cosiner7  detailsr+   r  r+   r+   r,   r  /  r1   zFaceMatcherUnavailable.matchNr  r  r+   r+   r+   r,   r  .  r  r  c                   @  s<   e Zd ZddddZed	d
 ZedddZdddZdS )InsightFaceMatcher  r  Fdet_sizeTuple[int, int]
prefer_gpury   c                 C  sB   ddl m} |rddgndg}|dd|d| _| jjd|d	 d S )
Nr   )FaceAnalysisCUDAExecutionProviderCPUExecutionProvider	buffalo_lz/var/www/html/runtime)rS  root	providersrF   )ctx_idr  )insightface.appr  appprepare)r  r  r  r  r  r+   r+   r,   __init__4  s   zInsightFaceMatcher.__init__c                 C  s   | sd S dd }t | |dS )Nc                 S  s"   | j \}}}}t|| ||  S r   )bboxrA   )fx1y1x2y2r+   r+   r,   area?  s   z.InsightFaceMatcher._largest_face.<locals>.arear   )rM   )facesr  r+   r+   r,   _largest_face:  s   z InsightFaceMatcher._largest_facer?   rW  r@   r%   rA   c                 C  sP   |  tj} | tj}tj| d }tj|d }tt| |||  S )Ng&.>)r[  r\  r]  rr  rs  rA   dot)r?   r@   nanbr+   r+   r,   _cosineE  s
   zInsightFaceMatcher._cosiner  r  r   c           	      C  s   | j |}| j |}| |}| |}|d u r%d d ddt|idS |d u r4d d ddt|idS | |j|j}|d d }t|t|dd dS )	Nno_face_in_document_photo	doc_facesr  no_face_in_selfieselfie_facesrB          @rD  )r  r   r  rI   r   normed_embeddingrA   )	r  r  r  r  	sel_facesfdfscosr  r+   r+   r,   r  M  s   

zInsightFaceMatcher.matchN)r  F)r  r  r  ry   )r?   rW  r@   rW  r%   rA   r  )r  r  r  r  staticmethodr  r   r  r+   r+   r+   r,   r  3  s    

r  zOptional[FaceMatcherBase]_FACE_MATCHERc                	   C  sn   t + td urtW  d    S ztddaW n ty#   t aY nw tW  d    S 1 s0w   Y  d S )NF)r  )
_FACE_LOCKr  r  r   r  r+   r+   r+   r,   get_face_matcherb  s   
$r  doc_image_pathselfie_image_pathdoc_photo_quadOptional[List[List[float]]]c                 C  s   |s	dddddS |sdddddS t | }t |}|du r'dddddS |du r2dddddS t|tj|tjd}t }|||S )z^
    Computes face match pack once. Caller decides whether to use it depending on policy.
    Nselfie_missingr  doc_photo_not_detectedcannot_read_doc_imagecannot_read_selfie_imagera  )rt  imreadr  r\  rg  r]  r  r  )r  r  r  doc_imgsel_imgcropmatcherr+   r+   r,   compute_face_packn  s   	

r  c                   @  s   e Zd ZU ded< ded< ded< dZded	< d
Zded< dZded< dZded< dZded< dZ	ded< dZ
ded< eedZded< dZded< dZded< dZded< d Zded!< dZd"ed#< dS )$	FieldRule	List[str]class_namesr$   r   ry   required      ?rA   min_det_confg?min_ocr_confr   ocr_kindoptional
match_type333333?match_thresholdr   rf   expected_lenNr   	validatordefault_factoryr   r   rB   weightF
must_match
match_gaterD   max_candidateszOptional[List[str]]input_aliases)r  r  r  __annotations__r"  r#  r$  r&  r(  r)  r*  r   r   r   r-  r.  r/  r0  r1  r+   r+   r+   r,   r    s"   
 r  c                   @  s   e Zd ZU ded< ded< dZded< dZded< dZded	< d
Zded< dZded< dZ	ded< dZ
ded< dZded< dZded< dZded< dZded< dZded< dZded < dZded!< eed"Zd#ed$< d%Zded&< d'Zded(< d
S ))	DocConfigr$   doc_idzList[FieldRule]rulesr   r   doc_typer   ocr_locale_hintNr'  min_detected_fields_countrB   rA   approve_min_coverage(\?approve_min_extractionq=
ףp?approve_min_match_corerC   approve_min_match_all      ?review_min_coverager  reject_below_coverage皙?approve_no_input_extra_bufferFry   require_face_matchr  face_metricface_match_thresholdr+  zList[Tuple[str, str]]
swap_pairsTenable_name_swapr  name_swap_margin)r  r  r  r2  r   r6  r7  r8  r9  r;  r=  r>  r@  rA  rC  rD  rE  rF  r   rG   rG  rH  rI  r+   r+   r+   r,   r3    s(   
 r3  r   c                 C  s  g }|  dg D ]f}t|}| dp| d}t|tr#|g|d< nt|p'g |d< |dd  | d}|d u r?i |d< n%t|tr[z	t||d< W n tyZ   i |d< Y n
w t|tsdi |d< |	t
d*i | q|  dpug }dd |D }td*i dt|  dp|  d	pd
d|dt|  dpd dt|  dpddt|  dp|  dp|  dptt|  dpdd|  dd dt|  dddt|  dddt|  d|  dddt|  dddt|  dddt|  dddt|  dd d!t|  d!d"d#t|  d#d$d%t|  d%dd|d&t|  d&d'd(t|  d(d)S )+Nr5  r  
class_namer   rG  c                 S  s.   g | ]}t |ttfrt|d krt|qS )rl   )r   rG   tuplerI   rg   r+   r+   r,   rj     s   . z+doc_config_from_payload.<locals>.<listcomp>r4  iddocr   r   r6  r7  r.   r   r8  r9  rB   r;  r:  r=  approve_min_matchr<  r>  rC   r@  r?  rA  r  rC  rB  rD  FrE  r  rF  rH  TrI  r  r+   )r   r   r   r$   rG   popr   r  r   rK   r  r3  r=   r   rA   ry   )r   r5  r   rrcncstrG  r+   r+   r,   doc_config_from_payload  s   





rS  zDict[str, 'KYCEngine']_ENGINE_CACHE
model_path'KYCEngine'c                 C  sL   t | }t |tvrt| t|< t| W  d    S 1 sw   Y  d S r   )r$   _ENGINE_LOCKrT  	KYCEngine)rU  r   r+   r+   r,   
get_engine  s   $rY  c                   @  s   e Zd Zd<ddZed=dd	Zd>ddZd?ddZeddd@dd ZedAd"d#Z	edAd$d%Z
edAd&d'Zd(d(d)eefdBd5d6ZdCd:d;Zd(S )DrX  yolo_model_pathr$   c                 C  sT   t || _| jj}t|trdd | D | _n
dd t|D | _t	 | _
d S )Nc                 S  s   i | ]\}}t |t|qS r+   )rf   r$   )rh   r   r   r+   r+   r,   
<dictcomp>  s    z&KYCEngine.__init__.<locals>.<dictcomp>c                 S  s   i | ]	\}}|t |qS r+   )r$   )rh   rO   r0  r+   r+   r,   r[  !  s    )r   r   namesr   r   r   r  rJ   	threadingLock_infer_lock)r  rZ  r\  r+   r+   r,   r    s   

zKYCEngine.__init__ri   rA   r%   c                 C  s   t dtdt| S )NrC   rB   )rM   rL   rA   ri   r+   r+   r,   _clamp01%  s   zKYCEngine._clamp01r3  r  
user_inputDict[str, str]r   c                 C  s@   | |j}|r
|S |jr|jD ]}| |}|r|  S qd S r   )r   r   r1  )r  r3  rb  r   akr   r+   r+   r,   _get_expected)  s   

zKYCEngine._get_expected
image_pathr  ioudevicemax_detrf   List[Dict[str, Any]]c              	   C  s>  | j  | jj|||||dd}W d    n1 sw   Y  |s#g S |d }g }t|dd d ur|jd ur|jj}	|jj}
|jj}t|	t	j
rP|	   }	t|
t	j
r^|
   }
t|t	j
rl|   }tt|	D ]$}t|
| }||| j|t|t|| |	| t d qr|S t|dd d ur|jd ur|jj}|jj}
|jj}t|t	j
r|   }t|
t	j
r|
   }
t|t	j
r|   }tt|D ]8}dd || D \}}}}t|
| }||g||g||g||gg}||| j|t|t|| |d q|S )	NF)sourcer  rg  rh  ri  verboser   obb)class_idrJ  r  rV  boxesc                 S  rd   r+   )rA   rg   r+   r+   r,   rj   e  rk   z$KYCEngine.detect.<locals>.<listcomp>)r_  r   predictgetattrrm  xyxyxyxyclsr  r   torchTensordetachcpunumpyrH   rI   rf   rK   r  r   r$   rA   r[  tolistro  xyxy)r  rf  r  rg  rh  ri  predsr   detsquadsrs  cfrO   cidrz  r  r  r  r  rV  r+   r+   r,   detect4  sb   



zKYCEngine.detectr   )r   fields_planr   r   c                 C  s(   dd|| ddgddddd	d
d}|S )a  
        Returns a single, self-contained bundle to be sent to your LLM layer.
        You can convert it to OpenAI Responses API (or any other provider) on your server.

        fields_plan: list of:
          {
            "key": str,
            "kind": "text|numeric|date|mrz",
            "candidates": [{"idx": int, "det_conf": float, "image_data_url": str}, ...]
          }

        Expected LLM response:
          {
            "fields": {
              "<key>": {
                 "candidate_idx": 0,
                 "text": "...",          # what you read
                 "confidence": 0.0-1.0,  # optional but recommended
                 "normalized": "..."     # optional; server can re-normalize anyway
              }, ...
            }
          }
        rD   kyc_field_ocrr   r  zint (required)zstring|null (required)zfloat 0..1 (optional)zstring|null (optional)r	  )typetop_level_keysfield_object)schema_versiontaskr   r  output_contractr+   )r  r   schemar+   r+   r,   build_llm_ocr_prompt_bundles  s   z%KYCEngine.build_llm_ocr_prompt_bundlebundlec              	   C  s   |  dpg }dd |D }dd| }d|dg}|D ]K}| d}| d	}| d
p0g }d| d| d}	|dkrB|	d7 }	|d|	d |D ]}
d|
v rh|
d dd\}}|ddd|dd qLq|ddd |S )Nr  c                 S  s   g | ]}| d qS r   )r   rh   r  r+   r+   r,   rj         zAKYCEngine.anthropic_content_items_from_bundle.<locals>.<listcomp>uv  You are a specialized OCR engine for Iranian ID cards (National Card/Shenasnameh).
CRITICAL INSTRUCTION FOR DIGITS:
The images contain Persian/Arabic numerals (۰, ۱, ۲, ۳, ۴, ۵, ۶, ۷, ۸, ۹).
You MUST transcribe them directly as standard English digits (0-9).
 - Visible: '۳۴۸' -> Output: '348'
 - Visible: '۱۴۰۲' -> Output: '1402'

OUTPUT RULES:
1. Return ONLY valid JSON. No markdown.
2. If a field is blurry or unreadable, set 'text': null.
3. Do not include any explanations.

JSON Response Structure:
{
  "fields": {
    "KEY": {"text": "value", "confidence": 0.99, "candidate_idx": 0}
  }
}
REQUIRED FIELDS: , r   r  r   r   r  r  z
Target Field: 'z	' (Type: )r  z -> EXTRACT ONLY DIGITS (0-9)r   ,rD   imager  	image/png)r  
media_typer   )r  rk  z
JSON Output:)r   joinrK   rY   )r  r  	keys_listintror   r  r   r  cands
field_hintr|   r   b64_datar+   r+   r,   #anthropic_content_items_from_bundle  s:   

z-KYCEngine.anthropic_content_items_from_bundlec                 C  s  |  dpg }d}d|dg}|D ]f}| d}| d}| dp#g }t| dp+d	}| d
p3d }	|dd| d| d| d|	 dt| 
d |D ]%}
|
 d}|
 d}|dd| d| d |d|
 dd qQq|ddd |S )aL  
        Helper: convert bundle into OpenAI Responses API `input` content items in ONE request.
        (You can do the same on your server; provided for clarity.)

        Output is a list of items like:
          {"type":"input_text","text":"..."}
          {"type":"input_image","image_url":"data:image/jpeg;base64,..."}
        r  a  You are a deterministic OCR engine for KYC field crops.
Return ONLY valid JSON. No markdown.

Global rules:
- Do NOT guess. If unreadable or ambiguous: text=null and confidence<=0.3.
- Never invent missing characters.
- Preserve native letters for names (do not transliterate).

Numeric/date rules:
- For kind=numeric: output ONLY ASCII digits 0-9 (no spaces, no separators).
- For kind=date: output as seen; if you can normalize to YYYY-MM-DD, also set normalized.
- NEVER drop leading zeros.

Constraint handling (IMPORTANT):
- Each FIELD may provide expected_len and/or regex.
- If kind=numeric and expected_len>0: you MUST return exactly that many digits.
- If regex is provided: your text MUST match it exactly.
- If you cannot satisfy constraints with high confidence: text=null.

Output JSON format:
{ "fields": { "<key>": {"candidate_idx":0, "text":null, "confidence":0.0, "normalized":null} } }

input_textr  r   r  r  r)  r   regexr   
FIELD key= kind= expected_len= regex= candidates=r  det_confCandidate idx=
 det_conf=input_imager   )r  	image_urlz
Return ONLY JSON. No markdown.)r   rf   r)   rK   rI   )r  r  r  r   r  r   r  r  r)  r  r|   r  r  r+   r+   r,    openai_content_items_from_bundle  s,   


$

z*KYCEngine.openai_content_items_from_bundlec                 C  s  |  dpg }dd |D }dd| }d|ig}td}|D ]}t|ts*q"| d}t|tr8| s9q"| d	p?d }| d
pHg }	| d}
| dpTd }zt	|
p\d}W n t
yj   d}Y nw |dd| d| d| d| dt|	 
i |	D ]S}t|tsq| d}| d}| dd}t|trd|vrq|dd| d| i || }d}|r|dp|}|dd\}}|d||di qq"|ddi |S )Nr  c                 S  s4   g | ]}t |d tr|d rt|d qS r   )r   r   r$   r  r+   r+   r,   rj     s   4 z6KYCEngine.gemini_parts_from_bundle.<locals>.<listcomp>a  You are a deterministic OCR engine for KYC field crops.
Return ONLY valid JSON. No markdown.

Global rules:
- Do NOT guess. If unreadable or ambiguous: text=null and confidence<=0.3.
- Never invent missing characters.
- Preserve native letters for names (do not transliterate).

Numeric/date rules:
- For kind=numeric: output ONLY ASCII digits 0-9 (no spaces, no separators).
- For kind=date: output as seen; if you can normalize to YYYY-MM-DD, also set normalized.
- NEVER drop leading zeros.

Constraint handling (IMPORTANT):
- Each field may provide expected_len and/or regex.
- If kind=numeric and expected_len>0: you MUST return exactly that many digits.
- If regex is provided: your text MUST match it exactly.
- If you cannot satisfy constraints with high confidence: text=null.

Output JSON format:
{"fields": {"<key>": {"candidate_idx":0, "text": null, "confidence":0.0, "normalized": null}}}
REQUIRED FIELDS: r  r   z^data:([^;]+);base64,r   r  r  r)  r  r   r   r  r  r  r  r  r  r  r   zbase64,r  r  r  rD   inline_data)	mime_typer   zReturn ONLY JSON. No markdown.)r   r  r'   compiler   r   r$   r)   r:   rf   r   rK   rI   r  grouprY   )r  r  r  r  r  data_url_rer  r   r  r  r)  r  expected_len_ir|   r  r  r   ru   mimer   r   r+   r+   r,   gemini_parts_from_bundle  sX   




&


z"KYCEngine.gemini_parts_from_bundleNFr  doc_cfgr3  r|  llm_ocr*Callable[[Dict[str, Any]], Dict[str, Any]]Optional[Dict[str, str]]debugry   r  r  c
           9        sZ  |pi }t |}
|
du rtd| i }|D ]}||d g | q|D ]}|| jdd d q(i }dCdd}g }i t|
jdd d}i }|jD ]I g } j	D ]}|
||g  qW|sfqO fdd|D }|jdd d |d|  }|sqO jpd }|dkr|d } j	d t|d dddd|d dd| j< |d |d  j< qOt jp jpi dpd}t jpi dpd } j|||g d}g | j< d}tj|st| |D ]}tj|d tjd}t||dv rd nd!}t|
|}d"g}|D ]}t|d# } t||||	|d$}!z; j d%| d%|  d&}"tj ||"}#|!!d'd(\}$}%t"|#d)}&|&#t$%|% W d   n	1 sTw   Y  W n t&yt }' zt'd*|'  W Y d}'~'nd}'~'ww |d# | t|d |!d+ | j | qq|d# r|| qOt|p|j(pt)|j*}(| j+||(d,})||)pi }*t,|*t-r|*d-nd}+t,|+t-si }+dDd1d2},dEdFd6d7}-|jD ]  jpd }|dkrqو j|vrq|+ jpi }.t,|.t-si }.|.d8}/zt|/}/W n t&y   d}/Y nw | jp$g }0|0s*q|/dk s6|/t|0kr8d}/|0|/ }t|dd9}1|.d}2|2du rS|.d0}2|2du rZd}2t|2}2|.d:}3|3du r||-||2t jpx jdpxdd;}3zt|3}3W n t&y   |-||2t jp jdpdd;}3Y nw | .|3}3|.d<}4|4du st|4 dkr|, ||2}4n|, |t|4}4 j	d |1|3|2 |4d=|d |/d| j< |d |d  j< q||d>}5|r+ddl/}6|60|)}7z|7d-g D ]}&|&d#g D ]}8d?|8v rd@|8d?< qq W n
 t&y"   Y nw |7|5dA< |*|5dB< |5S )Gz
        1) Uses YOLO detections + rules to crop candidates.
        2) Builds ONE bundle and calls llm_ocr(bundle).
        3) Maps response back to per-field extraction pack (same shape as before).
        NzCannot read image: rJ  c                 S     t | dd S Nr  rC   rA   r   r`  r+   r+   r,   <lambda>      z,KYCEngine.extract_with_llm.<locals>.<lambda>r   r3  r  r%   rf   c                 S  s$   t | jpd}|dkrd}t|dS )NrD   r   r   )rf   r0  rL   )r3  r   r+   r+   r,   top_k  s   
z)KYCEngine.extract_with_llm.<locals>.top_krl   )r}  doc_image_shapec                   s*   g | ]}t |d dt  jkr|qS )r  rC   )rA   r   r"  r{   r3  r+   r,   rj     s   * z.KYCEngine.extract_with_llm.<locals>.<listcomp>c                 S  r  r  r  r`  r+   r+   r,   r    r  r   rN  r   r  rV  )rJ  r  ocr_conf	value_rawr   
ocr_methodrV  r
  r}  lengthr  r   )r   r  r)  r  r  llm_ocr_debugra  r  gffffff?gHzG?r  r  r  r   r  r  rD   wbzFailed to save debug image: )r  r  r   )r  r   r  r  r$   r   c                 S  s`   |dkrt |S |dkrtt|S | jdv rt|S | jdv r,|dkr(t |S t|S t|S )Nr  r  
first_name	last_name	full_name)	id_numberpassport_no)r<   r-   r0   r   r;   r>   r6   )r3  r  r   r+   r+   r,   normalize_value  s   

z3KYCEngine.extract_with_llm.<locals>.normalize_valuer   r)  rA   c                 S  s   |pd  }|s
dS | dkrt|}|rt||krdS dS | dkr*t|r(dS dS | d	kr>d
|v r<t| dkr<dS dS dS )Nr   rC   r  g??r  r'  g?mrz<rl   gffffff?)r)   r<   rI   rw   
splitlines)r  r   r)  r   rv   r+   r+   r,   heuristic_conf  s"   z2KYCEngine.extract_with_llm.<locals>.heuristic_confr
  rC   r   )r)  r  llm)r  	internalsr   z<BASE64_REMOVED_FOR_LOG>
llm_bundlellm_response_raw)r3  r  r%   rf   )r3  r  r  r$   r   r$   r%   r$   )r   )r  r$   r   r   r)  rf   r%   rA   )1rt  r  r   
setdefaultrK   sortrG   rq  r5  r  extendr   r$  r:   rA   r   rf   r)  r   r$   r)   ospathexistsmakedirsr\  rg  r]  r_  r  rI   r  r  rY   openwriter  	b64decoder   printr7  r   r   r  r   r   ra  copydeepcopy)9r  r  r  r|  r  r   rb  r  r  r  r  by_classrv   r   candidates_for_keyr  r  r  r  r  rQ  r  bestr)  r  
plan_entry	debug_dirdetrV  r  variantsr   r  data_urlimg_filenameimg_pathheaderencodedr  r   r   r  llm_resp
llm_fieldsr  r  r  cand_idxdet_listr  r   r  	norm_textr   r  clean_bundler|   r+   r  r,   extract_with_llmg  sF  



















(,





zKYCEngine.extract_with_llmextracted_fields	face_packOptional[Dict[str, Any]]c           6      C  s  |pi }dd |j D }d}t }g }	g }
g }i }g }g }dDdd}dEdd}|j D ]}||j}|sBddddd dd||j< q*| t|dd}|t|jk}|r\||j |jp`d	 }|dkrwd}t
|d }d }d }d }n4| t|ddpd}|dpd}|d pd}|t|jkott| }t
|td!| }|jr|r|d"7 }d}d }|dkr|jrt|jt|pd|jpi \}}|sd}|p|j d#}|||||\}} |sd}|p| }|s|d$|j d%|  |	t|t|jf | ||}!d }"d&}#|!r|dkr|jpd'	 }$|$d(krd|p)d}%|!}&|d)kr:t|%}%t|&}&n|jd*v rIt|%}%t|&}&nt|%}%t|&}&|%|&krXdnd}"|"dkrad+nd,}#nI|$d-krtt|pod|!}"|"t|jkr}d-nd.}#n-|$d/krt|pd}'t|!}(|'r|(r|'|(krdnd}"|"dkrd0nd1}#nd }"d2}#|"d ur|
t|"t|jf |jr|t|"t|jf |j r|"dkr|#d3v r|d4|j  |#d.kr|d4|j  |||||t||"|#||d5
})|r|)!|||d6|d7|d8|d9d: |)||j< q*|r/|td"t"| nd}*||	}+|
r<||
nd },|rE||nd }-d}.|j#d urXt"|t$|j#k}.d }/d }0|d urw|d;}0|j%d<krr|d<}/n|d=}/t|-d ur|-n|,pd}1| d>t|+ d>|1  }2|2}3|j&r|/d ur| d?|2 d@| t|/  }3n|2}3t'||*|+|,|-|.t"||||/|0dA\}4}5|4|5|*|+|,|-t"||.|/|j%|2|3dB
|dCS )FNc                 S  s   g | ]}|j r|qS r+   )r   )rh   r   r+   r+   r,   rj   m  r  z#KYCEngine.score.<locals>.<listcomp>r   r3  r  r  r$   r  r   rs  r%   r   c                 S  s  | j pi }|s	dS |pd}|pd}|d}|d}|d}	|d urBzt|}
t||
kr7dd|
 fW S W n	 tyA   Y nw |d urdzt|}
t||
k rYdd|
 fW S W n	 tyc   Y nw |	d urzt|	}
t||
kr{dd	|
 fW S W n	 ty   Y nw |d
}|rt|}t||sdd| fS |d}|rztt|t|d u rW dS W n
 tj	y   Y nw |dkr|ddrt
|d u rdS dS )Nr6  r   r  
min_length
max_lengthFlength_expected_min_length_max_length_prefixprefix_r  )Fregex_mismatchr  
must_parse)Fdate_unparsed)r   r   rf   rI   r   r$   r  r'   	fullmatchr   rw   )r3  r  r  rs  r|   raw_snorm_sr  r  r  Lr  prefr  r+   r+   r,   validate_constraintsz  sj   





z-KYCEngine.score.<locals>.validate_constraintspairsList[Tuple[float, float]]rA   c                 S  s@   | sdS t dd | D }|dkrtt dd | D | S dS )NrC   c                 s  s    | ]\}}|V  qd S r   r+   )rh   r   r+  r+   r+   r,   r     s    z0KYCEngine.score.<locals>.wavg.<locals>.<genexpr>r   c                 s  r*  r   r+   )rh   r   r+  r+   r+   r,   r     r,  )r   rA   )r  swr+   r+   r,   wavg  s   &zKYCEngine.score.<locals>.wavgFrC   missing)presentdet_okocr_ok
extractionr  r7  r  r   rN  TrB   r  r  r   r   g{Gz?rD   _invalidzinvalid::no_user_inputr%  exactr  r  exact_matchexact_mismatchfuzzyfuzzy_below_thresholdr  
date_matchdate_mismatchr   )r  r  z	mismatch:)
r  r  r  r  r  r  r  r7  validvalid_reasonrJ  r  rV  r
  )r  r  rJ  r  rV  r
  r7  r  r  r  r  gffffff?)r  coverager  	match_all
match_corecount_gate_okdetected_countmismatch_flagsinvalid_flags
face_valueface_reason)
r  r  r  r  detected_fields_countr  facerE  	doc_scorefinal_score)decisionreasonsscores	per_field)
r3  r  r  r$   r  r   rs  r   r%   r   )r  r  r%   rA   )(r5  rX   r   r   ra  rA   r"  addr$  r:   mathsqrtr#  ry   r$   r)   rM   r   r*  rU  r   rK   r-  re  r&  r<   r;   r6   r^   r(  rw   r/  r.  updaterI   r8  rf   rE  rD  decide)6r  r  r  rb  r  r  required_rulesfound_required_detdet_ok_keysextraction_pairsmatch_pairs_allmatch_pairs_corer,  r!  r"  r  r
  r3  r  r  r  r  r  r  r  	raw_value
norm_valuevalid_okr  rD  r7  c_okc_reasonr4  mscoremreasonmtgot_normexp_normgotexprecr  extraction_scorer  r  r  face_valr$  
base_matchr'  r(  r)  r*  r+   r+   r,   r   c  s>  

6
 












"
zKYCEngine.score)rZ  r$   )ri   rA   r%   rA   )r3  r  rb  rc  r%   r   )rf  r$   r  rA   rg  rA   rh  r$   ri  rf   r%   rj  )r  rj  r   r$   r%   r   )r  r   r%   rj  )r  r$   r  r3  r|  rj  r  r  r   r   rb  r  r  ry   r  rf   r  rf   r%   r   )r  r3  r  r   rb  r  r  r  r  ry   r%   r   )r  r  r  r  r  ra  re  r  r  r  r  r  r  _OCR_MAX_DIMr  r   r+   r+   r+   r,   rX    s.    


?.:;S }rX  r  r  r  r  r  r  r   r!  r  r"  r#  r$  Tuple[str, List[str]]c                 C  s  g }| j d ur|s|d |d| d| j   d|fS || jk r,|d d|fS |r;||d d  d|fS |rJ||d d  d|fS | jr|	d u r_|d|
pWd	  d|fS t|	t| jk r|d
 |d|	dd| jd d|fS || jkr|| jkr|d u r|| j| j	 krd|fS |d d|fS || j
krd|fS |d |d ur|d|d d|fS || jkr|| jk r|d || jk r|d d|fS |d d|fS )Ndetected_fields_count_too_lowz	detected=z_min=REJECTcoverage_too_lowrn   REVIEWzface_required_but_unavailable:unknownface_below_thresholdzface=z.3fz_thr=APPROVEno_core_user_inputcore_match_below_thresholdz
match_all=extraction_lowpartial_coveragecoverage_insufficient)r8  rK   rA  r  rD  rA   rF  r9  r;  rC  r=  r@  )r  r  r  r  r  r  r   r!  r"  r#  r$  r*  r+   r+   r,   r1  r  sT   












r1  r  r  r   c                 C  s~   || v r|| v r| | | | | |< | |< | di }||v r0||v r0|| || ||< ||< |dg |||d d S )Nr}  swap_events)r?   r@   r7  )r   r  rK   )r  r  r?   r@   r7  r  r+   r+   r,   swap_field_keys  s   rW  cfgc                 C  s&   |j D ]\}}t| |||dd qd S )Nconfig_swap_pairsr8  )rG  rW  )r  r  rX  r?   r@   r+   r+   r,   apply_config_swaps  s   rZ  r  ocr_first_rawocr_last_raw	exp_firstexp_lastmarginc                 C  s   t | pd|pd}t |pd|pd}|| d }t | pd|pd}t |p%d|p(d}	||	 d }
|
|| kr@d||
||	ddS d||
||ddS )Nr   r  T)zfirst->lastzlast->first)swapscore_normalscore_swappedr  F)zfirst->firstz
last->last)r^   )r[  r\  r]  r^  r_  s11s22normals12s21swappedr+   r+   r,   best_name_assignment  s   ri  rb  rc  c           	      C  s   |j sd S d| vsd| vrd S |dpd}|dpd}|r!|s#d S | d dp+d}| d dp4d}t|||||jd}|dr]t| |dddd |d	g ddd|d
 d S d S )Nr  r  r   r  )r_  r`  heuristic_name_swapr8  rV  )r?   r@   r7  meta)rH  r   ri  rI  rW  r  rK   )	r  r  rX  rb  r]  r^  f_rawl_rawinfor+   r+   r,   apply_name_swap_if_needed  s   
 ro  r!  rW      TF)doc_config_payload
doc_configr  rb  r  	yolo_confyolo_iourh  ri  cache_enginer  r4  rq  r  rr  Optional[DocConfig]r  4Optional[Callable[[Dict[str, Any]], Dict[str, Any]]]r  rs  rt  rh  ri  ru  r  c                 C  s  |d ur	t |}n|d ur|}ntd|d u rt}|r t| nt| }|j|||	|
|d}|j||||||d}|d }|d }t||| t||||pNi  d }|j	rt|rm|d 
dpd|d 
d}t|||d	}nd d d
d d}|j|||||d}||d |d |d |d d}|j	rdnd}g }|j	r|s|d |d dkr|d ||d< ||d< |r||d< ||d< ||d< ||d< |
d|d< |
d|d< |S )NzBdoc_config_payload or doc_config must be provided (server-driven).)r  rg  rh  ri  )r  rb  r  r  r  r}  	doc_photophoto)r  r  r  r  r  )rb  r  r  r)  r+  r*  r,  )r4  r)  r+  r*  r,  r   disabledupload_selfie_requiredrK  retake_document_photoselfie_mode
next_steps
detectionsr&  r  r  )rS  r   r  rY  rX  r  r  rZ  ro  rD  r   r  r   rK   )rU  r  r4  rq  rr  r  rb  r  rs  rt  rh  ri  ru  r  rX  enginer|  packr  r  r  rV  scoringresultr}  r~  r+   r+   r,   run_kyc  sd   
	


r  )r#   r$   r%   r$   )r?   r$   r@   r$   r%   rA   )r#   r$   r%   r_   )rx   r$   r%   ry   )r   r$   r   r   r%   r   )r   r$   r%   r$   )r%   r$   )r   r$   r%   r$   )r   r   r   r   r%   r   )r   r   r%   r   )r   r   r%   r   )r   r   r%   r   )r   r$   r   r$   r   r$   r%   r   )r  r   r%   r   )r   r   r%   r  )rx   r$   r%  r&  r"  rf   r#  r'  r$  r$   r%   ry   )r%   r   )r7  r$   r%   r   )rR  r   r   r$   r   r   r%   r   )rV  rW  rX  rA   r%   rW  )r`  rW  r%   rW  )r`  rW  r+  rf   rm  rf   r%   rW  )rp  rW  rV  rW  r%   rW  )rp  rW  r%   rW  )r  rW  r%   rW  )rm   )r  rW  r  rf   r%   rW  )
r  rW  r  rf   r  rf   r  rf   r%   rW  )r  )r   rf   r  rf   r%   rf   )rp  rW  r  r$   r  r$   r  rf   r  rf   r%   r$   )r%   r  )r  r$   r  r$   r  r  r%   r   )r   r   r%   r3  )rU  r$   r%   rV  )r  r3  r  rA   r  rA   r  r   r  r   r  ry   r   rf   r!  r  r"  r  r#  r   r$  r   r%   rI  )r  r   r  r   r?   r$   r@   r$   r7  r$   r%   r   )r  r   r  r   rX  r3  r%   r   )r  )r[  r$   r\  r$   r]  r$   r^  r$   r_  rA   r%   r   )
r  r   r  r   rX  r3  rb  rc  r%   r   )rU  r$   r  r$   r4  r$   rq  r  rr  rv  r  rw  rb  r  r  r   rs  rA   rt  rA   rh  r$   ri  rf   ru  ry   r  ry   r%   r   )t
__future__r   r  r'   r.  r   r  loggingr]  dataclassesr   r   typingr   r   r   r   r	   r
   r   rt  rx  r\  rt  ultralyticsr   r$   	maketransr/   r2   r  r5   r4   rf   getenvrH  r  r   r   r   rA   r   r)   r:   r   	getLoggerr  r  r-   r0   r6   r;   r<   r>   rV   r^   rw   r   r   r   r   r   r   r   r   r   r  r  ry   ValidatorFnr  r   r5  r   r   r=  r@  rE  rF  rI  rQ  r2  rU  r_  rl  ro  r  r  r  r  r  r  r  r  r  r  r  r  r  r^  r  r  r  r  r3  rS  rT  rW  rY  rX  r1  rW  rZ  ri  ro  r  r+   r+   r+   r,   <module>   s^  
$


		/

!$D$9

&+ A      _K	
