o
    ƧjI                     @  s  U d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	m
Z
 d dlmZmZmZmZmZmZmZ d dlZd dlZd dlZd dlmZ ei dddd	d
ddddddddddddddddddd	ddddddddd ddddd!Zed"d"d#d$d$d%d&d&d&d'd"d(d'd)Zed*Zd+Zdd/d0Z dd1d2Z!dd3d4Z"dd5d6Z#dd7d8Z$dd9d:Z%dd>d?Z&ddCdDZ'ddEdFZ(ddHdIZ)ddJdKZ*ddMdNZ+dOZ,ddRdSZ-ddTdUZ.ddXdYZ/dd]d^Z0ddadbZ1dddfdgZ2dddjdkZ3G dldm dmZ4G dndo doe4Z5G dpdq dqe4Z6da7dre8ds< e9 Z:ddtduZ;dd{d|Z<e	G d}d~ d~Z=e	G dd dZ>dddZ?i Z@de8d< e9 ZAdddZBG dd dZCdddZDdddZEdddZF	ddddZGdddZHdddddddddddddddƄZIdS )    )annotationsN)	dataclassfield)DictAnyListOptionalTupleIterableCallable)YOLOu   ۰0u   ۱1u   ۲2u   ۳3u   ۴4u   ۵5u   ۶6u   ۷7u   ۸8u   ۹9u   ٠u   ١u   ٢u   ٣u   ٤u   ٥u   ٦)u   ٧u   ٨u   ٩u   یu   کu   هu   وu   ا u   لا)u   يu   ىu   كu   ةu   ۀu   ؤu   إu   أu   ٱu   ءu   ئu   ﻻu   ـz/[\u0610-\u061A\u064B-\u065F\u0670\u06D6-\u06ED]u   ‌sstrreturnc                 C  s   t dd| pd S )Nz\s+ r   )resubstripr    r    &/var/www/html/kyc_policy_engine_llm.pycollapse_spaces;   s   r"   c                 C  s   | pd tS )Nr   )	translate_FA_AR_TO_EN_DIGITSr   r    r    r!   to_en_digits?      r%   c                 C  s:   | pd} t | } | t} | td} td| } t| S )Nr   r   )r%   r#   _AR_FA_LETTERSreplace_ZWNJ_DIACRITICS_REr   r"   r   r    r    r!   normalize_fa_textC   s   
r+   c                 C  s(   t | } tjdd| tjd} t|  S )Nz0[^\w\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\s\-]r   flags)r+   r   r   UNICODEr"   lowerr   r    r    r!   normalize_nameL   s   r0   c                 C  s   t | } tdd| S )Nz[^0-9]r   )r%   r   r   r   r    r    r!   normalize_digits_onlyR   s   r1   c                 C  s   t | } tdd|  S )Nz[^0-9A-Za-z]r   )r%   r   r   upperr   r    r    r!   normalize_id_alphanumW   s   r3   vr   boolc                 C  sB   t | tr| S t | ttfr| dkS t | tr|   dv S dS )Nr   >   r   yonyestrueenabledrequiredF)
isinstancer5   intfloatr   r   r/   )r4   r    r    r!   	is_truthy\   s   

r?   abr>   c           
   
   C  s   | pd} |pd}| |krdS | r|sdS t tt|d }t| ddD ]5\}}|g}t|ddD ]%\}}||kr;dnd}|t|| d ||d  d ||d  |  q1|}q$|d }	d|	tt| t|  S )Nr         ?           )startr   )listrangelen	enumerateappendminmax)
r@   rA   previcacurjcbcostdistr    r    r!   levenshtein_ratioj   s   4rV   c                 C  sr   t | } t |}| r|sdS t|  }t| }t||@ }t||B }|r,|| nd}d| dt| |  S )NrC   333333?g?)r0   setsplitrI   rV   )r@   rA   sasbinterunionrR   r    r    r!   token_set_similarity~   s   r^   set[str]c                 C  s,   t | }|s	t S dd |dd D S )Nc                 S  s   h | ]}|r|qS r    r    ).0tokr    r    r!   	<setcomp>       z"_name_token_set.<locals>.<setcomp>-r   )r0   rX   r(   rY   )r   normr    r    r!   _name_token_set   s   rf   c                 C  s,   t | }t |}|r|sdS ||k p||k S )u   
    True when one normalized name token set is a strict subset of the other.
    Example: "محمد غلام" vs "محمد غلام نژاد".
    F)rf   )r@   rA   tatbr    r    r!   names_have_strict_subset   s
   ri   Optional[Tuple[int, int, int]]c              	   C  s8  t | pd} | dddd} td| }t|dk rdS dd	 |dd D }|d
 |d |d f|d
 |d |d f|d |d
 |d f|d |d |d
 f|d |d
 |d f|d |d |d
 fg}|D ]0\}}}d|  krxdkrn qid|  krdkrn qid|  krdkrn qi|||f  S qidS )z~
    Accepts common formats and returns (YYYY, M, D) if parseable.
    This is only for validation/matching, not for OCR.
    r   rd   /.z\d{1,4}   Nc                 S     g | ]}t |qS r    r=   r`   xr    r    r!   
<listcomp>   rc   z$parse_date_loose.<locals>.<listcomp>r   rD      i  i        )r%   r(   r   findallrI   )r   numsvalspermsr6   mdr    r    r!   parse_date_loose   s$   Hr|   )348343344codeOptional[str]c                   s   t | } tD ]}| |rd|   S qt| dkrdS tt| dkr'dS dd | D  t fddtd	D }|d
 } d	 }|dk rO||krMd S dS |d
| krWd S dS )Nz*iran_national_code_sanctioned_city_prefix:
   iran_national_code_invalidrD   c                 S  rn   r    ro   r`   cr    r    r!   rr      rc   z5iran_national_code_invalid_reason.<locals>.<listcomp>c                 3  s     | ]} | d |  V  qdS )r   Nr    )r`   rO   digitsr    r!   	<genexpr>   s    z4iran_national_code_invalid_reason.<locals>.<genexpr>	      rs   )r1   #IRAN_NATIONAL_CODE_BLOCKED_PREFIXES
startswithrI   rX   sumrH   )r   prefixr   rcheckr    r   r!   !iran_national_code_invalid_reason   s    
r   c                 C  s   t | d u S N)r   )r   r    r    r!   iran_national_code_is_valid   s   r   pts
np.ndarrayc                 C  s|   |  tj} | jdd}| t| }| t| }tj| ddd}| t| }| t| }tj||||gtjdS )NrD   )axisrF   dtype)	astypenpfloat32r   argminargmaxdiffreshapearray)r   r   tlbrr{   trblr    r    r!   order_points_clockwise   s   r   wr=   hc                 C  sX   t | d d df d|d | d d df< t | d d df d|d | d d df< | S )Nr   rD   )r   clip)r   r   r   r    r    r!   clip_points   s   **r   img_bgrquadc                 C  s
  | j d d \}}|tj}t|||}t|}|\}}}}tj|| }tj|| }	tt	||	}
tj|| }tj|| }tt	||}t	|
d}
t	|d}tj
ddg|
d dg|
d |d gd|d ggtjd}t||}tj| ||
|ftjdS )Nrs   r   rD   r   r,   )shaper   r   r   r   r   linalgre   r=   rM   r   cv2getPerspectiveTransformwarpPerspectiveINTER_CUBIC)r   r   HWr   r   r   r   widthAwidthBmaxWheightAheightBmaxHdstMr    r    r!   warp_quad_to_rect   s    

8r     imgmax_dimc                 C  sv   | j d d \}}t||}||kr| S |t| }tdtt|| }tdtt|| }tj| ||ftjdS )Nrs   )interpolation)r   rM   r>   r=   roundr   resize
INTER_AREA)r   r   r   r   rz   scalenwnhr    r    r!   _resize_max_dim  s   
r   U   jpeg_qualityc                 C  sV   t | |d} td| ttjt|g\}}|stdt| 	d}d| S )zD
    Converts an image to a compact data URL to feed to an LLM.
    )r   z.jpgzcv2.imencode(.jpg) failedasciizdata:image/jpeg;base64,)
r   r   imencoder=   IMWRITE_JPEG_QUALITYRuntimeErrorbase64	b64encodetobytesdecode)r   r   r   okbufb64r    r    r!   _encode_jpeg_data_url  s    
r   c                   @     e Zd Zd	ddZdS )
FaceMatcherBasedoc_photo_bgrr   
selfie_bgrr   Dict[str, Any]c                 C  s   t  r   )NotImplementedErrorselfr   r   r    r    r!   match  s   zFaceMatcherBase.matchNr   r   r   r   r   r   __name__
__module____qualname__r   r    r    r    r!   r         r   c                   @  r   )
FaceMatcherUnavailabler   r   r   r   r   c                 C  s   d d dd dS )Nface_module_unavailablescore01cosinereasondetailsr    r   r    r    r!   r   #  r&   zFaceMatcherUnavailable.matchNr   r   r    r    r    r!   r   "  r   r   c                   @  s<   e Zd ZddddZed	d
 ZedddZdddZdS )InsightFaceMatcher  r   Fdet_sizeTuple[int, int]
prefer_gpur5   c                 C  s@   ddl m} |rddgndg}|d|d| _| jjd|d d S )	Nr   )FaceAnalysisCUDAExecutionProviderCPUExecutionProvider	buffalo_l)name	providersrF   )ctx_idr   )insightface.appr   appprepare)r   r   r   r   r   r    r    r!   __init__(  s   zInsightFaceMatcher.__init__c                 C  s   | sd S dd }t | |dS )Nc                 S  s"   | j \}}}}t|| ||  S r   )bboxr>   )fx1y1x2y2r    r    r!   area3  s   z.InsightFaceMatcher._largest_face.<locals>.areakey)rM   )facesr  r    r    r!   _largest_face.  s   z InsightFaceMatcher._largest_facer@   r   rA   r   r>   c                 C  sP   |  tj} | tj}tj| d }tj|d }tt| |||  S )Ng&.>)r   r   r   r   re   r>   dot)r@   rA   nanbr    r    r!   _cosine9  s
   zInsightFaceMatcher._cosiner   r   r   c           	      C  s   | j |}| j |}| |}| |}|d u r%d d ddt|idS |d u r4d d ddt|idS | |j|j}|d d }t|t|dd dS )	Nno_face_in_document_photo	doc_facesr   no_face_in_selfieselfie_facesrB          @r   )r   getr  rI   r  normed_embeddingr>   )	r   r   r   r  	sel_facesfdfscosr   r    r    r!   r   A  s   

zInsightFaceMatcher.matchN)r   F)r   r   r   r5   )r@   r   rA   r   r   r>   r   )r   r   r   r   staticmethodr  r  r   r    r    r    r!   r   '  s    

r   zOptional[FaceMatcherBase]_FACE_MATCHERc                	   C  sn   t + td urtW  d    S ztddaW n ty#   t aY nw tW  d    S 1 s0w   Y  d S )NF)r   )
_FACE_LOCKr  r   	Exceptionr   r    r    r    r!   get_face_matcherV  s   
$r  doc_image_pathselfie_image_pathdoc_photo_quadOptional[List[List[float]]]r   c                 C  s   |s	dddddS |sdddddS t | }t |}|du r'dddddS |du r2dddddS t|tj|tjd}t }|||S )z^
    Computes face match pack once. Caller decides whether to use it depending on policy.
    Nselfie_missingr   doc_photo_not_detectedcannot_read_doc_imagecannot_read_selfie_imager   )r   imreadr   r   r   r   r  r   )r  r  r  doc_imgsel_imgcropmatcherr    r    r!   compute_face_packb  s   	

r)  c                   @  s   e Zd ZU ded< ded< ded< dZded	< d
Zded< dZded< dZded< dZded< dZ	ded< dZ
ded< eedZded< dZded< dZded< dZded< d Zded!< dZd"ed#< dS )$	FieldRule	List[str]class_namesr   r  r5   r;         ?r>   min_det_confg?min_ocr_conftextocr_kindoptional
match_type333333?match_thresholdr   r=   expected_lenNr   	validatordefault_factoryr   constraintsrB   weightF
must_match
match_gaterD   max_candidateszOptional[List[str]]input_aliases)r   r   r   __annotations__r.  r/  r1  r3  r5  r6  r7  r   dictr:  r;  r<  r=  r>  r?  r    r    r    r!   r*    s"   
 r*  c                   @  s   e Zd ZU ded< ded< dZded< dZd	ed
< dZd	ed< dZd	ed< dZd	ed< dZ	d	ed< dZ
d	ed< dZd	ed< dZded< dZded< dZd	ed< eedZded< d Zded!< d"Zd	ed#< dS )$	DocConfigr   doc_idzList[FieldRule]rulesNzOptional[int]min_detected_fields_countrB   r>   approve_min_coverage(\?approve_min_extractionq=
ףp?approve_min_match_corerC   approve_min_match_all      ?review_min_coverage      ?reject_below_coverage皙?approve_no_input_extra_bufferFr5   require_face_matchr   face_metricface_match_thresholdr8  zList[Tuple[str, str]]
swap_pairsTenable_name_swapQ?name_swap_margin)r   r   r   r@  rE  rF  rH  rJ  rK  rM  rO  rQ  rR  rS  rT  r   rG   rU  rV  rX  r    r    r    r!   rB    s"   
 rB  payloadc                 C  s  g }|  dg D ]f}t|}| dp| d}t|tr#|g|d< nt|p'g |d< |dd  | d}|d u r?i |d< n%t|tr[z	t||d< W n tyZ   i |d< Y n
w t|tsdi |d< |	t
d$i | q|  dpug }dd |D }td$i dt|  dp|  d	pd
d|d|  dd dt|  dddt|  dddt|  d|  dddt|  dddt|  dddt|  dddt|  dddt|  dddt|  dddt|  ddd|d t|  d d!d"t|  d"d#S )%NrD  r,  
class_namer:  rU  c                 S  s.   g | ]}t |ttfrt|d krt|qS )rs   )r<   rG   tuplerI   rp   r    r    r!   rr     s   . z+doc_config_from_payload.<locals>.<listcomp>rC  iddocrE  rF  rB   rH  rG  rJ  approve_min_matchrI  rK  rC   rM  rL  rO  rN  rQ  rP  rR  FrS  r   rT  rV  TrX  rW  r    )r  rA  r<   r   rG   popjsonloadsr  rK   r*  rB  r>   r5   )rY  rD  r   rrcncstrU  r    r    r!   doc_config_from_payload  sn   





re  zDict[str, 'KYCEngine']_ENGINE_CACHE
model_path'KYCEngine'c                 C  sL   t | }t |tvrt| t|< t| W  d    S 1 sw   Y  d S r   )r   _ENGINE_LOCKrf  	KYCEngine)rg  r  r    r    r!   
get_engine  s   $rk  c                   @  sj   e Zd Zd7ddZed8dd	Zd9ddZd:ddZeddd;dd Z	!	"	#	$d<d=d0d1Z	d>d5d6Z
d!S )?rj  yolo_model_pathr   c                 C  sT   t || _| jj}t|trdd | D | _n
dd t|D | _t	 | _
d S )Nc                 S  s   i | ]\}}t |t|qS r    )r=   r   )r`   kr4   r    r    r!   
<dictcomp>  s    z&KYCEngine.__init__.<locals>.<dictcomp>c                 S  s   i | ]	\}}|t |qS r    )r   )r`   rO   nr    r    r!   rn    s    )r   modelnamesr<   rA  itemsr,  rJ   	threadingLock_infer_lock)r   rl  rq  r    r    r!   r     s   

zKYCEngine.__init__rq   r>   r   c                 C  s   t dtdt| S )NrC   rB   )rM   rL   r>   rq   r    r    r!   _clamp01
  s   zKYCEngine._clamp01ruler*  
user_inputDict[str, str]r   c                 C  s@   | |j}|r
|S |jr|jD ]}| |}|r|  S qd S r   )r  r  r?  )r   rx  ry  r4   akvvr    r    r!   _get_expected  s   

zKYCEngine._get_expected
image_pathconfioudevicemax_detr=   List[Dict[str, Any]]c              	   C  s>  | j  | jj|||||dd}W d    n1 sw   Y  |s#g S |d }g }t|dd d ur|jd ur|jj}	|jj}
|jj}t|	t	j
rP|	   }	t|
t	j
r^|
   }
t|t	j
rl|   }tt|	D ]$}t|
| }||| j|t|t|| |	| t d qr|S t|dd d ur|jd ur|jj}|jj}
|jj}t|t	j
r|   }t|
t	j
r|
   }
t|t	j
r|   }tt|D ]8}dd || D \}}}}t|
| }||g||g||g||gg}||| j|t|t|| |d q|S )	NF)sourcer  r  r  r  verboser   obb)class_idrZ  r  r   boxesc                 S  rn   r    )r>   rp   r    r    r!   rr   J  rc   z$KYCEngine.detect.<locals>.<listcomp>)ru  rp  predictgetattrr  xyxyxyxyclsr  r<   torchTensordetachcpunumpyrH   rI   r=   rK   r,  r  r   r>   r   tolistr  xyxy)r   r~  r  r  r  r  predsr   detsquadsr  cfrO   cidr  r   r   r  r  r   r    r    r!   detect  sb   



zKYCEngine.detectfa)locale_hintfields_planr  r   c                 C  s(   dd|| ddgddddd	d
d}|S )a  
        Returns a single, self-contained bundle to be sent to your LLM layer.
        You can convert it to OpenAI Responses API (or any other provider) on your server.

        fields_plan: list of:
          {
            "key": str,
            "kind": "text|numeric|date|mrz",
            "candidates": [{"idx": int, "det_conf": float, "image_data_url": str}, ...]
          }

        Expected LLM response:
          {
            "fields": {
              "<key>": {
                 "candidate_idx": 0,
                 "text": "...",          # what you read
                 "confidence": 0.0-1.0,  # optional but recommended
                 "normalized": "..."     # optional; server can re-normalize anyway
              }, ...
            }
          }
        rD   kyc_field_ocrr`  fieldszint (required)zstring|null (required)zfloat 0..1 (optional)zstring|null (optional))candidate_idxr0  
confidence
normalized)typetop_level_keysfield_object)schema_versiontaskr  r  output_contractr    )r  r  schemar    r    r!   build_llm_ocr_prompt_bundleX  s   z%KYCEngine.build_llm_ocr_prompt_bundleNF\     r  doc_cfgrB  r  llm_ocr*Callable[[Dict[str, Any]], Dict[str, Any]]Optional[Dict[str, str]]debugr5   r   r   c	           )        s  |pi }t |}	|	du rtd| i }
|D ]}|
|d g | q|
D ]}|
| jdd d q(i }d7dd}g }i t|	jdd d}i }|jD ] g } j	D ]}|
|
|g  qV|seqO fdd|D }|jdd d |d|  }|sqO jpd }|dkr|d } j	d t|d dddd|d dd| j< |d |d  j< qO j|g d}g | j< t|D ]1\}}tj|d tjd}t|	|}t|||d}|d |t|d |d | j | q|| qO| j|dd}||pi }t|tr|d nd}t|tsi }d8d$d%}d9d:d)d*} |jD ]  jp2d }|dkr<q* j|vrDq*| jpLi }!t|!tsUi }!|!d+}"zt|"}"W n tyl   d}"Y nw | jpug }#|#s{q*|"dk s|"t|#krd}"|#|" }t|dd,}$|!d}%|%du r|!d#}%|%du rd-}%t|%}%|!d.}&|&du r| ||%t jpɈ jd/pdd0}&zt|&}&W n ty   | ||%t jp jd/pdd0}&Y nw | |&}&|!d1}'|'du s
t|'  d-kr| ||%}'n| |t|'}' j	d |$|&|%  |'d2|d |"d| j< |d |d  j< q*||d3}(|rM||(d4< ||(d5< ||(d6< |(S );z
        1) Uses YOLO detections + rules to crop candidates.
        2) Builds ONE bundle and calls llm_ocr(bundle).
        3) Maps response back to per-field extraction pack (same shape as before).
        NzCannot read image: rZ  c                 S     t | dd S Nr  rC   r>   r  rv  r    r    r!   <lambda>      z,KYCEngine.extract_with_llm.<locals>.<lambda>r  rx  r*  r   r=   c                 S  s$   t | jpd}|dkrd}t|dS )NrD   r      )r=   r>  rL   )rx  rm  r    r    r!   top_k  s   
z)KYCEngine.extract_with_llm.<locals>.top_krs   )r  doc_image_shapec                   s*   g | ]}t |d dt  jkr|qS )r  rC   )r>   r  r.  r   rx  r    r!   rr     s   * z.KYCEngine.extract_with_llm.<locals>.<listcomp>c                 S  r  r  r  rv  r    r    r!   r    r  r0  noner   r  r   )rZ  det_confocr_conf	value_raw
value_norm
ocr_methodr   r  r  )r  kind
candidatesr   )r   r   r  )idxr  image_data_urlr  )r  r  r  r  r   valuec                 S  s`   |dkrt |S |dkrtt|S | jdv rt|S | jdv r,|dkr(t |S t|S t|S )Nnumericdate
first_name	last_name	full_name)	id_numberpassport_no)r1   r"   r%   r  r0   r3   r+   )rx  r  r  r    r    r!   normalize_value  s   

z3KYCEngine.extract_with_llm.<locals>.normalize_valuer   r6  r>   c                 S  s   |pd  }|s
dS | dkrt|}|rt||krdS dS | dkr*t|r(dS dS | d	kr>d
|v r<t| dkr<dS dS dS )Nr   rC   r  g??r  r4  g?mrz<rs   gffffff?)r   r1   rI   r|   
splitlines)r  r0  r6  tr{   r    r    r!   heuristic_conf  s"   z2KYCEngine.extract_with_llm.<locals>.heuristic_confr  rC   r   r  length)r6  r  llm)r  	internals
detections
llm_bundlellm_response_raw)rx  r*  r   r=   )rx  r*  r  r   r  r   r   r   )r   )r  r   r0  r   r6  r=   r   r>   )!r   r$  r   
setdefaultrK   sortrG   r   rD  r,  extendr  r1  r/   r>   r  rJ   r   r   r   r   r   r  r<   rA  r=   r  rI   r   r6  r:  rw  r   ))r   r  r  r  r  ry  r  r   r   r   by_classr{   rm  candidates_for_keyr  r  r  r  r  rc  r  best
plan_entryr  detr   r'  data_urlbundlellm_resp
llm_fieldsr  r  respcand_idxdet_listr  raw_textr  	norm_textoutr    r  r!   extract_with_llm  s   

















(,



zKYCEngine.extract_with_llmextracted_fields	face_packOptional[Dict[str, Any]]c           <      C  s  |pi }dd |j D }d}t }g }	g }
g }i }g }g }dJdd}dKdd}|j D ]a}||j}|sBddddd dd||j< q*| t|dd}|t|jk}|r\||j |jp`d	 }|dkrwd}t
|d }d }d }d }n4| t|ddpd}|dpd}|d pd}|t|jkott| }t
|td!| }|jr|r|d"7 }| ||}d}d }d }d }d } d }!|jd#kr|dkrt|pd}"|"d u }|"}|"d urd}|"}|r|d$ks|jd%v rt|nt|}#t|#}$|$d u } |$}!|$d urd}|d u sd&|$v r|$}|||||\}%}&|%s)d}|p(|&}|s8|d'|j d(|  |	t|t|jf d }'d)}(|r|dkr|jpUd*	 })|)d+kr|pad}*|}+|d$krrt|*}*t|+}+n|jd,v rt|*}*t|+}+nt|*}*t|+}+|*|+krdnd}'|'dkrd-nd.}(nk|)d/krt|jpi d0},|jd,v r|,rt|pd|rd}'d1}(nDtt|pd|}'|'t|jkrd/nd2}(n-|)d3krt |pd}-t |}.|-r|.r|-|.krdnd}'|'dkr d4nd5}(nd }'d6}(|'d ur(|
t|'t|jf |j!r(|t|'t|jf |j"rM|'dkr?|(d7v r?|d8|j  |(d9v rM|d8|j  |||||t||'|(||d:
}/|jrl|/#|j||| |!d; |r|/#|||d<|d=|d>|d?d@ |/||j< q*|r|td"t$| nd}0||	}1|
r||
nd }2|r||nd }3d}4|j%d urt$|t&|j%k}4d }5d }6|d ur|dA}6|j'dBkr|dB}5n|dC}5t|3d ur|3n|2pd}7| dDt|1 dD|7  }8|8}9|j(r|5d ur| dE|8 dF| t|5  }9n|8}9t)||0|1|2|3|4t$||||5|6dG\}:};|:|;|0|1|2|3t$||4|5|j'|8|9dH
|dIS )LNc                 S  s   g | ]}|j r|qS r    )r;   )r`   r   r    r    r!   rr   X  s    z#KYCEngine.score.<locals>.<listcomp>r   rx  r*  r  r   rawr   re   r   Tuple[bool, Optional[str]]c                 S  s  | j pi }|s	dS |pd}|pd}|d}|d}|d}	|d urBzt|}
t||
kr7dd|
 fW S W n	 tyA   Y nw |d urdzt|}
t||
k rYdd|
 fW S W n	 tyc   Y nw |	d urzt|	}
t||
kr{dd	|
 fW S W n	 ty   Y nw |d
}|rt|}t||sdd| fS |d}|rztt|t|d u rW dS W n
 tj	y   Y nw |dkr|ddrt
|d u rdS dS )N)TNr   r  
min_length
max_lengthFlength_expected_min_length_max_length_r   prefix_regex)Fregex_mismatchr  
must_parse)Fdate_unparsed)r:  r  r=   rI   r  r   r   r   	fullmatcherrorr|   )rx  r  r  re   r   raw_snorm_sr  r  r  Lr   prefr  r    r    r!   validate_constraintse  sj   





z-KYCEngine.score.<locals>.validate_constraintspairsList[Tuple[float, float]]r>   c                 S  s@   | sdS t dd | D }|dkrtt dd | D | S dS )NrC   c                 s  s    | ]\}}|V  qd S r   r    )r`   _r   r    r    r!   r         z0KYCEngine.score.<locals>.wavg.<locals>.<genexpr>r   c                 s  s    | ]	\}}|| V  qd S r   r    )r`   r4   r   r    r    r!   r     s    )r   r>   )r  swr    r    r!   wavg  s   &zKYCEngine.score.<locals>.wavgFrC   missing)presentdet_okocr_ok
extractionr   r   r  r0  r  TrB   r  r  r   r  g{Gz?rD   iran_national_coder  >   r  r  sanctioned_city_prefixzinvalid::no_user_inputr2  exactr  exact_matchexact_mismatchfuzzyreject_incomplete_namename_incompletefuzzy_below_thresholdr  
date_matchdate_mismatchr  )r  r  z	mismatch:)r  r  )
r
  r  r  r  r  r  r   r   validvalid_reason)r7  validator_okvalidator_reasonexpected_validator_okexpected_validator_reasonrZ  r  r   r  )r  r  rZ  r  r   r  r   r   r   rN  r  gffffff?)r  coverager  	match_all
match_corecount_gate_okdetected_countmismatch_flagsinvalid_flags
face_valueface_reason)
r!  r  r"  r#  detected_fields_countr$  facerS  	doc_scorefinal_score)decisionreasonsscores	per_field)
rx  r*  r  r   r  r   re   r   r   r  )r  r  r   r>   )*rD  rX   r  r  rw  r>   r.  addr1  r/   mathsqrtr/  r5   r   r   rM   r;   r}  r7  r   r1   rK   r;  r3  r0   r+   r?   r:  ri   r^   r5  r|   r=  r<  updaterI   rE  r=   rS  rR  decide)<r   r  r  ry  r  r  required_rulesfound_required_detdet_ok_keysextraction_pairsmatch_pairs_allmatch_pairs_corer1  r&  r'  r  r  rx  r   r  r  r  r  r  r  	raw_value
norm_valueexpectedvalid_okr  r  r  r  r   r   expected_norm
exp_reasonc_okc_reasonmscoremreasonmtgot_normexp_normr  gotexprecr!  extraction_scorer"  r#  r$  face_valr)  
base_matchr,  r-  r.  r/  r    r    r!   scoreN  s|  

6
"














"
zKYCEngine.score)rl  r   )rq   r>   r   r>   )rx  r*  ry  rz  r   r   )r~  r   r  r>   r  r>   r  r   r  r=   r   r  )r  r  r  r   r   r   )NFr  r  )r  r   r  rB  r  r  r  r  ry  r  r  r5   r   r=   r   r=   r   r   )r  rB  r  r   ry  r  r  r  r  r5   r   r   )r   r   r   r   r  rw  r}  r  r  r  rP  r    r    r    r!   rj    s     


?4 Hrj  r  r!  r  r"  Optional[float]r#  r$  r%  r&  r+  r'  r(  r)  Tuple[str, List[str]]c                 C  s  g }| j d ur|s|d |d| d| j   d|fS || jk r,|d d|fS |rH||d d  tdd |D rDd|fS d	|fS |rW||d d  d	|fS | jr|	d u rl|d
|
pdd  d	|fS t|	t| jk r|d |d|	dd| jd d	|fS || jkr|| j	kr|d u r|| j	| j
 krd|fS |d d	|fS || jkrd|fS |d |d ur|d|d d	|fS || jkr|| j	k r|d || jk r|d d	|fS |d d	|fS )Ndetected_fields_count_too_lowz	detected=z_min=REJECTcoverage_too_lowrt   c                 s  s    | ]}d |v V  qdS ))iran_national_code_sanctioned_city_prefixNr    )r`   flagr    r    r!   r     r  zdecide.<locals>.<genexpr>REVIEWzface_required_but_unavailable:unknownface_below_thresholdzface=z.3fz_thr=APPROVEno_core_user_inputcore_match_below_thresholdz
match_all=extraction_lowpartial_coveragecoverage_insufficient)rE  rK   rO  r  anyrR  r>   rT  rF  rH  rQ  rJ  rM  )r  r!  r  r"  r#  r$  r%  r&  r'  r(  r)  r/  r    r    r!   r6  }  sX   












r6  r  r  r   Nonec                 C  s~   || v r|| v r| | | | | |< | |< | di }||v r0||v r0|| || ||< ||< |dg |||d d S )Nr  swap_events)r@   rA   r   )r  r  rK   )r  r  r@   rA   r   qr    r    r!   swap_field_keys  s   re  cfgc                 C  s&   |j D ]\}}t| |||dd qd S )Nconfig_swap_pairsr   )rU  re  )r  r  rf  r@   rA   r    r    r!   apply_config_swaps  s   ri  rW  ocr_first_rawocr_last_raw	exp_firstexp_lastmarginc                 C  s   t | pd|pd}t |pd|pd}|| d }t | pd|pd}t |p%d|p(d}	||	 d }
|
|| kr@d||
||	ddS d||
||ddS )Nr   r  T)zfirst->lastzlast->first)swapscore_normalscore_swappedr   F)zfirst->firstz
last->last)r^   )rj  rk  rl  rm  rn  s11s22normals12s21swappedr    r    r!   best_name_assignment  s   rx  ry  rz  c           	      C  s   |j sd S d| vsd| vrd S |dpd}|dpd}|r!|s#d S | d dp+d}| d dp4d}t|||||jd}|dr]t| |dddd |d	g ddd|d
 d S d S )Nr  r  r   r  )rn  ro  heuristic_name_swaprh  rc  )r@   rA   r   meta)rV  r  rx  rX  re  r  rK   )	r  r  rf  ry  rl  rm  f_rawl_rawinfor    r    r!   apply_name_swap_if_needed  s   
 r~  r-  rW      TF)doc_config_payload
doc_configr  ry  r  	yolo_confyolo_iour  r  cache_enginer  rC  r  r  r  Optional[DocConfig]r  4Optional[Callable[[Dict[str, Any]], Dict[str, Any]]]r  r  r  r  r  r  r  c                 C  s  |d ur	t |}n|d ur|}ntd|d u rtd|r"t| nt| }|j|||	|
|d}|j||||||d}|d }|d }t||| t||||pPi  d }|jrv|ro|d 	dpf|d 	d	}t
|||d
}nd d dd d}|j|||||d}||d |d |d |d d}|jrdnd}g }|jr|s|d |d dkr|d ||d< ||d< |r||d< ||d< ||d< ||d< |	d|d< |	d|d< |S )NzBdoc_config_payload or doc_config must be provided (server-driven).z0llm_ocr callback is required in LLM-OCR edition.)r  r  r  r  )r  ry  r  r  r  r  	doc_photophoto)r  r  r  r   r   )ry  r  r  r.  r0  r/  r1  )rC  r.  r0  r/  r1  r;   disabledupload_selfie_requiredrT  retake_document_photoselfie_mode
next_stepsr  r+  r  r  )re  
ValueErrorrk  rj  r  r  ri  r~  rR  r  r)  rP  rK   )rg  r  rC  r  r  r  ry  r  r  r  r  r  r  r  rf  enginer  packr  r  r  r   scoringresultr  r  r    r    r!   run_kyc  sd   
	


r  )r   r   r   r   )r4   r   r   r5   )r@   r   rA   r   r   r>   )r   r   r   r_   )r@   r   rA   r   r   r5   )r   r   r   rj   )r   r   r   r   )r   r   r   r5   )r   r   r   r   )r   r   r   r=   r   r=   r   r   )r   r   r   r   r   r   )r   )r   r   r   r=   r   r   )r   r   )r   r   r   r=   r   r=   r   r   )r   r   )r  r   r  r   r  r  r   r   )rY  r   r   rB  )rg  r   r   rh  )r  rB  r!  r>   r  r>   r"  rQ  r#  rQ  r$  r5   r%  r=   r&  r+  r'  r+  r(  rQ  r)  r   r   rR  )r  r   r  r   r@   r   rA   r   r   r   r   rb  )r  r   r  r   rf  rB  r   rb  )rW  )rj  r   rk  r   rl  r   rm  r   rn  r>   r   r   )
r  r   r  r   rf  rB  ry  rz  r   rb  )rg  r   r  r   rC  r   r  r  r  r  r  r  ry  r  r  r   r  r>   r  r>   r  r   r  r=   r  r5   r  r5   r   r   )J
__future__r   osr   r3  r`  r   rs  dataclassesr   r   typingr   r   r   r   r	   r
   r   r   r  r   r  ultralyticsr   r   	maketransr$   r'   compiler*   r)   r"   r%   r+   r0   r1   r3   r?   rV   r^   rf   ri   r|   r   r   r   r   r   r   r   r   r   r   r   r  r@  rt  r  r  r)  r*  rB  re  rf  ri  rk  rj  r6  re  ri  rx  r~  r  r    r    r    r!   <module>   s   	$





	












+


7
     

M
	

