
    %i,                       U d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	m
Z
 d dlmZmZmZmZmZmZmZ d dlZd dlZd dlZd dlmZ e                    i dddd	d
ddddddddddddddddddd	ddddddddd ddddd!          Ze                    d"d"d#d$d$d%d&d&d&d'd"d(d'd)          Z ej        d*          Zd+Zdd/Z dd0Z!dd1Z"dd2Z#dd3Z$dd4Z%dd8Z&dd9Z'dd;Z(d<Z)dd?Z*ddAZ+ddDZ,ddHZ-ddKZ.dddOZ/dddRZ0 G dS dT          Z1 G dU dVe1          Z2 G dW dXe1          Z3da4dYe5dZ<    ej6                    Z7dd[Z8ddaZ9e	 G db dc                      Z:e	 G dd de                      Z;ddgZ<i Z=dhe5di<    ej6                    Z>ddlZ? G dm dn          Z@dd}ZAddZBddZC	 dddZDddZEddddddddddddddZFdS )    )annotationsN)	dataclassfield)DictAnyListOptionalTupleIterableCallable)YOLOu   ۰0u   ۱1u   ۲2u   ۳3u   ۴4u   ۵5u   ۶6u   ۷7u   ۸8u   ۹9u   ٠u   ١u   ٢u   ٣u   ٤u   ٥u   ٦)u   ٧u   ٨u   ٩u   یu   کu   هu   وu   ا u   لا)u   يu   ىu   كu   ةu   ۀu   ؤu   إu   أu   ٱu   ءu   ئu   ﻻu   ـz/[\u0610-\u061A\u064B-\u065F\u0670\u06D6-\u06ED]u   ‌sstrreturnc                V    t          j        dd| pd                                          S )Nz\s+ r   )resubstripr   s    kyc_policy_engine_llm.pycollapse_spacesr#   ;   s$    6&#R00111    c                :    | pd                     t                    S )Nr   )	translate_FA_AR_TO_EN_DIGITSr!   s    r"   to_en_digitsr(   ?   s    G2333r$   c                    | pd} t          |           } |                     t                    } |                     t          d          } t
                              d|           } t          |           S )Nr   r   )r(   r&   _AR_FA_LETTERSreplace_ZWNJ_DIACRITICS_REr   r#   r!   s    r"   normalize_fa_textr.   C   s]    	
bAQA	N##A			%A2q!!A1r$   c                    t          |           } t          j        dd| t          j                  } t	          |                                           S )Nz0[^\w\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\s\-]r   flags)r.   r   r   UNICODEr#   lowerr!   s    r"   normalize_namer4   L   sF    !A
BBQSQ[\\\A1##%%%r$   c                L    t          |           } t          j        dd|           S )Nz[^0-9]r   )r(   r   r   r!   s    r"   normalize_digits_onlyr6   R   s!    QA6)R###r$   c                p    t          |           } t          j        dd|                                           S )Nz[^0-9A-Za-z]r   )r(   r   r   upperr!   s    r"   normalize_id_alphanumr9   W   s-    QA6/2q))//111r$   abfloatc                   | pd} |pd}| |k    rdS | r|sdS t          t          t          |          dz                       }t          | d          D ]p\  }}|g}t          |d          D ]T\  }}||k    rdnd}|                    t          ||         dz   ||dz
           dz   ||dz
           |z                        U|}q|d         }	d|	t          t          |           t          |                    z  z
  S )Nr         ?           )startr   )listrangelen	enumerateappendminmax)
r:   r;   previcacurjcbcostdists
             r"   levenshtein_ratiorR   `   s'   	RA	RAAvvs A sc!ffqj!!""D1A&&&  2cq*** 	M 	MEArb11aDJJs47Q;AE
QQUd8JKKLLLL8D$SVVSVV,,,--r$   c                Z   t          |           } t          |          }| r|sdS t          |                                           }t          |                                          }t          ||z            }t          ||z            }|r||z  nd}d|z  dt	          | |          z  z   S )Nr?   333333?g?)r4   setsplitrE   rR   )r:   r;   sasbinterunionrN   s          r"   token_set_similarityr[   t   s    qAqA A s	QWWYYB	QWWYYBRLLERLLE'CA7S,Q22222r$   Optional[Tuple[int, int, int]]c           	        t          | pd          } |                     dd                              dd          } t          j        d|           }t	          |          dk     rdS d |dd         D             }|d	         |d
         |d         f|d	         |d         |d
         f|d
         |d	         |d         f|d
         |d         |d	         f|d         |d	         |d
         f|d         |d
         |d	         fg}|D ]=\  }}}d|cxk    rdk    r*n d
|cxk    rdk    rn &d
|cxk    rdk    r
n 6|||fc S >dS )z~
    Accepts common formats and returns (YYYY, M, D) if parseable.
    This is only for validation/matching, not for OCR.
    r   -/.z\d{1,4}   Nc                ,    g | ]}t          |          S  int.0xs     r"   
<listcomp>z$parse_date_loose.<locals>.<listcomp>   s    %%%qCFF%%%r$   r   r@      i  i        )r(   r+   r   findallrE   )r   numsvalspermsymds          r"   parse_date_loosert      s   
 	QW"A			#s##C--A:j!$$D
4yy1}}t%%D!H%%%D	a$q'47#	a$q'47#	a$q'47#	a$q'47#	a$q'47#	a$q'47#E   1a1a2!q,,,,B,,,,,q!94r$   )348343344codeOptional[str]c                   t          |           } t          D ]}|                     |          rd| c S t          |           dk    rdS t          t	          |                     dk    rdS d | D             t          fdt          d          D                       }|dz  }d         }|d	k     r
||k    rd ndS |d|z
  k    rd ndS )
Nz*iran_national_code_sanctioned_city_prefix:
   iran_national_code_invalidr@   c                ,    g | ]}t          |          S rc   rd   )rg   cs     r"   ri   z5iran_national_code_invalid_reason.<locals>.<listcomp>   s    ###c!ff###r$   c              3  4   K   | ]}|         d |z
  z  V  dS )r{   Nrc   )rg   rK   digitss     r"   	<genexpr>z4iran_national_code_invalid_reason.<locals>.<genexpr>   s0      33QF1Ia 333333r$   	      rj   )r6   #IRAN_NATIONAL_CODE_BLOCKED_PREFIXES
startswithrE   rU   sumrD   )rx   prefixr   rcheckr   s        @r"   !iran_national_code_invalid_reasonr      s    &&D5 I I??6"" 	IHHHHHH	I 4yyB++
3t99~~++##d###F3333%((33333A	BA1IE1uuzztt'CCR!V$$44*FFr$   boolc                $    t          |           d u S N)r   )rx   s    r"   iran_national_code_is_validr      s    ,T22d::r$   pts
np.ndarrayc                   |                      t          j                  } |                     d          }| t          j        |                   }| t          j        |                   }t          j        | d                              d          }| t          j        |                   }| t          j        |                   }t          j        ||||gt          j                  S )Nr@   )axisrB   dtype)	astypenpfloat32r   argminargmaxdiffreshapearray)r   r   tlbrrs   trbls          r"   order_points_clockwiser      s    
**RZ
 
 CQA	RYq\\	B	RYq\\	B
!$$R((A	RYq\\	B	RYq\\	B8RR$BJ7777r$   wre   hc                    t          j        | d d df         d|dz
            | d d df<   t          j        | d d df         d|dz
            | d d df<   | S )Nr   r@   )r   clip)r   r   r   s      r"   clip_pointsr      se    AAAqD	1a!e,,C1IAAAqD	1a!e,,C1IJr$   img_bgrquadc                X   | j         d d         \  }}|                    t          j                  }t	          |||          }t          |          }|\  }}}}t          j                            ||z
            }t          j                            ||z
            }	t          t          ||	                    }
t          j                            ||z
            }t          j                            ||z
            }t          t          ||                    }t          |
d          }
t          |d          }t          j
        ddg|
dz
  dg|
dz
  |dz
  gd|dz
  ggt          j                  }t          j        ||          }t          j        | ||
|ft          j                  S )Nrj   r   r@   r   r0   )shaper   r   r   r   r   linalgnormre   rI   r   cv2getPerspectiveTransformwarpPerspectiveINTER_CUBIC)r   r   HWr   r   r   r   widthAwidthBmaxWheightAheightBmaxHdstMs                   r"   warp_quad_to_rectr      sl   =!DAq;;rz""DtQ""D!$''DNBBY^^BG$$FY^^BG$$Fs66""##DinnR"W%%GinnR"W%%Gs7G$$%%DtQ<<DtQ<<D
(QFTAXqMD1HdQh+?!TAXOWYWa
b
b
bC#D#..AwD$<sOOOOr$     imgmax_dimc           	     t   | j         d d         \  }}t          ||          }||k    r| S |t          |          z  }t          dt          t	          ||z                                }t          dt          t	          ||z                                }t          j        | ||ft
          j                  S )Nrj   )interpolation)r   rI   r<   re   roundr   resize
INTER_AREA)r   r   r   r   rr   scalenwnhs           r"   _resize_max_dimr      s    9RaR=DAqAq		AG||
eAhhE	QE!e)$$%%	&	&B	QE!e)$$%%	&	&B:cB83>BBBBr$   U   jpeg_qualityc                @   t          | |          } t          j        d| t          t          j                  t          |          g          \  }}|st          d          t          j        |                                          	                    d          }d| S )zD
    Converts an image to a compact data URL to feed to an LLM.
    )r   z.jpgzcv2.imencode(.jpg) failedasciizdata:image/jpeg;base64,)
r   r   imencodere   IMWRITE_JPEG_QUALITYRuntimeErrorbase64	b64encodetobytesdecode)r   r   r   okbufb64s         r"   _encode_jpeg_data_urlr      s     gw777Gl67S1I-J-JCP\L]L],^__GB 86777

3;;==
)
)
0
0
9
9C*S***r$   c                      e Zd ZddZdS )	FaceMatcherBasedoc_photo_bgrr   
selfie_bgrr   Dict[str, Any]c                    t                      r   )NotImplementedErrorselfr   r   s      r"   matchzFaceMatcherBase.match  s    !###r$   Nr   r   r   r   r   r   __name__
__module____qualname__r   rc   r$   r"   r   r      s(        $ $ $ $ $ $r$   r   c                      e Zd ZddZdS )	FaceMatcherUnavailabler   r   r   r   r   c                    d d dd dS )Nface_module_unavailablescore01cosinereasondetailsrc   r   s      r"   r   zFaceMatcherUnavailable.match  s    4;Taefffr$   Nr   r   rc   r$   r"   r   r     s.        g g g g g gr$   r   c                  N    e Zd ZdddZed             Zedd            ZddZdS )InsightFaceMatcher  r   Fdet_sizeTuple[int, int]
prefer_gpur   c                    ddl m} |rddgndg} |d|          | _        | j                            d|           d S )	Nr   )FaceAnalysisCUDAExecutionProviderCPUExecutionProvider	buffalo_l)name	providersrB   )ctx_idr   )insightface.appr   appprepare)r   r   r   r   r   s        r"   __init__zInsightFaceMatcher.__init__  se    000000ISq,.DEEZpYq	<[IFFFX66666r$   c                2    | sd S d }t          | |          S )Nc                J    | j         \  }}}}t          ||z
  ||z
  z            S r   )bboxr<   )fx1y1x2y2s        r"   areaz.InsightFaceMatcher._largest_face.<locals>.area  s,    VNBB"r'b2g.///r$   key)rI   )facesr
  s     r"   _largest_facez InsightFaceMatcher._largest_face  s3     	4	0 	0 	0 5d####r$   r:   r   r;   r   r<   c                V   |                      t          j                  } |                     t          j                  }t          j                            |           dz   }t          j                            |          dz   }t          t          j        | |          ||z  z            S )Ng&.>)r   r   r   r   r   r<   dot)r:   r;   nanbs       r"   _cosinezInsightFaceMatcher._cosine  sy    HHRZ  HHRZ  Y^^A%Y^^A%RVAq\\R"W-...r$   r   r   r   c                   | j                             |          }| j                             |          }|                     |          }|                     |          }|d d ddt          |          idS |d d ddt          |          idS |                     |j        |j                  }|dz   dz  }t          |          t          |          dd dS )	Nno_face_in_document_photo	doc_facesr   no_face_in_selfieselfie_facesr>          @r   )r   getr  rE   r  normed_embeddingr<   )	r   r   r   r  	sel_facesfdfscosr   s	            r"   r   zInsightFaceMatcher.match$  s   HLL//	HLL,,		**	**:#t?Zhsux  zC  vD  vD  hE  F  F  F:#t?R`npst}p~p~_  A  A  All2.0CDD9# >>U3ZZ4\`aaar$   N)r   F)r   r   r   r   )r:   r   r;   r   r   r<   r   )r   r   r   r  staticmethodr  r  r   rc   r$   r"   r   r   
  s        7 7 7 7 7 $ $ \$ / / / \/b b b b b br$   r   zOptional[FaceMatcherBase]_FACE_MATCHERc                     t           5  t          t          cd d d            S 	 t          d          an# t          $ r t	                      aY nw xY wt          cd d d            S # 1 swxY w Y   d S )NF)r   )
_FACE_LOCKr!  r   	Exceptionr   rc   r$   r"   get_face_matcherr%  9  s    	  $        	5.%@@@MM 	5 	5 	5244MMM	5                 s1   A&5A&AA&A	A&&A*-A*doc_image_pathselfie_image_pathdoc_photo_quadOptional[List[List[float]]]r   c                >   |sdddddS |sdddddS t          j        |           }t          j        |          }|dddddS |dddddS t          |t          j        |t          j                            }t                      }|                    ||          S )z^
    Computes face match pack once. Caller decides whether to use it depending on policy.
    Nselfie_missingr   doc_photo_not_detectedcannot_read_doc_imagecannot_read_selfie_imager   )r   imreadr   r   r   r   r%  r   )r&  r'  r(  doc_imgsel_imgcropmatchers          r"   compute_face_packr4  E  s      ^4;KX\]]] f4;S`deeej((Gj*++G4;R_cddd4;UbfgggWbh~RZ&P&P&PQQD  G==w'''r$   c                      e Zd ZU ded<   ded<   ded<   dZded	<   d
Zded<   dZded<   dZded<   dZded<   dZ	ded<   dZ
ded<    ee          Zded<   dZded<   dZded<   dZded<   d Zded!<   dZd"ed#<   dS )$	FieldRule	List[str]class_namesr   r  r   required      ?r<   min_det_confg?min_ocr_conftextocr_kindoptional
match_type333333?match_thresholdr   re   expected_lenNry   	validatordefault_factoryr   constraintsr>   weightF
must_match
match_gater@   max_candidateszOptional[List[str]]input_aliases)r   r   r   __annotations__r;  r<  r>  r@  rB  rC  rD  r   dictrG  rH  rI  rJ  rK  rL  rc   r$   r"   r6  r6  c  s        HHHNNNL LH J    !O!!!!L#I####"'%"="="=K====FJJN)-M------r$   r6  c                      e Zd ZU ded<   ded<   dZded<   dZd	ed
<   dZd	ed<   dZd	ed<   dZd	ed<   dZ	d	ed<   dZ
d	ed<   dZd	ed<   dZded<   dZded<   dZd	ed<    ee          Zded<   d Zded!<   d"Zd	ed#<   dS )$	DocConfigr   doc_idzList[FieldRule]rulesNzOptional[int]min_detected_fields_countr>   r<   approve_min_coverage(\?approve_min_extractionq=
ףp?approve_min_match_corer?   approve_min_match_all      ?review_min_coverage      ?reject_below_coverage皙?approve_no_input_extra_bufferFr   require_face_matchr   face_metricface_match_thresholdrE  zList[Tuple[str, str]]
swap_pairsTenable_name_swapQ?name_swap_margin)r   r   r   rM  rS  rT  rV  rX  rY  r[  r]  r_  r`  ra  rb  r   rC   rc  rd  rf  rc   r$   r"   rP  rP    s2        KKK/33333"%%%%%$((((($(((((#&&&&&!%%%%%#'''''+/!////  %$$$$ K    "&&&&& ).d(C(C(CJCCCC!!!!!"""""""r$   rP  payloadc                   g }|                      dg           D ]}t          |          }|                     d          p|                     d          }t          |t                    r|g|d<   nt	          |pg           |d<   |                    dd            |                     d          }|i |d<   n]t          |t                    r.	 t          j        |          |d<   n/# t          $ r i |d<   Y nw xY wt          |t                    si |d<   |	                    t          d#i |           |                      d          pg }d |D             }t          d#i dt          |                      d          p|                      d          pd	          d|d
|                      d
d           dt          |                      dd                    dt          |                      dd                    dt          |                      d|                      dd                              dt          |                      dd                    dt          |                      dd                    dt          |                      dd                    dt          |                      dd                    dt          |                      dd                    dt          |                      dd                    dt          |                      dd                    d|dt          |                      dd                     d!t          |                      d!d"                    S )$NrR  r8  
class_namerG  rc  c                    g | ]@}t          |t          t          f          t          |          d k    1t          |          AS )rj   )
isinstancerC   tuplerE   rf   s     r"   ri   z+doc_config_from_payload.<locals>.<listcomp>  sF    ___q*Qu2N2N_SVWXSYSY]^S^S^%((S^S^S^r$   rQ  iddocrS  rT  r>   rV  rU  rX  approve_min_matchrW  rY  r?   r[  rZ  r]  r\  r_  r^  r`  Fra  r   rb  rd  Trf  re  rc   )r  rN  rk  r   rC   popjsonloadsr$  rG   r6  rP  r<   r   )rg  rR  r   rrcncstrc  s          r"   doc_config_from_payloadrv    s   E[["%% & &!WWVVM"":bff\&:&:b# 	/!#B} $RX2B}
|T"""ff]##; "B}S!! 	#'$(JsOO=!! ' ' '$&=!!!'C&& 	# "B}Y____%%%%\**0bJ__J___J   7;;x((FGKK,=,=FGGGe #*++.I4"P"P"P
 #7;;/Es#K#KLLL  %W[[1I4%P%PQQQ  %W[[1I7;;WjlpKqKq%r%rsss $GKK0G$M$MNNN "'++.CT"J"JKKK $GKK0G$N$NOOO ',GKK8WY],^,^&_&_&_  ,@% H HIII M9==>>>  #7;;/Et#L#LMMM!$ :%& gkk*<dCCDDD'( w{{+=tDDEEE) s   C&&C87C8zDict[str, 'KYCEngine']_ENGINE_CACHE
model_path'KYCEngine'c                    t          |           }t          5  |t          vrt          |           t          |<   t          |         cd d d            S # 1 swxY w Y   d S r   )r   _ENGINE_LOCKrw  	KYCEngine)rx  r  s     r"   
get_enginer}    s    
j//C	 " "m##!*:!6!6M#S!" " " " " " " " " " " " " " " " " "s   -AAAc                  v    e Zd Zd0dZed1d            Zd2dZd3dZeddd4d            Z	 	 	 	 d5d6d+Z	d7d/Z
dS )8r|  yolo_model_pathr   c                $   t          |          | _        | j        j        }t          |t                    r$d |                                D             | _        nd t          |          D             | _        t          j	                    | _
        d S )Nc                N    i | ]"\  }}t          |          t          |          #S rc   )re   r   )rg   kvs      r"   
<dictcomp>z&KYCEngine.__init__.<locals>.<dictcomp>  s*    III41aAAIIIr$   c                4    i | ]\  }}|t          |          S rc   )r   )rg   rK   ns      r"   r  z&KYCEngine.__init__.<locals>.<dictcomp>  s$    GGGda3q66GGGr$   )r   modelnamesrk  rN  itemsr8  rF   	threadingLock_infer_lock)r   r  r  s      r"   r  zKYCEngine.__init__  s    /**

 eT"" 	HII5;;==IIIDGGi6F6FGGGD$>++r$   rh   r<   r   c           	     X    t          dt          dt          |                               S )Nr?   r>   )rI   rH   r<   rh   s    r"   _clamp01zKYCEngine._clamp01  s"    3Cq**+++r$   ruler6  
user_inputDict[str, str]ry   c                    |                     |j                  }|r|S |j        r%|j        D ]}|                     |          }|r|c S d S r   )r  r  rL  )r   r  r  r  akvvs         r"   _get_expectedzKYCEngine._get_expected  sk    NN48$$ 	H 	(  ^^B'' IIItr$   
image_pathconfioudevicemax_detre   List[Dict[str, Any]]c           
     :   | j         5  | j                            |||||d          }d d d            n# 1 swxY w Y   |sg S |d         }g }t          |dd           |j        |j        j        }	|j        j        }
|j        j        }t          |	t          j
                  r8|	                                                                                                }	t          |
t          j
                  r8|
                                                                                                }
t          |t          j
                  r8|                                                                                                }t          t          |	                    D ]}t!          |
|                   }|                    || j                            |t)          |                    t+          ||                   |	|                             t*                                                    d           |S t          |dd           |j        |j        j        }|j        j        }
|j        j        }t          |t          j
                  r8|                                                                                                }t          |
t          j
                  r8|
                                                                                                }
t          |t          j
                  r8|                                                                                                }t          t          |                    D ]}d ||         D             \  }}}}t!          |
|                   }||g||g||g||gg}|                    || j                            |t)          |                    t+          ||                   |d           |S )NF)sourcer  r  r  r  verboser   obb)class_idri  r  r   boxesc                ,    g | ]}t          |          S rc   )r<   rf   s     r"   ri   z$KYCEngine.detect.<locals>.<listcomp>-  s    !<!<!<q%((!<!<!<r$   )r  r  predictgetattrr  xyxyxyxyclsr  rk  torchTensordetachcpunumpyrD   rE   re   rG   r8  r  r   r<   r   tolistr  xyxy)r   r  r  r  r  r  predsr   detsquadsr  cfrK   cidr  r  r  r  r	  r   s                       r"   detectzKYCEngine.detect  s    	 	J&&!#fg_d '  E	 	 	 	 	 	 	 	 	 	 	 	 	 	 	
  	I!H%' 1eT"".153DENE%)CB%.. 5**,,2244#u|,, 1jjll&&((..00"el++ /YY[[__&&,,..3u::&&  #a&kk #"&"2"6"6sCHH"E"E!"Q%LL!!HOOE2299;;	      K 1gt$$0QW5H7<D'+CB$-- 3{{}}((**0022#u|,, 1jjll&&((..00"el++ /YY[[__&&,,..3t99%% 	 	!<!<DG!<!<!<BB#a&kkR2r(RHr2h? #"&"2"6"6sCHH"E"E!"Q%LL 	      s   !599fa)locale_hintfields_planr  r   c                *    dd|| ddgddddd	d
d}|S )a  
        Returns a single, self-contained bundle to be sent to your LLM layer.
        You can convert it to OpenAI Responses API (or any other provider) on your server.

        fields_plan: list of:
          {
            "key": str,
            "kind": "text|numeric|date|mrz",
            "candidates": [{"idx": int, "det_conf": float, "image_data_url": str}, ...]
          }

        Expected LLM response:
          {
            "fields": {
              "<key>": {
                 "candidate_idx": 0,
                 "text": "...",          # what you read
                 "confidence": 0.0-1.0,  # optional but recommended
                 "normalized": "..."     # optional; server can re-normalize anyway
              }, ...
            }
          }
        r@   kyc_field_ocrrq  fieldszint (required)zstring|null (required)zfloat 0..1 (optional)zstring|null (optional))candidate_idxr=  
confidence
normalized)typetop_level_keysfield_object)schema_versiontaskr  r  output_contractrc   )r  r  schemas      r"   build_llm_ocr_prompt_bundlez%KYCEngine.build_llm_ocr_prompt_bundle;  sK    <  #&!#+*%54"9":	! !	  	 
 
  r$   NF\     r&  doc_cfgrP  r  llm_ocr*Callable[[Dict[str, Any]], Dict[str, Any]]Optional[Dict[str, str]]debugr   r   r   c	                  ) |pi }t          j        |          }	|	t          d|           i }
|D ]1}|
                    |d         g                               |           2|
D ]}|
|                             d             i }d2d
}g }i t          |	j        dd                   d}i }|j        D ]Ɗ)g })j	        D ]+}|
                    |
                    |g                      ,|s;)fd|D             }|                    d            |d |)                   }|sv)j        pd                                }|dk    rV|d         })j	        d         t          |d                   dddd|d         dd|)j        <   |d         |d         )j        <   )j        |g d}g |)j        <   t!          |          D ]\  }}t#          j        |d         t"          j                  }t)          |	|          }t+          |||          }|d                             |t          |d                   |d           |)j                                     |           |                    |           |                     |d          } ||          pi }t/          |t0                    r|                    d          nd}t/          |t0                    si }d3d!}d4d5d%} |j        D ]))j        pd                                }|dk    r%)j        |vr/|                    )j                  pi }!t/          |!t0                    si }!|!                    d&          }"	 t3          |"          }"n# t4          $ r d}"Y nw xY w|                    )j                  pg }#|#s|"dk     s|"t7          |#          k    rd}"|#|"         }t          |                    dd'                    }$|!                    d          }%|%|!                    d           }%|%d(}%t9          |%          }%|!                    d)          }&|&< | ||%t3          )j        p)j                            d*          pd          +          }&	 t          |&          }&nL# t4          $ r?  | ||%t3          )j        p)j                            d*          pd          +          }&Y nw xY w|                     |&          }&|!                    d,          }'|'%t9          |'                                           d(k    r |)||%          }'n |)|t9          |'                    }')j	        d         |$|&|%                                 |'d-|d         |"d|)j        <   |d         |d         )j        <   ||d.}(|r||(d/<   ||(d0<   ||(d1<   |(S )6z
        1) Uses YOLO detections + rules to crop candidates.
        2) Builds ONE bundle and calls llm_ocr(bundle).
        3) Maps response back to per-field extraction pack (same shape as before).
        NzCannot read image: ri  c                J    t          |                     dd                     S Nr  r?   r<   r  r  s    r"   <lambda>z,KYCEngine.extract_with_llm.<locals>.<lambda>  s    E!%%2D2D,E,E+E r$   r  r  r6  r   re   c                ^    t          | j        pd          }|dk    rd}t          |d          S )Nr@   r      )re   rK  rH   )r  r  s     r"   top_kz)KYCEngine.extract_with_llm.<locals>.top_k  s3    D',1--AAvvq!99r$   rj   )r  doc_image_shapec                    g | ]=}t          |                    d d                    t          j                  k    ;|>S )r  r?   )r<   r  r;  )rg   r~   r  s     r"   ri   z.KYCEngine.extract_with_llm.<locals>.<listcomp>  sF    iii5vs9K9K3L3LPUVZVgPhPh3h3h!3h3h3hr$   c                J    t          |                     dd                     S r  r  r  s    r"   r  z,KYCEngine.extract_with_llm.<locals>.<lambda>  s    5vs1C1C+D+D*D r$   r=  noner   r  r   )ri  det_confocr_conf	value_raw
value_norm
ocr_methodr   r  r  )r  kind
candidatesr   )r   r   r  )idxr  image_data_urlr  )r  r  r  r  r   valuec                   |dk    rt          |          S |dk    rt          t          |                    S | j        dv rt	          |          S | j        dv r$|dk    rt          |          nt          |          S t          |          S )Nnumericdate
first_name	last_name	full_name)	id_numberpassport_no)r6   r#   r(   r  r4   r9   r.   )r  r  r  s      r"   normalize_valuez3KYCEngine.extract_with_llm.<locals>.normalize_value  s    y  ,U333v~~&|E':':;;;xCCC%e,,,x7777;y7H7H,U333NcdiNjNjj$U+++r$   ry   rC  r<   c                .   |pd                                 }|sdS | dk    r(t          |          }|rt          |          |k    rdS dS | dk    rt          |          rdS dS | d	k    r-d
|v r't          |                                          dk    rdS dS dS )Nr   r?   r  g??r  rA  g?mrz<rj   gffffff?)r    r6   rE   rt   
splitlines)r  r=  rC  trs   s        r"   heuristic_confz2KYCEngine.extract_with_llm.<locals>.heuristic_conf  s    ""$$A sy  )!,, CFFl$:$:3tv~~#A&&  4tu}}!88ALLNN 3 3q 8 84t4r$   r  r?   r   r  length)rC  r  llm)r  	internals
detections
llm_bundlellm_response_raw)r  r6  r   re   )r  r6  r  r   r  r   r   r   )r   )r  r   r=  ry   rC  re   r   r<   )!r   r/  r   
setdefaultrG   sortrC   r   rR  r8  extendr  r>  r3   r<   r  rF   r   r   r   r   r   r  rk  rN  re   r$  rE   r   rC  rG  r  r    )*r   r&  r  r  r  r  r  r   r   r   by_classrs   r  candidates_for_keyr  r  r  r  r  rt  r  best
plan_entryr  detr   r2  data_urlbundlellm_resp
llm_fieldsr  r  respcand_idxdet_listr  raw_textr  	norm_textoutr  s*                                            @r"   extract_with_llmzKYCEngine.extract_with_llmj  s      %2
j((;E^EEFFF46 	? 	?A,44;;A>>>> 	G 	GAQK!E!EFFFF ?A	 	 	 	 -/.0T#)TVUVTV-EXEX$Y$Y	!#M /	+ /	+D/1J& 8 8!!(,,r2"6"67777 iiiiZiiiJOO D DOEEE#LUU4[[L1J M+V2244D v~~!!}"&"21"5 %d6l 3 3 $!%"&"& L%&	$ 	$tx  04F|	'"48, "&4rJJJ+-tx(%j11 	9 	9SxF2:>>>(d330LZabbb<(// %c&k 2 2&.1 1   
 #48,33C8888z**** 11kW[1\\76??(b/9(D/I/ISX\\(+++t
*d++ 	J		, 		, 		, 		,	 	 	 	 	& M ;	7 ;	7DM+V2244Dv~~x111>>$(++1rDdD)) xx00Hx==    *--dh77=2H !||x3x==888$CSWWVS1122Hxx''H88G,,8}}H88L))D|%~dH3tGXG\`\l\p\pqy\z\zG~  DA  DA  B  B  BBT{{ B B B%~dH3tGXG\`\l\p\pqy\z\zG~  DA  DA  B  B  BB==&&D..I C	NN$8$8$:$:b$@$@+OD$AA		+OD$IGG	 #.q1$ %^^--'#F!)	  	 F48 ,/v;Igtx((i88 	/ $C &C&.C"#
s%   -M==NNRAS S extracted_fields	face_packOptional[Dict[str, Any]]c                   |pi }d |j         D             }d}t                      }g }	g }
g }i }g }g }dDd}dEd}|j         D ]}|                    |j                  }|sddddd dd||j        <   1|                     t          |                    dd                              }|t          |j                  k    }|r|                    |j                   |j        pd	                                }|dk    r d}t          j        |dz            }d }d }d }n|                     t          |                    dd          pd                    }|                    d          pd}|                    d          pd}|t          |j                  k    o-t          t          |                                                    }t          j        |t!          d|          z            }|j        r|r|dz  }|                     ||          }d}d }d }d }d } d }!|j        d k    rs|dk    rmt)          |pd          }"|"d u }|"}|"d}|"}|rN|d!k    s	|j        d"v rt+          |          nt          |          }#t)          |#          }$|$d u } |$}!|$
d}|d#|$v r|$} |||||          \  }%}&|%sd}|p|&}|s |                    d$|j         d%|            |	                    t          |          t          |j                  f           d }'d&}(|r3|dk    r,|j        pd'	                                })|)d(k    r|pd}*|}+|d!k    rt+          |*          }*t+          |+          }+nF|j        d)v rt3          |*          }*t3          |+          }+nt5          |*          }*t5          |+          }+|*|+k    rdnd}'|'dk    rd*nd+}(n|)d,k    r<t          t7          |pd|                    }'|'t          |j                  k    rd,nd-}(nC|)d.k    r=t;          |pd          },t;          |          }-|,r|-r|,|-k    rdnd}'|'dk    rd/nd0}(nd }'d1}(|'s|
                    t          |'          t          |j                  f           |j        r6|                    t          |'          t          |j                  f           |j        rJ|'dk    r!|(d2v r|                    d3|j                    |(d-k    r|                    d3|j                    |||||t          |          |'|(||d4
}.|j        r |.                     |j        ||| |!d5           |rh|.                     |||                    d6          |                    d7          |                    d8          |                    d9          d:           |.||j        <   |r |t!          dtC          |                    z  nd}/ ||	          }0|
r ||
          nd }1|r ||          nd }2d}3|j"        %tC          |          tG          |j"                  k    }3d }4d }5|K|                    d;          }5|j$        d<k    r|                    d<          }4n|                    d=          }4t          |2|2n|1pd          }6|                     d>t          |0          z  d>|6z  z             }7|7}8|j%        rC|4?|                     d?|7z  d@|                     t          |4                    z  z             }8n|7}8tM          ||/|0|1|2|3tC          |          |||4|5A          \  }9}:|9|:|/|0|1|2tC          |          |3|4|j$        |7|8dB
|dCS )FNc                     g | ]}|j         	|S rc   )r9  )rg   r   s     r"   ri   z#KYCEngine.score.<locals>.<listcomp>;  s    AAAajA!AAAr$   r   r  r6  r  r   rawry   r   r   Tuple[bool, Optional[str]]c                   | j         pi }|sdS |pd}|pd}|                    d          }|                    d          }|                    d          }	|;	 t          |          }
t          |          |
k    rdd|
 fS n# t          $ r Y nw xY w|;	 t          |          }
t          |          |
k     rdd|
 fS n# t          $ r Y nw xY w|	;	 t          |	          }
t          |          |
k    rdd	|
 fS n# t          $ r Y nw xY w|                    d
          }|r8t          |          }t          |                              |          sdd| fS |                    d          }|rH	 t          j        t          |          t          |                    dS n# t          j	        $ r Y nw xY w|dk    r'|                    dd          rt          |          dS dS )N)TNr   r  
min_length
max_lengthFlength_expected_min_length_max_length_r   prefix_regex)Fregex_mismatchr  
must_parse)Fdate_unparsed)rG  r  re   rE   r$  r   r   r   	fullmatcherrorrt   )r  r  r  r   r~   raw_snorm_sr  r  r  Lr   prefr  s                 r"   validate_constraintsz-KYCEngine.score.<locals>.validate_constraintsH  sm    &BA "!zI2EZRFUU8__F|,,J|,,J!FA6{{a''$&<&<&<<< (    D%JA6{{Q$&7A&7&777 '    D%JA6{{Q$&7A&7&777 '    D UU8__F 36{{6{{--d33 3 "2D"2"222EE'NNE |CJJF<<D66 Ex   D v~~!%%e"<"<~#E**211:sH   (B 
BB(B? ?
CC(C< <
D	D	3/F% %F76F7pairsList[Tuple[float, float]]r<   c                    | sdS t          d | D                       }|dk    r)t          t          d | D                       |z            ndS )Nr?   c              3      K   | ]	\  }}|V  
d S r   rc   )rg   _r   s      r"   r   z0KYCEngine.score.<locals>.wavg.<locals>.<genexpr>  s&      ))41aQ))))))r$   r   c              3  &   K   | ]\  }}||z  V  d S r   rc   )rg   r  r   s      r"   r   z0KYCEngine.score.<locals>.wavg.<locals>.<genexpr>  s*      55tq!QU555555r$   )r   r<   )r&  sws     r"   wavgzKYCEngine.score.<locals>.wavg~  sb     s))5)))))B?AAvv555u55555:;;;3Nr$   Fr?   missing)presentdet_okocr_ok
extractionr   r   r  r=  r  Tr>   r  r  r   r  g{Gz?r@   iran_national_coder  >   r  r  sanctioned_city_prefixzinvalid::no_user_inputr?  exactr  exact_matchexact_mismatchfuzzyfuzzy_below_thresholdr  
date_matchdate_mismatchr  )r9  r=  z	mismatch:)
r/  r0  r1  r  r  r2  r   r   validvalid_reason)rD  validator_okvalidator_reasonexpected_validator_okexpected_validator_reasonri  r  r   r  )r  r  ri  r  r   r  r   r   r   r\  r  gffffff?)r  coverager2  	match_all
match_corecount_gate_okdetected_countmismatch_flagsinvalid_flags
face_valueface_reason)
rD  r2  rE  rF  detected_fields_countrG  facera  	doc_scorefinal_score)decisionreasonsscores	per_field)
r  r6  r  r   r  ry   r   ry   r   r  )r&  r'  r   r<   )'rR  rU   r  r  r  r<   r;  addr>  r3   mathsqrtr<  r   r   r    rI   r9  r  rD  r   r6   rG   rH  r@  r4   r.   r[   rB  rt   rJ  rI  updaterE   rS  re   ra  r`  decide);r   r  r  r  r  r  required_rulesfound_required_detdet_ok_keysextraction_pairsmatch_pairs_allmatch_pairs_corerT  rI  rJ  r%  r-  r  r  r  r0  r  r1  r2  r  	raw_value
norm_valueexpectedvalid_okr?  r@  rA  rB  rC  r   expected_norm
exp_reasonc_okc_reasonmscoremreasonmtgot_normexp_normgotexprecrD  extraction_scorerE  rF  rG  face_valrL  
base_matchrO  rP  rQ  rR  s;                                                              r"   scorezKYCEngine.score1  s6	     %2
AAW]AAA55685768$&	$&#%4	 4	 4	 4	l	O 	O 	O 	O M O	& O	&D $$TX..A $VY!Y' '	$(# }}U155S+A+A%B%BCCHt'8!9!99F *)))M+V2244Dv~~!Yx#~66
 	!

==quuZ/E/E/L)M)MNNEE+..4"	UU<006B
"eD,=&>&>>aDZI^I^I`I`DaDa!Yx#dH2E2E'EFF
} ( ("a'"))$
;;HHL+/L.248!7;%~!555$&..::;KLL%~#) %$H#)L 6HLPYHYHY]a]e  jF  ^F  ^F$9($C$C$C  MP  QY  MZ  MZM!B=!Q!QJ,6$,>)0:-!-#('/3Kz3Y3Y+5L11$iTTND( 8 +7x K$$%I%I%I<%I%IJJJ##U:%6%6dk8J8J$KLLL&*F%G  2DFNNo3::<<==)/RH'Hy((#8#B#B#8#B#B%MMM#1(#;#;#1(#;#;#4X#>#>#4X#>#>$,$8$8SScF/5}}mmBRGG7]]"#7	R#R#RSSF)/59M3N3N)N)NggTkGG6\\*9?;;C*844C 2s 2(+s

28C--,,_!%"1!&&fuT[7I7I'JKKK? Q$++U6]]E$+<N<N,OPPP BS==W0S%S%S"))*@dh*@*@AAA555"))*@dh*@*@AAA "  $$#J//!! ,# #C ~ 

!%$0(8-B1J      

$","#%%"5"5"#%%"5"5EE&MM%&UU?%;%;     #&IdhIW`&QN0C0C)D)DDD]`4 011.=GTT/***4	0@Jdd+,,,d
,8 --W5V1W1WWM #--11K"h..$==22$==33)?::iFVSVXX
MM#.>(?(?"?#
BR"RSS	  % 	(#"mmD9,<tdmmTYZbTcTcFdFd?d,dee'"'!'{++)'#
 
 
' !$.&(),[)9)9!. &2&*  #
 
 	
r$   )r  r   )rh   r<   r   r<   )r  r6  r  r  r   ry   )r  r   r  r<   r  r<   r  r   r  re   r   r  )r  r  r  r   r   r   )NFr  r  )r&  r   r  rP  r  r  r  r  r  r  r  r   r   re   r   re   r   r   )r  rP  r  r   r  r  r  r  r  r   r   r   )r   r   r   r  r   r  r  r  r  r  rs  rc   r$   r"   r|  r|    s        	, 	, 	, 	, , , , \,	 	 	 	; ; ; ;~   , , , , , \,h 04C C C C CN_
 _
 _
 _
 _
 _
r$   r|  r  rD  r2  rE  Optional[float]rF  rG  rH  rI  r7  rJ  rK  rL  Tuple[str, List[str]]c                   g }| j         ;|s9|                    d           |                    d| d| j                     d|fS || j        k     r|                    d           d|fS |r>|                    |d d                    t	          d |D                       rd|fS d|fS |r!|                    |d d                    d|fS | j        r|	|                    d	|
pd
            d|fS t          |	          t          | j                  k     r;|                    d           |                    d|	dd| j        d           d|fS || j        k    r|| j	        k    ru|0|| j	        | j
        z   k    rd|fS |                    d           d|fS || j        k    rd|fS |                    d           ||                    d|d           d|fS || j        k    rD|| j	        k     r|                    d           || j        k     r|                    d           d|fS |                    d           d|fS )Ndetected_fields_count_too_lowz	detected=z_min=REJECTcoverage_too_lowrk   c              3     K   | ]}d |v V  	dS ))iran_national_code_sanctioned_city_prefixNrc   )rg   flags     r"   r   zdecide.<locals>.<genexpr>r  s(      ]]t:dB]]]]]]r$   REVIEWzface_required_but_unavailable:unknownface_below_thresholdzface=z.3fz_thr=APPROVEno_core_user_inputcore_match_below_thresholdz
match_all=extraction_lowpartial_coveragecoverage_insufficient)rS  rG   r]  r  anyr`  r<   rb  rT  rV  r_  rX  r[  )r  rD  r2  rE  rF  rG  rH  rI  rJ  rK  rL  rR  s               r"   rY  rY  W  s    G(4]46777[>[[8Y[[\\\  '///)***    !}SbS)***]]}]]]]] 	%W$$   !~crc*+++   ! %NNVK<T9VVWWWW$$uW%ABBBBNN1222NNZ:ZZZ8TZZZ[[[W$$ 7///J'B`4`4`g<w?ddee '))NN/000W$$777g%%3444 NN7	777888   7...666NN+,,,g222NN-...  NN*+++Wr$   r  r  r   Nonec                   || v r|| v r| |         | |         c| |<   | |<   |                     di           }||v r||v r||         ||         c||<   ||<   |                    dg                               |||d           d S )Nr  swap_events)r:   r;   r   )r  r  rG   )r  r  r:   r;   r   qs         r"   swap_field_keysr    s    F{{qF{{%ay&)q	6!9gr""AAvv!q&&qT1Q4
!ad++22f3U3UVVVVVr$   cfgc                H    |j         D ]\  }}t          | |||d           d S )Nconfig_swap_pairsr   )rc  r  )r  r  r  r:   r;   s        r"   apply_config_swapsr    sB     M M1	1a8KLLLLLM Mr$   re  ocr_first_rawocr_last_raw	exp_firstexp_lastmarginc                    t          | pd|pd          }t          |pd|pd          }||z   dz  }t          | pd|pd          }t          |pd|pd          }	||	z   dz  }
|
||z   k    r
d||
||	ddS d||
||ddS )Nr   r  T)zfirst->lastzlast->first)swapscore_normalscore_swappedr   F)zfirst->firstz
last->last)r[   )r  r  r  r  r  s11s22normals12s21swappeds              r"   best_name_assignmentr    s     }2IO
D
DC
|1r8>r
B
BCCi3F
}2HN
C
CC
|1r9?
C
CCSyCG&  fwkn  @C  \D  \D  E  E  	E6Gil|  YA  YA  B  B  Br$   r  r  c                   |j         sd S d| vsd| vrd S |                    d          pd}|                    d          pd}|r|sd S | d                             d          pd}| d                             d          pd}t          |||||j                  }|                    d          rDt	          | |ddd           |                    d	g                               ddd|d
           d S d S )Nr  r  r   r  )r  r  heuristic_name_swapr  r  )r:   r;   r   meta)rd  r  r  rf  r  r  rG   )	r  r  r  r  r  r  f_rawl_rawinfos	            r"   apply_name_swap_if_neededr    sN    6!![%>%>|,,2I~~k**0bH H < $$[117RE;##K006BEui#J^___Dxx M	<Mbcccc]B//66\P[g|  GK  8L  8L  	M  	M  	M  	M  	MM Mr$   r:  rT      TF)doc_config_payload
doc_configr  r  r'  	yolo_confyolo_iour  r  cache_enginer  rQ  r  r  r  Optional[DocConfig]r  4Optional[Callable[[Dict[str, Any]], Dict[str, Any]]]r  r  r  r  r  r  r  c                   |t          |          }n||}nt          d          |t          d          |rt          |           nt          |           }|                    |||	|
|          }|                    ||||||          }|d         }|d         }t          |||           t          ||||pi            d }|j        rR|rI|d         	                    d          p|d         	                    d	          }t          |||
          }nd d dd d}|                    |||||          }||d         |d         |d         |d         d}|j        rdnd}g }|j        r|s|                    d           |d         dk    r|                    d           ||d<   ||d<   |rD||d<   ||d<   ||d<   ||d<   |	                    d          |d<   |	                    d          |d<   |S )NzBdoc_config_payload or doc_config must be provided (server-driven).z0llm_ocr callback is required in LLM-OCR edition.)r  r  r  r  )r  r  r  r  r  r  	doc_photophoto)r&  r'  r(  r+  r   )r  r  r  rQ  rS  rR  rT  )rQ  rQ  rS  rR  rT  r9  disabledupload_selfie_requiredrx  retake_document_photoselfie_mode
next_stepsr  rN  r  r  )rv  
ValueErrorr}  r|  r  r  r  r  r`  r  r4  rs  rG   )rx  r&  rQ  r  r  r  r  r'  r  r  r  r  r  r  r  enginer  packr  r  r  r   scoringresultr  r  s                             r"   run_kycr    s   , %%&899		]^^^KLLL'3NZ
###:9N9NF==iXf^e=ffD"" #  D (^F[!Ivy#...fij6FBGGG I
 g 	gW%))+66Y)G:L:P:PQX:Y:YD)[l  ~B  C  C  CII$(DDTaeffIll3:Z_l``G J'(#9%[) F !$ 6F**JKJ
 4&7 42333jX%%1222'F=%F< B#|!x'{"v#xx55|%)XX.@%A%A!"Mr$   )r   r   r   r   )r:   r   r;   r   r   r<   )r   r   r   r\   )rx   r   r   ry   )rx   r   r   r   )r   r   r   r   )r   r   r   re   r   re   r   r   )r   r   r   r   r   r   )r   )r   r   r   re   r   r   )r   r   )r   r   r   re   r   re   r   r   )r   r   )r&  r   r'  r   r(  r)  r   r   )rg  r   r   rP  )rx  r   r   ry  )r  rP  rD  r<   r2  r<   rE  rt  rF  rt  rG  r   rH  re   rI  r7  rJ  r7  rK  rt  rL  ry   r   ru  )r  r   r  r   r:   r   r;   r   r   r   r   r  )r  r   r  r   r  rP  r   r  )re  )r  r   r  r   r  r   r  r   r  r<   r   r   )
r  r   r  r   r  rP  r  r  r   r  )rx  r   r&  r   rQ  r   r  r  r  r  r  r  r  r  r'  ry   r  r<   r  r<   r  r   r  re   r  r   r  r   r   r   )G
__future__r   osr   rV  rq  r   r  dataclassesr   r   typingr   r   r   r	   r
   r   r   r   r  r   r  ultralyticsr   r   	maketransr'   r*   compiler-   r,   r#   r(   r.   r4   r6   r9   rR   r[   rt   r   r   r   r   r   r   r   r   r   r   r   r!  rM  r  r#  r%  r4  r6  rP  rv  rw  r{  r}  r|  rY  r  r  r  r  r  rc   r$   r"   <module>r     s   # " " " " " " 				 				        ( ( ( ( ( ( ( ( G G G G G G G G G G G G G G G G G G 



            mm %#%S%%%)3%04c%#%S%%%)3%04c% 	#% S% % &*3% 15c% 	#	% S	% !$3c	% % %    











       NOO2 2 2 24 4 4 4   & & & &$ $ $ $
2 2 2 2. . . .(
3 
3 
3 
3   < '< #G G G G(; ; ; ;8 8 8 8   P P P P,C C C C C	+ 	+ 	+ 	+ 	+ $ $ $ $ $ $ $ $
g g g g g_ g g g
(b (b (b (b (b (b (b (bV ,0 / / / /Y^
	 	 	 	( ( ( (< . . . . . . . .< # # # # # # # #60 0 0 0n )+ * * * *y~" " " "o	
 o	
 o	
 o	
 o	
 o	
 o	
 o	
lF F F FZW W W WM M M M B B B B B(M M M M8 48&* EI+/'+)X X X X X X X Xr$   