"""Identity normalization helpers for organization source ingestion.""" from __future__ import annotations import re VALID_INN_LENGTHS = frozenset({10, 12}) VALID_KPP_LENGTH = 9 VALID_OGRN_LENGTH = 13 VALID_OGRIP_LENGTH = 15 def digits(value: str | None) -> str: """Return only decimal digits from a source identity value.""" return re.sub(r"\D+", "", str(value or "")) def normalize_identity_fields( *, inn: str | None = "", kpp: str | None = "", ogrn: str | None = "", ogrip: str | None = "", ) -> tuple[str, str, str, str]: """Normalize parser identity values to canonical Organization constraints.""" inn_digits = digits(inn) kpp_digits = digits(kpp) ogrn_digits = digits(ogrn) ogrip_digits = digits(ogrip) has_legal_entity_inn = len(inn_digits) == 10 has_entrepreneur_inn = len(inn_digits) == 12 normalized_ogrip = "" if len(ogrip_digits) == VALID_OGRIP_LENGTH and not has_legal_entity_inn: normalized_ogrip = ogrip_digits elif len(ogrn_digits) == VALID_OGRIP_LENGTH and not has_legal_entity_inn: normalized_ogrip = ogrn_digits normalized_inn = inn_digits if len(inn_digits) in VALID_INN_LENGTHS else "" normalized_kpp = ( "" if normalized_ogrip or has_entrepreneur_inn else kpp_digits if len(kpp_digits) == VALID_KPP_LENGTH else "" ) normalized_ogrn = ( "" if normalized_ogrip or has_entrepreneur_inn else ogrn_digits if len(ogrn_digits) == VALID_OGRN_LENGTH else "" ) return normalized_inn, normalized_kpp, normalized_ogrn, normalized_ogrip