55 lines
1.6 KiB
Python
55 lines
1.6 KiB
Python
"""Identity normalization helpers for organization source ingestion."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import re
|
|
|
|
VALID_INN_LENGTHS = frozenset({10, 12})
|
|
VALID_KPP_LENGTH = 9
|
|
VALID_OGRN_LENGTH = 13
|
|
VALID_OGRIP_LENGTH = 15
|
|
|
|
|
|
def digits(value: str | None) -> str:
|
|
"""Return only decimal digits from a source identity value."""
|
|
return re.sub(r"\D+", "", str(value or ""))
|
|
|
|
|
|
def normalize_identity_fields(
|
|
*,
|
|
inn: str | None = "",
|
|
kpp: str | None = "",
|
|
ogrn: str | None = "",
|
|
ogrip: str | None = "",
|
|
) -> tuple[str, str, str, str]:
|
|
"""Normalize parser identity values to canonical Organization constraints."""
|
|
inn_digits = digits(inn)
|
|
kpp_digits = digits(kpp)
|
|
ogrn_digits = digits(ogrn)
|
|
ogrip_digits = digits(ogrip)
|
|
has_legal_entity_inn = len(inn_digits) == 10
|
|
has_entrepreneur_inn = len(inn_digits) == 12
|
|
|
|
normalized_ogrip = ""
|
|
if len(ogrip_digits) == VALID_OGRIP_LENGTH and not has_legal_entity_inn:
|
|
normalized_ogrip = ogrip_digits
|
|
elif len(ogrn_digits) == VALID_OGRIP_LENGTH and not has_legal_entity_inn:
|
|
normalized_ogrip = ogrn_digits
|
|
|
|
normalized_inn = inn_digits if len(inn_digits) in VALID_INN_LENGTHS else ""
|
|
normalized_kpp = (
|
|
""
|
|
if normalized_ogrip or has_entrepreneur_inn
|
|
else kpp_digits
|
|
if len(kpp_digits) == VALID_KPP_LENGTH
|
|
else ""
|
|
)
|
|
normalized_ogrn = (
|
|
""
|
|
if normalized_ogrip or has_entrepreneur_inn
|
|
else ogrn_digits
|
|
if len(ogrn_digits) == VALID_OGRN_LENGTH
|
|
else ""
|
|
)
|
|
return normalized_inn, normalized_kpp, normalized_ogrn, normalized_ogrip
|