Files
mostovik-backend/tests/apps/parsers/factories.py
Aleksandr Meshchriakov 199d871923
Some checks failed
CI/CD Pipeline / Build Docker Images (push) Blocked by required conditions
CI/CD Pipeline / Push to Gitea Registry (push) Blocked by required conditions
CI/CD Pipeline / Code Quality Checks (push) Failing after 3m55s
CI/CD Pipeline / Run Tests (push) Failing after 3h11m38s
feat(parsers): add proverki.gov.ru parser with sync_inspections task
- Add InspectionRecord model with is_federal_law_248, data_year, data_month fields
- Add ProverkiClient with Playwright support for JS-rendered portal
- Add streaming XML parser for large files (>50MB)
- Add sync_inspections task with incremental loading logic
  - Starts from 01.01.2025 if DB is empty
  - Loads both FZ-294 and FZ-248 inspections
  - Stops after 2 consecutive empty months
- Add InspectionService methods: get_last_loaded_period, has_data_for_period
- Add Minpromtorg parsers (certificates, manufacturers)
- Add Django Admin for parser models
- Update README with parsers documentation and changelog
2026-01-21 20:16:25 +01:00

348 lines
12 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Factories for parsers tests."""
import random
from datetime import timedelta
from django.utils import timezone
import factory
from apps.parsers.models import (
IndustrialCertificateRecord,
InspectionRecord,
ManufacturerRecord,
ParserLoadLog,
Proxy,
)
# === Хелперы для генерации реалистичных данных ===
def generate_inn_legal() -> str:
"""Генерация ИНН юридического лица (10 цифр)."""
# ИНН юрлица: NNNNXXXXXC (10 цифр)
# NNNN - код налогового органа
# XXXXX - порядковый номер
# C - контрольная цифра
region = random.choice(["77", "78", "50", "52", "63", "16", "66", "74", "54", "61"])
inspection = str(random.randint(1, 99)).zfill(2)
number = str(random.randint(1, 99999)).zfill(5)
base = region + inspection + number
# Контрольная цифра (упрощённо)
control = str(sum(int(d) for d in base) % 10)
return base + control
def generate_ogrn() -> str:
"""Генерация ОГРН юридического лица (13 цифр)."""
# ОГРН: СГГККННХХХХХЧ (13 цифр)
# С - признак (1 - юрлицо)
# ГГ - год регистрации
# КК - код региона
# НН - код инспекции
# ХХХХХ - номер записи
# Ч - контрольная цифра
sign = "1"
year = str(random.randint(2, 24)).zfill(2)
region = random.choice(["77", "78", "50", "52", "63", "16", "66", "74", "54", "61"])
inspection = str(random.randint(1, 99)).zfill(2)
number = str(random.randint(1, 99999)).zfill(5)
base = sign + year + region + inspection + number
# Контрольная цифра: остаток от деления на 11, если 10 - то 0
control = str(int(base) % 11 % 10)
return base + control
def generate_certificate_number() -> str:
"""Генерация номера сертификата промпроизводства."""
# Формат: ПП-XXXXXXXXXX или аналогичный
prefix = random.choice(["ПП", "СПП", "ЗППП"])
year = random.randint(2020, 2025)
number = random.randint(1, 99999)
return f"{prefix}-{year}-{number:05d}"
def generate_company_name() -> str:
"""Генерация реалистичного названия компании."""
forms = ["ООО", "АО", "ПАО", "ЗАО", "ОАО"]
industries = [
"Металлург",
"Промтех",
"Машстрой",
"Агропром",
"Нефтегаз",
"Химпром",
"Электроника",
"Автоком",
"Стройинвест",
"Техносервис",
"Приборостроение",
"Энергомаш",
"Станкопром",
"Спецсталь",
"Трубопрокат",
]
suffixes = ["", " Групп", " Холдинг", " Инвест", " Трейд", " Индустрия", " Про"]
cities = [
"Москва",
"Санкт-Петербург",
"Новосибирск",
"Екатеринбург",
"Казань",
"Челябинск",
]
form = random.choice(forms)
industry = random.choice(industries)
suffix = random.choice(suffixes)
city = random.choice(cities) if random.random() > 0.7 else ""
name = f"{industry}{suffix}"
if city:
name = f"{name}-{city}"
return f'{form} "{name}"'
def generate_legal_address() -> str:
"""Генерация юридического адреса."""
regions = [
("г. Москва", ""),
("г. Санкт-Петербург", ""),
("Московская обл.", "г. Подольск"),
("Свердловская обл.", "г. Екатеринбург"),
("Республика Татарстан", "г. Казань"),
("Челябинская обл.", "г. Челябинск"),
("Новосибирская обл.", "г. Новосибирск"),
("Нижегородская обл.", "г. Нижний Новгород"),
]
region, city = random.choice(regions)
street_types = ["ул.", "пр-т", "пер.", "наб.", "ш."]
street_names = [
"Ленина",
"Мира",
"Советская",
"Промышленная",
"Заводская",
"Первомайская",
"Октябрьская",
"Гагарина",
"Кирова",
"Строителей",
]
street = f"{random.choice(street_types)} {random.choice(street_names)}"
building = random.randint(1, 150)
office = random.randint(1, 500) if random.random() > 0.5 else None
postal = f"{random.randint(100, 199)}0{random.randint(10, 99)}"
parts = [postal, region]
if city:
parts.append(city)
parts.append(f"{street}, д. {building}")
if office:
parts.append(f"оф. {office}")
return ", ".join(parts)
def generate_proxy_address() -> str:
"""Генерация адреса прокси-сервера."""
protocols = ["http", "https", "socks5"]
hosts = [
f"{random.randint(1, 255)}.{random.randint(1, 255)}."
f"{random.randint(1, 255)}.{random.randint(1, 255)}",
f"proxy{random.randint(1, 50)}.example.com",
f"ru{random.randint(1, 20)}.proxy-service.net",
]
ports = [8080, 3128, 8888, 1080, 8000, 9050]
protocol = random.choice(protocols)
host = random.choice(hosts)
port = random.choice(ports)
return f"{protocol}://{host}:{port}"
# === Фабрики ===
class ProxyFactory(factory.django.DjangoModelFactory):
"""Factory for Proxy model."""
class Meta:
model = Proxy
address = factory.LazyFunction(generate_proxy_address)
is_active = True
fail_count = 0
description = factory.LazyAttribute(
lambda _: random.choice(
[
"Datacenter RU",
"Residential RU",
"Mobile RU",
"Datacenter EU",
"Premium proxy",
"Backup proxy",
]
)
)
class ParserLoadLogFactory(factory.django.DjangoModelFactory):
"""Factory for ParserLoadLog model."""
class Meta:
model = ParserLoadLog
batch_id = factory.Sequence(lambda n: n + 1)
source = factory.LazyAttribute(
lambda _: random.choice(
[
ParserLoadLog.Source.INDUSTRIAL,
ParserLoadLog.Source.MANUFACTURES,
]
)
)
records_count = factory.LazyAttribute(lambda _: random.randint(100, 5000))
status = "success"
error_message = ""
class IndustrialCertificateRecordFactory(factory.django.DjangoModelFactory):
"""Factory for IndustrialCertificateRecord model."""
class Meta:
model = IndustrialCertificateRecord
load_batch = factory.Sequence(lambda n: n + 1)
issue_date = factory.LazyAttribute(
lambda _: (timezone.now() - timedelta(days=random.randint(30, 365))).strftime(
"%d.%m.%Y"
)
)
certificate_number = factory.LazyFunction(generate_certificate_number)
expiry_date = factory.LazyAttribute(
lambda _: (timezone.now() + timedelta(days=random.randint(180, 730))).strftime(
"%d.%m.%Y"
)
)
certificate_file_url = factory.LazyAttribute(
lambda obj: f"https://minpromtorg.gov.ru/docs/certificates/"
f"{obj.certificate_number.replace('-', '_')}.pdf"
)
organisation_name = factory.LazyFunction(generate_company_name)
inn = factory.LazyFunction(generate_inn_legal)
ogrn = factory.LazyFunction(generate_ogrn)
class ManufacturerRecordFactory(factory.django.DjangoModelFactory):
"""Factory for ManufacturerRecord model."""
class Meta:
model = ManufacturerRecord
load_batch = factory.Sequence(lambda n: n + 1)
full_legal_name = factory.LazyFunction(generate_company_name)
inn = factory.LazyFunction(generate_inn_legal)
ogrn = factory.LazyFunction(generate_ogrn)
address = factory.LazyFunction(generate_legal_address)
def generate_registration_number() -> str:
"""Генерация учётного номера проверки."""
# Формат: 772020123456 или подобный
region = random.choice(["77", "78", "50", "52", "63", "16", "66", "74", "54", "61"])
year = random.randint(2020, 2025)
number = random.randint(1, 999999)
return f"{region}{year}{number:06d}"
def generate_control_authority() -> str:
"""Генерация наименования контрольного органа."""
authorities = [
"Роспотребнадзор",
"Ростехнадзор",
"Росприроднадзор",
"МЧС России",
"Роструд",
"ФНС России",
"ФАС России",
"Россельхознадзор",
"Роскомнадзор",
"Росздравнадзор",
]
prefixes = [
"Управление",
"Территориальное управление",
"Межрегиональное управление",
"Отдел",
]
regions = [
"по г. Москве",
"по Санкт-Петербургу",
"по Московской области",
"по Свердловской области",
"по Республике Татарстан",
"по Челябинской области",
"по Новосибирской области",
]
authority = random.choice(authorities)
prefix = random.choice(prefixes) if random.random() > 0.3 else ""
region = random.choice(regions) if random.random() > 0.4 else ""
if prefix and region:
return f"{prefix} {authority} {region}"
elif prefix:
return f"{prefix} {authority}"
elif region:
return f"{authority} {region}"
return authority
class InspectionRecordFactory(factory.django.DjangoModelFactory):
"""Factory for InspectionRecord model."""
class Meta:
model = InspectionRecord
load_batch = factory.Sequence(lambda n: n + 1)
registration_number = factory.LazyFunction(generate_registration_number)
inn = factory.LazyFunction(generate_inn_legal)
ogrn = factory.LazyFunction(generate_ogrn)
organisation_name = factory.LazyFunction(generate_company_name)
control_authority = factory.LazyFunction(generate_control_authority)
inspection_type = factory.LazyAttribute(
lambda _: random.choice(["плановая", "внеплановая"])
)
inspection_form = factory.LazyAttribute(
lambda _: random.choice(["документарная", "выездная", "документарная и выездная"])
)
start_date = factory.LazyAttribute(
lambda _: (timezone.now() - timedelta(days=random.randint(1, 180))).strftime(
"%Y-%m-%d"
)
)
end_date = factory.LazyAttribute(
lambda _: (timezone.now() + timedelta(days=random.randint(1, 30))).strftime(
"%Y-%m-%d"
)
)
status = factory.LazyAttribute(
lambda _: random.choice(["завершена", "в процессе", "запланирована"])
)
legal_basis = factory.LazyAttribute(
lambda _: random.choice(["294-ФЗ", "248-ФЗ", "184-ФЗ"])
)
result = factory.LazyAttribute(
lambda _: random.choice(
["нарушения не выявлены", "выявлены нарушения", ""]
)
if random.random() > 0.3
else ""
)