feat(parsers): add proverki.gov.ru parser with sync_inspections task
Some checks failed
CI/CD Pipeline / Build Docker Images (push) Blocked by required conditions
CI/CD Pipeline / Push to Gitea Registry (push) Blocked by required conditions
CI/CD Pipeline / Code Quality Checks (push) Failing after 3m55s
CI/CD Pipeline / Run Tests (push) Failing after 3h11m38s
Some checks failed
CI/CD Pipeline / Build Docker Images (push) Blocked by required conditions
CI/CD Pipeline / Push to Gitea Registry (push) Blocked by required conditions
CI/CD Pipeline / Code Quality Checks (push) Failing after 3m55s
CI/CD Pipeline / Run Tests (push) Failing after 3h11m38s
- Add InspectionRecord model with is_federal_law_248, data_year, data_month fields - Add ProverkiClient with Playwright support for JS-rendered portal - Add streaming XML parser for large files (>50MB) - Add sync_inspections task with incremental loading logic - Starts from 01.01.2025 if DB is empty - Loads both FZ-294 and FZ-248 inspections - Stops after 2 consecutive empty months - Add InspectionService methods: get_last_loaded_period, has_data_for_period - Add Minpromtorg parsers (certificates, manufacturers) - Add Django Admin for parser models - Update README with parsers documentation and changelog
This commit is contained in:
347
tests/apps/parsers/factories.py
Normal file
347
tests/apps/parsers/factories.py
Normal file
@@ -0,0 +1,347 @@
|
||||
"""Factories for parsers tests."""
|
||||
|
||||
import random
|
||||
from datetime import timedelta
|
||||
|
||||
from django.utils import timezone
|
||||
|
||||
import factory
|
||||
|
||||
from apps.parsers.models import (
|
||||
IndustrialCertificateRecord,
|
||||
InspectionRecord,
|
||||
ManufacturerRecord,
|
||||
ParserLoadLog,
|
||||
Proxy,
|
||||
)
|
||||
|
||||
# === Хелперы для генерации реалистичных данных ===
|
||||
|
||||
|
||||
def generate_inn_legal() -> str:
|
||||
"""Генерация ИНН юридического лица (10 цифр)."""
|
||||
# ИНН юрлица: NNNNXXXXXC (10 цифр)
|
||||
# NNNN - код налогового органа
|
||||
# XXXXX - порядковый номер
|
||||
# C - контрольная цифра
|
||||
region = random.choice(["77", "78", "50", "52", "63", "16", "66", "74", "54", "61"])
|
||||
inspection = str(random.randint(1, 99)).zfill(2)
|
||||
number = str(random.randint(1, 99999)).zfill(5)
|
||||
base = region + inspection + number
|
||||
# Контрольная цифра (упрощённо)
|
||||
control = str(sum(int(d) for d in base) % 10)
|
||||
return base + control
|
||||
|
||||
|
||||
def generate_ogrn() -> str:
|
||||
"""Генерация ОГРН юридического лица (13 цифр)."""
|
||||
# ОГРН: СГГККННХХХХХЧ (13 цифр)
|
||||
# С - признак (1 - юрлицо)
|
||||
# ГГ - год регистрации
|
||||
# КК - код региона
|
||||
# НН - код инспекции
|
||||
# ХХХХХ - номер записи
|
||||
# Ч - контрольная цифра
|
||||
sign = "1"
|
||||
year = str(random.randint(2, 24)).zfill(2)
|
||||
region = random.choice(["77", "78", "50", "52", "63", "16", "66", "74", "54", "61"])
|
||||
inspection = str(random.randint(1, 99)).zfill(2)
|
||||
number = str(random.randint(1, 99999)).zfill(5)
|
||||
base = sign + year + region + inspection + number
|
||||
# Контрольная цифра: остаток от деления на 11, если 10 - то 0
|
||||
control = str(int(base) % 11 % 10)
|
||||
return base + control
|
||||
|
||||
|
||||
def generate_certificate_number() -> str:
|
||||
"""Генерация номера сертификата промпроизводства."""
|
||||
# Формат: ПП-XXXXXXXXXX или аналогичный
|
||||
prefix = random.choice(["ПП", "СПП", "ЗППП"])
|
||||
year = random.randint(2020, 2025)
|
||||
number = random.randint(1, 99999)
|
||||
return f"{prefix}-{year}-{number:05d}"
|
||||
|
||||
|
||||
def generate_company_name() -> str:
|
||||
"""Генерация реалистичного названия компании."""
|
||||
forms = ["ООО", "АО", "ПАО", "ЗАО", "ОАО"]
|
||||
industries = [
|
||||
"Металлург",
|
||||
"Промтех",
|
||||
"Машстрой",
|
||||
"Агропром",
|
||||
"Нефтегаз",
|
||||
"Химпром",
|
||||
"Электроника",
|
||||
"Автоком",
|
||||
"Стройинвест",
|
||||
"Техносервис",
|
||||
"Приборостроение",
|
||||
"Энергомаш",
|
||||
"Станкопром",
|
||||
"Спецсталь",
|
||||
"Трубопрокат",
|
||||
]
|
||||
suffixes = ["", " Групп", " Холдинг", " Инвест", " Трейд", " Индустрия", " Про"]
|
||||
cities = [
|
||||
"Москва",
|
||||
"Санкт-Петербург",
|
||||
"Новосибирск",
|
||||
"Екатеринбург",
|
||||
"Казань",
|
||||
"Челябинск",
|
||||
]
|
||||
|
||||
form = random.choice(forms)
|
||||
industry = random.choice(industries)
|
||||
suffix = random.choice(suffixes)
|
||||
city = random.choice(cities) if random.random() > 0.7 else ""
|
||||
|
||||
name = f"{industry}{suffix}"
|
||||
if city:
|
||||
name = f"{name}-{city}"
|
||||
|
||||
return f'{form} "{name}"'
|
||||
|
||||
|
||||
def generate_legal_address() -> str:
|
||||
"""Генерация юридического адреса."""
|
||||
regions = [
|
||||
("г. Москва", ""),
|
||||
("г. Санкт-Петербург", ""),
|
||||
("Московская обл.", "г. Подольск"),
|
||||
("Свердловская обл.", "г. Екатеринбург"),
|
||||
("Республика Татарстан", "г. Казань"),
|
||||
("Челябинская обл.", "г. Челябинск"),
|
||||
("Новосибирская обл.", "г. Новосибирск"),
|
||||
("Нижегородская обл.", "г. Нижний Новгород"),
|
||||
]
|
||||
|
||||
region, city = random.choice(regions)
|
||||
street_types = ["ул.", "пр-т", "пер.", "наб.", "ш."]
|
||||
street_names = [
|
||||
"Ленина",
|
||||
"Мира",
|
||||
"Советская",
|
||||
"Промышленная",
|
||||
"Заводская",
|
||||
"Первомайская",
|
||||
"Октябрьская",
|
||||
"Гагарина",
|
||||
"Кирова",
|
||||
"Строителей",
|
||||
]
|
||||
|
||||
street = f"{random.choice(street_types)} {random.choice(street_names)}"
|
||||
building = random.randint(1, 150)
|
||||
office = random.randint(1, 500) if random.random() > 0.5 else None
|
||||
|
||||
postal = f"{random.randint(100, 199)}0{random.randint(10, 99)}"
|
||||
|
||||
parts = [postal, region]
|
||||
if city:
|
||||
parts.append(city)
|
||||
parts.append(f"{street}, д. {building}")
|
||||
if office:
|
||||
parts.append(f"оф. {office}")
|
||||
|
||||
return ", ".join(parts)
|
||||
|
||||
|
||||
def generate_proxy_address() -> str:
|
||||
"""Генерация адреса прокси-сервера."""
|
||||
protocols = ["http", "https", "socks5"]
|
||||
hosts = [
|
||||
f"{random.randint(1, 255)}.{random.randint(1, 255)}."
|
||||
f"{random.randint(1, 255)}.{random.randint(1, 255)}",
|
||||
f"proxy{random.randint(1, 50)}.example.com",
|
||||
f"ru{random.randint(1, 20)}.proxy-service.net",
|
||||
]
|
||||
ports = [8080, 3128, 8888, 1080, 8000, 9050]
|
||||
|
||||
protocol = random.choice(protocols)
|
||||
host = random.choice(hosts)
|
||||
port = random.choice(ports)
|
||||
|
||||
return f"{protocol}://{host}:{port}"
|
||||
|
||||
|
||||
# === Фабрики ===
|
||||
|
||||
|
||||
class ProxyFactory(factory.django.DjangoModelFactory):
|
||||
"""Factory for Proxy model."""
|
||||
|
||||
class Meta:
|
||||
model = Proxy
|
||||
|
||||
address = factory.LazyFunction(generate_proxy_address)
|
||||
is_active = True
|
||||
fail_count = 0
|
||||
description = factory.LazyAttribute(
|
||||
lambda _: random.choice(
|
||||
[
|
||||
"Datacenter RU",
|
||||
"Residential RU",
|
||||
"Mobile RU",
|
||||
"Datacenter EU",
|
||||
"Premium proxy",
|
||||
"Backup proxy",
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
class ParserLoadLogFactory(factory.django.DjangoModelFactory):
|
||||
"""Factory for ParserLoadLog model."""
|
||||
|
||||
class Meta:
|
||||
model = ParserLoadLog
|
||||
|
||||
batch_id = factory.Sequence(lambda n: n + 1)
|
||||
source = factory.LazyAttribute(
|
||||
lambda _: random.choice(
|
||||
[
|
||||
ParserLoadLog.Source.INDUSTRIAL,
|
||||
ParserLoadLog.Source.MANUFACTURES,
|
||||
]
|
||||
)
|
||||
)
|
||||
records_count = factory.LazyAttribute(lambda _: random.randint(100, 5000))
|
||||
status = "success"
|
||||
error_message = ""
|
||||
|
||||
|
||||
class IndustrialCertificateRecordFactory(factory.django.DjangoModelFactory):
|
||||
"""Factory for IndustrialCertificateRecord model."""
|
||||
|
||||
class Meta:
|
||||
model = IndustrialCertificateRecord
|
||||
|
||||
load_batch = factory.Sequence(lambda n: n + 1)
|
||||
issue_date = factory.LazyAttribute(
|
||||
lambda _: (timezone.now() - timedelta(days=random.randint(30, 365))).strftime(
|
||||
"%d.%m.%Y"
|
||||
)
|
||||
)
|
||||
certificate_number = factory.LazyFunction(generate_certificate_number)
|
||||
expiry_date = factory.LazyAttribute(
|
||||
lambda _: (timezone.now() + timedelta(days=random.randint(180, 730))).strftime(
|
||||
"%d.%m.%Y"
|
||||
)
|
||||
)
|
||||
certificate_file_url = factory.LazyAttribute(
|
||||
lambda obj: f"https://minpromtorg.gov.ru/docs/certificates/"
|
||||
f"{obj.certificate_number.replace('-', '_')}.pdf"
|
||||
)
|
||||
organisation_name = factory.LazyFunction(generate_company_name)
|
||||
inn = factory.LazyFunction(generate_inn_legal)
|
||||
ogrn = factory.LazyFunction(generate_ogrn)
|
||||
|
||||
|
||||
class ManufacturerRecordFactory(factory.django.DjangoModelFactory):
|
||||
"""Factory for ManufacturerRecord model."""
|
||||
|
||||
class Meta:
|
||||
model = ManufacturerRecord
|
||||
|
||||
load_batch = factory.Sequence(lambda n: n + 1)
|
||||
full_legal_name = factory.LazyFunction(generate_company_name)
|
||||
inn = factory.LazyFunction(generate_inn_legal)
|
||||
ogrn = factory.LazyFunction(generate_ogrn)
|
||||
address = factory.LazyFunction(generate_legal_address)
|
||||
|
||||
|
||||
def generate_registration_number() -> str:
|
||||
"""Генерация учётного номера проверки."""
|
||||
# Формат: 772020123456 или подобный
|
||||
region = random.choice(["77", "78", "50", "52", "63", "16", "66", "74", "54", "61"])
|
||||
year = random.randint(2020, 2025)
|
||||
number = random.randint(1, 999999)
|
||||
return f"{region}{year}{number:06d}"
|
||||
|
||||
|
||||
def generate_control_authority() -> str:
|
||||
"""Генерация наименования контрольного органа."""
|
||||
authorities = [
|
||||
"Роспотребнадзор",
|
||||
"Ростехнадзор",
|
||||
"Росприроднадзор",
|
||||
"МЧС России",
|
||||
"Роструд",
|
||||
"ФНС России",
|
||||
"ФАС России",
|
||||
"Россельхознадзор",
|
||||
"Роскомнадзор",
|
||||
"Росздравнадзор",
|
||||
]
|
||||
prefixes = [
|
||||
"Управление",
|
||||
"Территориальное управление",
|
||||
"Межрегиональное управление",
|
||||
"Отдел",
|
||||
]
|
||||
regions = [
|
||||
"по г. Москве",
|
||||
"по Санкт-Петербургу",
|
||||
"по Московской области",
|
||||
"по Свердловской области",
|
||||
"по Республике Татарстан",
|
||||
"по Челябинской области",
|
||||
"по Новосибирской области",
|
||||
]
|
||||
|
||||
authority = random.choice(authorities)
|
||||
prefix = random.choice(prefixes) if random.random() > 0.3 else ""
|
||||
region = random.choice(regions) if random.random() > 0.4 else ""
|
||||
|
||||
if prefix and region:
|
||||
return f"{prefix} {authority} {region}"
|
||||
elif prefix:
|
||||
return f"{prefix} {authority}"
|
||||
elif region:
|
||||
return f"{authority} {region}"
|
||||
return authority
|
||||
|
||||
|
||||
class InspectionRecordFactory(factory.django.DjangoModelFactory):
|
||||
"""Factory for InspectionRecord model."""
|
||||
|
||||
class Meta:
|
||||
model = InspectionRecord
|
||||
|
||||
load_batch = factory.Sequence(lambda n: n + 1)
|
||||
registration_number = factory.LazyFunction(generate_registration_number)
|
||||
inn = factory.LazyFunction(generate_inn_legal)
|
||||
ogrn = factory.LazyFunction(generate_ogrn)
|
||||
organisation_name = factory.LazyFunction(generate_company_name)
|
||||
control_authority = factory.LazyFunction(generate_control_authority)
|
||||
inspection_type = factory.LazyAttribute(
|
||||
lambda _: random.choice(["плановая", "внеплановая"])
|
||||
)
|
||||
inspection_form = factory.LazyAttribute(
|
||||
lambda _: random.choice(["документарная", "выездная", "документарная и выездная"])
|
||||
)
|
||||
start_date = factory.LazyAttribute(
|
||||
lambda _: (timezone.now() - timedelta(days=random.randint(1, 180))).strftime(
|
||||
"%Y-%m-%d"
|
||||
)
|
||||
)
|
||||
end_date = factory.LazyAttribute(
|
||||
lambda _: (timezone.now() + timedelta(days=random.randint(1, 30))).strftime(
|
||||
"%Y-%m-%d"
|
||||
)
|
||||
)
|
||||
status = factory.LazyAttribute(
|
||||
lambda _: random.choice(["завершена", "в процессе", "запланирована"])
|
||||
)
|
||||
legal_basis = factory.LazyAttribute(
|
||||
lambda _: random.choice(["294-ФЗ", "248-ФЗ", "184-ФЗ"])
|
||||
)
|
||||
result = factory.LazyAttribute(
|
||||
lambda _: random.choice(
|
||||
["нарушения не выявлены", "выявлены нарушения", ""]
|
||||
)
|
||||
if random.random() > 0.3
|
||||
else ""
|
||||
)
|
||||
Reference in New Issue
Block a user