Some checks failed
CI/CD Pipeline / Build Docker Images (push) Blocked by required conditions
CI/CD Pipeline / Push to Gitea Registry (push) Blocked by required conditions
CI/CD Pipeline / Code Quality Checks (push) Failing after 3m55s
CI/CD Pipeline / Run Tests (push) Failing after 3h11m38s
- Add InspectionRecord model with is_federal_law_248, data_year, data_month fields - Add ProverkiClient with Playwright support for JS-rendered portal - Add streaming XML parser for large files (>50MB) - Add sync_inspections task with incremental loading logic - Starts from 01.01.2025 if DB is empty - Loads both FZ-294 and FZ-248 inspections - Stops after 2 consecutive empty months - Add InspectionService methods: get_last_loaded_period, has_data_for_period - Add Minpromtorg parsers (certificates, manufacturers) - Add Django Admin for parser models - Update README with parsers documentation and changelog
348 lines
12 KiB
Python
348 lines
12 KiB
Python
"""Factories for parsers tests."""
|
||
|
||
import random
|
||
from datetime import timedelta
|
||
|
||
from django.utils import timezone
|
||
|
||
import factory
|
||
|
||
from apps.parsers.models import (
|
||
IndustrialCertificateRecord,
|
||
InspectionRecord,
|
||
ManufacturerRecord,
|
||
ParserLoadLog,
|
||
Proxy,
|
||
)
|
||
|
||
# === Хелперы для генерации реалистичных данных ===
|
||
|
||
|
||
def generate_inn_legal() -> str:
|
||
"""Генерация ИНН юридического лица (10 цифр)."""
|
||
# ИНН юрлица: NNNNXXXXXC (10 цифр)
|
||
# NNNN - код налогового органа
|
||
# XXXXX - порядковый номер
|
||
# C - контрольная цифра
|
||
region = random.choice(["77", "78", "50", "52", "63", "16", "66", "74", "54", "61"])
|
||
inspection = str(random.randint(1, 99)).zfill(2)
|
||
number = str(random.randint(1, 99999)).zfill(5)
|
||
base = region + inspection + number
|
||
# Контрольная цифра (упрощённо)
|
||
control = str(sum(int(d) for d in base) % 10)
|
||
return base + control
|
||
|
||
|
||
def generate_ogrn() -> str:
|
||
"""Генерация ОГРН юридического лица (13 цифр)."""
|
||
# ОГРН: СГГККННХХХХХЧ (13 цифр)
|
||
# С - признак (1 - юрлицо)
|
||
# ГГ - год регистрации
|
||
# КК - код региона
|
||
# НН - код инспекции
|
||
# ХХХХХ - номер записи
|
||
# Ч - контрольная цифра
|
||
sign = "1"
|
||
year = str(random.randint(2, 24)).zfill(2)
|
||
region = random.choice(["77", "78", "50", "52", "63", "16", "66", "74", "54", "61"])
|
||
inspection = str(random.randint(1, 99)).zfill(2)
|
||
number = str(random.randint(1, 99999)).zfill(5)
|
||
base = sign + year + region + inspection + number
|
||
# Контрольная цифра: остаток от деления на 11, если 10 - то 0
|
||
control = str(int(base) % 11 % 10)
|
||
return base + control
|
||
|
||
|
||
def generate_certificate_number() -> str:
|
||
"""Генерация номера сертификата промпроизводства."""
|
||
# Формат: ПП-XXXXXXXXXX или аналогичный
|
||
prefix = random.choice(["ПП", "СПП", "ЗППП"])
|
||
year = random.randint(2020, 2025)
|
||
number = random.randint(1, 99999)
|
||
return f"{prefix}-{year}-{number:05d}"
|
||
|
||
|
||
def generate_company_name() -> str:
|
||
"""Генерация реалистичного названия компании."""
|
||
forms = ["ООО", "АО", "ПАО", "ЗАО", "ОАО"]
|
||
industries = [
|
||
"Металлург",
|
||
"Промтех",
|
||
"Машстрой",
|
||
"Агропром",
|
||
"Нефтегаз",
|
||
"Химпром",
|
||
"Электроника",
|
||
"Автоком",
|
||
"Стройинвест",
|
||
"Техносервис",
|
||
"Приборостроение",
|
||
"Энергомаш",
|
||
"Станкопром",
|
||
"Спецсталь",
|
||
"Трубопрокат",
|
||
]
|
||
suffixes = ["", " Групп", " Холдинг", " Инвест", " Трейд", " Индустрия", " Про"]
|
||
cities = [
|
||
"Москва",
|
||
"Санкт-Петербург",
|
||
"Новосибирск",
|
||
"Екатеринбург",
|
||
"Казань",
|
||
"Челябинск",
|
||
]
|
||
|
||
form = random.choice(forms)
|
||
industry = random.choice(industries)
|
||
suffix = random.choice(suffixes)
|
||
city = random.choice(cities) if random.random() > 0.7 else ""
|
||
|
||
name = f"{industry}{suffix}"
|
||
if city:
|
||
name = f"{name}-{city}"
|
||
|
||
return f'{form} "{name}"'
|
||
|
||
|
||
def generate_legal_address() -> str:
|
||
"""Генерация юридического адреса."""
|
||
regions = [
|
||
("г. Москва", ""),
|
||
("г. Санкт-Петербург", ""),
|
||
("Московская обл.", "г. Подольск"),
|
||
("Свердловская обл.", "г. Екатеринбург"),
|
||
("Республика Татарстан", "г. Казань"),
|
||
("Челябинская обл.", "г. Челябинск"),
|
||
("Новосибирская обл.", "г. Новосибирск"),
|
||
("Нижегородская обл.", "г. Нижний Новгород"),
|
||
]
|
||
|
||
region, city = random.choice(regions)
|
||
street_types = ["ул.", "пр-т", "пер.", "наб.", "ш."]
|
||
street_names = [
|
||
"Ленина",
|
||
"Мира",
|
||
"Советская",
|
||
"Промышленная",
|
||
"Заводская",
|
||
"Первомайская",
|
||
"Октябрьская",
|
||
"Гагарина",
|
||
"Кирова",
|
||
"Строителей",
|
||
]
|
||
|
||
street = f"{random.choice(street_types)} {random.choice(street_names)}"
|
||
building = random.randint(1, 150)
|
||
office = random.randint(1, 500) if random.random() > 0.5 else None
|
||
|
||
postal = f"{random.randint(100, 199)}0{random.randint(10, 99)}"
|
||
|
||
parts = [postal, region]
|
||
if city:
|
||
parts.append(city)
|
||
parts.append(f"{street}, д. {building}")
|
||
if office:
|
||
parts.append(f"оф. {office}")
|
||
|
||
return ", ".join(parts)
|
||
|
||
|
||
def generate_proxy_address() -> str:
|
||
"""Генерация адреса прокси-сервера."""
|
||
protocols = ["http", "https", "socks5"]
|
||
hosts = [
|
||
f"{random.randint(1, 255)}.{random.randint(1, 255)}."
|
||
f"{random.randint(1, 255)}.{random.randint(1, 255)}",
|
||
f"proxy{random.randint(1, 50)}.example.com",
|
||
f"ru{random.randint(1, 20)}.proxy-service.net",
|
||
]
|
||
ports = [8080, 3128, 8888, 1080, 8000, 9050]
|
||
|
||
protocol = random.choice(protocols)
|
||
host = random.choice(hosts)
|
||
port = random.choice(ports)
|
||
|
||
return f"{protocol}://{host}:{port}"
|
||
|
||
|
||
# === Фабрики ===
|
||
|
||
|
||
class ProxyFactory(factory.django.DjangoModelFactory):
|
||
"""Factory for Proxy model."""
|
||
|
||
class Meta:
|
||
model = Proxy
|
||
|
||
address = factory.LazyFunction(generate_proxy_address)
|
||
is_active = True
|
||
fail_count = 0
|
||
description = factory.LazyAttribute(
|
||
lambda _: random.choice(
|
||
[
|
||
"Datacenter RU",
|
||
"Residential RU",
|
||
"Mobile RU",
|
||
"Datacenter EU",
|
||
"Premium proxy",
|
||
"Backup proxy",
|
||
]
|
||
)
|
||
)
|
||
|
||
|
||
class ParserLoadLogFactory(factory.django.DjangoModelFactory):
|
||
"""Factory for ParserLoadLog model."""
|
||
|
||
class Meta:
|
||
model = ParserLoadLog
|
||
|
||
batch_id = factory.Sequence(lambda n: n + 1)
|
||
source = factory.LazyAttribute(
|
||
lambda _: random.choice(
|
||
[
|
||
ParserLoadLog.Source.INDUSTRIAL,
|
||
ParserLoadLog.Source.MANUFACTURES,
|
||
]
|
||
)
|
||
)
|
||
records_count = factory.LazyAttribute(lambda _: random.randint(100, 5000))
|
||
status = "success"
|
||
error_message = ""
|
||
|
||
|
||
class IndustrialCertificateRecordFactory(factory.django.DjangoModelFactory):
|
||
"""Factory for IndustrialCertificateRecord model."""
|
||
|
||
class Meta:
|
||
model = IndustrialCertificateRecord
|
||
|
||
load_batch = factory.Sequence(lambda n: n + 1)
|
||
issue_date = factory.LazyAttribute(
|
||
lambda _: (timezone.now() - timedelta(days=random.randint(30, 365))).strftime(
|
||
"%d.%m.%Y"
|
||
)
|
||
)
|
||
certificate_number = factory.LazyFunction(generate_certificate_number)
|
||
expiry_date = factory.LazyAttribute(
|
||
lambda _: (timezone.now() + timedelta(days=random.randint(180, 730))).strftime(
|
||
"%d.%m.%Y"
|
||
)
|
||
)
|
||
certificate_file_url = factory.LazyAttribute(
|
||
lambda obj: f"https://minpromtorg.gov.ru/docs/certificates/"
|
||
f"{obj.certificate_number.replace('-', '_')}.pdf"
|
||
)
|
||
organisation_name = factory.LazyFunction(generate_company_name)
|
||
inn = factory.LazyFunction(generate_inn_legal)
|
||
ogrn = factory.LazyFunction(generate_ogrn)
|
||
|
||
|
||
class ManufacturerRecordFactory(factory.django.DjangoModelFactory):
|
||
"""Factory for ManufacturerRecord model."""
|
||
|
||
class Meta:
|
||
model = ManufacturerRecord
|
||
|
||
load_batch = factory.Sequence(lambda n: n + 1)
|
||
full_legal_name = factory.LazyFunction(generate_company_name)
|
||
inn = factory.LazyFunction(generate_inn_legal)
|
||
ogrn = factory.LazyFunction(generate_ogrn)
|
||
address = factory.LazyFunction(generate_legal_address)
|
||
|
||
|
||
def generate_registration_number() -> str:
|
||
"""Генерация учётного номера проверки."""
|
||
# Формат: 772020123456 или подобный
|
||
region = random.choice(["77", "78", "50", "52", "63", "16", "66", "74", "54", "61"])
|
||
year = random.randint(2020, 2025)
|
||
number = random.randint(1, 999999)
|
||
return f"{region}{year}{number:06d}"
|
||
|
||
|
||
def generate_control_authority() -> str:
|
||
"""Генерация наименования контрольного органа."""
|
||
authorities = [
|
||
"Роспотребнадзор",
|
||
"Ростехнадзор",
|
||
"Росприроднадзор",
|
||
"МЧС России",
|
||
"Роструд",
|
||
"ФНС России",
|
||
"ФАС России",
|
||
"Россельхознадзор",
|
||
"Роскомнадзор",
|
||
"Росздравнадзор",
|
||
]
|
||
prefixes = [
|
||
"Управление",
|
||
"Территориальное управление",
|
||
"Межрегиональное управление",
|
||
"Отдел",
|
||
]
|
||
regions = [
|
||
"по г. Москве",
|
||
"по Санкт-Петербургу",
|
||
"по Московской области",
|
||
"по Свердловской области",
|
||
"по Республике Татарстан",
|
||
"по Челябинской области",
|
||
"по Новосибирской области",
|
||
]
|
||
|
||
authority = random.choice(authorities)
|
||
prefix = random.choice(prefixes) if random.random() > 0.3 else ""
|
||
region = random.choice(regions) if random.random() > 0.4 else ""
|
||
|
||
if prefix and region:
|
||
return f"{prefix} {authority} {region}"
|
||
elif prefix:
|
||
return f"{prefix} {authority}"
|
||
elif region:
|
||
return f"{authority} {region}"
|
||
return authority
|
||
|
||
|
||
class InspectionRecordFactory(factory.django.DjangoModelFactory):
|
||
"""Factory for InspectionRecord model."""
|
||
|
||
class Meta:
|
||
model = InspectionRecord
|
||
|
||
load_batch = factory.Sequence(lambda n: n + 1)
|
||
registration_number = factory.LazyFunction(generate_registration_number)
|
||
inn = factory.LazyFunction(generate_inn_legal)
|
||
ogrn = factory.LazyFunction(generate_ogrn)
|
||
organisation_name = factory.LazyFunction(generate_company_name)
|
||
control_authority = factory.LazyFunction(generate_control_authority)
|
||
inspection_type = factory.LazyAttribute(
|
||
lambda _: random.choice(["плановая", "внеплановая"])
|
||
)
|
||
inspection_form = factory.LazyAttribute(
|
||
lambda _: random.choice(["документарная", "выездная", "документарная и выездная"])
|
||
)
|
||
start_date = factory.LazyAttribute(
|
||
lambda _: (timezone.now() - timedelta(days=random.randint(1, 180))).strftime(
|
||
"%Y-%m-%d"
|
||
)
|
||
)
|
||
end_date = factory.LazyAttribute(
|
||
lambda _: (timezone.now() + timedelta(days=random.randint(1, 30))).strftime(
|
||
"%Y-%m-%d"
|
||
)
|
||
)
|
||
status = factory.LazyAttribute(
|
||
lambda _: random.choice(["завершена", "в процессе", "запланирована"])
|
||
)
|
||
legal_basis = factory.LazyAttribute(
|
||
lambda _: random.choice(["294-ФЗ", "248-ФЗ", "184-ФЗ"])
|
||
)
|
||
result = factory.LazyAttribute(
|
||
lambda _: random.choice(
|
||
["нарушения не выявлены", "выявлены нарушения", ""]
|
||
)
|
||
if random.random() > 0.3
|
||
else ""
|
||
)
|