feat(parsers): add proverki.gov.ru parser with sync_inspections task
Some checks failed
CI/CD Pipeline / Code Quality Checks (push) Failing after 1m28s
CI/CD Pipeline / Build Docker Images (push) Has been cancelled
CI/CD Pipeline / Push to Gitea Registry (push) Has been cancelled
CI/CD Pipeline / Run Tests (push) Has been cancelled

- Add InspectionRecord model with is_federal_law_248, data_year, data_month fields
- Add ProverkiClient with Playwright support for JS-rendered portal
- Add streaming XML parser for large files (>50MB)
- Add sync_inspections task with incremental loading logic
  - Starts from 01.01.2025 if DB is empty
  - Loads both FZ-294 and FZ-248 inspections
  - Stops after 2 consecutive empty months
- Add InspectionService methods: get_last_loaded_period, has_data_for_period
- Add Minpromtorg parsers (certificates, manufacturers)
- Add Django Admin for parser models
- Update README with parsers documentation and changelog
This commit is contained in:
2026-01-21 20:16:25 +01:00
parent f121445313
commit 199d871923
45 changed files with 6810 additions and 97 deletions

387
src/apps/parsers/admin.py Normal file
View File

@@ -0,0 +1,387 @@
"""
Admin configuration for parsers app.
"""
from apps.parsers.models import (
IndustrialCertificateRecord,
InspectionRecord,
ManufacturerRecord,
ParserLoadLog,
Proxy,
)
from django.contrib import admin
from django.utils.html import format_html
@admin.register(Proxy)
class ProxyAdmin(admin.ModelAdmin):
"""Admin для прокси-серверов."""
list_display = [
"address",
"is_active_badge",
"fail_count",
"last_used_at",
"created_at",
]
list_filter = ["is_active", "created_at"]
search_fields = ["address"]
readonly_fields = ["created_at", "updated_at", "last_used_at"]
ordering = ["-is_active", "-last_used_at"]
list_per_page = 50
fieldsets = (
("Основное", {"fields": ("address", "is_active")}),
("Статистика", {"fields": ("fail_count", "last_used_at")}),
("Даты", {"fields": ("created_at", "updated_at"), "classes": ("collapse",)}),
)
def is_active_badge(self, obj):
"""Цветной бейдж активности."""
if obj.is_active:
return format_html(
'<span style="color: white; background: #28a745; padding: 3px 10px; '
'border-radius: 3px;">Активен</span>'
)
return format_html(
'<span style="color: white; background: #dc3545; padding: 3px 10px; '
'border-radius: 3px;">Неактивен</span>'
)
is_active_badge.short_description = "Статус"
is_active_badge.admin_order_field = "is_active"
actions = ["activate_proxies", "deactivate_proxies", "reset_fail_count"]
@admin.action(description="Активировать выбранные прокси")
def activate_proxies(self, request, queryset):
updated = queryset.update(is_active=True)
self.message_user(request, f"Активировано {updated} прокси")
@admin.action(description="Деактивировать выбранные прокси")
def deactivate_proxies(self, request, queryset):
updated = queryset.update(is_active=False)
self.message_user(request, f"Деактивировано {updated} прокси")
@admin.action(description="Сбросить счётчик ошибок")
def reset_fail_count(self, request, queryset):
updated = queryset.update(fail_count=0)
self.message_user(request, f"Сброшен счётчик для {updated} прокси")
@admin.register(ParserLoadLog)
class ParserLoadLogAdmin(admin.ModelAdmin):
"""Admin для логов загрузки."""
list_display = [
"id",
"source",
"batch_id",
"status_badge",
"records_count",
"created_at",
]
list_filter = ["source", "status", "created_at"]
search_fields = ["batch_id", "error_message"]
readonly_fields = ["created_at", "updated_at"]
ordering = ["-created_at"]
list_per_page = 50
date_hierarchy = "created_at"
fieldsets = (
("Основное", {"fields": ("source", "batch_id", "status")}),
("Результат", {"fields": ("records_count", "error_message")}),
("Даты", {"fields": ("created_at", "updated_at"), "classes": ("collapse",)}),
)
def status_badge(self, obj):
"""Цветной бейдж статуса."""
colors = {
"success": "#28a745",
"failed": "#dc3545",
"in_progress": "#ffc107",
"pending": "#6c757d",
}
color = colors.get(obj.status, "#6c757d")
return format_html(
'<span style="color: white; background: {}; padding: 3px 10px; '
'border-radius: 3px;">{}</span>',
color,
obj.get_status_display()
if hasattr(obj, "get_status_display")
else obj.status,
)
status_badge.short_description = "Статус"
status_badge.admin_order_field = "status"
def has_add_permission(self, request):
"""Запретить создание логов вручную."""
return False
class HasCertificateNumberFilter(admin.SimpleListFilter):
"""Фильтр по наличию номера сертификата."""
title = "Номер сертификата"
parameter_name = "has_cert_number"
def lookups(self, request, model_admin):
return [
("yes", "С номером"),
("no", "Без номера"),
]
def queryset(self, request, queryset):
if self.value() == "yes":
return queryset.exclude(certificate_number__in=["-", ""])
if self.value() == "no":
return queryset.filter(certificate_number__in=["-", ""])
return queryset
@admin.register(IndustrialCertificateRecord)
class IndustrialCertificateRecordAdmin(admin.ModelAdmin):
"""Admin для сертификатов промышленного производства."""
list_display = [
"certificate_number",
"organisation_name_short",
"inn",
"ogrn",
"issue_date",
"expiry_date",
"load_batch",
]
list_filter = [HasCertificateNumberFilter, "load_batch", "created_at"]
search_fields = [
"certificate_number",
"organisation_name",
"inn",
"ogrn",
]
readonly_fields = ["created_at", "updated_at", "load_batch"]
ordering = ["-created_at"]
list_per_page = 100
date_hierarchy = "created_at"
raw_id_fields = []
fieldsets = (
(
"Сертификат",
{"fields": ("certificate_number", "issue_date", "expiry_date")},
),
(
"Организация",
{"fields": ("organisation_name", "inn", "ogrn")},
),
(
"Документ",
{"fields": ("certificate_file_url",), "classes": ("collapse",)},
),
(
"Системное",
{
"fields": ("load_batch", "created_at", "updated_at"),
"classes": ("collapse",),
},
),
)
def organisation_name_short(self, obj):
"""Сокращённое название организации."""
name = obj.organisation_name or ""
return name[:60] + "..." if len(name) > 60 else name
organisation_name_short.short_description = "Организация"
organisation_name_short.admin_order_field = "organisation_name"
def has_add_permission(self, request):
"""Запретить создание записей вручную."""
return False
def has_change_permission(self, request, obj=None):
"""Запретить редактирование записей."""
return False
@admin.register(ManufacturerRecord)
class ManufacturerRecordAdmin(admin.ModelAdmin):
"""Admin для реестра производителей."""
list_display = [
"full_legal_name_short",
"inn",
"ogrn",
"address_short",
"load_batch",
"created_at",
]
list_filter = ["load_batch", "created_at"]
search_fields = [
"full_legal_name",
"inn",
"ogrn",
"address",
]
readonly_fields = ["created_at", "updated_at", "load_batch"]
ordering = ["-created_at"]
list_per_page = 100
date_hierarchy = "created_at"
fieldsets = (
(
"Организация",
{"fields": ("full_legal_name", "inn", "ogrn")},
),
(
"Адрес",
{"fields": ("address",)},
),
(
"Системное",
{
"fields": ("load_batch", "created_at", "updated_at"),
"classes": ("collapse",),
},
),
)
def full_legal_name_short(self, obj):
"""Сокращённое название."""
name = obj.full_legal_name or ""
return name[:60] + "..." if len(name) > 60 else name
full_legal_name_short.short_description = "Название"
full_legal_name_short.admin_order_field = "full_legal_name"
def address_short(self, obj):
"""Сокращённый адрес."""
addr = obj.address or ""
return addr[:40] + "..." if len(addr) > 40 else addr
address_short.short_description = "Адрес"
address_short.admin_order_field = "address"
def has_add_permission(self, request):
"""Запретить создание записей вручную."""
return False
def has_change_permission(self, request, obj=None):
"""Запретить редактирование записей."""
return False
@admin.register(InspectionRecord)
class InspectionRecordAdmin(admin.ModelAdmin):
"""Admin для проверок из Единого реестра проверок."""
list_display = [
"registration_number",
"organisation_name_short",
"inn",
"control_authority_short",
"inspection_type",
"status_badge",
"start_date",
"load_batch",
]
list_filter = [
"inspection_type",
"inspection_form",
"status",
"load_batch",
"created_at",
]
search_fields = [
"registration_number",
"organisation_name",
"inn",
"ogrn",
"control_authority",
]
readonly_fields = ["created_at", "updated_at", "load_batch"]
ordering = ["-created_at"]
list_per_page = 100
date_hierarchy = "created_at"
fieldsets = (
(
"Проверка",
{
"fields": (
"registration_number",
"inspection_type",
"inspection_form",
"status",
)
},
),
(
"Организация",
{"fields": ("organisation_name", "inn", "ogrn")},
),
(
"Контрольный орган",
{"fields": ("control_authority", "legal_basis")},
),
(
"Сроки и результат",
{"fields": ("start_date", "end_date", "result")},
),
(
"Системное",
{
"fields": ("load_batch", "created_at", "updated_at"),
"classes": ("collapse",),
},
),
)
def organisation_name_short(self, obj):
"""Сокращённое название организации."""
name = obj.organisation_name or ""
return name[:50] + "..." if len(name) > 50 else name
organisation_name_short.short_description = "Организация"
organisation_name_short.admin_order_field = "organisation_name"
def control_authority_short(self, obj):
"""Сокращённое название контрольного органа."""
name = obj.control_authority or ""
return name[:30] + "..." if len(name) > 30 else name
control_authority_short.short_description = "Контр. орган"
control_authority_short.admin_order_field = "control_authority"
def status_badge(self, obj):
"""Цветной бейдж статуса."""
status = obj.status or ""
status_lower = status.lower()
if "завершен" in status_lower:
color = "#28a745"
elif "процесс" in status_lower or "проведен" in status_lower:
color = "#ffc107"
elif "отменен" in status_lower or "прекращ" in status_lower:
color = "#dc3545"
else:
color = "#6c757d"
return format_html(
'<span style="color: white; background: {}; padding: 2px 8px; '
'border-radius: 3px; font-size: 11px;">{}</span>',
color,
status[:20] if len(status) > 20 else status,
)
status_badge.short_description = "Статус"
status_badge.admin_order_field = "status"
def has_add_permission(self, request):
"""Запретить создание записей вручную."""
return False
def has_change_permission(self, request, obj=None):
"""Запретить редактирование записей."""
return False