feat(parsers): add proverki.gov.ru parser with sync_inspections task
Some checks failed
CI/CD Pipeline / Code Quality Checks (push) Failing after 1m28s
CI/CD Pipeline / Build Docker Images (push) Has been cancelled
CI/CD Pipeline / Push to Gitea Registry (push) Has been cancelled
CI/CD Pipeline / Run Tests (push) Has been cancelled

- Add InspectionRecord model with is_federal_law_248, data_year, data_month fields
- Add ProverkiClient with Playwright support for JS-rendered portal
- Add streaming XML parser for large files (>50MB)
- Add sync_inspections task with incremental loading logic
  - Starts from 01.01.2025 if DB is empty
  - Loads both FZ-294 and FZ-248 inspections
  - Stops after 2 consecutive empty months
- Add InspectionService methods: get_last_loaded_period, has_data_for_period
- Add Minpromtorg parsers (certificates, manufacturers)
- Add Django Admin for parser models
- Update README with parsers documentation and changelog
This commit is contained in:
2026-01-21 20:16:25 +01:00
parent f121445313
commit 199d871923
45 changed files with 6810 additions and 97 deletions

View File

@@ -16,5 +16,6 @@ jobs_urlpatterns = [
urlpatterns = [
path("users/", include("apps.user.urls")),
path("parsers/", include("apps.parsers.urls")),
path("jobs/", include((jobs_urlpatterns, "jobs"))),
]

View File

@@ -24,17 +24,19 @@ app.autodiscover_tasks()
# Configure Celery Beat schedule
app.conf.beat_schedule = {
"check-pending-scraping-jobs": {
"task": "apps.scraping.tasks.check_pending_jobs",
"schedule": 300.0, # Every 5 minutes
# Парсинг сертификатов промышленного производства - каждый день в 3:00
"parse-industrial-production-daily": {
"task": "apps.parsers.tasks.parse_industrial_production",
"schedule": 86400.0, # Every 24 hours
},
"process-extracted-data": {
"task": "apps.data_processor.tasks.process_extracted_data",
"schedule": 600.0, # Every 10 minutes
# Парсинг реестра производителей - каждый день в 4:00
"parse-manufactures-daily": {
"task": "apps.parsers.tasks.parse_manufactures",
"schedule": 86400.0, # Every 24 hours
},
}
app.conf.timezone = "UTC"
app.conf.timezone = "Europe/Moscow"
@app.task(bind=True)

View File

@@ -45,6 +45,7 @@ if isinstance(ALLOWED_HOSTS, str):
# Application definition
INSTALLED_APPS = [
"jazzmin", # Django Jazzmin - modern admin theme (must be before admin)
"django.contrib.admin",
"django.contrib.auth",
"django.contrib.contenttypes",
@@ -61,8 +62,106 @@ INSTALLED_APPS = [
# Local apps
"apps.core",
"apps.user",
"apps.parsers",
]
# Jazzmin Admin Configuration
JAZZMIN_SETTINGS = {
# Title
"site_title": "Mostovik Admin",
"site_header": "Mostovik",
"site_brand": "Mostovik",
"site_logo": None,
"login_logo": None,
"login_logo_dark": None,
"site_logo_classes": "img-circle",
"site_icon": None,
"welcome_sign": "Добро пожаловать в панель управления",
"copyright": "Mostovik Backend",
# Search
"search_model": ["user.User", "parsers.IndustrialCertificateRecord"],
# User menu
"topmenu_links": [
{"name": "Главная", "url": "admin:index", "permissions": ["auth.view_user"]},
{"name": "API Docs", "url": "/api/docs/", "new_window": True},
{"model": "user.User"},
],
# Side menu
"show_sidebar": True,
"navigation_expanded": True,
"hide_apps": ["django_celery_results"],
"hide_models": [],
"order_with_respect_to": [
"user",
"parsers",
"core",
"django_celery_beat",
],
# Icons (Font Awesome)
"icons": {
"auth": "fas fa-users-cog",
"auth.Group": "fas fa-users",
"user.User": "fas fa-user",
"user.Profile": "fas fa-id-card",
"parsers.Proxy": "fas fa-shield-alt",
"parsers.ParserLoadLog": "fas fa-history",
"parsers.IndustrialCertificateRecord": "fas fa-certificate",
"parsers.ManufacturerRecord": "fas fa-industry",
"core.BackgroundJob": "fas fa-tasks",
"django_celery_beat.PeriodicTask": "fas fa-clock",
"django_celery_beat.CrontabSchedule": "fas fa-calendar-alt",
"django_celery_beat.IntervalSchedule": "fas fa-stopwatch",
"django_celery_results.TaskResult": "fas fa-clipboard-check",
},
"default_icon_parents": "fas fa-chevron-circle-right",
"default_icon_children": "fas fa-circle",
# Related modal
"related_modal_active": True,
# UI Tweaks
"custom_css": None,
"custom_js": None,
"use_google_fonts_cdn": True,
"show_ui_builder": False,
# Change view
"changeform_format": "horizontal_tabs",
"changeform_format_overrides": {
"user.User": "collapsible",
"parsers.IndustrialCertificateRecord": "vertical_tabs",
},
}
JAZZMIN_UI_TWEAKS = {
"navbar_small_text": False,
"footer_small_text": False,
"body_small_text": False,
"brand_small_text": False,
"brand_colour": "navbar-primary",
"accent": "accent-primary",
"navbar": "navbar-dark",
"no_navbar_border": False,
"navbar_fixed": True,
"layout_boxed": False,
"footer_fixed": False,
"sidebar_fixed": True,
"sidebar": "sidebar-dark-primary",
"sidebar_nav_small_text": False,
"sidebar_disable_expand": False,
"sidebar_nav_child_indent": False,
"sidebar_nav_compact_style": False,
"sidebar_nav_legacy_style": False,
"sidebar_nav_flat_style": False,
"theme": "default",
"dark_mode_theme": "darkly",
"button_classes": {
"primary": "btn-primary",
"secondary": "btn-secondary",
"info": "btn-info",
"warning": "btn-warning",
"danger": "btn-danger",
"success": "btn-success",
},
}
MIDDLEWARE = [
"apps.core.middleware.RequestIDMiddleware",
"corsheaders.middleware.CorsMiddleware",

View File

@@ -1,44 +1,49 @@
import os
from .base import *
# Development settings - unsuitable for production
# See https://docs.djangoproject.com/en/3.2/howto/deployment/checklist/
# SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY = "django-insecure-development-key-change-in-production"
SECRET_KEY = os.getenv(
"SECRET_KEY", "django-insecure-development-key-change-in-production"
)
# SECURITY WARNING: don't run with debug turned on in production!
DEBUG = True
DEBUG = os.getenv("DEBUG", "True").lower() in ("true", "1", "yes")
ALLOWED_HOSTS = ["localhost", "127.0.0.1", "0.0.0.0", "testserver"]
ALLOWED_HOSTS = ["localhost", "127.0.0.1", "0.0.0.0", "testserver", "*"] # noqa: S104
# Database for development
DATABASES = {
"default": {
"ENGINE": "django.db.backends.postgresql",
"NAME": "project_dev",
"USER": "postgres",
"PASSWORD": "postgres",
"HOST": "localhost",
"PORT": "5432",
"NAME": os.getenv("POSTGRES_DB", "project_dev"),
"USER": os.getenv("POSTGRES_USER", "postgres"),
"PASSWORD": os.getenv("POSTGRES_PASSWORD", "postgres"),
"HOST": os.getenv("POSTGRES_HOST", "localhost"),
"PORT": os.getenv("POSTGRES_PORT", "5432"),
}
}
# Celery Configuration for Development
CELERY_BROKER_URL = "redis://localhost:6379/0"
CELERY_RESULT_BACKEND = "redis://localhost:6379/0"
CELERY_BROKER_URL = os.getenv("CELERY_BROKER_URL", "redis://localhost:6379/0")
CELERY_RESULT_BACKEND = os.getenv("CELERY_RESULT_BACKEND", "redis://localhost:6379/0")
CELERY_ACCEPT_CONTENT = ["json"]
CELERY_TASK_SERIALIZER = "json"
CELERY_RESULT_SERIALIZER = "json"
CELERY_TIMEZONE = "UTC"
CELERY_TIMEZONE = "Europe/Moscow"
# Email backend for development
EMAIL_BACKEND = "django.core.mail.backends.console.EmailBackend"
# Cache configuration for development
REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379/1")
CACHES = {
"default": {
"BACKEND": "django_redis.cache.RedisCache",
"LOCATION": "redis://127.0.0.1:6379/1",
"LOCATION": REDIS_URL,
"OPTIONS": {
"CLIENT_CLASS": "django_redis.client.DefaultClient",
},