feat(parsers): add proverki.gov.ru parser with sync_inspections task
- Add InspectionRecord model with is_federal_law_248, data_year, data_month fields - Add ProverkiClient with Playwright support for JS-rendered portal - Add streaming XML parser for large files (>50MB) - Add sync_inspections task with incremental loading logic - Starts from 01.01.2025 if DB is empty - Loads both FZ-294 and FZ-248 inspections - Stops after 2 consecutive empty months - Add InspectionService methods: get_last_loaded_period, has_data_for_period - Add Minpromtorg parsers (certificates, manufacturers) - Add Django Admin for parser models - Update README with parsers documentation and changelog
This commit is contained in:
@@ -16,5 +16,6 @@ jobs_urlpatterns = [
|
||||
|
||||
urlpatterns = [
|
||||
path("users/", include("apps.user.urls")),
|
||||
path("parsers/", include("apps.parsers.urls")),
|
||||
path("jobs/", include((jobs_urlpatterns, "jobs"))),
|
||||
]
|
||||
|
||||
@@ -24,17 +24,19 @@ app.autodiscover_tasks()
|
||||
|
||||
# Configure Celery Beat schedule
|
||||
app.conf.beat_schedule = {
|
||||
"check-pending-scraping-jobs": {
|
||||
"task": "apps.scraping.tasks.check_pending_jobs",
|
||||
"schedule": 300.0, # Every 5 minutes
|
||||
# Парсинг сертификатов промышленного производства - каждый день в 3:00
|
||||
"parse-industrial-production-daily": {
|
||||
"task": "apps.parsers.tasks.parse_industrial_production",
|
||||
"schedule": 86400.0, # Every 24 hours
|
||||
},
|
||||
"process-extracted-data": {
|
||||
"task": "apps.data_processor.tasks.process_extracted_data",
|
||||
"schedule": 600.0, # Every 10 minutes
|
||||
# Парсинг реестра производителей - каждый день в 4:00
|
||||
"parse-manufactures-daily": {
|
||||
"task": "apps.parsers.tasks.parse_manufactures",
|
||||
"schedule": 86400.0, # Every 24 hours
|
||||
},
|
||||
}
|
||||
|
||||
app.conf.timezone = "UTC"
|
||||
app.conf.timezone = "Europe/Moscow"
|
||||
|
||||
|
||||
@app.task(bind=True)
|
||||
|
||||
@@ -45,6 +45,7 @@ if isinstance(ALLOWED_HOSTS, str):
|
||||
|
||||
# Application definition
|
||||
INSTALLED_APPS = [
|
||||
"jazzmin", # Django Jazzmin - modern admin theme (must be before admin)
|
||||
"django.contrib.admin",
|
||||
"django.contrib.auth",
|
||||
"django.contrib.contenttypes",
|
||||
@@ -61,8 +62,106 @@ INSTALLED_APPS = [
|
||||
# Local apps
|
||||
"apps.core",
|
||||
"apps.user",
|
||||
"apps.parsers",
|
||||
]
|
||||
|
||||
# Jazzmin Admin Configuration
|
||||
JAZZMIN_SETTINGS = {
|
||||
# Title
|
||||
"site_title": "Mostovik Admin",
|
||||
"site_header": "Mostovik",
|
||||
"site_brand": "Mostovik",
|
||||
"site_logo": None,
|
||||
"login_logo": None,
|
||||
"login_logo_dark": None,
|
||||
"site_logo_classes": "img-circle",
|
||||
"site_icon": None,
|
||||
"welcome_sign": "Добро пожаловать в панель управления",
|
||||
"copyright": "Mostovik Backend",
|
||||
# Search
|
||||
"search_model": ["user.User", "parsers.IndustrialCertificateRecord"],
|
||||
# User menu
|
||||
"topmenu_links": [
|
||||
{"name": "Главная", "url": "admin:index", "permissions": ["auth.view_user"]},
|
||||
{"name": "API Docs", "url": "/api/docs/", "new_window": True},
|
||||
{"model": "user.User"},
|
||||
],
|
||||
# Side menu
|
||||
"show_sidebar": True,
|
||||
"navigation_expanded": True,
|
||||
"hide_apps": ["django_celery_results"],
|
||||
"hide_models": [],
|
||||
"order_with_respect_to": [
|
||||
"user",
|
||||
"parsers",
|
||||
"core",
|
||||
"django_celery_beat",
|
||||
],
|
||||
# Icons (Font Awesome)
|
||||
"icons": {
|
||||
"auth": "fas fa-users-cog",
|
||||
"auth.Group": "fas fa-users",
|
||||
"user.User": "fas fa-user",
|
||||
"user.Profile": "fas fa-id-card",
|
||||
"parsers.Proxy": "fas fa-shield-alt",
|
||||
"parsers.ParserLoadLog": "fas fa-history",
|
||||
"parsers.IndustrialCertificateRecord": "fas fa-certificate",
|
||||
"parsers.ManufacturerRecord": "fas fa-industry",
|
||||
"core.BackgroundJob": "fas fa-tasks",
|
||||
"django_celery_beat.PeriodicTask": "fas fa-clock",
|
||||
"django_celery_beat.CrontabSchedule": "fas fa-calendar-alt",
|
||||
"django_celery_beat.IntervalSchedule": "fas fa-stopwatch",
|
||||
"django_celery_results.TaskResult": "fas fa-clipboard-check",
|
||||
},
|
||||
"default_icon_parents": "fas fa-chevron-circle-right",
|
||||
"default_icon_children": "fas fa-circle",
|
||||
# Related modal
|
||||
"related_modal_active": True,
|
||||
# UI Tweaks
|
||||
"custom_css": None,
|
||||
"custom_js": None,
|
||||
"use_google_fonts_cdn": True,
|
||||
"show_ui_builder": False,
|
||||
# Change view
|
||||
"changeform_format": "horizontal_tabs",
|
||||
"changeform_format_overrides": {
|
||||
"user.User": "collapsible",
|
||||
"parsers.IndustrialCertificateRecord": "vertical_tabs",
|
||||
},
|
||||
}
|
||||
|
||||
JAZZMIN_UI_TWEAKS = {
|
||||
"navbar_small_text": False,
|
||||
"footer_small_text": False,
|
||||
"body_small_text": False,
|
||||
"brand_small_text": False,
|
||||
"brand_colour": "navbar-primary",
|
||||
"accent": "accent-primary",
|
||||
"navbar": "navbar-dark",
|
||||
"no_navbar_border": False,
|
||||
"navbar_fixed": True,
|
||||
"layout_boxed": False,
|
||||
"footer_fixed": False,
|
||||
"sidebar_fixed": True,
|
||||
"sidebar": "sidebar-dark-primary",
|
||||
"sidebar_nav_small_text": False,
|
||||
"sidebar_disable_expand": False,
|
||||
"sidebar_nav_child_indent": False,
|
||||
"sidebar_nav_compact_style": False,
|
||||
"sidebar_nav_legacy_style": False,
|
||||
"sidebar_nav_flat_style": False,
|
||||
"theme": "default",
|
||||
"dark_mode_theme": "darkly",
|
||||
"button_classes": {
|
||||
"primary": "btn-primary",
|
||||
"secondary": "btn-secondary",
|
||||
"info": "btn-info",
|
||||
"warning": "btn-warning",
|
||||
"danger": "btn-danger",
|
||||
"success": "btn-success",
|
||||
},
|
||||
}
|
||||
|
||||
MIDDLEWARE = [
|
||||
"apps.core.middleware.RequestIDMiddleware",
|
||||
"corsheaders.middleware.CorsMiddleware",
|
||||
|
||||
@@ -1,44 +1,49 @@
|
||||
import os
|
||||
|
||||
from .base import *
|
||||
|
||||
# Development settings - unsuitable for production
|
||||
# See https://docs.djangoproject.com/en/3.2/howto/deployment/checklist/
|
||||
|
||||
# SECURITY WARNING: keep the secret key used in production secret!
|
||||
SECRET_KEY = "django-insecure-development-key-change-in-production"
|
||||
SECRET_KEY = os.getenv(
|
||||
"SECRET_KEY", "django-insecure-development-key-change-in-production"
|
||||
)
|
||||
|
||||
# SECURITY WARNING: don't run with debug turned on in production!
|
||||
DEBUG = True
|
||||
DEBUG = os.getenv("DEBUG", "True").lower() in ("true", "1", "yes")
|
||||
|
||||
ALLOWED_HOSTS = ["localhost", "127.0.0.1", "0.0.0.0", "testserver"]
|
||||
ALLOWED_HOSTS = ["localhost", "127.0.0.1", "0.0.0.0", "testserver", "*"] # noqa: S104
|
||||
|
||||
# Database for development
|
||||
DATABASES = {
|
||||
"default": {
|
||||
"ENGINE": "django.db.backends.postgresql",
|
||||
"NAME": "project_dev",
|
||||
"USER": "postgres",
|
||||
"PASSWORD": "postgres",
|
||||
"HOST": "localhost",
|
||||
"PORT": "5432",
|
||||
"NAME": os.getenv("POSTGRES_DB", "project_dev"),
|
||||
"USER": os.getenv("POSTGRES_USER", "postgres"),
|
||||
"PASSWORD": os.getenv("POSTGRES_PASSWORD", "postgres"),
|
||||
"HOST": os.getenv("POSTGRES_HOST", "localhost"),
|
||||
"PORT": os.getenv("POSTGRES_PORT", "5432"),
|
||||
}
|
||||
}
|
||||
|
||||
# Celery Configuration for Development
|
||||
CELERY_BROKER_URL = "redis://localhost:6379/0"
|
||||
CELERY_RESULT_BACKEND = "redis://localhost:6379/0"
|
||||
CELERY_BROKER_URL = os.getenv("CELERY_BROKER_URL", "redis://localhost:6379/0")
|
||||
CELERY_RESULT_BACKEND = os.getenv("CELERY_RESULT_BACKEND", "redis://localhost:6379/0")
|
||||
CELERY_ACCEPT_CONTENT = ["json"]
|
||||
CELERY_TASK_SERIALIZER = "json"
|
||||
CELERY_RESULT_SERIALIZER = "json"
|
||||
CELERY_TIMEZONE = "UTC"
|
||||
CELERY_TIMEZONE = "Europe/Moscow"
|
||||
|
||||
# Email backend for development
|
||||
EMAIL_BACKEND = "django.core.mail.backends.console.EmailBackend"
|
||||
|
||||
# Cache configuration for development
|
||||
REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379/1")
|
||||
CACHES = {
|
||||
"default": {
|
||||
"BACKEND": "django_redis.cache.RedisCache",
|
||||
"LOCATION": "redis://127.0.0.1:6379/1",
|
||||
"LOCATION": REDIS_URL,
|
||||
"OPTIONS": {
|
||||
"CLIENT_CLASS": "django_redis.client.DefaultClient",
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user