From c4b5b7f2c2c614417117c2cf06e9aeae5059608c Mon Sep 17 00:00:00 2001 From: Aleksandr Meshchriakov Date: Fri, 20 Mar 2026 11:43:12 +0100 Subject: [PATCH] fix(parsers): support proxy fallback from env for external sources --- .env.prod.example | 3 +++ src/apps/parsers/tasks.py | 48 +++++++++++++++++++++++---------------- src/settings/base.py | 4 +++- 3 files changed, 34 insertions(+), 21 deletions(-) diff --git a/.env.prod.example b/.env.prod.example index be3eecf..fd06fa3 100644 --- a/.env.prod.example +++ b/.env.prod.example @@ -25,6 +25,9 @@ CELERY_WORKER_CONCURRENCY=4 # Parsers API keys CHECKO_API_KEY=CHANGE_ME_CHECKO_API_KEY ZAKUPKI_TOKEN=CHANGE_ME_ZAKUPKI_TOKEN +# Optional: comma-separated HTTP(S) proxies for parser tasks +# Example: PARSER_PROXIES=http://user:pass@proxy1:8080,http://user:pass@proxy2:8080 +PARSER_PROXIES= # 1 to collect static files during migrate service, 0 to skip COLLECTSTATIC_ON_MIGRATE=1 diff --git a/src/apps/parsers/tasks.py b/src/apps/parsers/tasks.py index 39f7432..436d49b 100644 --- a/src/apps/parsers/tasks.py +++ b/src/apps/parsers/tasks.py @@ -32,6 +32,7 @@ from apps.parsers.services import ( ProxyService, ) from celery import shared_task +from django.conf import settings from requests.adapters import BaseAdapter logger = logging.getLogger(__name__) @@ -41,6 +42,26 @@ DEFAULT_START_YEAR = 2025 DEFAULT_START_MONTH = 1 +def _resolve_proxies(proxies: list[str] | None) -> list[str] | None: + """ + Разрешить итоговый список прокси. + + Приоритет: + 1. Явно переданные в задачу `proxies` + 2. Активные прокси из БД + 3. `settings.PARSER_PROXIES` (например, из ENV) + """ + if proxies is not None: + return proxies + + db_proxies = ProxyService.get_active_proxies_or_none() + if db_proxies: + return db_proxies + + configured_proxies = getattr(settings, "PARSER_PROXIES", []) or [] + return configured_proxies or None + + def _get_or_create_background_job( *, task_id: str, @@ -301,9 +322,7 @@ def parse_industrial_production( ) task_id = self.request.id or str(uuid.uuid4()) - # Если прокси не переданы, берём из БД - if proxies is None: - proxies = ProxyService.get_active_proxies_or_none() + proxies = _resolve_proxies(proxies) logger.info( "Starting industrial production parsing (task_id=%s, batch_id=%d, proxies=%d)", @@ -395,9 +414,7 @@ def parse_manufactures( ) task_id = self.request.id or str(uuid.uuid4()) - # Если прокси не переданы, берём из БД - if proxies is None: - proxies = ProxyService.get_active_proxies_or_none() + proxies = _resolve_proxies(proxies) logger.info( "Starting manufactures parsing (task_id=%s, batch_id=%d, proxies=%d)", @@ -488,8 +505,7 @@ def parse_industrial_products( ) task_id = self.request.id or str(uuid.uuid4()) - if proxies is None: - proxies = ProxyService.get_active_proxies_or_none() + proxies = _resolve_proxies(proxies) logger.info( "Starting industrial products parsing (task_id=%s, batch_id=%d, proxies=%d)", @@ -634,9 +650,7 @@ def parse_inspections( ) task_id = self.request.id or str(uuid.uuid4()) - # Если прокси не переданы, берём из БД - if proxies is None: - proxies = ProxyService.get_active_proxies_or_none() + proxies = _resolve_proxies(proxies) logger.info( "Starting inspections parsing (task_id=%s, batch_id=%d, year=%s, month=%s, proxies=%d)", @@ -828,9 +842,7 @@ def sync_inspections( # noqa: C901 ) task_id = self.request.id or str(uuid.uuid4()) - # Если прокси не переданы, берём из БД - if proxies is None: - proxies = ProxyService.get_active_proxies_or_none() + proxies = _resolve_proxies(proxies) logger.info( "Starting inspections sync (task_id=%s, batch_id=%d)", task_id, batch_id @@ -1039,9 +1051,7 @@ def parse_procurements( ) task_id = self.request.id or str(uuid.uuid4()) - # Если прокси не переданы, берём из БД - if proxies is None: - proxies = ProxyService.get_active_proxies_or_none() + proxies = _resolve_proxies(proxies) logger.info( "Starting procurements parsing " @@ -1174,9 +1184,7 @@ def sync_procurements( # noqa: C901 ) task_id = self.request.id or str(uuid.uuid4()) - # Если прокси не переданы, берём из БД - if proxies is None: - proxies = ProxyService.get_active_proxies_or_none() + proxies = _resolve_proxies(proxies) logger.info( "Starting procurements sync (task_id=%s, batch_id=%d, region=%s, law=%s-FZ)", diff --git a/src/settings/base.py b/src/settings/base.py index 7da16a3..a2cf6c9 100644 --- a/src/settings/base.py +++ b/src/settings/base.py @@ -197,7 +197,9 @@ WSGI_APPLICATION = "core.wsgi.application" ZAKUPKI_TOKEN = os.getenv("ZAKUPKI_TOKEN", "") FNS_LOCK_TTL_SECONDS = 3600 -PARSER_PROXIES = [] +PARSER_PROXIES = [ + item.strip() for item in os.getenv("PARSER_PROXIES", "").split(",") if item.strip() +] BACKUP_ENCRYPTION_KEY = os.getenv("BACKUP_ENCRYPTION_KEY", "") BACKUP_KEY_ID = os.getenv("BACKUP_KEY_ID", "default") BACKUP_EXPORT_DIRECTORY = os.getenv(