From 7d4c54636b87d19f5c0a822d4730fb0cce52e310 Mon Sep 17 00:00:00 2001 From: Aleksandr Meshchriakov Date: Mon, 23 Mar 2026 10:47:34 +0100 Subject: [PATCH] feat(parsers): sync RU proxies from proxy-tools --- src/apps/parsers/admin.py | 11 +- src/apps/parsers/clients/proxy_tools.py | 83 ++++ .../migrations/0015_add_proxy_metadata.py | 35 ++ src/apps/parsers/models.py | 14 + src/apps/parsers/serializers.py | 2 + src/apps/parsers/services.py | 368 +++++++++++++++++- src/apps/parsers/tasks.py | 12 +- src/core/celery.py | 4 + src/settings/base.py | 10 + tests/apps/core/test_celery_module.py | 1 + tests/apps/parsers/factories.py | 2 + tests/apps/parsers/test_services.py | 125 +++++- tests/apps/parsers/test_tasks.py | 53 +++ 13 files changed, 705 insertions(+), 15 deletions(-) create mode 100644 src/apps/parsers/clients/proxy_tools.py create mode 100644 src/apps/parsers/migrations/0015_add_proxy_metadata.py diff --git a/src/apps/parsers/admin.py b/src/apps/parsers/admin.py index 5e3523b..d96d04f 100644 --- a/src/apps/parsers/admin.py +++ b/src/apps/parsers/admin.py @@ -28,19 +28,24 @@ class ProxyAdmin(admin.ModelAdmin): list_display = [ "address", + "country_code", + "source", "is_active_badge", "fail_count", "last_used_at", "created_at", ] - list_filter = ["is_active", "created_at"] - search_fields = ["address"] + list_filter = ["is_active", "country_code", "source", "created_at"] + search_fields = ["address", "country_code", "source", "description"] readonly_fields = ["created_at", "updated_at", "last_used_at"] ordering = ["-is_active", "-last_used_at"] list_per_page = 50 fieldsets = ( - ("Основное", {"fields": ("address", "is_active")}), + ( + "Основное", + {"fields": ("address", "country_code", "source", "description", "is_active")}, + ), ("Статистика", {"fields": ("fail_count", "last_used_at")}), ("Даты", {"fields": ("created_at", "updated_at"), "classes": ("collapse",)}), ) diff --git a/src/apps/parsers/clients/proxy_tools.py b/src/apps/parsers/clients/proxy_tools.py new file mode 100644 index 0000000..97b58b0 --- /dev/null +++ b/src/apps/parsers/clients/proxy_tools.py @@ -0,0 +1,83 @@ +""" +Клиент Proxy-Tools JSON API. + +Документация: +https://proxy-tools.com/pages/proxy-api +""" + +from __future__ import annotations + +import logging +from dataclasses import dataclass, field +from typing import Any + +from apps.parsers.clients.base import BaseHTTPClient, HTTPClientError +from requests.adapters import BaseAdapter + +logger = logging.getLogger(__name__) + +DEFAULT_API_URL = "https://proxy-tools.com/api/v1/proxies" + + +class ProxyToolsClientError(HTTPClientError): + """Ошибка клиента Proxy-Tools.""" + + +@dataclass +class ProxyToolsClient: + """ + Клиент для загрузки списка прокси из Proxy-Tools. + + Использует Bearer token и возвращает сырой JSON payload, потому что + внешний сервис документирует фильтры, но не фиксирует shape ответа. + """ + + api_key: str + api_url: str = DEFAULT_API_URL + timeout: int = 30 + http_adapter: BaseAdapter | None = None + _http_client: BaseHTTPClient | None = field(default=None, repr=False) + + @property + def http_client(self) -> BaseHTTPClient: + """Ленивая инициализация HTTP клиента.""" + if self._http_client is None: + self._http_client = BaseHTTPClient( + base_url="https://proxy-tools.com", + timeout=self.timeout, + adapter=self.http_adapter, + headers={ + "Accept": "application/json", + "Authorization": f"Bearer {self.api_key}", + }, + ) + return self._http_client + + def fetch_proxies( + self, + *, + country_code: str, + page: int = 1, + limit: int = 100, + ) -> Any: + """Получить страницу прокси по коду страны.""" + params = { + "geo": country_code.lower(), + "page": str(page), + "limit": str(limit), + } + logger.info( + "Fetching proxies from Proxy-Tools (country=%s, page=%s, limit=%s)", + country_code, + page, + limit, + ) + try: + response = self.http_client.get(self.api_url, params=params) + return response.json() + except HTTPClientError: + raise + except Exception as exc: # noqa: BLE001 + raise ProxyToolsClientError( + f"Failed to fetch proxies from Proxy-Tools: {exc}" + ) from exc diff --git a/src/apps/parsers/migrations/0015_add_proxy_metadata.py b/src/apps/parsers/migrations/0015_add_proxy_metadata.py new file mode 100644 index 0000000..883dba6 --- /dev/null +++ b/src/apps/parsers/migrations/0015_add_proxy_metadata.py @@ -0,0 +1,35 @@ +# Generated by Django 3.2.25 on 2026-03-23 10:30 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("parsers", "0014_parsingsettings"), + ] + + operations = [ + migrations.AddField( + model_name="proxy", + name="country_code", + field=models.CharField( + db_index=True, + default="RU", + help_text="ISO-3166 код страны прокси, например RU", + max_length=2, + verbose_name="код страны", + ), + ), + migrations.AddField( + model_name="proxy", + name="source", + field=models.CharField( + db_index=True, + default="manual", + help_text="Источник прокси (например: manual, proxy-tools)", + max_length=50, + verbose_name="источник", + ), + ), + ] diff --git a/src/apps/parsers/models.py b/src/apps/parsers/models.py index d14f01f..1a20815 100644 --- a/src/apps/parsers/models.py +++ b/src/apps/parsers/models.py @@ -351,6 +351,20 @@ class Proxy(TimestampMixin, models.Model): blank=True, help_text=_("Описание прокси (провайдер, локация и т.д.)"), ) + source = models.CharField( + _("источник"), + max_length=50, + default="manual", + db_index=True, + help_text=_("Источник прокси (например: manual, proxy-tools)"), + ) + country_code = models.CharField( + _("код страны"), + max_length=2, + default="RU", + db_index=True, + help_text=_("ISO-3166 код страны прокси, например RU"), + ) class Meta: db_table = "parsers_proxy" diff --git a/src/apps/parsers/serializers.py b/src/apps/parsers/serializers.py index e27fd5f..afe2d90 100644 --- a/src/apps/parsers/serializers.py +++ b/src/apps/parsers/serializers.py @@ -443,6 +443,8 @@ class ProxySerializer(serializers.ModelSerializer): "id", "address", "is_active", + "country_code", + "source", "last_used_at", "fail_count", "description", diff --git a/src/apps/parsers/services.py b/src/apps/parsers/services.py index 6f689b5..8c3992b 100644 --- a/src/apps/parsers/services.py +++ b/src/apps/parsers/services.py @@ -11,6 +11,8 @@ from contextlib import suppress from dataclasses import dataclass from datetime import date, datetime from decimal import Decimal, InvalidOperation +from typing import Any +from urllib.parse import urlparse from apps.core.services import BaseService, BulkOperationsMixin from apps.parsers.clients.minpromtorg.schemas import ( @@ -19,6 +21,7 @@ from apps.parsers.clients.minpromtorg.schemas import ( Manufacturer, ) from apps.parsers.clients.proverki.schemas import Inspection +from apps.parsers.clients.proxy_tools import ProxyToolsClient, ProxyToolsClientError from apps.parsers.clients.zakupki.schemas import Procurement from apps.parsers.models import ( FinancialReport, @@ -32,6 +35,7 @@ from apps.parsers.models import ( Proxy, ) from apps.registers.models import Organization +from django.conf import settings from django.db import IntegrityError, transaction from django.db.models import Q from django.utils import timezone @@ -639,29 +643,68 @@ class ProxyService(BaseService[Proxy]): """ model = Proxy + RUNTIME_COUNTRY_CODE = "RU" + MANUAL_SOURCE = "manual" + PROXY_TOOLS_SOURCE = "proxy-tools" @classmethod - def get_active_proxies(cls) -> list[str]: + def get_active_proxies( + cls, + *, + country_code: str | None = None, + source: str | None = None, + ) -> list[str]: """ Получить список адресов активных прокси. Returns: Список адресов прокси (может быть пустым) """ - proxies = cls.model.objects.filter(is_active=True).values_list( - "address", flat=True - ) + proxies = cls.model.objects.filter(is_active=True) + if country_code: + proxies = proxies.filter(country_code=country_code.upper()) + if source: + proxies = proxies.filter(source=source) + proxies = proxies.values_list("address", flat=True) return list(proxies) @classmethod - def get_active_proxies_or_none(cls) -> list[str] | None: + def get_active_proxies_or_none( + cls, + *, + country_code: str | None = None, + source: str | None = None, + ) -> list[str] | None: """ Получить список активных прокси или None, если их нет. Returns: Список адресов прокси или None """ - proxies = cls.get_active_proxies() + proxies = cls.get_active_proxies(country_code=country_code, source=source) + return proxies if proxies else None + + @classmethod + def get_runtime_proxies(cls) -> list[str]: + """ + Получить прокси для рантайма парсеров. + + Приоритет: + 1. RU прокси, загруженные из Proxy-Tools + 2. Любые активные RU прокси + """ + proxies = cls.get_active_proxies( + country_code=cls.RUNTIME_COUNTRY_CODE, + source=cls.PROXY_TOOLS_SOURCE, + ) + if proxies: + return proxies + return cls.get_active_proxies(country_code=cls.RUNTIME_COUNTRY_CODE) + + @classmethod + def get_runtime_proxies_or_none(cls) -> list[str] | None: + """Получить runtime-прокси или None, если их нет.""" + proxies = cls.get_runtime_proxies() return proxies if proxies else None @classmethod @@ -698,31 +741,53 @@ class ProxyService(BaseService[Proxy]): @classmethod @transaction.atomic - def add_proxy(cls, address: str, description: str = "") -> Proxy: + def add_proxy( + cls, + address: str, + description: str = "", + *, + source: str = MANUAL_SOURCE, + country_code: str = RUNTIME_COUNTRY_CODE, + ) -> Proxy: """ Добавить новый прокси. Args: address: Адрес прокси (например: http://proxy:8080) description: Описание прокси + source: Источник прокси + country_code: ISO-код страны Returns: Созданный объект Proxy """ proxy, _ = cls.model.objects.get_or_create( address=address, - defaults={"description": description, "is_active": True}, + defaults={ + "description": description, + "is_active": True, + "source": source, + "country_code": country_code.upper(), + }, ) return proxy @classmethod @transaction.atomic - def add_proxies(cls, addresses: list[str]) -> int: + def add_proxies( + cls, + addresses: list[str], + *, + source: str = MANUAL_SOURCE, + country_code: str = RUNTIME_COUNTRY_CODE, + ) -> int: """ Добавить список прокси. Args: addresses: Список адресов прокси + source: Источник прокси + country_code: ISO-код страны Returns: Количество добавленных прокси @@ -731,13 +796,296 @@ class ProxyService(BaseService[Proxy]): for address in addresses: _, created = cls.model.objects.get_or_create( address=address, - defaults={"is_active": True}, + defaults={ + "is_active": True, + "source": source, + "country_code": country_code.upper(), + }, ) if created: created_count += 1 return created_count +class ProxyToolsSyncError(Exception): + """Ошибка синхронизации прокси из Proxy-Tools.""" + + +class ProxyToolsSyncService: + """Сервис синхронизации RU-прокси из Proxy-Tools.""" + + COUNTRY_CODE = ProxyService.RUNTIME_COUNTRY_CODE + SOURCE = ProxyService.PROXY_TOOLS_SOURCE + + @classmethod + def sync_ru_proxies(cls) -> dict[str, int | str]: + """Загрузить RU-прокси из Proxy-Tools и синхронизировать таблицу.""" + api_key = getattr(settings, "PROXY_TOOLS_API_KEY", "").strip() + if not api_key: + logger.warning("Proxy-Tools sync skipped: PROXY_TOOLS_API_KEY is empty") + return { + "status": "skipped", + "reason": "missing_api_key", + "fetched": 0, + "created": 0, + "updated": 0, + "deactivated": 0, + } + + client = ProxyToolsClient( + api_key=api_key, + api_url=settings.PROXY_TOOLS_API_URL, + timeout=int(getattr(settings, "PROXY_TOOLS_TIMEOUT_SECONDS", 30)), + ) + limit = int(getattr(settings, "PROXY_TOOLS_LIMIT", 100)) + max_pages = max(int(getattr(settings, "PROXY_TOOLS_MAX_PAGES", 3)), 1) + + try: + items = cls._fetch_all_pages(client=client, limit=limit, max_pages=max_pages) + addresses = cls._extract_addresses(items) + except ProxyToolsClientError as exc: + raise ProxyToolsSyncError(str(exc)) from exc + + result = cls._sync_addresses(addresses) + return { + "status": "success", + "fetched": len(addresses), + **result, + } + + @classmethod + def _fetch_all_pages( + cls, + *, + client: ProxyToolsClient, + limit: int, + max_pages: int, + ) -> list[Any]: + """Собрать прокси с нескольких страниц, если API их отдаёт.""" + items: list[Any] = [] + for page in range(1, max_pages + 1): + payload = client.fetch_proxies( + country_code=cls.COUNTRY_CODE, + page=page, + limit=limit, + ) + batch = cls._extract_items(payload) + items.extend(batch) + if not cls._has_more_pages(payload, page=page, batch_size=len(batch), limit=limit): + break + return items + + @classmethod + def _extract_items(cls, payload: Any) -> list[Any]: + """Извлечь список элементов прокси из внешнего payload.""" + if isinstance(payload, list): + return payload + if isinstance(payload, dict): + for key in ("proxies", "data", "results", "items"): + value = payload.get(key) + if isinstance(value, list): + return value + if isinstance(value, dict): + with suppress(ProxyToolsSyncError): + return cls._extract_items(value) + for value in payload.values(): + if isinstance(value, list): + return value + raise ProxyToolsSyncError("Unexpected Proxy-Tools response shape") + + @classmethod + def _has_more_pages( + cls, + payload: Any, + *, + page: int, + batch_size: int, + limit: int, + ) -> bool: + """Определить, нужно ли запросить следующую страницу.""" + if batch_size == 0: + return False + if isinstance(payload, dict): + next_value = payload.get("next") or payload.get("next_page") + if next_value not in (None, "", False): + return True + meta = payload.get("meta") + if isinstance(meta, dict): + pagination = meta.get("pagination") + if isinstance(pagination, dict): + current_page = cls._to_int( + pagination.get("current_page") or pagination.get("page") + ) + total_pages = cls._to_int(pagination.get("total_pages")) + if current_page is not None and total_pages is not None: + return current_page < total_pages + total_pages = cls._to_int(meta.get("total_pages")) + if total_pages is not None: + return page < total_pages + return batch_size >= limit + + @classmethod + def _extract_addresses(cls, items: list[Any]) -> list[str]: + """Нормализовать и дедуплицировать адреса прокси.""" + addresses: list[str] = [] + seen: set[str] = set() + for item in items: + address = cls._extract_address(item) + if address and address not in seen: + seen.add(address) + addresses.append(address) + return addresses + + @classmethod + def _extract_address(cls, item: Any) -> str | None: + """Извлечь адрес прокси из одного элемента payload.""" + if isinstance(item, str): + return cls._normalize_address(item) + if not isinstance(item, dict): + return None + + for key in ("proxy", "proxy_url", "url", "address", "addr"): + value = item.get(key) + if isinstance(value, str): + normalized = cls._normalize_address( + value, + scheme_hint=item.get("scheme") + or item.get("protocol") + or item.get("type") + or item.get("proxy_type"), + ) + if normalized: + return normalized + + host = item.get("host") or item.get("ip") + port = item.get("port") + if host and port: + return cls._normalize_address( + f"{host}:{port}", + scheme_hint=item.get("scheme") + or item.get("protocol") + or item.get("type") + or item.get("proxy_type"), + ) + + return None + + @classmethod + def _normalize_address( + cls, + value: str, + *, + scheme_hint: Any = None, + ) -> str | None: + """Привести адрес прокси к нормализованному URL.""" + candidate = str(value).strip() + if not candidate: + return None + + scheme = cls._normalize_scheme(scheme_hint) + if "://" not in candidate: + candidate = f"{scheme or 'http'}://{candidate}" + + parsed = urlparse(candidate) + if not parsed.hostname or parsed.port is None: + return None + + final_scheme = cls._normalize_scheme(parsed.scheme) or scheme or "http" + credentials = "" + if parsed.username: + credentials = parsed.username + if parsed.password: + credentials = f"{credentials}:{parsed.password}" + credentials = f"{credentials}@" + + host = parsed.hostname + if ":" in host and not host.startswith("["): + host = f"[{host}]" + return f"{final_scheme}://{credentials}{host}:{parsed.port}" + + @classmethod + def _normalize_scheme(cls, value: Any) -> str | None: + """Нормализовать схему прокси.""" + if value is None: + return None + mapping = { + "1": "socks4", + "2": "socks5", + "3": "https", + "4": "http", + 1: "socks4", + 2: "socks5", + 3: "https", + 4: "http", + } + candidate = mapping.get(value, str(value).strip().lower()) + if candidate in {"http", "https", "socks4", "socks5"}: + return candidate + return None + + @classmethod + def _to_int(cls, value: Any) -> int | None: + """Безопасно привести значение к int.""" + try: + return int(value) + except (TypeError, ValueError): + return None + + @classmethod + @transaction.atomic + def _sync_addresses(cls, addresses: list[str]) -> dict[str, int]: + """Синхронизировать импортированные адреса с таблицей Proxy.""" + existing_qs = Proxy.objects.filter( + source=cls.SOURCE, + country_code=cls.COUNTRY_CODE, + ) + existing_by_address = { + proxy.address: proxy + for proxy in existing_qs.only("id", "address", "is_active") + } + + created = 0 + updated = 0 + for address in addresses: + proxy = existing_by_address.get(address) + if proxy is None: + Proxy.objects.create( + address=address, + is_active=True, + description="Imported from Proxy-Tools", + source=cls.SOURCE, + country_code=cls.COUNTRY_CODE, + ) + created += 1 + continue + + changed_fields: list[str] = [] + if not proxy.is_active: + proxy.is_active = True + changed_fields.append("is_active") + if proxy.description != "Imported from Proxy-Tools": + proxy.description = "Imported from Proxy-Tools" + changed_fields.append("description") + if changed_fields: + proxy.save(update_fields=[*changed_fields, "updated_at"]) + updated += 1 + + deactivated = 0 + active_imported = existing_qs.filter(is_active=True) + if addresses: + deactivated = active_imported.exclude(address__in=addresses).update( + is_active=False + ) + else: + deactivated = active_imported.update(is_active=False) + + return { + "created": created, + "updated": updated, + "deactivated": deactivated, + } + + class InspectionService(BulkOperationsMixin, BaseService[InspectionRecord]): """ Сервис для управления данными о проверках. diff --git a/src/apps/parsers/tasks.py b/src/apps/parsers/tasks.py index 436d49b..eb69387 100644 --- a/src/apps/parsers/tasks.py +++ b/src/apps/parsers/tasks.py @@ -13,6 +13,7 @@ from datetime import datetime from pathlib import Path from apps.core.services import BackgroundJobService +from apps.core.tasks import PeriodicTask as CorePeriodicTask from apps.parsers.clients.minpromtorg import ( IndustrialProductionClient, IndustrialProductsClient, @@ -30,6 +31,7 @@ from apps.parsers.services import ( ParserLoadLogService, ProcurementService, ProxyService, + ProxyToolsSyncService, ) from celery import shared_task from django.conf import settings @@ -54,7 +56,7 @@ def _resolve_proxies(proxies: list[str] | None) -> list[str] | None: if proxies is not None: return proxies - db_proxies = ProxyService.get_active_proxies_or_none() + db_proxies = ProxyService.get_runtime_proxies_or_none() if db_proxies: return db_proxies @@ -89,6 +91,14 @@ def _get_or_create_background_job( return job +@shared_task(bind=True, base=CorePeriodicTask) +def sync_ru_proxies(self) -> dict[str, int | str]: # noqa: ARG001 + """Периодически загружать RU-прокси из Proxy-Tools.""" + result = ProxyToolsSyncService.sync_ru_proxies() + logger.info("RU proxy sync finished: %s", result) + return result + + def _lock_path_for(file_path: Path) -> Path: return Path(f"{file_path}.lock") diff --git a/src/core/celery.py b/src/core/celery.py index 1cc45eb..7f477b6 100644 --- a/src/core/celery.py +++ b/src/core/celery.py @@ -74,6 +74,10 @@ app.conf.beat_schedule = { "task": "apps.parsers.tasks.parse_inspections", "schedule": 7 * 24 * 60 * 60, # Every 7 days }, + "sync-ru-proxies-hourly": { + "task": "apps.parsers.tasks.sync_ru_proxies", + "schedule": getattr(settings, "PROXY_TOOLS_SYNC_INTERVAL_SECONDS", 3600), + }, # Сканирование папки FNS - каждые 5 минут "scan-fns-directory": { "task": "apps.parsers.tasks.scan_fns_directory", diff --git a/src/settings/base.py b/src/settings/base.py index 28e0348..9423df1 100644 --- a/src/settings/base.py +++ b/src/settings/base.py @@ -200,6 +200,16 @@ FNS_LOCK_TTL_SECONDS = 3600 PARSER_PROXIES = [ item.strip() for item in os.getenv("PARSER_PROXIES", "").split(",") if item.strip() ] +PROXY_TOOLS_API_KEY = os.getenv("PROXY_TOOLS_API_KEY", "").strip() +PROXY_TOOLS_API_URL = os.getenv( + "PROXY_TOOLS_API_URL", "https://proxy-tools.com/api/v1/proxies" +).strip() +PROXY_TOOLS_TIMEOUT_SECONDS = int(os.getenv("PROXY_TOOLS_TIMEOUT_SECONDS", "30")) +PROXY_TOOLS_LIMIT = int(os.getenv("PROXY_TOOLS_LIMIT", "100")) +PROXY_TOOLS_MAX_PAGES = int(os.getenv("PROXY_TOOLS_MAX_PAGES", "3")) +PROXY_TOOLS_SYNC_INTERVAL_SECONDS = int( + os.getenv("PROXY_TOOLS_SYNC_INTERVAL_SECONDS", "3600") +) BACKUP_ENCRYPTION_KEY = os.getenv("BACKUP_ENCRYPTION_KEY", "") BACKUP_KEY_ID = os.getenv("BACKUP_KEY_ID", "default") BACKUP_EXPORT_DIRECTORY = os.getenv( diff --git a/tests/apps/core/test_celery_module.py b/tests/apps/core/test_celery_module.py index 8671906..23eebf3 100644 --- a/tests/apps/core/test_celery_module.py +++ b/tests/apps/core/test_celery_module.py @@ -52,6 +52,7 @@ class CeleryModuleTest(SimpleTestCase): self.assertIn("parse-manufactures-daily", module.app.conf.beat_schedule) self.assertIn("parse-industrial-products-daily", module.app.conf.beat_schedule) self.assertIn("parse-inspections-weekly", module.app.conf.beat_schedule) + self.assertIn("sync-ru-proxies-hourly", module.app.conf.beat_schedule) def test_startup_refresh_queues_when_lock_acquired(self): with patch.dict( diff --git a/tests/apps/parsers/factories.py b/tests/apps/parsers/factories.py index 3238667..1fb5788 100644 --- a/tests/apps/parsers/factories.py +++ b/tests/apps/parsers/factories.py @@ -83,6 +83,8 @@ class ProxyFactory(factory.django.DjangoModelFactory): address = factory.LazyFunction(generate_proxy_address) description = factory.LazyAttribute(lambda _: fake.sentence(nb_words=3)) + source = "manual" + country_code = "RU" is_active = True fail_count = 0 last_used_at = factory.LazyAttribute( diff --git a/tests/apps/parsers/test_services.py b/tests/apps/parsers/test_services.py index f91235e..69e773c 100644 --- a/tests/apps/parsers/test_services.py +++ b/tests/apps/parsers/test_services.py @@ -1,5 +1,6 @@ """Tests for parsers services.""" +from unittest.mock import patch from urllib.parse import urlparse from apps.parsers.clients.minpromtorg.industrial import IndustrialProductionClient @@ -27,9 +28,10 @@ from apps.parsers.services import ( ParserLoadLogService, ProcurementService, ProxyService, + ProxyToolsSyncService, ) from apps.registers.models import Organization -from django.test import TestCase, tag +from django.test import TestCase, override_settings, tag from tests.utils import TestHTTPServer from tests.utils.fixtures import build_minpromtorg_certificates_excel, fake @@ -173,6 +175,127 @@ class ProxyServiceTest(TestCase): self.assertEqual(created, 1) self.assertEqual(Proxy.objects.count(), 2) + def test_get_runtime_proxies_prefers_proxy_tools_ru(self): + """Runtime should prefer RU proxies imported from Proxy-Tools.""" + manual_ru = ProxyFactory( + source=ProxyService.MANUAL_SOURCE, + country_code="RU", + ) + imported_ru = ProxyFactory( + source=ProxyService.PROXY_TOOLS_SOURCE, + country_code="RU", + ) + ProxyFactory( + source=ProxyService.PROXY_TOOLS_SOURCE, + country_code="US", + ) + + result = ProxyService.get_runtime_proxies() + + self.assertEqual(result, [imported_ru.address]) + self.assertNotIn(manual_ru.address, result) + + def test_get_runtime_proxies_falls_back_to_any_ru_proxy(self): + """Runtime should fall back to any RU proxy when imported list is empty.""" + manual_ru = ProxyFactory( + source=ProxyService.MANUAL_SOURCE, + country_code="RU", + ) + ProxyFactory( + source=ProxyService.MANUAL_SOURCE, + country_code="US", + ) + + result = ProxyService.get_runtime_proxies() + + self.assertEqual(result, [manual_ru.address]) + + +class ProxyToolsSyncServiceTest(TestCase): + """Tests for ProxyToolsSyncService.""" + + def test_sync_ru_proxies_skips_without_api_key(self): + """Sync should be skipped when API key is missing.""" + result = ProxyToolsSyncService.sync_ru_proxies() + + self.assertEqual(result["status"], "skipped") + self.assertEqual(result["reason"], "missing_api_key") + + @override_settings( + PROXY_TOOLS_API_KEY="test-token", + PROXY_TOOLS_LIMIT=2, + PROXY_TOOLS_MAX_PAGES=2, + ) + @patch("apps.parsers.services.ProxyToolsClient.fetch_proxies") + def test_sync_ru_proxies_upserts_and_deactivates(self, fetch_proxies_mock): + """Sync should create, reactivate and deactivate imported proxies.""" + active_stale = ProxyFactory( + address="http://10.0.0.10:8000", + source=ProxyService.PROXY_TOOLS_SOURCE, + country_code="RU", + is_active=True, + ) + inactive_existing = ProxyFactory( + address="http://10.0.0.20:8000", + source=ProxyService.PROXY_TOOLS_SOURCE, + country_code="RU", + is_active=False, + ) + manual_ru = ProxyFactory( + address="http://10.0.0.30:8000", + source=ProxyService.MANUAL_SOURCE, + country_code="RU", + is_active=True, + ) + + fetch_proxies_mock.side_effect = [ + { + "data": [ + {"host": "10.0.0.20", "port": 8000, "type": "4"}, + {"proxy": "socks5://10.0.0.40:1080"}, + ], + "meta": {"total_pages": 2}, + }, + { + "data": [ + "https://10.0.0.50:8443", + ], + "meta": {"total_pages": 2}, + }, + ] + + result = ProxyToolsSyncService.sync_ru_proxies() + + self.assertEqual(result["status"], "success") + self.assertEqual(result["fetched"], 3) + self.assertEqual(result["created"], 2) + self.assertEqual(result["updated"], 1) + self.assertEqual(result["deactivated"], 1) + + active_stale.refresh_from_db() + inactive_existing.refresh_from_db() + manual_ru.refresh_from_db() + + self.assertFalse(active_stale.is_active) + self.assertTrue(inactive_existing.is_active) + self.assertTrue(manual_ru.is_active) + + imported_addresses = set( + Proxy.objects.filter( + source=ProxyService.PROXY_TOOLS_SOURCE, + country_code="RU", + is_active=True, + ).values_list("address", flat=True) + ) + self.assertSetEqual( + imported_addresses, + { + "http://10.0.0.20:8000", + "socks5://10.0.0.40:1080", + "https://10.0.0.50:8443", + }, + ) + class ParserLoadLogServiceTest(TestCase): """Tests for ParserLoadLogService.""" diff --git a/tests/apps/parsers/test_tasks.py b/tests/apps/parsers/test_tasks.py index bf60634..11fc666 100644 --- a/tests/apps/parsers/test_tasks.py +++ b/tests/apps/parsers/test_tasks.py @@ -9,6 +9,7 @@ import tempfile import threading from pathlib import Path from types import SimpleNamespace +from unittest.mock import patch from urllib.parse import urlparse from apps.parsers import tasks as parser_tasks @@ -39,6 +40,7 @@ from apps.parsers.tasks import ( _move_to_dir, _process_fns_file_sync, _remove_lock, + _resolve_proxies, _try_create_lock, parse_all_minpromtorg, parse_all_sources, @@ -51,6 +53,7 @@ from apps.parsers.tasks import ( scan_fns_directory, sync_inspections, sync_procurements, + sync_ru_proxies, ) from django.test import TestCase, override_settings from openpyxl import Workbook @@ -59,6 +62,7 @@ from tests.apps.parsers.factories import ( InspectionRecordFactory, ParserLoadLogFactory, ProcurementRecordFactory, + ProxyFactory, ) from tests.utils import TestHTTPServer from tests.utils.fixtures import ( @@ -102,6 +106,55 @@ def _portal_path(year: int, month: int) -> str: return f"/portal/public-open-data/check/{year}/{month}" +class ProxyResolutionTestCase(TestCase): + """Tests for proxy resolution in parser tasks.""" + + @override_settings(PARSER_PROXIES=["http://env-proxy:8080"]) + def test_resolve_proxies_prefers_runtime_db_proxies(self): + imported_proxy = ProxyFactory( + address="http://10.0.0.2:8000", + source="proxy-tools", + country_code="RU", + is_active=True, + ) + ProxyFactory( + address="http://10.0.0.3:8000", + source="manual", + country_code="RU", + is_active=True, + ) + + result = _resolve_proxies(None) + + self.assertEqual(result, [imported_proxy.address]) + + @override_settings(PARSER_PROXIES=["http://env-proxy:8080"]) + def test_resolve_proxies_falls_back_to_settings_when_db_empty(self): + result = _resolve_proxies(None) + + self.assertEqual(result, ["http://env-proxy:8080"]) + + +class SyncRuProxiesTaskTestCase(TestCase): + """Tests for periodic RU proxy sync task.""" + + @patch("apps.parsers.tasks.ProxyToolsSyncService.sync_ru_proxies") + def test_sync_ru_proxies_returns_service_payload(self, sync_mock): + sync_mock.return_value = { + "status": "success", + "fetched": 3, + "created": 2, + "updated": 1, + "deactivated": 0, + } + + result = sync_ru_proxies.run() + + self.assertEqual(result["status"], "success") + self.assertEqual(result["fetched"], 3) + sync_mock.assert_called_once_with() + + @override_settings( CELERY_TASK_ALWAYS_EAGER=True, CELERY_TASK_EAGER_PROPAGATES=True,