feat(parsers): sync RU proxies from proxy-tools
This commit is contained in:
@@ -28,19 +28,24 @@ class ProxyAdmin(admin.ModelAdmin):
|
||||
|
||||
list_display = [
|
||||
"address",
|
||||
"country_code",
|
||||
"source",
|
||||
"is_active_badge",
|
||||
"fail_count",
|
||||
"last_used_at",
|
||||
"created_at",
|
||||
]
|
||||
list_filter = ["is_active", "created_at"]
|
||||
search_fields = ["address"]
|
||||
list_filter = ["is_active", "country_code", "source", "created_at"]
|
||||
search_fields = ["address", "country_code", "source", "description"]
|
||||
readonly_fields = ["created_at", "updated_at", "last_used_at"]
|
||||
ordering = ["-is_active", "-last_used_at"]
|
||||
list_per_page = 50
|
||||
|
||||
fieldsets = (
|
||||
("Основное", {"fields": ("address", "is_active")}),
|
||||
(
|
||||
"Основное",
|
||||
{"fields": ("address", "country_code", "source", "description", "is_active")},
|
||||
),
|
||||
("Статистика", {"fields": ("fail_count", "last_used_at")}),
|
||||
("Даты", {"fields": ("created_at", "updated_at"), "classes": ("collapse",)}),
|
||||
)
|
||||
|
||||
83
src/apps/parsers/clients/proxy_tools.py
Normal file
83
src/apps/parsers/clients/proxy_tools.py
Normal file
@@ -0,0 +1,83 @@
|
||||
"""
|
||||
Клиент Proxy-Tools JSON API.
|
||||
|
||||
Документация:
|
||||
https://proxy-tools.com/pages/proxy-api
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
from apps.parsers.clients.base import BaseHTTPClient, HTTPClientError
|
||||
from requests.adapters import BaseAdapter
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
DEFAULT_API_URL = "https://proxy-tools.com/api/v1/proxies"
|
||||
|
||||
|
||||
class ProxyToolsClientError(HTTPClientError):
|
||||
"""Ошибка клиента Proxy-Tools."""
|
||||
|
||||
|
||||
@dataclass
|
||||
class ProxyToolsClient:
|
||||
"""
|
||||
Клиент для загрузки списка прокси из Proxy-Tools.
|
||||
|
||||
Использует Bearer token и возвращает сырой JSON payload, потому что
|
||||
внешний сервис документирует фильтры, но не фиксирует shape ответа.
|
||||
"""
|
||||
|
||||
api_key: str
|
||||
api_url: str = DEFAULT_API_URL
|
||||
timeout: int = 30
|
||||
http_adapter: BaseAdapter | None = None
|
||||
_http_client: BaseHTTPClient | None = field(default=None, repr=False)
|
||||
|
||||
@property
|
||||
def http_client(self) -> BaseHTTPClient:
|
||||
"""Ленивая инициализация HTTP клиента."""
|
||||
if self._http_client is None:
|
||||
self._http_client = BaseHTTPClient(
|
||||
base_url="https://proxy-tools.com",
|
||||
timeout=self.timeout,
|
||||
adapter=self.http_adapter,
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
},
|
||||
)
|
||||
return self._http_client
|
||||
|
||||
def fetch_proxies(
|
||||
self,
|
||||
*,
|
||||
country_code: str,
|
||||
page: int = 1,
|
||||
limit: int = 100,
|
||||
) -> Any:
|
||||
"""Получить страницу прокси по коду страны."""
|
||||
params = {
|
||||
"geo": country_code.lower(),
|
||||
"page": str(page),
|
||||
"limit": str(limit),
|
||||
}
|
||||
logger.info(
|
||||
"Fetching proxies from Proxy-Tools (country=%s, page=%s, limit=%s)",
|
||||
country_code,
|
||||
page,
|
||||
limit,
|
||||
)
|
||||
try:
|
||||
response = self.http_client.get(self.api_url, params=params)
|
||||
return response.json()
|
||||
except HTTPClientError:
|
||||
raise
|
||||
except Exception as exc: # noqa: BLE001
|
||||
raise ProxyToolsClientError(
|
||||
f"Failed to fetch proxies from Proxy-Tools: {exc}"
|
||||
) from exc
|
||||
35
src/apps/parsers/migrations/0015_add_proxy_metadata.py
Normal file
35
src/apps/parsers/migrations/0015_add_proxy_metadata.py
Normal file
@@ -0,0 +1,35 @@
|
||||
# Generated by Django 3.2.25 on 2026-03-23 10:30
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("parsers", "0014_parsingsettings"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name="proxy",
|
||||
name="country_code",
|
||||
field=models.CharField(
|
||||
db_index=True,
|
||||
default="RU",
|
||||
help_text="ISO-3166 код страны прокси, например RU",
|
||||
max_length=2,
|
||||
verbose_name="код страны",
|
||||
),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="proxy",
|
||||
name="source",
|
||||
field=models.CharField(
|
||||
db_index=True,
|
||||
default="manual",
|
||||
help_text="Источник прокси (например: manual, proxy-tools)",
|
||||
max_length=50,
|
||||
verbose_name="источник",
|
||||
),
|
||||
),
|
||||
]
|
||||
@@ -351,6 +351,20 @@ class Proxy(TimestampMixin, models.Model):
|
||||
blank=True,
|
||||
help_text=_("Описание прокси (провайдер, локация и т.д.)"),
|
||||
)
|
||||
source = models.CharField(
|
||||
_("источник"),
|
||||
max_length=50,
|
||||
default="manual",
|
||||
db_index=True,
|
||||
help_text=_("Источник прокси (например: manual, proxy-tools)"),
|
||||
)
|
||||
country_code = models.CharField(
|
||||
_("код страны"),
|
||||
max_length=2,
|
||||
default="RU",
|
||||
db_index=True,
|
||||
help_text=_("ISO-3166 код страны прокси, например RU"),
|
||||
)
|
||||
|
||||
class Meta:
|
||||
db_table = "parsers_proxy"
|
||||
|
||||
@@ -443,6 +443,8 @@ class ProxySerializer(serializers.ModelSerializer):
|
||||
"id",
|
||||
"address",
|
||||
"is_active",
|
||||
"country_code",
|
||||
"source",
|
||||
"last_used_at",
|
||||
"fail_count",
|
||||
"description",
|
||||
|
||||
@@ -11,6 +11,8 @@ from contextlib import suppress
|
||||
from dataclasses import dataclass
|
||||
from datetime import date, datetime
|
||||
from decimal import Decimal, InvalidOperation
|
||||
from typing import Any
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from apps.core.services import BaseService, BulkOperationsMixin
|
||||
from apps.parsers.clients.minpromtorg.schemas import (
|
||||
@@ -19,6 +21,7 @@ from apps.parsers.clients.minpromtorg.schemas import (
|
||||
Manufacturer,
|
||||
)
|
||||
from apps.parsers.clients.proverki.schemas import Inspection
|
||||
from apps.parsers.clients.proxy_tools import ProxyToolsClient, ProxyToolsClientError
|
||||
from apps.parsers.clients.zakupki.schemas import Procurement
|
||||
from apps.parsers.models import (
|
||||
FinancialReport,
|
||||
@@ -32,6 +35,7 @@ from apps.parsers.models import (
|
||||
Proxy,
|
||||
)
|
||||
from apps.registers.models import Organization
|
||||
from django.conf import settings
|
||||
from django.db import IntegrityError, transaction
|
||||
from django.db.models import Q
|
||||
from django.utils import timezone
|
||||
@@ -639,29 +643,68 @@ class ProxyService(BaseService[Proxy]):
|
||||
"""
|
||||
|
||||
model = Proxy
|
||||
RUNTIME_COUNTRY_CODE = "RU"
|
||||
MANUAL_SOURCE = "manual"
|
||||
PROXY_TOOLS_SOURCE = "proxy-tools"
|
||||
|
||||
@classmethod
|
||||
def get_active_proxies(cls) -> list[str]:
|
||||
def get_active_proxies(
|
||||
cls,
|
||||
*,
|
||||
country_code: str | None = None,
|
||||
source: str | None = None,
|
||||
) -> list[str]:
|
||||
"""
|
||||
Получить список адресов активных прокси.
|
||||
|
||||
Returns:
|
||||
Список адресов прокси (может быть пустым)
|
||||
"""
|
||||
proxies = cls.model.objects.filter(is_active=True).values_list(
|
||||
"address", flat=True
|
||||
)
|
||||
proxies = cls.model.objects.filter(is_active=True)
|
||||
if country_code:
|
||||
proxies = proxies.filter(country_code=country_code.upper())
|
||||
if source:
|
||||
proxies = proxies.filter(source=source)
|
||||
proxies = proxies.values_list("address", flat=True)
|
||||
return list(proxies)
|
||||
|
||||
@classmethod
|
||||
def get_active_proxies_or_none(cls) -> list[str] | None:
|
||||
def get_active_proxies_or_none(
|
||||
cls,
|
||||
*,
|
||||
country_code: str | None = None,
|
||||
source: str | None = None,
|
||||
) -> list[str] | None:
|
||||
"""
|
||||
Получить список активных прокси или None, если их нет.
|
||||
|
||||
Returns:
|
||||
Список адресов прокси или None
|
||||
"""
|
||||
proxies = cls.get_active_proxies()
|
||||
proxies = cls.get_active_proxies(country_code=country_code, source=source)
|
||||
return proxies if proxies else None
|
||||
|
||||
@classmethod
|
||||
def get_runtime_proxies(cls) -> list[str]:
|
||||
"""
|
||||
Получить прокси для рантайма парсеров.
|
||||
|
||||
Приоритет:
|
||||
1. RU прокси, загруженные из Proxy-Tools
|
||||
2. Любые активные RU прокси
|
||||
"""
|
||||
proxies = cls.get_active_proxies(
|
||||
country_code=cls.RUNTIME_COUNTRY_CODE,
|
||||
source=cls.PROXY_TOOLS_SOURCE,
|
||||
)
|
||||
if proxies:
|
||||
return proxies
|
||||
return cls.get_active_proxies(country_code=cls.RUNTIME_COUNTRY_CODE)
|
||||
|
||||
@classmethod
|
||||
def get_runtime_proxies_or_none(cls) -> list[str] | None:
|
||||
"""Получить runtime-прокси или None, если их нет."""
|
||||
proxies = cls.get_runtime_proxies()
|
||||
return proxies if proxies else None
|
||||
|
||||
@classmethod
|
||||
@@ -698,31 +741,53 @@ class ProxyService(BaseService[Proxy]):
|
||||
|
||||
@classmethod
|
||||
@transaction.atomic
|
||||
def add_proxy(cls, address: str, description: str = "") -> Proxy:
|
||||
def add_proxy(
|
||||
cls,
|
||||
address: str,
|
||||
description: str = "",
|
||||
*,
|
||||
source: str = MANUAL_SOURCE,
|
||||
country_code: str = RUNTIME_COUNTRY_CODE,
|
||||
) -> Proxy:
|
||||
"""
|
||||
Добавить новый прокси.
|
||||
|
||||
Args:
|
||||
address: Адрес прокси (например: http://proxy:8080)
|
||||
description: Описание прокси
|
||||
source: Источник прокси
|
||||
country_code: ISO-код страны
|
||||
|
||||
Returns:
|
||||
Созданный объект Proxy
|
||||
"""
|
||||
proxy, _ = cls.model.objects.get_or_create(
|
||||
address=address,
|
||||
defaults={"description": description, "is_active": True},
|
||||
defaults={
|
||||
"description": description,
|
||||
"is_active": True,
|
||||
"source": source,
|
||||
"country_code": country_code.upper(),
|
||||
},
|
||||
)
|
||||
return proxy
|
||||
|
||||
@classmethod
|
||||
@transaction.atomic
|
||||
def add_proxies(cls, addresses: list[str]) -> int:
|
||||
def add_proxies(
|
||||
cls,
|
||||
addresses: list[str],
|
||||
*,
|
||||
source: str = MANUAL_SOURCE,
|
||||
country_code: str = RUNTIME_COUNTRY_CODE,
|
||||
) -> int:
|
||||
"""
|
||||
Добавить список прокси.
|
||||
|
||||
Args:
|
||||
addresses: Список адресов прокси
|
||||
source: Источник прокси
|
||||
country_code: ISO-код страны
|
||||
|
||||
Returns:
|
||||
Количество добавленных прокси
|
||||
@@ -731,13 +796,296 @@ class ProxyService(BaseService[Proxy]):
|
||||
for address in addresses:
|
||||
_, created = cls.model.objects.get_or_create(
|
||||
address=address,
|
||||
defaults={"is_active": True},
|
||||
defaults={
|
||||
"is_active": True,
|
||||
"source": source,
|
||||
"country_code": country_code.upper(),
|
||||
},
|
||||
)
|
||||
if created:
|
||||
created_count += 1
|
||||
return created_count
|
||||
|
||||
|
||||
class ProxyToolsSyncError(Exception):
|
||||
"""Ошибка синхронизации прокси из Proxy-Tools."""
|
||||
|
||||
|
||||
class ProxyToolsSyncService:
|
||||
"""Сервис синхронизации RU-прокси из Proxy-Tools."""
|
||||
|
||||
COUNTRY_CODE = ProxyService.RUNTIME_COUNTRY_CODE
|
||||
SOURCE = ProxyService.PROXY_TOOLS_SOURCE
|
||||
|
||||
@classmethod
|
||||
def sync_ru_proxies(cls) -> dict[str, int | str]:
|
||||
"""Загрузить RU-прокси из Proxy-Tools и синхронизировать таблицу."""
|
||||
api_key = getattr(settings, "PROXY_TOOLS_API_KEY", "").strip()
|
||||
if not api_key:
|
||||
logger.warning("Proxy-Tools sync skipped: PROXY_TOOLS_API_KEY is empty")
|
||||
return {
|
||||
"status": "skipped",
|
||||
"reason": "missing_api_key",
|
||||
"fetched": 0,
|
||||
"created": 0,
|
||||
"updated": 0,
|
||||
"deactivated": 0,
|
||||
}
|
||||
|
||||
client = ProxyToolsClient(
|
||||
api_key=api_key,
|
||||
api_url=settings.PROXY_TOOLS_API_URL,
|
||||
timeout=int(getattr(settings, "PROXY_TOOLS_TIMEOUT_SECONDS", 30)),
|
||||
)
|
||||
limit = int(getattr(settings, "PROXY_TOOLS_LIMIT", 100))
|
||||
max_pages = max(int(getattr(settings, "PROXY_TOOLS_MAX_PAGES", 3)), 1)
|
||||
|
||||
try:
|
||||
items = cls._fetch_all_pages(client=client, limit=limit, max_pages=max_pages)
|
||||
addresses = cls._extract_addresses(items)
|
||||
except ProxyToolsClientError as exc:
|
||||
raise ProxyToolsSyncError(str(exc)) from exc
|
||||
|
||||
result = cls._sync_addresses(addresses)
|
||||
return {
|
||||
"status": "success",
|
||||
"fetched": len(addresses),
|
||||
**result,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def _fetch_all_pages(
|
||||
cls,
|
||||
*,
|
||||
client: ProxyToolsClient,
|
||||
limit: int,
|
||||
max_pages: int,
|
||||
) -> list[Any]:
|
||||
"""Собрать прокси с нескольких страниц, если API их отдаёт."""
|
||||
items: list[Any] = []
|
||||
for page in range(1, max_pages + 1):
|
||||
payload = client.fetch_proxies(
|
||||
country_code=cls.COUNTRY_CODE,
|
||||
page=page,
|
||||
limit=limit,
|
||||
)
|
||||
batch = cls._extract_items(payload)
|
||||
items.extend(batch)
|
||||
if not cls._has_more_pages(payload, page=page, batch_size=len(batch), limit=limit):
|
||||
break
|
||||
return items
|
||||
|
||||
@classmethod
|
||||
def _extract_items(cls, payload: Any) -> list[Any]:
|
||||
"""Извлечь список элементов прокси из внешнего payload."""
|
||||
if isinstance(payload, list):
|
||||
return payload
|
||||
if isinstance(payload, dict):
|
||||
for key in ("proxies", "data", "results", "items"):
|
||||
value = payload.get(key)
|
||||
if isinstance(value, list):
|
||||
return value
|
||||
if isinstance(value, dict):
|
||||
with suppress(ProxyToolsSyncError):
|
||||
return cls._extract_items(value)
|
||||
for value in payload.values():
|
||||
if isinstance(value, list):
|
||||
return value
|
||||
raise ProxyToolsSyncError("Unexpected Proxy-Tools response shape")
|
||||
|
||||
@classmethod
|
||||
def _has_more_pages(
|
||||
cls,
|
||||
payload: Any,
|
||||
*,
|
||||
page: int,
|
||||
batch_size: int,
|
||||
limit: int,
|
||||
) -> bool:
|
||||
"""Определить, нужно ли запросить следующую страницу."""
|
||||
if batch_size == 0:
|
||||
return False
|
||||
if isinstance(payload, dict):
|
||||
next_value = payload.get("next") or payload.get("next_page")
|
||||
if next_value not in (None, "", False):
|
||||
return True
|
||||
meta = payload.get("meta")
|
||||
if isinstance(meta, dict):
|
||||
pagination = meta.get("pagination")
|
||||
if isinstance(pagination, dict):
|
||||
current_page = cls._to_int(
|
||||
pagination.get("current_page") or pagination.get("page")
|
||||
)
|
||||
total_pages = cls._to_int(pagination.get("total_pages"))
|
||||
if current_page is not None and total_pages is not None:
|
||||
return current_page < total_pages
|
||||
total_pages = cls._to_int(meta.get("total_pages"))
|
||||
if total_pages is not None:
|
||||
return page < total_pages
|
||||
return batch_size >= limit
|
||||
|
||||
@classmethod
|
||||
def _extract_addresses(cls, items: list[Any]) -> list[str]:
|
||||
"""Нормализовать и дедуплицировать адреса прокси."""
|
||||
addresses: list[str] = []
|
||||
seen: set[str] = set()
|
||||
for item in items:
|
||||
address = cls._extract_address(item)
|
||||
if address and address not in seen:
|
||||
seen.add(address)
|
||||
addresses.append(address)
|
||||
return addresses
|
||||
|
||||
@classmethod
|
||||
def _extract_address(cls, item: Any) -> str | None:
|
||||
"""Извлечь адрес прокси из одного элемента payload."""
|
||||
if isinstance(item, str):
|
||||
return cls._normalize_address(item)
|
||||
if not isinstance(item, dict):
|
||||
return None
|
||||
|
||||
for key in ("proxy", "proxy_url", "url", "address", "addr"):
|
||||
value = item.get(key)
|
||||
if isinstance(value, str):
|
||||
normalized = cls._normalize_address(
|
||||
value,
|
||||
scheme_hint=item.get("scheme")
|
||||
or item.get("protocol")
|
||||
or item.get("type")
|
||||
or item.get("proxy_type"),
|
||||
)
|
||||
if normalized:
|
||||
return normalized
|
||||
|
||||
host = item.get("host") or item.get("ip")
|
||||
port = item.get("port")
|
||||
if host and port:
|
||||
return cls._normalize_address(
|
||||
f"{host}:{port}",
|
||||
scheme_hint=item.get("scheme")
|
||||
or item.get("protocol")
|
||||
or item.get("type")
|
||||
or item.get("proxy_type"),
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def _normalize_address(
|
||||
cls,
|
||||
value: str,
|
||||
*,
|
||||
scheme_hint: Any = None,
|
||||
) -> str | None:
|
||||
"""Привести адрес прокси к нормализованному URL."""
|
||||
candidate = str(value).strip()
|
||||
if not candidate:
|
||||
return None
|
||||
|
||||
scheme = cls._normalize_scheme(scheme_hint)
|
||||
if "://" not in candidate:
|
||||
candidate = f"{scheme or 'http'}://{candidate}"
|
||||
|
||||
parsed = urlparse(candidate)
|
||||
if not parsed.hostname or parsed.port is None:
|
||||
return None
|
||||
|
||||
final_scheme = cls._normalize_scheme(parsed.scheme) or scheme or "http"
|
||||
credentials = ""
|
||||
if parsed.username:
|
||||
credentials = parsed.username
|
||||
if parsed.password:
|
||||
credentials = f"{credentials}:{parsed.password}"
|
||||
credentials = f"{credentials}@"
|
||||
|
||||
host = parsed.hostname
|
||||
if ":" in host and not host.startswith("["):
|
||||
host = f"[{host}]"
|
||||
return f"{final_scheme}://{credentials}{host}:{parsed.port}"
|
||||
|
||||
@classmethod
|
||||
def _normalize_scheme(cls, value: Any) -> str | None:
|
||||
"""Нормализовать схему прокси."""
|
||||
if value is None:
|
||||
return None
|
||||
mapping = {
|
||||
"1": "socks4",
|
||||
"2": "socks5",
|
||||
"3": "https",
|
||||
"4": "http",
|
||||
1: "socks4",
|
||||
2: "socks5",
|
||||
3: "https",
|
||||
4: "http",
|
||||
}
|
||||
candidate = mapping.get(value, str(value).strip().lower())
|
||||
if candidate in {"http", "https", "socks4", "socks5"}:
|
||||
return candidate
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def _to_int(cls, value: Any) -> int | None:
|
||||
"""Безопасно привести значение к int."""
|
||||
try:
|
||||
return int(value)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
@transaction.atomic
|
||||
def _sync_addresses(cls, addresses: list[str]) -> dict[str, int]:
|
||||
"""Синхронизировать импортированные адреса с таблицей Proxy."""
|
||||
existing_qs = Proxy.objects.filter(
|
||||
source=cls.SOURCE,
|
||||
country_code=cls.COUNTRY_CODE,
|
||||
)
|
||||
existing_by_address = {
|
||||
proxy.address: proxy
|
||||
for proxy in existing_qs.only("id", "address", "is_active")
|
||||
}
|
||||
|
||||
created = 0
|
||||
updated = 0
|
||||
for address in addresses:
|
||||
proxy = existing_by_address.get(address)
|
||||
if proxy is None:
|
||||
Proxy.objects.create(
|
||||
address=address,
|
||||
is_active=True,
|
||||
description="Imported from Proxy-Tools",
|
||||
source=cls.SOURCE,
|
||||
country_code=cls.COUNTRY_CODE,
|
||||
)
|
||||
created += 1
|
||||
continue
|
||||
|
||||
changed_fields: list[str] = []
|
||||
if not proxy.is_active:
|
||||
proxy.is_active = True
|
||||
changed_fields.append("is_active")
|
||||
if proxy.description != "Imported from Proxy-Tools":
|
||||
proxy.description = "Imported from Proxy-Tools"
|
||||
changed_fields.append("description")
|
||||
if changed_fields:
|
||||
proxy.save(update_fields=[*changed_fields, "updated_at"])
|
||||
updated += 1
|
||||
|
||||
deactivated = 0
|
||||
active_imported = existing_qs.filter(is_active=True)
|
||||
if addresses:
|
||||
deactivated = active_imported.exclude(address__in=addresses).update(
|
||||
is_active=False
|
||||
)
|
||||
else:
|
||||
deactivated = active_imported.update(is_active=False)
|
||||
|
||||
return {
|
||||
"created": created,
|
||||
"updated": updated,
|
||||
"deactivated": deactivated,
|
||||
}
|
||||
|
||||
|
||||
class InspectionService(BulkOperationsMixin, BaseService[InspectionRecord]):
|
||||
"""
|
||||
Сервис для управления данными о проверках.
|
||||
|
||||
@@ -13,6 +13,7 @@ from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
from apps.core.services import BackgroundJobService
|
||||
from apps.core.tasks import PeriodicTask as CorePeriodicTask
|
||||
from apps.parsers.clients.minpromtorg import (
|
||||
IndustrialProductionClient,
|
||||
IndustrialProductsClient,
|
||||
@@ -30,6 +31,7 @@ from apps.parsers.services import (
|
||||
ParserLoadLogService,
|
||||
ProcurementService,
|
||||
ProxyService,
|
||||
ProxyToolsSyncService,
|
||||
)
|
||||
from celery import shared_task
|
||||
from django.conf import settings
|
||||
@@ -54,7 +56,7 @@ def _resolve_proxies(proxies: list[str] | None) -> list[str] | None:
|
||||
if proxies is not None:
|
||||
return proxies
|
||||
|
||||
db_proxies = ProxyService.get_active_proxies_or_none()
|
||||
db_proxies = ProxyService.get_runtime_proxies_or_none()
|
||||
if db_proxies:
|
||||
return db_proxies
|
||||
|
||||
@@ -89,6 +91,14 @@ def _get_or_create_background_job(
|
||||
return job
|
||||
|
||||
|
||||
@shared_task(bind=True, base=CorePeriodicTask)
|
||||
def sync_ru_proxies(self) -> dict[str, int | str]: # noqa: ARG001
|
||||
"""Периодически загружать RU-прокси из Proxy-Tools."""
|
||||
result = ProxyToolsSyncService.sync_ru_proxies()
|
||||
logger.info("RU proxy sync finished: %s", result)
|
||||
return result
|
||||
|
||||
|
||||
def _lock_path_for(file_path: Path) -> Path:
|
||||
return Path(f"{file_path}.lock")
|
||||
|
||||
|
||||
@@ -74,6 +74,10 @@ app.conf.beat_schedule = {
|
||||
"task": "apps.parsers.tasks.parse_inspections",
|
||||
"schedule": 7 * 24 * 60 * 60, # Every 7 days
|
||||
},
|
||||
"sync-ru-proxies-hourly": {
|
||||
"task": "apps.parsers.tasks.sync_ru_proxies",
|
||||
"schedule": getattr(settings, "PROXY_TOOLS_SYNC_INTERVAL_SECONDS", 3600),
|
||||
},
|
||||
# Сканирование папки FNS - каждые 5 минут
|
||||
"scan-fns-directory": {
|
||||
"task": "apps.parsers.tasks.scan_fns_directory",
|
||||
|
||||
@@ -200,6 +200,16 @@ FNS_LOCK_TTL_SECONDS = 3600
|
||||
PARSER_PROXIES = [
|
||||
item.strip() for item in os.getenv("PARSER_PROXIES", "").split(",") if item.strip()
|
||||
]
|
||||
PROXY_TOOLS_API_KEY = os.getenv("PROXY_TOOLS_API_KEY", "").strip()
|
||||
PROXY_TOOLS_API_URL = os.getenv(
|
||||
"PROXY_TOOLS_API_URL", "https://proxy-tools.com/api/v1/proxies"
|
||||
).strip()
|
||||
PROXY_TOOLS_TIMEOUT_SECONDS = int(os.getenv("PROXY_TOOLS_TIMEOUT_SECONDS", "30"))
|
||||
PROXY_TOOLS_LIMIT = int(os.getenv("PROXY_TOOLS_LIMIT", "100"))
|
||||
PROXY_TOOLS_MAX_PAGES = int(os.getenv("PROXY_TOOLS_MAX_PAGES", "3"))
|
||||
PROXY_TOOLS_SYNC_INTERVAL_SECONDS = int(
|
||||
os.getenv("PROXY_TOOLS_SYNC_INTERVAL_SECONDS", "3600")
|
||||
)
|
||||
BACKUP_ENCRYPTION_KEY = os.getenv("BACKUP_ENCRYPTION_KEY", "")
|
||||
BACKUP_KEY_ID = os.getenv("BACKUP_KEY_ID", "default")
|
||||
BACKUP_EXPORT_DIRECTORY = os.getenv(
|
||||
|
||||
@@ -52,6 +52,7 @@ class CeleryModuleTest(SimpleTestCase):
|
||||
self.assertIn("parse-manufactures-daily", module.app.conf.beat_schedule)
|
||||
self.assertIn("parse-industrial-products-daily", module.app.conf.beat_schedule)
|
||||
self.assertIn("parse-inspections-weekly", module.app.conf.beat_schedule)
|
||||
self.assertIn("sync-ru-proxies-hourly", module.app.conf.beat_schedule)
|
||||
|
||||
def test_startup_refresh_queues_when_lock_acquired(self):
|
||||
with patch.dict(
|
||||
|
||||
@@ -83,6 +83,8 @@ class ProxyFactory(factory.django.DjangoModelFactory):
|
||||
|
||||
address = factory.LazyFunction(generate_proxy_address)
|
||||
description = factory.LazyAttribute(lambda _: fake.sentence(nb_words=3))
|
||||
source = "manual"
|
||||
country_code = "RU"
|
||||
is_active = True
|
||||
fail_count = 0
|
||||
last_used_at = factory.LazyAttribute(
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
"""Tests for parsers services."""
|
||||
|
||||
from unittest.mock import patch
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from apps.parsers.clients.minpromtorg.industrial import IndustrialProductionClient
|
||||
@@ -27,9 +28,10 @@ from apps.parsers.services import (
|
||||
ParserLoadLogService,
|
||||
ProcurementService,
|
||||
ProxyService,
|
||||
ProxyToolsSyncService,
|
||||
)
|
||||
from apps.registers.models import Organization
|
||||
from django.test import TestCase, tag
|
||||
from django.test import TestCase, override_settings, tag
|
||||
|
||||
from tests.utils import TestHTTPServer
|
||||
from tests.utils.fixtures import build_minpromtorg_certificates_excel, fake
|
||||
@@ -173,6 +175,127 @@ class ProxyServiceTest(TestCase):
|
||||
self.assertEqual(created, 1)
|
||||
self.assertEqual(Proxy.objects.count(), 2)
|
||||
|
||||
def test_get_runtime_proxies_prefers_proxy_tools_ru(self):
|
||||
"""Runtime should prefer RU proxies imported from Proxy-Tools."""
|
||||
manual_ru = ProxyFactory(
|
||||
source=ProxyService.MANUAL_SOURCE,
|
||||
country_code="RU",
|
||||
)
|
||||
imported_ru = ProxyFactory(
|
||||
source=ProxyService.PROXY_TOOLS_SOURCE,
|
||||
country_code="RU",
|
||||
)
|
||||
ProxyFactory(
|
||||
source=ProxyService.PROXY_TOOLS_SOURCE,
|
||||
country_code="US",
|
||||
)
|
||||
|
||||
result = ProxyService.get_runtime_proxies()
|
||||
|
||||
self.assertEqual(result, [imported_ru.address])
|
||||
self.assertNotIn(manual_ru.address, result)
|
||||
|
||||
def test_get_runtime_proxies_falls_back_to_any_ru_proxy(self):
|
||||
"""Runtime should fall back to any RU proxy when imported list is empty."""
|
||||
manual_ru = ProxyFactory(
|
||||
source=ProxyService.MANUAL_SOURCE,
|
||||
country_code="RU",
|
||||
)
|
||||
ProxyFactory(
|
||||
source=ProxyService.MANUAL_SOURCE,
|
||||
country_code="US",
|
||||
)
|
||||
|
||||
result = ProxyService.get_runtime_proxies()
|
||||
|
||||
self.assertEqual(result, [manual_ru.address])
|
||||
|
||||
|
||||
class ProxyToolsSyncServiceTest(TestCase):
|
||||
"""Tests for ProxyToolsSyncService."""
|
||||
|
||||
def test_sync_ru_proxies_skips_without_api_key(self):
|
||||
"""Sync should be skipped when API key is missing."""
|
||||
result = ProxyToolsSyncService.sync_ru_proxies()
|
||||
|
||||
self.assertEqual(result["status"], "skipped")
|
||||
self.assertEqual(result["reason"], "missing_api_key")
|
||||
|
||||
@override_settings(
|
||||
PROXY_TOOLS_API_KEY="test-token",
|
||||
PROXY_TOOLS_LIMIT=2,
|
||||
PROXY_TOOLS_MAX_PAGES=2,
|
||||
)
|
||||
@patch("apps.parsers.services.ProxyToolsClient.fetch_proxies")
|
||||
def test_sync_ru_proxies_upserts_and_deactivates(self, fetch_proxies_mock):
|
||||
"""Sync should create, reactivate and deactivate imported proxies."""
|
||||
active_stale = ProxyFactory(
|
||||
address="http://10.0.0.10:8000",
|
||||
source=ProxyService.PROXY_TOOLS_SOURCE,
|
||||
country_code="RU",
|
||||
is_active=True,
|
||||
)
|
||||
inactive_existing = ProxyFactory(
|
||||
address="http://10.0.0.20:8000",
|
||||
source=ProxyService.PROXY_TOOLS_SOURCE,
|
||||
country_code="RU",
|
||||
is_active=False,
|
||||
)
|
||||
manual_ru = ProxyFactory(
|
||||
address="http://10.0.0.30:8000",
|
||||
source=ProxyService.MANUAL_SOURCE,
|
||||
country_code="RU",
|
||||
is_active=True,
|
||||
)
|
||||
|
||||
fetch_proxies_mock.side_effect = [
|
||||
{
|
||||
"data": [
|
||||
{"host": "10.0.0.20", "port": 8000, "type": "4"},
|
||||
{"proxy": "socks5://10.0.0.40:1080"},
|
||||
],
|
||||
"meta": {"total_pages": 2},
|
||||
},
|
||||
{
|
||||
"data": [
|
||||
"https://10.0.0.50:8443",
|
||||
],
|
||||
"meta": {"total_pages": 2},
|
||||
},
|
||||
]
|
||||
|
||||
result = ProxyToolsSyncService.sync_ru_proxies()
|
||||
|
||||
self.assertEqual(result["status"], "success")
|
||||
self.assertEqual(result["fetched"], 3)
|
||||
self.assertEqual(result["created"], 2)
|
||||
self.assertEqual(result["updated"], 1)
|
||||
self.assertEqual(result["deactivated"], 1)
|
||||
|
||||
active_stale.refresh_from_db()
|
||||
inactive_existing.refresh_from_db()
|
||||
manual_ru.refresh_from_db()
|
||||
|
||||
self.assertFalse(active_stale.is_active)
|
||||
self.assertTrue(inactive_existing.is_active)
|
||||
self.assertTrue(manual_ru.is_active)
|
||||
|
||||
imported_addresses = set(
|
||||
Proxy.objects.filter(
|
||||
source=ProxyService.PROXY_TOOLS_SOURCE,
|
||||
country_code="RU",
|
||||
is_active=True,
|
||||
).values_list("address", flat=True)
|
||||
)
|
||||
self.assertSetEqual(
|
||||
imported_addresses,
|
||||
{
|
||||
"http://10.0.0.20:8000",
|
||||
"socks5://10.0.0.40:1080",
|
||||
"https://10.0.0.50:8443",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class ParserLoadLogServiceTest(TestCase):
|
||||
"""Tests for ParserLoadLogService."""
|
||||
|
||||
@@ -9,6 +9,7 @@ import tempfile
|
||||
import threading
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import patch
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from apps.parsers import tasks as parser_tasks
|
||||
@@ -39,6 +40,7 @@ from apps.parsers.tasks import (
|
||||
_move_to_dir,
|
||||
_process_fns_file_sync,
|
||||
_remove_lock,
|
||||
_resolve_proxies,
|
||||
_try_create_lock,
|
||||
parse_all_minpromtorg,
|
||||
parse_all_sources,
|
||||
@@ -51,6 +53,7 @@ from apps.parsers.tasks import (
|
||||
scan_fns_directory,
|
||||
sync_inspections,
|
||||
sync_procurements,
|
||||
sync_ru_proxies,
|
||||
)
|
||||
from django.test import TestCase, override_settings
|
||||
from openpyxl import Workbook
|
||||
@@ -59,6 +62,7 @@ from tests.apps.parsers.factories import (
|
||||
InspectionRecordFactory,
|
||||
ParserLoadLogFactory,
|
||||
ProcurementRecordFactory,
|
||||
ProxyFactory,
|
||||
)
|
||||
from tests.utils import TestHTTPServer
|
||||
from tests.utils.fixtures import (
|
||||
@@ -102,6 +106,55 @@ def _portal_path(year: int, month: int) -> str:
|
||||
return f"/portal/public-open-data/check/{year}/{month}"
|
||||
|
||||
|
||||
class ProxyResolutionTestCase(TestCase):
|
||||
"""Tests for proxy resolution in parser tasks."""
|
||||
|
||||
@override_settings(PARSER_PROXIES=["http://env-proxy:8080"])
|
||||
def test_resolve_proxies_prefers_runtime_db_proxies(self):
|
||||
imported_proxy = ProxyFactory(
|
||||
address="http://10.0.0.2:8000",
|
||||
source="proxy-tools",
|
||||
country_code="RU",
|
||||
is_active=True,
|
||||
)
|
||||
ProxyFactory(
|
||||
address="http://10.0.0.3:8000",
|
||||
source="manual",
|
||||
country_code="RU",
|
||||
is_active=True,
|
||||
)
|
||||
|
||||
result = _resolve_proxies(None)
|
||||
|
||||
self.assertEqual(result, [imported_proxy.address])
|
||||
|
||||
@override_settings(PARSER_PROXIES=["http://env-proxy:8080"])
|
||||
def test_resolve_proxies_falls_back_to_settings_when_db_empty(self):
|
||||
result = _resolve_proxies(None)
|
||||
|
||||
self.assertEqual(result, ["http://env-proxy:8080"])
|
||||
|
||||
|
||||
class SyncRuProxiesTaskTestCase(TestCase):
|
||||
"""Tests for periodic RU proxy sync task."""
|
||||
|
||||
@patch("apps.parsers.tasks.ProxyToolsSyncService.sync_ru_proxies")
|
||||
def test_sync_ru_proxies_returns_service_payload(self, sync_mock):
|
||||
sync_mock.return_value = {
|
||||
"status": "success",
|
||||
"fetched": 3,
|
||||
"created": 2,
|
||||
"updated": 1,
|
||||
"deactivated": 0,
|
||||
}
|
||||
|
||||
result = sync_ru_proxies.run()
|
||||
|
||||
self.assertEqual(result["status"], "success")
|
||||
self.assertEqual(result["fetched"], 3)
|
||||
sync_mock.assert_called_once_with()
|
||||
|
||||
|
||||
@override_settings(
|
||||
CELERY_TASK_ALWAYS_EAGER=True,
|
||||
CELERY_TASK_EAGER_PROPAGATES=True,
|
||||
|
||||
Reference in New Issue
Block a user