feat(parsers): add SOAP API support for zakupki.gov.ru
- Add post() method to BaseHTTPClient for SOAP requests - Update download_file() to support custom headers (for token) - Add ZAKUPKI_TOKEN and PARSER_PROXIES settings - Improve SOAP error parsing to show EIS error messages - Update E2E tests to use token from settings - Add data/ and .zed/ to gitignore
This commit is contained in:
@@ -29,3 +29,7 @@ LOG_LEVEL=INFO
|
||||
|
||||
# Scrapy Settings
|
||||
SCRAPY_LOG_LEVEL=INFO
|
||||
|
||||
# Parsers API Tokens
|
||||
# Токен для zakupki.gov.ru (получить через Госуслуги на https://zakupki.gov.ru/pmd/auth/welcome)
|
||||
ZAKUPKI_TOKEN=
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -39,3 +39,5 @@ Thumbs.db
|
||||
# Backup files
|
||||
*.bak
|
||||
*.backupdata/
|
||||
data/
|
||||
.zed/
|
||||
|
||||
@@ -189,6 +189,65 @@ class BaseHTTPClient:
|
||||
logger.debug("Response %d from %s", response.status_code, url)
|
||||
return response
|
||||
|
||||
def post(
|
||||
self,
|
||||
endpoint: str,
|
||||
data: bytes | str | None = None,
|
||||
json: dict[str, Any] | None = None,
|
||||
headers: dict[str, str] | None = None,
|
||||
) -> bytes:
|
||||
"""
|
||||
Выполнить POST запрос.
|
||||
|
||||
Args:
|
||||
endpoint: Путь или полный URL
|
||||
data: Тело запроса (bytes или str)
|
||||
json: JSON тело запроса
|
||||
headers: Дополнительные заголовки
|
||||
|
||||
Returns:
|
||||
Содержимое ответа как bytes
|
||||
|
||||
Raises:
|
||||
ConnectionError: При ошибке подключения
|
||||
HTTPError: При HTTP ошибке (4xx, 5xx)
|
||||
"""
|
||||
url = self._build_url(endpoint)
|
||||
logger.info("POST %s (proxy: %s)", url, self._current_proxy)
|
||||
|
||||
request_headers = {}
|
||||
if headers:
|
||||
request_headers.update(headers)
|
||||
|
||||
try:
|
||||
response = self.session.post(
|
||||
url,
|
||||
data=data,
|
||||
json=json,
|
||||
headers=request_headers,
|
||||
timeout=self.timeout,
|
||||
)
|
||||
except requests.exceptions.ConnectionError as e:
|
||||
logger.error("Connection error: %s - %s", url, e)
|
||||
raise ConnectionError(f"Failed to connect to {url}", url=url) from e
|
||||
except requests.exceptions.Timeout as e:
|
||||
logger.error("Timeout: %s", url)
|
||||
raise ConnectionError(f"Request timeout for {url}", url=url) from e
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.error("Request error: %s - %s", url, e)
|
||||
raise HTTPClientError(f"Request failed: {e}", url=url) from e
|
||||
|
||||
if not response.ok:
|
||||
logger.error("HTTP error %d: %s", response.status_code, url)
|
||||
raise HTTPError(
|
||||
f"HTTP {response.status_code} for {url}",
|
||||
status_code=response.status_code,
|
||||
url=url,
|
||||
)
|
||||
|
||||
logger.debug("POST Response %d from %s", response.status_code, url)
|
||||
return response.content
|
||||
|
||||
def get_json(self, endpoint: str, params: dict[str, Any] | None = None) -> dict:
|
||||
"""
|
||||
Выполнить GET запрос и вернуть JSON.
|
||||
@@ -203,12 +262,15 @@ class BaseHTTPClient:
|
||||
response = self.get(endpoint, params=params)
|
||||
return response.json()
|
||||
|
||||
def download_file(self, endpoint: str) -> bytes:
|
||||
def download_file(
|
||||
self, endpoint: str, headers: dict[str, str] | None = None
|
||||
) -> bytes:
|
||||
"""
|
||||
Скачать файл.
|
||||
|
||||
Args:
|
||||
endpoint: Путь или полный URL файла
|
||||
headers: Дополнительные заголовки
|
||||
|
||||
Returns:
|
||||
Содержимое файла как bytes
|
||||
@@ -216,9 +278,34 @@ class BaseHTTPClient:
|
||||
url = self._build_url(endpoint)
|
||||
logger.info("Downloading file: %s", url)
|
||||
|
||||
response = self.get(endpoint)
|
||||
content = response.content
|
||||
# Выполняем GET с дополнительными заголовками
|
||||
request_headers = {}
|
||||
if headers:
|
||||
request_headers.update(headers)
|
||||
|
||||
try:
|
||||
response = self.session.get(
|
||||
url, headers=request_headers, timeout=self.timeout
|
||||
)
|
||||
except requests.exceptions.ConnectionError as e:
|
||||
logger.error("Connection error: %s - %s", url, e)
|
||||
raise ConnectionError(f"Failed to connect to {url}", url=url) from e
|
||||
except requests.exceptions.Timeout as e:
|
||||
logger.error("Timeout: %s", url)
|
||||
raise ConnectionError(f"Request timeout for {url}", url=url) from e
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.error("Request error: %s - %s", url, e)
|
||||
raise HTTPClientError(f"Request failed: {e}", url=url) from e
|
||||
|
||||
if not response.ok:
|
||||
logger.error("HTTP error %d: %s", response.status_code, url)
|
||||
raise HTTPError(
|
||||
f"HTTP {response.status_code} for {url}",
|
||||
status_code=response.status_code,
|
||||
url=url,
|
||||
)
|
||||
|
||||
content = response.content
|
||||
logger.info("Downloaded %d bytes from %s", len(content), url)
|
||||
return content
|
||||
|
||||
|
||||
@@ -370,7 +370,9 @@ class ZakupkiClient:
|
||||
</soapenv:Body>
|
||||
</soapenv:Envelope>"""
|
||||
|
||||
def _parse_soap_response(self, response_content: bytes) -> str | None:
|
||||
def _parse_soap_response( # noqa: C901
|
||||
self, response_content: bytes
|
||||
) -> str | None:
|
||||
"""Извлечь URL архива из SOAP ответа."""
|
||||
try:
|
||||
xml_str = response_content.decode("utf-8")
|
||||
@@ -383,12 +385,27 @@ class ZakupkiClient:
|
||||
logger.info("Found archive URL: %s", elem.text)
|
||||
return elem.text.strip()
|
||||
|
||||
# Проверяем на ошибки
|
||||
# Проверяем на errorInfo (структурированная ошибка ЕИС)
|
||||
for elem in root.iter():
|
||||
if "fault" in elem.tag.lower() or "error" in elem.tag.lower():
|
||||
error_text = elem.text or ET.tostring(elem, encoding="unicode")
|
||||
logger.error("SOAP error: %s", error_text)
|
||||
raise ZakupkiClientError(f"SOAP error: {error_text}")
|
||||
if elem.tag.endswith("errorInfo"):
|
||||
code = ""
|
||||
message = ""
|
||||
for child in elem:
|
||||
if child.tag.endswith("code") and child.text:
|
||||
code = child.text.strip()
|
||||
if child.tag.endswith("message") and child.text:
|
||||
message = child.text.strip()
|
||||
if message:
|
||||
error_msg = f"[{code}] {message}" if code else message
|
||||
logger.error("EIS error: %s", error_msg)
|
||||
raise ZakupkiClientError(f"EIS error: {error_msg}")
|
||||
|
||||
# Проверяем на fault (SOAP fault)
|
||||
for elem in root.iter():
|
||||
if "fault" in elem.tag.lower():
|
||||
error_text = ET.tostring(elem, encoding="unicode")
|
||||
logger.error("SOAP fault: %s", error_text)
|
||||
raise ZakupkiClientError(f"SOAP fault: {error_text}")
|
||||
|
||||
logger.warning("No archiveUrl found in SOAP response")
|
||||
return None
|
||||
@@ -432,13 +449,17 @@ class ZakupkiClient:
|
||||
procurements = self._download_and_parse_http(plan.file_url, None)
|
||||
all_procurements.extend(procurements)
|
||||
logger.info(
|
||||
"Parsed %d procurements from %s", len(procurements), plan.file_name
|
||||
"Parsed %d procurements from %s",
|
||||
len(procurements),
|
||||
plan.file_name,
|
||||
)
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(95, f"Загружено {len(all_procurements)} закупок")
|
||||
|
||||
logger.info("Total fetched %d procurements via HTTP", len(all_procurements))
|
||||
logger.info(
|
||||
"Total fetched %d procurements via HTTP", len(all_procurements)
|
||||
)
|
||||
return all_procurements
|
||||
|
||||
def _discover_data_files(
|
||||
|
||||
@@ -16,11 +16,17 @@ import unittest
|
||||
from apps.parsers.clients.zakupki import ZakupkiClient
|
||||
from apps.parsers.models import ParserLoadLog, ProcurementRecord
|
||||
from apps.parsers.services import ParserLoadLogService, ProcurementService
|
||||
from django.conf import settings
|
||||
from django.test import TestCase, override_settings
|
||||
|
||||
# Флаг для запуска E2E тестов
|
||||
RUN_E2E_TESTS = os.environ.get("RUN_E2E_TESTS", "").lower() in ("1", "true", "yes")
|
||||
|
||||
# Токен из settings (или переменной окружения)
|
||||
ZAKUPKI_TOKEN = getattr(settings, "ZAKUPKI_TOKEN", "") or os.environ.get(
|
||||
"ZAKUPKI_TOKEN", ""
|
||||
)
|
||||
|
||||
|
||||
@unittest.skipUnless(RUN_E2E_TESTS, "E2E tests disabled. Set RUN_E2E_TESTS=1 to enable")
|
||||
class ZakupkiClientE2ETestCase(TestCase):
|
||||
@@ -32,7 +38,7 @@ class ZakupkiClientE2ETestCase(TestCase):
|
||||
|
||||
def setUp(self):
|
||||
"""Подготовка."""
|
||||
self.client = ZakupkiClient(timeout=60)
|
||||
self.client = ZakupkiClient(token=ZAKUPKI_TOKEN, timeout=60)
|
||||
|
||||
def tearDown(self):
|
||||
"""Очистка."""
|
||||
@@ -103,7 +109,7 @@ class ProcurementServiceE2ETestCase(TestCase):
|
||||
)
|
||||
|
||||
# Загрузка данных
|
||||
with ZakupkiClient(timeout=60) as client:
|
||||
with ZakupkiClient(token=ZAKUPKI_TOKEN, timeout=60) as client:
|
||||
procurements = client.fetch_procurements(
|
||||
region_code="77",
|
||||
year=2025,
|
||||
|
||||
@@ -221,6 +221,21 @@ CACHES = {
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# PARSERS SETTINGS
|
||||
# =============================================================================
|
||||
|
||||
# Zakupki.gov.ru API Token (получить через Госуслуги)
|
||||
ZAKUPKI_TOKEN = get_env("ZAKUPKI_TOKEN", "")
|
||||
|
||||
# Proxy list for parsers (comma-separated)
|
||||
PARSER_PROXIES = get_env("PARSER_PROXIES", "")
|
||||
if isinstance(PARSER_PROXIES, str) and PARSER_PROXIES:
|
||||
PARSER_PROXIES = [p.strip() for p in PARSER_PROXIES.split(",") if p.strip()]
|
||||
else:
|
||||
PARSER_PROXIES = []
|
||||
|
||||
|
||||
# Password validation
|
||||
AUTH_PASSWORD_VALIDATORS = [
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user