feat(parsers): add SOAP API support for zakupki.gov.ru
- Add post() method to BaseHTTPClient for SOAP requests - Update download_file() to support custom headers (for token) - Add ZAKUPKI_TOKEN and PARSER_PROXIES settings - Improve SOAP error parsing to show EIS error messages - Update E2E tests to use token from settings - Add data/ and .zed/ to gitignore
This commit is contained in:
@@ -29,3 +29,7 @@ LOG_LEVEL=INFO
|
|||||||
|
|
||||||
# Scrapy Settings
|
# Scrapy Settings
|
||||||
SCRAPY_LOG_LEVEL=INFO
|
SCRAPY_LOG_LEVEL=INFO
|
||||||
|
|
||||||
|
# Parsers API Tokens
|
||||||
|
# Токен для zakupki.gov.ru (получить через Госуслуги на https://zakupki.gov.ru/pmd/auth/welcome)
|
||||||
|
ZAKUPKI_TOKEN=
|
||||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -39,3 +39,5 @@ Thumbs.db
|
|||||||
# Backup files
|
# Backup files
|
||||||
*.bak
|
*.bak
|
||||||
*.backupdata/
|
*.backupdata/
|
||||||
|
data/
|
||||||
|
.zed/
|
||||||
|
|||||||
@@ -189,6 +189,65 @@ class BaseHTTPClient:
|
|||||||
logger.debug("Response %d from %s", response.status_code, url)
|
logger.debug("Response %d from %s", response.status_code, url)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
def post(
|
||||||
|
self,
|
||||||
|
endpoint: str,
|
||||||
|
data: bytes | str | None = None,
|
||||||
|
json: dict[str, Any] | None = None,
|
||||||
|
headers: dict[str, str] | None = None,
|
||||||
|
) -> bytes:
|
||||||
|
"""
|
||||||
|
Выполнить POST запрос.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
endpoint: Путь или полный URL
|
||||||
|
data: Тело запроса (bytes или str)
|
||||||
|
json: JSON тело запроса
|
||||||
|
headers: Дополнительные заголовки
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Содержимое ответа как bytes
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ConnectionError: При ошибке подключения
|
||||||
|
HTTPError: При HTTP ошибке (4xx, 5xx)
|
||||||
|
"""
|
||||||
|
url = self._build_url(endpoint)
|
||||||
|
logger.info("POST %s (proxy: %s)", url, self._current_proxy)
|
||||||
|
|
||||||
|
request_headers = {}
|
||||||
|
if headers:
|
||||||
|
request_headers.update(headers)
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = self.session.post(
|
||||||
|
url,
|
||||||
|
data=data,
|
||||||
|
json=json,
|
||||||
|
headers=request_headers,
|
||||||
|
timeout=self.timeout,
|
||||||
|
)
|
||||||
|
except requests.exceptions.ConnectionError as e:
|
||||||
|
logger.error("Connection error: %s - %s", url, e)
|
||||||
|
raise ConnectionError(f"Failed to connect to {url}", url=url) from e
|
||||||
|
except requests.exceptions.Timeout as e:
|
||||||
|
logger.error("Timeout: %s", url)
|
||||||
|
raise ConnectionError(f"Request timeout for {url}", url=url) from e
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
logger.error("Request error: %s - %s", url, e)
|
||||||
|
raise HTTPClientError(f"Request failed: {e}", url=url) from e
|
||||||
|
|
||||||
|
if not response.ok:
|
||||||
|
logger.error("HTTP error %d: %s", response.status_code, url)
|
||||||
|
raise HTTPError(
|
||||||
|
f"HTTP {response.status_code} for {url}",
|
||||||
|
status_code=response.status_code,
|
||||||
|
url=url,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.debug("POST Response %d from %s", response.status_code, url)
|
||||||
|
return response.content
|
||||||
|
|
||||||
def get_json(self, endpoint: str, params: dict[str, Any] | None = None) -> dict:
|
def get_json(self, endpoint: str, params: dict[str, Any] | None = None) -> dict:
|
||||||
"""
|
"""
|
||||||
Выполнить GET запрос и вернуть JSON.
|
Выполнить GET запрос и вернуть JSON.
|
||||||
@@ -203,12 +262,15 @@ class BaseHTTPClient:
|
|||||||
response = self.get(endpoint, params=params)
|
response = self.get(endpoint, params=params)
|
||||||
return response.json()
|
return response.json()
|
||||||
|
|
||||||
def download_file(self, endpoint: str) -> bytes:
|
def download_file(
|
||||||
|
self, endpoint: str, headers: dict[str, str] | None = None
|
||||||
|
) -> bytes:
|
||||||
"""
|
"""
|
||||||
Скачать файл.
|
Скачать файл.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
endpoint: Путь или полный URL файла
|
endpoint: Путь или полный URL файла
|
||||||
|
headers: Дополнительные заголовки
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Содержимое файла как bytes
|
Содержимое файла как bytes
|
||||||
@@ -216,9 +278,34 @@ class BaseHTTPClient:
|
|||||||
url = self._build_url(endpoint)
|
url = self._build_url(endpoint)
|
||||||
logger.info("Downloading file: %s", url)
|
logger.info("Downloading file: %s", url)
|
||||||
|
|
||||||
response = self.get(endpoint)
|
# Выполняем GET с дополнительными заголовками
|
||||||
content = response.content
|
request_headers = {}
|
||||||
|
if headers:
|
||||||
|
request_headers.update(headers)
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = self.session.get(
|
||||||
|
url, headers=request_headers, timeout=self.timeout
|
||||||
|
)
|
||||||
|
except requests.exceptions.ConnectionError as e:
|
||||||
|
logger.error("Connection error: %s - %s", url, e)
|
||||||
|
raise ConnectionError(f"Failed to connect to {url}", url=url) from e
|
||||||
|
except requests.exceptions.Timeout as e:
|
||||||
|
logger.error("Timeout: %s", url)
|
||||||
|
raise ConnectionError(f"Request timeout for {url}", url=url) from e
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
logger.error("Request error: %s - %s", url, e)
|
||||||
|
raise HTTPClientError(f"Request failed: {e}", url=url) from e
|
||||||
|
|
||||||
|
if not response.ok:
|
||||||
|
logger.error("HTTP error %d: %s", response.status_code, url)
|
||||||
|
raise HTTPError(
|
||||||
|
f"HTTP {response.status_code} for {url}",
|
||||||
|
status_code=response.status_code,
|
||||||
|
url=url,
|
||||||
|
)
|
||||||
|
|
||||||
|
content = response.content
|
||||||
logger.info("Downloaded %d bytes from %s", len(content), url)
|
logger.info("Downloaded %d bytes from %s", len(content), url)
|
||||||
return content
|
return content
|
||||||
|
|
||||||
|
|||||||
@@ -370,7 +370,9 @@ class ZakupkiClient:
|
|||||||
</soapenv:Body>
|
</soapenv:Body>
|
||||||
</soapenv:Envelope>"""
|
</soapenv:Envelope>"""
|
||||||
|
|
||||||
def _parse_soap_response(self, response_content: bytes) -> str | None:
|
def _parse_soap_response( # noqa: C901
|
||||||
|
self, response_content: bytes
|
||||||
|
) -> str | None:
|
||||||
"""Извлечь URL архива из SOAP ответа."""
|
"""Извлечь URL архива из SOAP ответа."""
|
||||||
try:
|
try:
|
||||||
xml_str = response_content.decode("utf-8")
|
xml_str = response_content.decode("utf-8")
|
||||||
@@ -383,12 +385,27 @@ class ZakupkiClient:
|
|||||||
logger.info("Found archive URL: %s", elem.text)
|
logger.info("Found archive URL: %s", elem.text)
|
||||||
return elem.text.strip()
|
return elem.text.strip()
|
||||||
|
|
||||||
# Проверяем на ошибки
|
# Проверяем на errorInfo (структурированная ошибка ЕИС)
|
||||||
for elem in root.iter():
|
for elem in root.iter():
|
||||||
if "fault" in elem.tag.lower() or "error" in elem.tag.lower():
|
if elem.tag.endswith("errorInfo"):
|
||||||
error_text = elem.text or ET.tostring(elem, encoding="unicode")
|
code = ""
|
||||||
logger.error("SOAP error: %s", error_text)
|
message = ""
|
||||||
raise ZakupkiClientError(f"SOAP error: {error_text}")
|
for child in elem:
|
||||||
|
if child.tag.endswith("code") and child.text:
|
||||||
|
code = child.text.strip()
|
||||||
|
if child.tag.endswith("message") and child.text:
|
||||||
|
message = child.text.strip()
|
||||||
|
if message:
|
||||||
|
error_msg = f"[{code}] {message}" if code else message
|
||||||
|
logger.error("EIS error: %s", error_msg)
|
||||||
|
raise ZakupkiClientError(f"EIS error: {error_msg}")
|
||||||
|
|
||||||
|
# Проверяем на fault (SOAP fault)
|
||||||
|
for elem in root.iter():
|
||||||
|
if "fault" in elem.tag.lower():
|
||||||
|
error_text = ET.tostring(elem, encoding="unicode")
|
||||||
|
logger.error("SOAP fault: %s", error_text)
|
||||||
|
raise ZakupkiClientError(f"SOAP fault: {error_text}")
|
||||||
|
|
||||||
logger.warning("No archiveUrl found in SOAP response")
|
logger.warning("No archiveUrl found in SOAP response")
|
||||||
return None
|
return None
|
||||||
@@ -432,13 +449,17 @@ class ZakupkiClient:
|
|||||||
procurements = self._download_and_parse_http(plan.file_url, None)
|
procurements = self._download_and_parse_http(plan.file_url, None)
|
||||||
all_procurements.extend(procurements)
|
all_procurements.extend(procurements)
|
||||||
logger.info(
|
logger.info(
|
||||||
"Parsed %d procurements from %s", len(procurements), plan.file_name
|
"Parsed %d procurements from %s",
|
||||||
|
len(procurements),
|
||||||
|
plan.file_name,
|
||||||
)
|
)
|
||||||
|
|
||||||
if progress_callback:
|
if progress_callback:
|
||||||
progress_callback(95, f"Загружено {len(all_procurements)} закупок")
|
progress_callback(95, f"Загружено {len(all_procurements)} закупок")
|
||||||
|
|
||||||
logger.info("Total fetched %d procurements via HTTP", len(all_procurements))
|
logger.info(
|
||||||
|
"Total fetched %d procurements via HTTP", len(all_procurements)
|
||||||
|
)
|
||||||
return all_procurements
|
return all_procurements
|
||||||
|
|
||||||
def _discover_data_files(
|
def _discover_data_files(
|
||||||
|
|||||||
@@ -16,11 +16,17 @@ import unittest
|
|||||||
from apps.parsers.clients.zakupki import ZakupkiClient
|
from apps.parsers.clients.zakupki import ZakupkiClient
|
||||||
from apps.parsers.models import ParserLoadLog, ProcurementRecord
|
from apps.parsers.models import ParserLoadLog, ProcurementRecord
|
||||||
from apps.parsers.services import ParserLoadLogService, ProcurementService
|
from apps.parsers.services import ParserLoadLogService, ProcurementService
|
||||||
|
from django.conf import settings
|
||||||
from django.test import TestCase, override_settings
|
from django.test import TestCase, override_settings
|
||||||
|
|
||||||
# Флаг для запуска E2E тестов
|
# Флаг для запуска E2E тестов
|
||||||
RUN_E2E_TESTS = os.environ.get("RUN_E2E_TESTS", "").lower() in ("1", "true", "yes")
|
RUN_E2E_TESTS = os.environ.get("RUN_E2E_TESTS", "").lower() in ("1", "true", "yes")
|
||||||
|
|
||||||
|
# Токен из settings (или переменной окружения)
|
||||||
|
ZAKUPKI_TOKEN = getattr(settings, "ZAKUPKI_TOKEN", "") or os.environ.get(
|
||||||
|
"ZAKUPKI_TOKEN", ""
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@unittest.skipUnless(RUN_E2E_TESTS, "E2E tests disabled. Set RUN_E2E_TESTS=1 to enable")
|
@unittest.skipUnless(RUN_E2E_TESTS, "E2E tests disabled. Set RUN_E2E_TESTS=1 to enable")
|
||||||
class ZakupkiClientE2ETestCase(TestCase):
|
class ZakupkiClientE2ETestCase(TestCase):
|
||||||
@@ -32,7 +38,7 @@ class ZakupkiClientE2ETestCase(TestCase):
|
|||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
"""Подготовка."""
|
"""Подготовка."""
|
||||||
self.client = ZakupkiClient(timeout=60)
|
self.client = ZakupkiClient(token=ZAKUPKI_TOKEN, timeout=60)
|
||||||
|
|
||||||
def tearDown(self):
|
def tearDown(self):
|
||||||
"""Очистка."""
|
"""Очистка."""
|
||||||
@@ -103,7 +109,7 @@ class ProcurementServiceE2ETestCase(TestCase):
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Загрузка данных
|
# Загрузка данных
|
||||||
with ZakupkiClient(timeout=60) as client:
|
with ZakupkiClient(token=ZAKUPKI_TOKEN, timeout=60) as client:
|
||||||
procurements = client.fetch_procurements(
|
procurements = client.fetch_procurements(
|
||||||
region_code="77",
|
region_code="77",
|
||||||
year=2025,
|
year=2025,
|
||||||
|
|||||||
@@ -221,6 +221,21 @@ CACHES = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# PARSERS SETTINGS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Zakupki.gov.ru API Token (получить через Госуслуги)
|
||||||
|
ZAKUPKI_TOKEN = get_env("ZAKUPKI_TOKEN", "")
|
||||||
|
|
||||||
|
# Proxy list for parsers (comma-separated)
|
||||||
|
PARSER_PROXIES = get_env("PARSER_PROXIES", "")
|
||||||
|
if isinstance(PARSER_PROXIES, str) and PARSER_PROXIES:
|
||||||
|
PARSER_PROXIES = [p.strip() for p in PARSER_PROXIES.split(",") if p.strip()]
|
||||||
|
else:
|
||||||
|
PARSER_PROXIES = []
|
||||||
|
|
||||||
|
|
||||||
# Password validation
|
# Password validation
|
||||||
AUTH_PASSWORD_VALIDATORS = [
|
AUTH_PASSWORD_VALIDATORS = [
|
||||||
{
|
{
|
||||||
|
|||||||
Reference in New Issue
Block a user