feat(parsers): добавлен парсер zakupki.gov.ru с SOAP API интеграцией

Реализована полная интеграция с ЕИС Закупки через SOAP API
(FTP доступ закрыт с 01.01.2025).

Добавлено:
- ZakupkiClient с поддержкой SOAP методов getDocsByOrgRegionRequest
  и getDocsByReestrNumberRequest
- Модель ProcurementRecord (18 полей, 3 индекса)
- ProcurementService и ParserLoadLogService для бизнес-логики
- Celery задачи parse_procurements и sync_procurements
- Админка с цветовой индикацией статусов и фильтрами
- 71 тест (unit + E2E с RUN_E2E_TESTS=1)

Требования: токен SOAP API через Госуслуги

🤖 Generated with [Qoder][https://qoder.com]
This commit is contained in:
2026-01-27 16:01:28 +01:00
parent 199d871923
commit c6483d8427
16 changed files with 3405 additions and 0 deletions

View File

@@ -0,0 +1,404 @@
"""
Unit-тесты для ZakupkiClient.
Тестирует клиент для парсинга данных с zakupki.gov.ru.
Использует моки для HTTP запросов.
"""
import io
import zipfile
from unittest.mock import patch
from apps.parsers.clients.zakupki import ZakupkiClient, ZakupkiClientError
from apps.parsers.clients.zakupki.schemas import Procurement, ProcurementPlan
from django.test import SimpleTestCase
class ZakupkiClientInitTestCase(SimpleTestCase):
"""Тесты инициализации клиента."""
def test_init_default(self):
"""Клиент создаётся с настройками по умолчанию."""
client = ZakupkiClient()
self.assertEqual(client.host, "zakupki.gov.ru")
self.assertEqual(client.timeout, 120)
self.assertIsNone(client.proxies)
def test_init_with_proxies(self):
"""Клиент создаётся с прокси."""
proxies = ["http://proxy1:8080", "http://proxy2:8080"]
client = ZakupkiClient(proxies=proxies)
self.assertEqual(client.proxies, proxies)
def test_init_with_custom_timeout(self):
"""Клиент создаётся с кастомным таймаутом."""
client = ZakupkiClient(timeout=60)
self.assertEqual(client.timeout, 60)
def test_context_manager(self):
"""Клиент поддерживает context manager."""
with ZakupkiClient() as client:
self.assertIsInstance(client, ZakupkiClient)
class ZakupkiClientDiscoverFilesTestCase(SimpleTestCase):
"""Тесты метода _discover_data_files."""
def test_discover_files_with_region_and_year(self):
"""Поиск файлов с регионом и годом."""
client = ZakupkiClient()
plans = client._discover_data_files(region_code="77", year=2025)
self.assertEqual(len(plans), 1)
self.assertIsInstance(plans[0], ProcurementPlan)
self.assertEqual(plans[0].region_code, "77")
self.assertEqual(plans[0].year, 2025)
self.assertIsNone(plans[0].month)
def test_discover_files_with_month(self):
"""Поиск файлов с указанием месяца."""
client = ZakupkiClient()
plans = client._discover_data_files(region_code="77", year=2025, month=3)
self.assertEqual(len(plans), 1)
self.assertEqual(plans[0].month, 3)
# URL содержит год и месяц
self.assertIn("2025", plans[0].file_url)
self.assertIn("03", plans[0].file_url)
def test_discover_files_empty_without_region(self):
"""Без региона возвращается пустой список."""
client = ZakupkiClient()
plans = client._discover_data_files(year=2025)
self.assertEqual(plans, [])
def test_discover_files_empty_without_year(self):
"""Без года возвращается пустой список."""
client = ZakupkiClient()
plans = client._discover_data_files(region_code="77")
self.assertEqual(plans, [])
def test_discover_files_law_type_44(self):
"""Поиск файлов по 44-ФЗ."""
client = ZakupkiClient()
plans = client._discover_data_files(region_code="77", year=2025, law_type="44")
self.assertEqual(len(plans), 1)
self.assertIn("fz44", plans[0].file_url)
def test_discover_files_law_type_223(self):
"""Поиск файлов по 223-ФЗ."""
client = ZakupkiClient()
plans = client._discover_data_files(region_code="77", year=2025, law_type="223")
self.assertEqual(len(plans), 1)
self.assertIn("fz223", plans[0].file_url)
class ZakupkiClientParseXMLTestCase(SimpleTestCase):
"""Тесты парсинга XML."""
def setUp(self):
"""Подготовка тестовых данных."""
self.client = ZakupkiClient()
# Минимальный валидный XML с закупкой
self.valid_xml = b"""<?xml version="1.0" encoding="UTF-8"?>
<export>
<notification>
<purchaseNumber>0123456789012345678</purchaseNumber>
<purchaseObjectInfo>Test procurement</purchaseObjectInfo>
<customer>
<INN>1234567890</INN>
<KPP>123456789</KPP>
<OGRN>1234567890123</OGRN>
<fullName>Test Organization</fullName>
</customer>
<lot>
<maxPrice>1000000</maxPrice>
<currency>
<code>RUB</code>
</currency>
</lot>
<placingWay>
<name>Electronic auction</name>
</placingWay>
<publishDate>2025-01-15</publishDate>
<endDate>2025-02-15</endDate>
<state>Published</state>
</notification>
</export>
"""
self.empty_xml = b"""<?xml version="1.0" encoding="UTF-8"?>
<export></export>
"""
self.invalid_xml = b"not xml content"
def test_parse_xml_valid(self):
"""Парсинг валидного XML."""
procurements = self.client._parse_xml_content(self.valid_xml, None)
self.assertEqual(len(procurements), 1)
proc = procurements[0]
self.assertIsInstance(proc, Procurement)
self.assertEqual(proc.purchase_number, "0123456789012345678")
self.assertEqual(proc.customer_inn, "1234567890")
self.assertEqual(proc.customer_name, "Test Organization")
self.assertEqual(proc.max_price, "1000000")
def test_parse_xml_empty(self):
"""Парсинг пустого XML возвращает пустой список."""
procurements = self.client._parse_xml_content(self.empty_xml, None)
self.assertEqual(procurements, [])
def test_parse_xml_invalid(self):
"""Невалидный XML вызывает исключение."""
with self.assertRaises(ZakupkiClientError):
self.client._parse_xml_content(self.invalid_xml, None)
def test_parse_xml_with_namespace(self):
"""Парсинг XML с namespace."""
xml_with_ns = b"""<?xml version="1.0" encoding="UTF-8"?>
<export xmlns="http://zakupki.gov.ru/export">
<notification>
<purchaseNumber>9876543210123456789</purchaseNumber>
<customer>
<INN>9876543210</INN>
<fullName>NS Organization</fullName>
</customer>
</notification>
</export>
"""
procurements = self.client._parse_xml_content(xml_with_ns, None)
# Парсер должен обработать или вернуть пустой список
# (зависит от реализации обработки namespace)
self.assertIsInstance(procurements, list)
def test_parse_xml_windows1251_encoding(self):
"""Парсинг XML в кодировке Windows-1251."""
xml_cp1251 = """<?xml version="1.0" encoding="windows-1251"?>
<export>
<notification>
<purchaseNumber>1111111111111111111</purchaseNumber>
<customer>
<INN>1111111111</INN>
<fullName>Тестовая Организация</fullName>
</customer>
</notification>
</export>
""".encode("windows-1251")
procurements = self.client._parse_xml_content(xml_cp1251, None)
self.assertEqual(len(procurements), 1)
self.assertEqual(procurements[0].customer_name, "Тестовая Организация")
class ZakupkiClientParseZIPTestCase(SimpleTestCase):
"""Тесты парсинга ZIP архивов."""
def setUp(self):
"""Подготовка тестовых данных."""
self.client = ZakupkiClient()
def _create_zip_with_xml(self, xml_content: bytes, filename: str = "data.xml"):
"""Создать ZIP архив с XML файлом."""
buffer = io.BytesIO()
with zipfile.ZipFile(buffer, "w", zipfile.ZIP_DEFLATED) as zf:
zf.writestr(filename, xml_content)
return buffer.getvalue()
def test_parse_zip_with_xml(self):
"""Парсинг ZIP архива с XML файлом."""
xml_content = b"""<?xml version="1.0" encoding="UTF-8"?>
<export>
<notification>
<purchaseNumber>1234567890123456789</purchaseNumber>
<customer>
<INN>1234567890</INN>
<fullName>ZIP Test Org</fullName>
</customer>
</notification>
</export>
"""
zip_content = self._create_zip_with_xml(xml_content)
procurements = self.client._parse_zip_archive(zip_content, None)
self.assertEqual(len(procurements), 1)
self.assertEqual(procurements[0].purchase_number, "1234567890123456789")
def test_parse_zip_empty(self):
"""Парсинг пустого ZIP архива."""
buffer = io.BytesIO()
with zipfile.ZipFile(buffer, "w"):
pass # Пустой архив
zip_content = buffer.getvalue()
procurements = self.client._parse_zip_archive(zip_content, None)
self.assertEqual(procurements, [])
def test_parse_zip_multiple_xml_files(self):
"""Парсинг ZIP с несколькими XML файлами."""
xml1 = b"""<?xml version="1.0"?><export>
<notification>
<purchaseNumber>1111111111111111111</purchaseNumber>
<customer><INN>1111111111</INN><fullName>Org1</fullName></customer>
</notification>
</export>"""
xml2 = b"""<?xml version="1.0"?><export>
<notification>
<purchaseNumber>2222222222222222222</purchaseNumber>
<customer><INN>2222222222</INN><fullName>Org2</fullName></customer>
</notification>
</export>"""
buffer = io.BytesIO()
with zipfile.ZipFile(buffer, "w") as zf:
zf.writestr("file1.xml", xml1)
zf.writestr("file2.xml", xml2)
zip_content = buffer.getvalue()
procurements = self.client._parse_zip_archive(zip_content, None)
self.assertEqual(len(procurements), 2)
numbers = {p.purchase_number for p in procurements}
self.assertIn("1111111111111111111", numbers)
self.assertIn("2222222222222222222", numbers)
class ZakupkiClientFetchTestCase(SimpleTestCase):
"""Тесты метода fetch_procurements с моками."""
def setUp(self):
"""Подготовка тестовых данных."""
# Отключаем FTP для использования HTTP логики в тестах
# Без токена клиент использует HTTP fallback
self.client = ZakupkiClient()
@patch.object(ZakupkiClient, "_download_and_parse_http")
@patch.object(ZakupkiClient, "_discover_data_files")
def test_fetch_with_region_and_year(self, mock_discover, mock_download):
"""Загрузка закупок по региону и году."""
mock_discover.return_value = [
ProcurementPlan(
region_code="77",
year=2025,
month=None,
file_url="http://test.url/data.zip",
file_name="data.zip",
)
]
mock_download.return_value = [
Procurement(
purchase_number="1234567890123456789",
purchase_name="Test",
customer_inn="1234567890",
customer_kpp="123456789",
customer_ogrn="1234567890123",
customer_name="Test Org",
max_price="1000000",
currency_code="RUB",
placement_method="Auction",
publish_date="2025-01-01",
end_date="2025-02-01",
status="Published",
law_type="44-FZ",
)
]
procurements = self.client.fetch_procurements(region_code="77", year=2025)
self.assertEqual(len(procurements), 1)
mock_discover.assert_called_once()
mock_download.assert_called_once()
@patch.object(ZakupkiClient, "_download_and_parse_http")
def test_fetch_with_direct_url(self, mock_download):
"""Загрузка закупок по прямой ссылке."""
mock_download.return_value = [
Procurement(
purchase_number="9999999999999999999",
purchase_name="Direct URL Test",
customer_inn="9999999999",
customer_kpp="",
customer_ogrn="",
customer_name="Direct Org",
max_price="500000",
currency_code="RUB",
placement_method="",
publish_date="2025-01-01",
end_date="",
status="",
law_type="",
)
]
procurements = self.client.fetch_procurements(
file_url="http://direct.url/data.xml"
)
self.assertEqual(len(procurements), 1)
self.assertEqual(procurements[0].purchase_number, "9999999999999999999")
mock_download.assert_called_once()
@patch.object(ZakupkiClient, "_discover_data_files")
def test_fetch_no_files_found(self, mock_discover):
"""Возвращает пустой список если файлы не найдены."""
mock_discover.return_value = []
procurements = self.client.fetch_procurements(region_code="77", year=2025)
self.assertEqual(procurements, [])
def test_fetch_progress_callback(self):
"""Тест callback для прогресса."""
progress_calls = []
def callback(percent, message):
progress_calls.append((percent, message))
with patch.object(ZakupkiClient, "_discover_data_files", return_value=[]):
self.client.fetch_procurements(
region_code="77", year=2025, progress_callback=callback
)
# Должен быть вызван хотя бы один раз
self.assertGreater(len(progress_calls), 0)
self.assertEqual(progress_calls[0][0], 0) # Начало с 0%
class ZakupkiClientSanitizeXMLTestCase(SimpleTestCase):
"""Тесты метода _sanitize_xml."""
def setUp(self):
"""Подготовка."""
self.client = ZakupkiClient()
def test_sanitize_removes_control_chars(self):
"""Удаляет управляющие символы."""
dirty_xml = "<?xml version='1.0'?><root>Test\x00\x01\x02</root>"
clean_xml = self.client._sanitize_xml(dirty_xml)
self.assertNotIn("\x00", clean_xml)
self.assertNotIn("\x01", clean_xml)
self.assertNotIn("\x02", clean_xml)
def test_sanitize_escapes_ampersands(self):
"""Экранирует неэкранированные амперсанды."""
dirty_xml = "<root>Test & Company</root>"
clean_xml = self.client._sanitize_xml(dirty_xml)
self.assertIn("&amp;", clean_xml)
def test_sanitize_keeps_valid_entities(self):
"""Сохраняет валидные XML сущности."""
valid_xml = "<root>&amp; &lt; &gt; &quot;</root>"
clean_xml = self.client._sanitize_xml(valid_xml)
self.assertIn("&amp;", clean_xml)
self.assertIn("&lt;", clean_xml)
self.assertIn("&gt;", clean_xml)
self.assertIn("&quot;", clean_xml)