"""Tests for parsers clients."""
from io import BytesIO
from unittest.mock import patch
from django.test import TestCase, tag
from faker import Faker
from openpyxl import Workbook
from apps.parsers.clients.base import BaseHTTPClient, HTTPClientError
from apps.parsers.clients.minpromtorg.industrial import IndustrialProductionClient
from apps.parsers.clients.minpromtorg.manufactures import ManufacturesClient
from apps.parsers.clients.minpromtorg.schemas import IndustrialCertificate, Manufacturer
from apps.parsers.clients.proverki import ProverkiClient
from apps.parsers.clients.proverki.schemas import Inspection
fake = Faker("ru_RU")
class BaseHTTPClientTest(TestCase):
"""Tests for BaseHTTPClient."""
def test_client_initialization(self):
"""Test client initializes with defaults."""
client = BaseHTTPClient(base_url="https://example.com")
self.assertEqual(client.base_url, "https://example.com")
self.assertIsNone(client.proxies)
self.assertEqual(client.timeout, 30)
def test_client_with_proxies(self):
"""Test client initializes with proxy list."""
proxies = ["http://proxy1:8080", "http://proxy2:8080"]
client = BaseHTTPClient(base_url="https://example.com", proxies=proxies)
self.assertEqual(client.proxies, proxies)
def test_select_proxy_returns_none_without_proxies(self):
"""Test _select_proxy returns None when no proxies."""
client = BaseHTTPClient(base_url="https://example.com")
self.assertIsNone(client._select_proxy())
def test_select_proxy_returns_random_from_list(self):
"""Test _select_proxy returns proxy from list."""
proxies = ["http://proxy1:8080", "http://proxy2:8080"]
client = BaseHTTPClient(base_url="https://example.com", proxies=proxies)
selected = client._select_proxy()
self.assertIn(selected, proxies)
def test_current_proxy_property(self):
"""Test current_proxy property is None before session creation."""
proxies = ["http://proxy:8080"]
client = BaseHTTPClient(base_url="https://example.com", proxies=proxies)
# current_proxy is None until session is created
proxy = client.current_proxy
self.assertIsNone(proxy)
# After accessing session, proxy should be set
_ = client.session
proxy = client.current_proxy
self.assertEqual(proxy, "http://proxy:8080")
def _create_test_excel_certificates() -> bytes:
"""Create test Excel file with certificate data."""
wb = Workbook()
ws = wb.active
# Header
ws.append(
[
"issue_date",
"certificate_number",
"expiry_date",
"certificate_file_url",
"organisation_name",
"inn",
"ogrn",
]
)
# Data rows
for i in range(5):
ws.append(
[
"2024-01-01",
f"CERT-{i:04d}",
"2025-01-01",
f"https://example.com/cert{i}.pdf",
f"Company {i} LLC",
f"123456789{i}",
f"123456789012{i}",
]
)
output = BytesIO()
wb.save(output)
output.seek(0)
return output.read()
def _create_test_excel_manufacturers() -> bytes:
"""Create test Excel file with manufacturer data."""
wb = Workbook()
ws = wb.active
# Header
ws.append(["full_legal_name", "inn", "ogrn", "address"])
# Data rows
for i in range(5):
ws.append(
[
f"Manufacturer {i} LLC",
f"123456789{i}",
f"123456789012{i}",
f"Address {i}, City",
]
)
output = BytesIO()
wb.save(output)
output.seek(0)
return output.read()
class IndustrialProductionClientTest(TestCase):
"""Tests for IndustrialProductionClient."""
def test_client_initialization(self):
"""Test client initializes correctly."""
client = IndustrialProductionClient()
self.assertIsNone(client.proxies)
self.assertEqual(client.host, "minpromtorg.gov.ru")
def test_client_with_proxies(self):
"""Test client accepts proxy list."""
proxies = ["http://proxy1:8080", "http://proxy2:8080"]
client = IndustrialProductionClient(proxies=proxies)
self.assertEqual(client.proxies, proxies)
def test_context_manager(self):
"""Test client works as context manager."""
with IndustrialProductionClient() as client:
self.assertIsInstance(client, IndustrialProductionClient)
@patch.object(BaseHTTPClient, "get_json")
@patch.object(BaseHTTPClient, "download_file")
def test_fetch_certificates_success(self, mock_download, mock_get_json):
"""Test successful certificate fetching."""
# Mock API response
mock_get_json.return_value = {
"data": [
{
"name": "Заключения о подтверждении производства промышленной продукции на территории Российской Федерации",
"files": [
{"name": "data_resolutions_20240101.xlsx", "url": "/files/test.xlsx"},
],
}
]
}
# Mock Excel download
mock_download.return_value = _create_test_excel_certificates()
with IndustrialProductionClient() as client:
certificates = client.fetch_certificates()
self.assertEqual(len(certificates), 5)
self.assertIsInstance(certificates[0], IndustrialCertificate)
self.assertEqual(certificates[0].certificate_number, "CERT-0000")
@patch.object(BaseHTTPClient, "get_json")
def test_fetch_certificates_no_files(self, mock_get_json):
"""Test returns empty list when no files found."""
mock_get_json.return_value = {"data": []}
with IndustrialProductionClient() as client:
certificates = client.fetch_certificates()
self.assertEqual(certificates, [])
@patch.object(BaseHTTPClient, "get_json")
def test_get_latest_file_url_selects_newest(self, mock_get_json):
"""Test selects file with latest date."""
mock_get_json.return_value = {
"data": [
{
"name": "Заключения о подтверждении производства промышленной продукции на территории Российской Федерации",
"files": [
{"name": "data_resolutions_20240101.xlsx", "url": "/files/old.xlsx"},
{"name": "data_resolutions_20240315.xlsx", "url": "/files/new.xlsx"},
{"name": "data_resolutions_20240201.xlsx", "url": "/files/mid.xlsx"},
],
}
]
}
client = IndustrialProductionClient()
files_data = client._fetch_files_list()
url = client._get_latest_file_url(files_data)
self.assertIn("new.xlsx", url)
def test_parse_row_valid(self):
"""Test parsing valid row."""
client = IndustrialProductionClient()
row = (
"2024-01-01",
"CERT-123",
"2025-01-01",
"https://example.com/cert.pdf",
"Test Company",
"1234567890",
"1234567890123",
)
result = client._parse_row(row)
self.assertIsInstance(result, IndustrialCertificate)
self.assertEqual(result.certificate_number, "CERT-123")
self.assertEqual(result.inn, "1234567890")
def test_parse_row_invalid(self):
"""Test parsing invalid row returns None."""
client = IndustrialProductionClient()
row = ("only", "two") # Not enough columns
result = client._parse_row(row)
self.assertIsNone(result)
class ManufacturesClientTest(TestCase):
"""Tests for ManufacturesClient."""
def test_client_initialization(self):
"""Test client initializes correctly."""
client = ManufacturesClient()
self.assertIsNone(client.proxies)
self.assertEqual(client.host, "minpromtorg.gov.ru")
def test_client_with_proxies(self):
"""Test client accepts proxy list."""
proxies = ["http://proxy1:8080", "http://proxy2:8080"]
client = ManufacturesClient(proxies=proxies)
self.assertEqual(client.proxies, proxies)
def test_context_manager(self):
"""Test client works as context manager."""
with ManufacturesClient() as client:
self.assertIsInstance(client, ManufacturesClient)
@patch.object(BaseHTTPClient, "get_json")
@patch.object(BaseHTTPClient, "download_file")
def test_fetch_manufacturers_success(self, mock_download, mock_get_json):
"""Test successful manufacturer fetching."""
# Mock API response
mock_get_json.return_value = {
"data": [
{
"name": "Производители промышленной продукции",
"files": [
{"name": "data_orgs_20240101.xlsx", "url": "/files/test.xlsx"},
],
}
]
}
# Mock Excel download
mock_download.return_value = _create_test_excel_manufacturers()
with ManufacturesClient() as client:
manufacturers = client.fetch_manufacturers()
self.assertEqual(len(manufacturers), 5)
self.assertIsInstance(manufacturers[0], Manufacturer)
self.assertEqual(manufacturers[0].full_legal_name, "Manufacturer 0 LLC")
@patch.object(BaseHTTPClient, "get_json")
def test_fetch_manufacturers_no_files(self, mock_get_json):
"""Test returns empty list when no files found."""
mock_get_json.return_value = {"data": []}
with ManufacturesClient() as client:
manufacturers = client.fetch_manufacturers()
self.assertEqual(manufacturers, [])
@patch.object(BaseHTTPClient, "get_json")
def test_get_latest_file_url_selects_newest(self, mock_get_json):
"""Test selects file with latest date."""
mock_get_json.return_value = {
"data": [
{
"name": "Производители промышленной продукции",
"files": [
{"name": "data_orgs_20240101.xlsx", "url": "/files/old.xlsx"},
{"name": "data_orgs_20240315.xlsx", "url": "/files/new.xlsx"},
{"name": "data_orgs_20240201.xlsx", "url": "/files/mid.xlsx"},
],
}
]
}
client = ManufacturesClient()
files_data = client._fetch_files_list()
url = client._get_latest_file_url(files_data)
self.assertIn("new.xlsx", url)
def test_parse_row_valid(self):
"""Test parsing valid row."""
client = ManufacturesClient()
row = ("Test Company LLC", "1234567890", "1234567890123", "Test Address")
result = client._parse_row(row)
self.assertIsInstance(result, Manufacturer)
self.assertEqual(result.full_legal_name, "Test Company LLC")
self.assertEqual(result.inn, "1234567890")
def test_parse_row_without_address(self):
"""Test parsing row without address."""
client = ManufacturesClient()
row = ("Test Company LLC", "1234567890", "1234567890123")
result = client._parse_row(row)
self.assertIsInstance(result, Manufacturer)
self.assertEqual(result.address, "")
@tag("integration", "slow", "network")
class IndustrialProductionClientIntegrationTest(TestCase):
"""
Интеграционные тесты с реальной загрузкой данных.
ВНИМАНИЕ: Эти тесты делают реальные HTTP запросы к внешним серверам.
Запускать с тегом: python manage.py test --tag=integration
"""
def test_fetch_certificates_real_data(self):
"""
Интеграционный тест: реальная загрузка сертификатов с gisp.gov.ru.
Этот тест:
1. Подключается к реальному API
2. Скачивает Excel файл
3. Парсит данные
4. Проверяет структуру результата
Тест может занять время и зависит от доступности внешнего сервера.
"""
try:
with IndustrialProductionClient(timeout=120) as client:
certificates = client.fetch_certificates()
# Проверяем что данные получены
self.assertIsInstance(certificates, list)
# Если данные есть - проверяем структуру
if certificates:
cert = certificates[0]
self.assertIsInstance(cert, IndustrialCertificate)
self.assertIsNotNone(cert.certificate_number)
self.assertIsNotNone(cert.inn)
self.assertIsNotNone(cert.organisation_name)
# Логируем для информации
print(f"\n[INTEGRATION] Loaded {len(certificates)} certificates")
print(f"[INTEGRATION] First certificate: {cert.certificate_number}")
print(f"[INTEGRATION] Organisation: {cert.organisation_name}")
else:
print("\n[INTEGRATION] No certificates found (API may be unavailable)")
except HTTPClientError as e:
# API может быть недоступен - это ожидаемое поведение для интеграционных тестов
self.skipTest(f"External API unavailable: {e}")
@tag("integration", "slow", "network")
class ManufacturesClientIntegrationTest(TestCase):
"""
Интеграционные тесты для клиента производителей.
ВНИМАНИЕ: Эти тесты делают реальные HTTP запросы к внешним серверам.
Запускать с тегом: python manage.py test --tag=integration
"""
def test_fetch_manufacturers_real_data(self):
"""
Интеграционный тест: реальная загрузка производителей с gisp.gov.ru.
"""
try:
with ManufacturesClient(timeout=120) as client:
manufacturers = client.fetch_manufacturers()
# Проверяем что данные получены
self.assertIsInstance(manufacturers, list)
# Если данные есть - проверяем структуру
if manufacturers:
m = manufacturers[0]
self.assertIsInstance(m, Manufacturer)
self.assertIsNotNone(m.full_legal_name)
self.assertIsNotNone(m.inn)
# Логируем для информации
print(f"\n[INTEGRATION] Loaded {len(manufacturers)} manufacturers")
print(f"[INTEGRATION] First manufacturer: {m.full_legal_name}")
print(f"[INTEGRATION] INN: {m.inn}")
else:
print("\n[INTEGRATION] No manufacturers found (API may be unavailable)")
except HTTPClientError as e:
# API может быть недоступен - это ожидаемое поведение для интеграционных тестов
self.skipTest(f"External API unavailable: {e}")
def _create_test_xml_inspections() -> bytes:
"""Create test XML file with inspection data."""
xml_content = """
772024000001
7701234567
1027700000001
ООО "Тест Компания 1"
Роспотребнадзор
плановая
документарная
2024-01-15
2024-01-30
завершена
294-ФЗ
нарушения не выявлены
772024000002
7702345678
1027700000002
АО "Тест Компания 2"
Ростехнадзор
внеплановая
выездная
2024-02-01
2024-02-15
завершена
248-ФЗ
выявлены нарушения
"""
return xml_content.encode("utf-8")
def _create_test_xml_inspections_russian_tags() -> bytes:
"""Create test XML with Russian tag names."""
xml_content = """
<Проверки>
<КНМ>
<УчетныйНомер>772024000003УчетныйНомер>
<ИНН>7703456789ИНН>
<ОГРН>1027700000003ОГРН>
<Наименование>ПАО "Тест Компания 3"Наименование>
<КонтрольныйОрган>МЧС РоссииКонтрольныйОрган>
<ТипПроверки>плановаяТипПроверки>
<ФормаПроверки>документарная и выезднаяФормаПроверки>
<ДатаНачала>2024-03-01ДатаНачала>
<ДатаОкончания>2024-03-20ДатаОкончания>
<Статус>в процессеСтатус>
<ПравовоеОснование>294-ФЗПравовоеОснование>
КНМ>
Проверки>"""
return xml_content.encode("utf-8")
class ProverkiClientTest(TestCase):
"""Tests for ProverkiClient."""
def test_client_initialization(self):
"""Test client initializes correctly."""
client = ProverkiClient()
self.assertIsNone(client.proxies)
self.assertEqual(client.host, "proverki.gov.ru")
def test_client_with_proxies(self):
"""Test client accepts proxy list."""
proxies = ["http://proxy1:8080", "http://proxy2:8080"]
client = ProverkiClient(proxies=proxies)
self.assertEqual(client.proxies, proxies)
def test_context_manager(self):
"""Test client works as context manager."""
with ProverkiClient() as client:
self.assertIsInstance(client, ProverkiClient)
def test_parse_xml_content_english_tags(self):
"""Test parsing XML with English tag names."""
client = ProverkiClient()
xml_content = _create_test_xml_inspections()
inspections = client._parse_xml_content(xml_content, None)
self.assertEqual(len(inspections), 2)
self.assertIsInstance(inspections[0], Inspection)
self.assertEqual(inspections[0].registration_number, "772024000001")
self.assertEqual(inspections[0].inn, "7701234567")
self.assertEqual(inspections[0].organisation_name, 'ООО "Тест Компания 1"')
self.assertEqual(inspections[0].control_authority, "Роспотребнадзор")
self.assertEqual(inspections[0].inspection_type, "плановая")
self.assertEqual(inspections[0].legal_basis, "294-ФЗ")
def test_parse_xml_content_russian_tags(self):
"""Test parsing XML with Russian tag names."""
client = ProverkiClient()
xml_content = _create_test_xml_inspections_russian_tags()
inspections = client._parse_xml_content(xml_content, None)
self.assertEqual(len(inspections), 1)
self.assertIsInstance(inspections[0], Inspection)
self.assertEqual(inspections[0].registration_number, "772024000003")
self.assertEqual(inspections[0].inn, "7703456789")
self.assertEqual(inspections[0].control_authority, "МЧС России")
def test_parse_xml_record_with_attributes(self):
"""Test parsing XML record with attributes instead of child elements."""
from xml.etree import ElementTree as ET
client = ProverkiClient()
xml_str = ''
element = ET.fromstring(xml_str)
result = client._parse_xml_record(element)
self.assertIsNotNone(result)
self.assertEqual(result.inn, "1234567890")
self.assertEqual(result.registration_number, "TEST123")
def test_parse_xml_record_invalid(self):
"""Test parsing invalid XML record returns None."""
from xml.etree import ElementTree as ET
client = ProverkiClient()
xml_str = ""
element = ET.fromstring(xml_str)
result = client._parse_xml_record(element)
self.assertIsNone(result)
def test_parse_windows_1251_encoding(self):
"""Test parsing XML with Windows-1251 encoding."""
client = ProverkiClient()
xml_content = """
1234567890
TEST001
Компания
""".encode(
"windows-1251"
)
inspections = client._parse_xml_content(xml_content, None)
self.assertEqual(len(inspections), 1)
self.assertEqual(inspections[0].organisation_name, "Компания")
@patch.object(BaseHTTPClient, "download_file")
@patch.object(ProverkiClient, "_discover_data_files")
def test_fetch_inspections_with_file_url(self, mock_discover, mock_download):
"""Test fetching inspections with direct file URL."""
mock_download.return_value = _create_test_xml_inspections()
with ProverkiClient() as client:
inspections = client.fetch_inspections(
file_url="https://proverki.gov.ru/opendata/test.xml"
)
self.assertEqual(len(inspections), 2)
mock_discover.assert_not_called() # Should not discover files when URL provided
@patch.object(ProverkiClient, "_discover_data_files")
def test_fetch_inspections_no_files(self, mock_discover):
"""Test returns empty list when no files found."""
mock_discover.return_value = []
with ProverkiClient() as client:
inspections = client.fetch_inspections(year=2025)
self.assertEqual(inspections, [])
@tag("integration", "slow", "network")
class ProverkiClientIntegrationTest(TestCase):
"""
Интеграционные тесты для клиента proverki.gov.ru.
ВНИМАНИЕ: Эти тесты делают реальные HTTP запросы к внешним серверам.
Запускать с тегом: python manage.py test --tag=integration
"""
def test_fetch_inspections_real_data(self):
"""
Интеграционный тест: реальная загрузка проверок с proverki.gov.ru.
"""
try:
with ProverkiClient(timeout=120) as client:
inspections = client.fetch_inspections(year=2025)
# Проверяем что данные получены
self.assertIsInstance(inspections, list)
# Если данные есть - проверяем структуру
if inspections:
insp = inspections[0]
self.assertIsInstance(insp, Inspection)
self.assertIsNotNone(insp.registration_number)
self.assertIsNotNone(insp.inn)
# Логируем для информации
print(f"\n[INTEGRATION] Loaded {len(inspections)} inspections")
print(f"[INTEGRATION] First inspection: {insp.registration_number}")
print(f"[INTEGRATION] Organisation: {insp.organisation_name}")
print(f"[INTEGRATION] Control authority: {insp.control_authority}")
else:
print(
"\n[INTEGRATION] No inspections found "
"(API may be unavailable or data format changed)"
)
except HTTPClientError as e:
# API может быть недоступен
self.skipTest(f"External API unavailable: {e}")