"""Tests for parsers clients.""" from io import BytesIO from unittest.mock import patch from django.test import TestCase, tag from faker import Faker from openpyxl import Workbook from apps.parsers.clients.base import BaseHTTPClient, HTTPClientError from apps.parsers.clients.minpromtorg.industrial import IndustrialProductionClient from apps.parsers.clients.minpromtorg.manufactures import ManufacturesClient from apps.parsers.clients.minpromtorg.schemas import IndustrialCertificate, Manufacturer from apps.parsers.clients.proverki import ProverkiClient from apps.parsers.clients.proverki.schemas import Inspection fake = Faker("ru_RU") class BaseHTTPClientTest(TestCase): """Tests for BaseHTTPClient.""" def test_client_initialization(self): """Test client initializes with defaults.""" client = BaseHTTPClient(base_url="https://example.com") self.assertEqual(client.base_url, "https://example.com") self.assertIsNone(client.proxies) self.assertEqual(client.timeout, 30) def test_client_with_proxies(self): """Test client initializes with proxy list.""" proxies = ["http://proxy1:8080", "http://proxy2:8080"] client = BaseHTTPClient(base_url="https://example.com", proxies=proxies) self.assertEqual(client.proxies, proxies) def test_select_proxy_returns_none_without_proxies(self): """Test _select_proxy returns None when no proxies.""" client = BaseHTTPClient(base_url="https://example.com") self.assertIsNone(client._select_proxy()) def test_select_proxy_returns_random_from_list(self): """Test _select_proxy returns proxy from list.""" proxies = ["http://proxy1:8080", "http://proxy2:8080"] client = BaseHTTPClient(base_url="https://example.com", proxies=proxies) selected = client._select_proxy() self.assertIn(selected, proxies) def test_current_proxy_property(self): """Test current_proxy property is None before session creation.""" proxies = ["http://proxy:8080"] client = BaseHTTPClient(base_url="https://example.com", proxies=proxies) # current_proxy is None until session is created proxy = client.current_proxy self.assertIsNone(proxy) # After accessing session, proxy should be set _ = client.session proxy = client.current_proxy self.assertEqual(proxy, "http://proxy:8080") def _create_test_excel_certificates() -> bytes: """Create test Excel file with certificate data.""" wb = Workbook() ws = wb.active # Header ws.append( [ "issue_date", "certificate_number", "expiry_date", "certificate_file_url", "organisation_name", "inn", "ogrn", ] ) # Data rows for i in range(5): ws.append( [ "2024-01-01", f"CERT-{i:04d}", "2025-01-01", f"https://example.com/cert{i}.pdf", f"Company {i} LLC", f"123456789{i}", f"123456789012{i}", ] ) output = BytesIO() wb.save(output) output.seek(0) return output.read() def _create_test_excel_manufacturers() -> bytes: """Create test Excel file with manufacturer data.""" wb = Workbook() ws = wb.active # Header ws.append(["full_legal_name", "inn", "ogrn", "address"]) # Data rows for i in range(5): ws.append( [ f"Manufacturer {i} LLC", f"123456789{i}", f"123456789012{i}", f"Address {i}, City", ] ) output = BytesIO() wb.save(output) output.seek(0) return output.read() class IndustrialProductionClientTest(TestCase): """Tests for IndustrialProductionClient.""" def test_client_initialization(self): """Test client initializes correctly.""" client = IndustrialProductionClient() self.assertIsNone(client.proxies) self.assertEqual(client.host, "minpromtorg.gov.ru") def test_client_with_proxies(self): """Test client accepts proxy list.""" proxies = ["http://proxy1:8080", "http://proxy2:8080"] client = IndustrialProductionClient(proxies=proxies) self.assertEqual(client.proxies, proxies) def test_context_manager(self): """Test client works as context manager.""" with IndustrialProductionClient() as client: self.assertIsInstance(client, IndustrialProductionClient) @patch.object(BaseHTTPClient, "get_json") @patch.object(BaseHTTPClient, "download_file") def test_fetch_certificates_success(self, mock_download, mock_get_json): """Test successful certificate fetching.""" # Mock API response mock_get_json.return_value = { "data": [ { "name": "Заключения о подтверждении производства промышленной продукции на территории Российской Федерации", "files": [ {"name": "data_resolutions_20240101.xlsx", "url": "/files/test.xlsx"}, ], } ] } # Mock Excel download mock_download.return_value = _create_test_excel_certificates() with IndustrialProductionClient() as client: certificates = client.fetch_certificates() self.assertEqual(len(certificates), 5) self.assertIsInstance(certificates[0], IndustrialCertificate) self.assertEqual(certificates[0].certificate_number, "CERT-0000") @patch.object(BaseHTTPClient, "get_json") def test_fetch_certificates_no_files(self, mock_get_json): """Test returns empty list when no files found.""" mock_get_json.return_value = {"data": []} with IndustrialProductionClient() as client: certificates = client.fetch_certificates() self.assertEqual(certificates, []) @patch.object(BaseHTTPClient, "get_json") def test_get_latest_file_url_selects_newest(self, mock_get_json): """Test selects file with latest date.""" mock_get_json.return_value = { "data": [ { "name": "Заключения о подтверждении производства промышленной продукции на территории Российской Федерации", "files": [ {"name": "data_resolutions_20240101.xlsx", "url": "/files/old.xlsx"}, {"name": "data_resolutions_20240315.xlsx", "url": "/files/new.xlsx"}, {"name": "data_resolutions_20240201.xlsx", "url": "/files/mid.xlsx"}, ], } ] } client = IndustrialProductionClient() files_data = client._fetch_files_list() url = client._get_latest_file_url(files_data) self.assertIn("new.xlsx", url) def test_parse_row_valid(self): """Test parsing valid row.""" client = IndustrialProductionClient() row = ( "2024-01-01", "CERT-123", "2025-01-01", "https://example.com/cert.pdf", "Test Company", "1234567890", "1234567890123", ) result = client._parse_row(row) self.assertIsInstance(result, IndustrialCertificate) self.assertEqual(result.certificate_number, "CERT-123") self.assertEqual(result.inn, "1234567890") def test_parse_row_invalid(self): """Test parsing invalid row returns None.""" client = IndustrialProductionClient() row = ("only", "two") # Not enough columns result = client._parse_row(row) self.assertIsNone(result) class ManufacturesClientTest(TestCase): """Tests for ManufacturesClient.""" def test_client_initialization(self): """Test client initializes correctly.""" client = ManufacturesClient() self.assertIsNone(client.proxies) self.assertEqual(client.host, "minpromtorg.gov.ru") def test_client_with_proxies(self): """Test client accepts proxy list.""" proxies = ["http://proxy1:8080", "http://proxy2:8080"] client = ManufacturesClient(proxies=proxies) self.assertEqual(client.proxies, proxies) def test_context_manager(self): """Test client works as context manager.""" with ManufacturesClient() as client: self.assertIsInstance(client, ManufacturesClient) @patch.object(BaseHTTPClient, "get_json") @patch.object(BaseHTTPClient, "download_file") def test_fetch_manufacturers_success(self, mock_download, mock_get_json): """Test successful manufacturer fetching.""" # Mock API response mock_get_json.return_value = { "data": [ { "name": "Производители промышленной продукции", "files": [ {"name": "data_orgs_20240101.xlsx", "url": "/files/test.xlsx"}, ], } ] } # Mock Excel download mock_download.return_value = _create_test_excel_manufacturers() with ManufacturesClient() as client: manufacturers = client.fetch_manufacturers() self.assertEqual(len(manufacturers), 5) self.assertIsInstance(manufacturers[0], Manufacturer) self.assertEqual(manufacturers[0].full_legal_name, "Manufacturer 0 LLC") @patch.object(BaseHTTPClient, "get_json") def test_fetch_manufacturers_no_files(self, mock_get_json): """Test returns empty list when no files found.""" mock_get_json.return_value = {"data": []} with ManufacturesClient() as client: manufacturers = client.fetch_manufacturers() self.assertEqual(manufacturers, []) @patch.object(BaseHTTPClient, "get_json") def test_get_latest_file_url_selects_newest(self, mock_get_json): """Test selects file with latest date.""" mock_get_json.return_value = { "data": [ { "name": "Производители промышленной продукции", "files": [ {"name": "data_orgs_20240101.xlsx", "url": "/files/old.xlsx"}, {"name": "data_orgs_20240315.xlsx", "url": "/files/new.xlsx"}, {"name": "data_orgs_20240201.xlsx", "url": "/files/mid.xlsx"}, ], } ] } client = ManufacturesClient() files_data = client._fetch_files_list() url = client._get_latest_file_url(files_data) self.assertIn("new.xlsx", url) def test_parse_row_valid(self): """Test parsing valid row.""" client = ManufacturesClient() row = ("Test Company LLC", "1234567890", "1234567890123", "Test Address") result = client._parse_row(row) self.assertIsInstance(result, Manufacturer) self.assertEqual(result.full_legal_name, "Test Company LLC") self.assertEqual(result.inn, "1234567890") def test_parse_row_without_address(self): """Test parsing row without address.""" client = ManufacturesClient() row = ("Test Company LLC", "1234567890", "1234567890123") result = client._parse_row(row) self.assertIsInstance(result, Manufacturer) self.assertEqual(result.address, "") @tag("integration", "slow", "network") class IndustrialProductionClientIntegrationTest(TestCase): """ Интеграционные тесты с реальной загрузкой данных. ВНИМАНИЕ: Эти тесты делают реальные HTTP запросы к внешним серверам. Запускать с тегом: python manage.py test --tag=integration """ def test_fetch_certificates_real_data(self): """ Интеграционный тест: реальная загрузка сертификатов с gisp.gov.ru. Этот тест: 1. Подключается к реальному API 2. Скачивает Excel файл 3. Парсит данные 4. Проверяет структуру результата Тест может занять время и зависит от доступности внешнего сервера. """ try: with IndustrialProductionClient(timeout=120) as client: certificates = client.fetch_certificates() # Проверяем что данные получены self.assertIsInstance(certificates, list) # Если данные есть - проверяем структуру if certificates: cert = certificates[0] self.assertIsInstance(cert, IndustrialCertificate) self.assertIsNotNone(cert.certificate_number) self.assertIsNotNone(cert.inn) self.assertIsNotNone(cert.organisation_name) # Логируем для информации print(f"\n[INTEGRATION] Loaded {len(certificates)} certificates") print(f"[INTEGRATION] First certificate: {cert.certificate_number}") print(f"[INTEGRATION] Organisation: {cert.organisation_name}") else: print("\n[INTEGRATION] No certificates found (API may be unavailable)") except HTTPClientError as e: # API может быть недоступен - это ожидаемое поведение для интеграционных тестов self.skipTest(f"External API unavailable: {e}") @tag("integration", "slow", "network") class ManufacturesClientIntegrationTest(TestCase): """ Интеграционные тесты для клиента производителей. ВНИМАНИЕ: Эти тесты делают реальные HTTP запросы к внешним серверам. Запускать с тегом: python manage.py test --tag=integration """ def test_fetch_manufacturers_real_data(self): """ Интеграционный тест: реальная загрузка производителей с gisp.gov.ru. """ try: with ManufacturesClient(timeout=120) as client: manufacturers = client.fetch_manufacturers() # Проверяем что данные получены self.assertIsInstance(manufacturers, list) # Если данные есть - проверяем структуру if manufacturers: m = manufacturers[0] self.assertIsInstance(m, Manufacturer) self.assertIsNotNone(m.full_legal_name) self.assertIsNotNone(m.inn) # Логируем для информации print(f"\n[INTEGRATION] Loaded {len(manufacturers)} manufacturers") print(f"[INTEGRATION] First manufacturer: {m.full_legal_name}") print(f"[INTEGRATION] INN: {m.inn}") else: print("\n[INTEGRATION] No manufacturers found (API may be unavailable)") except HTTPClientError as e: # API может быть недоступен - это ожидаемое поведение для интеграционных тестов self.skipTest(f"External API unavailable: {e}") def _create_test_xml_inspections() -> bytes: """Create test XML file with inspection data.""" xml_content = """ 772024000001 7701234567 1027700000001 ООО "Тест Компания 1" Роспотребнадзор плановая документарная 2024-01-15 2024-01-30 завершена 294-ФЗ нарушения не выявлены 772024000002 7702345678 1027700000002 АО "Тест Компания 2" Ростехнадзор внеплановая выездная 2024-02-01 2024-02-15 завершена 248-ФЗ выявлены нарушения """ return xml_content.encode("utf-8") def _create_test_xml_inspections_russian_tags() -> bytes: """Create test XML with Russian tag names.""" xml_content = """ <Проверки> <КНМ> <УчетныйНомер>772024000003 <ИНН>7703456789 <ОГРН>1027700000003 <Наименование>ПАО "Тест Компания 3" <КонтрольныйОрган>МЧС России <ТипПроверки>плановая <ФормаПроверки>документарная и выездная <ДатаНачала>2024-03-01 <ДатаОкончания>2024-03-20 <Статус>в процессе <ПравовоеОснование>294-ФЗ """ return xml_content.encode("utf-8") class ProverkiClientTest(TestCase): """Tests for ProverkiClient.""" def test_client_initialization(self): """Test client initializes correctly.""" client = ProverkiClient() self.assertIsNone(client.proxies) self.assertEqual(client.host, "proverki.gov.ru") def test_client_with_proxies(self): """Test client accepts proxy list.""" proxies = ["http://proxy1:8080", "http://proxy2:8080"] client = ProverkiClient(proxies=proxies) self.assertEqual(client.proxies, proxies) def test_context_manager(self): """Test client works as context manager.""" with ProverkiClient() as client: self.assertIsInstance(client, ProverkiClient) def test_parse_xml_content_english_tags(self): """Test parsing XML with English tag names.""" client = ProverkiClient() xml_content = _create_test_xml_inspections() inspections = client._parse_xml_content(xml_content, None) self.assertEqual(len(inspections), 2) self.assertIsInstance(inspections[0], Inspection) self.assertEqual(inspections[0].registration_number, "772024000001") self.assertEqual(inspections[0].inn, "7701234567") self.assertEqual(inspections[0].organisation_name, 'ООО "Тест Компания 1"') self.assertEqual(inspections[0].control_authority, "Роспотребнадзор") self.assertEqual(inspections[0].inspection_type, "плановая") self.assertEqual(inspections[0].legal_basis, "294-ФЗ") def test_parse_xml_content_russian_tags(self): """Test parsing XML with Russian tag names.""" client = ProverkiClient() xml_content = _create_test_xml_inspections_russian_tags() inspections = client._parse_xml_content(xml_content, None) self.assertEqual(len(inspections), 1) self.assertIsInstance(inspections[0], Inspection) self.assertEqual(inspections[0].registration_number, "772024000003") self.assertEqual(inspections[0].inn, "7703456789") self.assertEqual(inspections[0].control_authority, "МЧС России") def test_parse_xml_record_with_attributes(self): """Test parsing XML record with attributes instead of child elements.""" from xml.etree import ElementTree as ET client = ProverkiClient() xml_str = '' element = ET.fromstring(xml_str) result = client._parse_xml_record(element) self.assertIsNotNone(result) self.assertEqual(result.inn, "1234567890") self.assertEqual(result.registration_number, "TEST123") def test_parse_xml_record_invalid(self): """Test parsing invalid XML record returns None.""" from xml.etree import ElementTree as ET client = ProverkiClient() xml_str = "" element = ET.fromstring(xml_str) result = client._parse_xml_record(element) self.assertIsNone(result) def test_parse_windows_1251_encoding(self): """Test parsing XML with Windows-1251 encoding.""" client = ProverkiClient() xml_content = """ 1234567890 TEST001 Компания """.encode( "windows-1251" ) inspections = client._parse_xml_content(xml_content, None) self.assertEqual(len(inspections), 1) self.assertEqual(inspections[0].organisation_name, "Компания") @patch.object(BaseHTTPClient, "download_file") @patch.object(ProverkiClient, "_discover_data_files") def test_fetch_inspections_with_file_url(self, mock_discover, mock_download): """Test fetching inspections with direct file URL.""" mock_download.return_value = _create_test_xml_inspections() with ProverkiClient() as client: inspections = client.fetch_inspections( file_url="https://proverki.gov.ru/opendata/test.xml" ) self.assertEqual(len(inspections), 2) mock_discover.assert_not_called() # Should not discover files when URL provided @patch.object(ProverkiClient, "_discover_data_files") def test_fetch_inspections_no_files(self, mock_discover): """Test returns empty list when no files found.""" mock_discover.return_value = [] with ProverkiClient() as client: inspections = client.fetch_inspections(year=2025) self.assertEqual(inspections, []) @tag("integration", "slow", "network") class ProverkiClientIntegrationTest(TestCase): """ Интеграционные тесты для клиента proverki.gov.ru. ВНИМАНИЕ: Эти тесты делают реальные HTTP запросы к внешним серверам. Запускать с тегом: python manage.py test --tag=integration """ def test_fetch_inspections_real_data(self): """ Интеграционный тест: реальная загрузка проверок с proverki.gov.ru. """ try: with ProverkiClient(timeout=120) as client: inspections = client.fetch_inspections(year=2025) # Проверяем что данные получены self.assertIsInstance(inspections, list) # Если данные есть - проверяем структуру if inspections: insp = inspections[0] self.assertIsInstance(insp, Inspection) self.assertIsNotNone(insp.registration_number) self.assertIsNotNone(insp.inn) # Логируем для информации print(f"\n[INTEGRATION] Loaded {len(inspections)} inspections") print(f"[INTEGRATION] First inspection: {insp.registration_number}") print(f"[INTEGRATION] Organisation: {insp.organisation_name}") print(f"[INTEGRATION] Control authority: {insp.control_authority}") else: print( "\n[INTEGRATION] No inspections found " "(API may be unavailable or data format changed)" ) except HTTPClientError as e: # API может быть недоступен self.skipTest(f"External API unavailable: {e}")