"""Tests for parsers clients.""" import json import zipfile from io import BytesIO from unittest.mock import Mock, patch from apps.parsers.clients.base import BaseHTTPClient, HTTPClientError from apps.parsers.clients.common import ( GenericParserItem, StructuredDataClient, StructuredDataClientError, ) from apps.parsers.clients.minpromtorg.industrial import IndustrialProductionClient from apps.parsers.clients.minpromtorg.manufactures import ManufacturesClient from apps.parsers.clients.minpromtorg.schemas import IndustrialCertificate, Manufacturer from apps.parsers.clients.proverki import ProverkiClient from apps.parsers.clients.proverki.schemas import Inspection from apps.parsers.clients.trudvsem import TrudvsemClient from apps.parsers.models import ParserLoadLog from django.test import TestCase, tag from faker import Faker from openpyxl import Workbook fake = Faker("ru_RU") class BaseHTTPClientTest(TestCase): """Tests for BaseHTTPClient.""" def test_client_initialization(self): """Test client initializes with defaults.""" client = BaseHTTPClient(base_url="https://example.com") self.assertEqual(client.base_url, "https://example.com") self.assertIsNone(client.proxies) self.assertEqual(client.timeout, 30) def test_client_with_proxies(self): """Test client initializes with proxy list.""" proxies = ["http://proxy1:8080", "http://proxy2:8080"] client = BaseHTTPClient(base_url="https://example.com", proxies=proxies) self.assertEqual(client.proxies, proxies) def test_select_proxy_returns_none_without_proxies(self): """Test _select_proxy returns None when no proxies.""" client = BaseHTTPClient(base_url="https://example.com") self.assertIsNone(client._select_proxy()) def test_select_proxy_returns_random_from_list(self): """Test _select_proxy returns proxy from list.""" proxies = ["http://proxy1:8080", "http://proxy2:8080"] client = BaseHTTPClient(base_url="https://example.com", proxies=proxies) selected = client._select_proxy() self.assertIn(selected, proxies) def test_current_proxy_property(self): """Test current_proxy property is None before session creation.""" proxies = ["http://proxy:8080"] client = BaseHTTPClient(base_url="https://example.com", proxies=proxies) # current_proxy is None until session is created proxy = client.current_proxy self.assertIsNone(proxy) # After accessing session, proxy should be set _ = client.session proxy = client.current_proxy self.assertEqual(proxy, "http://proxy:8080") def test_download_file_rejects_large_content_length_before_body_read(self): """Test download_file checks Content-Length before reading response body.""" client = BaseHTTPClient(base_url="https://example.com") response = Mock() response.ok = True response.headers = {"Content-Length": "10"} response.iter_content.return_value = [b"too-large"] response.close = Mock() client.session.get = Mock(return_value=response) with self.assertRaises(HTTPClientError): client.download_file("/data.csv", max_size_bytes=5) response.iter_content.assert_not_called() response.close.assert_called_once() def test_download_file_passes_ssl_verification_flag(self): """Test download_file can disable SSL verification for broken upstream TLS.""" client = BaseHTTPClient(base_url="https://example.com", verify_ssl=False) response = Mock() response.ok = True response.headers = {"Content-Length": "4"} response.iter_content.return_value = [b"data"] response.close = Mock() client.session.get = Mock(return_value=response) content = client.download_file("/data.csv") self.assertEqual(content, b"data") client.session.get.assert_called_once_with( "https://example.com/data.csv", stream=True, timeout=30, verify=False, ) class StructuredDataClientTest(TestCase): """Tests for StructuredDataClient.""" def test_parse_json_records(self): """Test JSON parsing and normalization.""" payload = { "data": [ { "id": "FIN-1", "inn": "1234567890", "ogrn": "1234567890123", "name": "Test Company", "amount": "10 500,50", "date": "2024", } ] } client = StructuredDataClient(source=ParserLoadLog.Source.FNS_FINANCIAL) records = client.fetch_records( content=json.dumps(payload).encode("utf-8"), file_name="data.json", ) self.assertEqual(len(records), 1) self.assertIsInstance(records[0], GenericParserItem) self.assertEqual(records[0].external_id, "FIN-1") self.assertEqual(records[0].inn, "1234567890") self.assertEqual(str(records[0].amount), "10500.50") def test_parse_csv_records(self): """Test CSV parsing with Russian headers.""" content = ( "реестровый номер;ИНН;наименование;сумма\n" "RN-1;1234567890;ООО Тест;1000.00\n" ).encode("cp1251") client = StructuredDataClient(source=ParserLoadLog.Source.UNFAIR_SUPPLIERS) records = client.fetch_records(content=content, file_name="data.csv") self.assertEqual(len(records), 1) self.assertEqual(records[0].external_id, "RN-1") self.assertEqual(records[0].organisation_name, "ООО Тест") def test_parse_xml_records_under_wrapper(self): """Test XML parser selects repeated nested record elements.""" content = ( "" "XML-1123ООО А" "XML-2456ООО Б" "" ).encode() client = StructuredDataClient(source=ParserLoadLog.Source.FNS_FINANCIAL) records = client.fetch_records(content=content, file_name="data.xml") self.assertEqual(len(records), 2) self.assertEqual(records[0].external_id, "XML-1") self.assertEqual(records[1].external_id, "XML-2") def test_json_payload_preserves_nested_objects(self): """Test payload keeps nested JSON structures machine-readable.""" payload = { "data": [ { "id": "NESTED-1", "company": {"inn": "123", "name": "ООО А"}, "amounts": [1, 2], } ] } client = StructuredDataClient(source=ParserLoadLog.Source.FNS_FINANCIAL) record = client.fetch_records( content=json.dumps(payload).encode("utf-8"), file_name="data.json", )[0] self.assertEqual(record.payload["company"], {"inn": "123", "name": "ООО А"}) self.assertEqual(record.payload["amounts"], [1, 2]) def test_fallback_external_id_is_stable_after_reordering(self): """Test generated external_id does not depend on row position.""" client = StructuredDataClient(source=ParserLoadLog.Source.FNS_FINANCIAL) first = client.fetch_records( content=("name;amount\nООО А;10\nООО Б;20\n").encode(), file_name="data.csv", ) second = client.fetch_records( content=("name;amount\nООО Б;20\nООО А;10\n").encode(), file_name="data.csv", ) self.assertEqual(first[0].external_id, second[1].external_id) self.assertEqual(first[1].external_id, second[0].external_id) def test_zip_rejects_too_many_supported_files(self): """Test ZIP parser refuses archives with too many supported files.""" archive_content = BytesIO() with zipfile.ZipFile(archive_content, "w") as archive: archive.writestr("one.csv", "id\n1\n") archive.writestr("two.csv", "id\n2\n") client = StructuredDataClient( source=ParserLoadLog.Source.FNS_FINANCIAL, max_zip_entries=1, ) with self.assertRaises(StructuredDataClientError): client.fetch_records( content=archive_content.getvalue(), file_name="data.zip", ) def test_html_without_table_returns_empty_records(self): """Test generic parser does not treat HTML pages as malformed XML.""" content = b"
No table
" client = StructuredDataClient(source=ParserLoadLog.Source.ARBITRATION) records = client.fetch_records(content=content, file_name="") self.assertEqual(records, []) def test_html_table_after_long_head_is_detected(self): """Test HTML detection scans beyond the first kilobyte.""" content = ( "" + (" " * 1500) + "" "" "" "
idinn
HTML-11234567890
" ).encode() client = StructuredDataClient(source=ParserLoadLog.Source.ARBITRATION) records = client.fetch_records(content=content, file_name="") self.assertEqual(len(records), 1) self.assertEqual(records[0].external_id, "HTML-1") self.assertEqual(records[0].inn, "1234567890") def test_html_layout_table_without_headers_is_ignored(self): """Test layout/navigation tables are not imported as records.""" content = ( "" "" "" "
КартотекаСтраж
КалендарьМой Арбитр
" ).encode() client = StructuredDataClient(source=ParserLoadLog.Source.ARBITRATION) records = client.fetch_records(content=content, file_name="") self.assertEqual(records, []) @patch.object(BaseHTTPClient, "post_json") def test_mpt_products_page_uses_official_search_api(self, mock_post_json): """Test GISP product page uses the official paginated UI API.""" mock_post_json.return_value = { "ok": True, "total_count": 1, "items": [ { "org_name": "ООО Производитель", "org_inn": "7701000000", "org_ogrn": "1027700000000", "product_reg_number_2023": "10165413", "product_name": "Средство дезинфицирующее", "res_date": "2026-04-25", "product_gisp_url": "https://gisp.gov.ru/goods/#/product/1", } ], } client = StructuredDataClient(source=ParserLoadLog.Source.MPT_PRODUCTS) records = client.fetch_records(file_url="https://gisp.gov.ru/pp719v2/pub/prod/") self.assertEqual(len(records), 1) self.assertEqual(records[0].external_id, "10165413") self.assertEqual(records[0].inn, "7701000000") self.assertEqual(records[0].organisation_name, "ООО Производитель") self.assertEqual(records[0].title, "Средство дезинфицирующее") self.assertEqual( mock_post_json.call_args.args[0], "https://gisp.gov.ru/pp719v2/pub/prod/b/", ) def test_zakupki_cards_are_parsed_as_records(self): """Test ЕИС search cards are parsed when there is no HTML table.""" content = """
№ 0331
Работа комиссии
Объект закупки
Поставка оборудования
Заказчик
ГКУ Тест
Начальная цена
649 989,52 ₽
Размещено
20.04.2026
""".encode() client = StructuredDataClient(source=ParserLoadLog.Source.PROCUREMENTS_44FZ) records = client.fetch_records(content=content, file_name="search.html") self.assertEqual(len(records), 1) self.assertEqual(records[0].external_id, "0331") self.assertEqual(records[0].title, "Поставка оборудования") self.assertEqual(records[0].organisation_name, "ГКУ Тест") self.assertEqual(str(records[0].amount), "649989.52") self.assertEqual(records[0].record_date, "20.04.2026") def test_html_table_with_td_header_row_is_parsed(self): """Test registry tables without th still parse when first row is a header.""" content = """
Номер реестровой записиИнформация о лицеИНН
ГОЗ-1ООО Оборона7701000000
""".encode() client = StructuredDataClient(source=ParserLoadLog.Source.ARBITRATION) records = client.fetch_records(content=content, file_name="fas.html") self.assertEqual(len(records), 1) self.assertEqual(records[0].external_id, "ГОЗ-1") self.assertEqual(records[0].inn, "7701000000") self.assertEqual(records[0].organisation_name, "ООО Оборона") def test_fas_goz_multirow_header_table_is_parsed(self): """Test FAS GOZ table skips multirow headers and column-number rows.""" content = """
Номер реестровой записиОрган ПостановлениеЛицо
номердатаисполнение полное наименованиеадресИНН
12345678
1Нижегородское УФАС России № 052/04/7.29.2-2965/2023 от 22.01.2024 28.10.2025В стадии исполнения АО УАПОАО УАПОг. Уфа0275074279
""".encode() client = StructuredDataClient(source=ParserLoadLog.Source.FAS_GOZ) records = client.fetch_records(content=content, file_name="fas.html") self.assertEqual(len(records), 1) self.assertEqual(records[0].external_id, "1") self.assertEqual(records[0].inn, "0275074279") self.assertEqual(records[0].organisation_name, "АО УАПО") self.assertEqual(records[0].record_date, "28.10.2025") self.assertEqual(records[0].status, "В стадии исполнения") def test_fns_nested_bfo_fields_are_normalized(self): """Test FNS JSON keeps nested payload and maps useful BFO fields.""" payload = { "content": [ { "id": 6622458, "inn": "7736050003", "shortName": 'ПАО "ГАЗПРОМ"', "ogrn": "1027700070518", "statusCode": "ACTIVE", "bfo": { "period": "2025", "actualBfoDate": "2026-03-16", "gainSum": 5846351786, }, } ] } client = StructuredDataClient(source=ParserLoadLog.Source.FNS_FINANCIAL) records = client.fetch_records( content=json.dumps(payload).encode("utf-8"), file_name="fns.json", ) self.assertEqual(records[0].external_id, "6622458") self.assertEqual(records[0].inn, "7736050003") self.assertEqual(records[0].organisation_name, 'ПАО "ГАЗПРОМ"') self.assertEqual(records[0].record_date, "2026-03-16") self.assertEqual(str(records[0].amount), "5846351786") self.assertEqual(records[0].status, "ACTIVE") @patch.object(BaseHTTPClient, "download_file") def test_fstec_page_discovers_csv_download(self, mock_download): """Test FSTEC registry page follows the official CSV download link.""" mock_download.side_effect = [ ( 'Государственный ' "реестр ССЗИ" ).encode(), ( '"№ сертификата","Дата внесения в реестр","Срок действия сертификата",' '"Наименование средства (шифр)","Заявитель"\n' '"17/1","2002-07-26","2020-08-01","ФСПК-100","ООО НПП ЭЛКОМ"\n' ).encode(), ] client = StructuredDataClient(source=ParserLoadLog.Source.FSTEC) records = client.fetch_records(file_url="https://reestr.fstec.ru/reg3") self.assertEqual(len(records), 1) self.assertEqual(records[0].external_id, "17/1") self.assertEqual(records[0].organisation_name, "ООО НПП ЭЛКОМ") self.assertEqual(records[0].title, "ФСПК-100") self.assertEqual(records[0].record_date, "2002-07-26") self.assertEqual(records[0].status, "2020-08-01") class TrudvsemClientTest(TestCase): """Tests for TrudvsemClient.""" @patch.object(BaseHTTPClient, "get_json") def test_fetch_vacancies_success(self, mock_get_json): """Test successful vacancies fetching.""" mock_get_json.return_value = { "results": { "vacancies": [ { "vacancy": { "id": "VAC-1", "job-name": "Инженер", "creation-date": "2026-01-01", "salary": {"from": 120000}, "company": { "name": "ООО Тест", "inn": "1234567890", "ogrn": "1234567890123", }, "vac_url": "https://trudvsem.ru/vacancy/VAC-1", } } ] } } with TrudvsemClient() as client: records = client.fetch_vacancies(limit=1) self.assertEqual(len(records), 1) self.assertEqual(records[0].external_id, "VAC-1") self.assertEqual(records[0].source, ParserLoadLog.Source.TRUDVSEM) self.assertEqual(records[0].inn, "1234567890") @patch.object(BaseHTTPClient, "get_json") def test_fetch_vacancies_by_company_inn_scans_pages(self, mock_get_json): """Test company_inn search scans next pages instead of false empty result.""" mock_get_json.side_effect = [ { "results": { "vacancies": [ { "vacancy": { "id": "VAC-OTHER", "company": {"inn": "0000000000"}, } } ] } }, { "results": { "vacancies": [ { "vacancy": { "id": "VAC-MATCH", "company": {"inn": "1234567890"}, } } ] } }, ] with TrudvsemClient(company_search_max_pages=2) as client: records = client.fetch_vacancies(limit=1, company_inn="1234567890") self.assertEqual(len(records), 1) self.assertEqual(records[0].external_id, "VAC-MATCH") self.assertEqual(mock_get_json.call_args_list[0].kwargs["params"]["offset"], 0) self.assertEqual(mock_get_json.call_args_list[1].kwargs["params"]["offset"], 1) def _create_test_excel_certificates() -> bytes: """Create test Excel file with certificate data.""" wb = Workbook() ws = wb.active # Header ws.append( [ "issue_date", "certificate_number", "expiry_date", "certificate_file_url", "organisation_name", "inn", "ogrn", ] ) # Data rows for i in range(5): ws.append( [ "2024-01-01", f"CERT-{i:04d}", "2025-01-01", f"https://example.com/cert{i}.pdf", f"Company {i} LLC", f"123456789{i}", f"123456789012{i}", ] ) output = BytesIO() wb.save(output) output.seek(0) return output.read() def _create_test_excel_manufacturers() -> bytes: """Create test Excel file with manufacturer data.""" wb = Workbook() ws = wb.active # Header ws.append(["full_legal_name", "inn", "ogrn", "address"]) # Data rows for i in range(5): ws.append( [ f"Manufacturer {i} LLC", f"123456789{i}", f"123456789012{i}", f"Address {i}, City", ] ) output = BytesIO() wb.save(output) output.seek(0) return output.read() class IndustrialProductionClientTest(TestCase): """Tests for IndustrialProductionClient.""" def test_client_initialization(self): """Test client initializes correctly.""" client = IndustrialProductionClient() self.assertIsNone(client.proxies) self.assertEqual(client.host, "minpromtorg.gov.ru") def test_client_with_proxies(self): """Test client accepts proxy list.""" proxies = ["http://proxy1:8080", "http://proxy2:8080"] client = IndustrialProductionClient(proxies=proxies) self.assertEqual(client.proxies, proxies) def test_context_manager(self): """Test client works as context manager.""" with IndustrialProductionClient() as client: self.assertIsInstance(client, IndustrialProductionClient) @patch.object(BaseHTTPClient, "get_json") @patch.object(BaseHTTPClient, "download_file") def test_fetch_certificates_success(self, mock_download, mock_get_json): """Test successful certificate fetching.""" # Mock API response mock_get_json.return_value = { "data": [ { "name": "Заключения о подтверждении производства промышленной продукции на территории Российской Федерации", "files": [ { "name": "data_resolutions_20240101.xlsx", "url": "/files/test.xlsx", }, ], } ] } # Mock Excel download mock_download.return_value = _create_test_excel_certificates() with IndustrialProductionClient() as client: certificates = client.fetch_certificates() self.assertEqual(len(certificates), 5) self.assertIsInstance(certificates[0], IndustrialCertificate) self.assertEqual(certificates[0].certificate_number, "CERT-0000") @patch.object(BaseHTTPClient, "get_json") def test_fetch_certificates_no_files(self, mock_get_json): """Test returns empty list when no files found.""" mock_get_json.return_value = {"data": []} with IndustrialProductionClient() as client: certificates = client.fetch_certificates() self.assertEqual(certificates, []) @patch.object(BaseHTTPClient, "get_json") def test_get_latest_file_url_selects_newest(self, mock_get_json): """Test selects file with latest date.""" mock_get_json.return_value = { "data": [ { "name": "Заключения о подтверждении производства промышленной продукции на территории Российской Федерации", "files": [ { "name": "data_resolutions_20240101.xlsx", "url": "/files/old.xlsx", }, { "name": "data_resolutions_20240315.xlsx", "url": "/files/new.xlsx", }, { "name": "data_resolutions_20240201.xlsx", "url": "/files/mid.xlsx", }, ], } ] } client = IndustrialProductionClient() files_data = client._fetch_files_list() url = client._get_latest_file_url(files_data) self.assertIn("new.xlsx", url) def test_parse_row_valid(self): """Test parsing valid row.""" client = IndustrialProductionClient() row = ( "2024-01-01", "CERT-123", "2025-01-01", "https://example.com/cert.pdf", "Test Company", "1234567890", "1234567890123", ) result = client._parse_row(row) self.assertIsInstance(result, IndustrialCertificate) self.assertEqual(result.certificate_number, "CERT-123") self.assertEqual(result.inn, "1234567890") def test_parse_row_invalid(self): """Test parsing invalid row returns None.""" client = IndustrialProductionClient() row = ("only", "two") # Not enough columns result = client._parse_row(row) self.assertIsNone(result) class ManufacturesClientTest(TestCase): """Tests for ManufacturesClient.""" def test_client_initialization(self): """Test client initializes correctly.""" client = ManufacturesClient() self.assertIsNone(client.proxies) self.assertEqual(client.host, "minpromtorg.gov.ru") def test_client_with_proxies(self): """Test client accepts proxy list.""" proxies = ["http://proxy1:8080", "http://proxy2:8080"] client = ManufacturesClient(proxies=proxies) self.assertEqual(client.proxies, proxies) def test_context_manager(self): """Test client works as context manager.""" with ManufacturesClient() as client: self.assertIsInstance(client, ManufacturesClient) @patch.object(BaseHTTPClient, "get_json") @patch.object(BaseHTTPClient, "download_file") def test_fetch_manufacturers_success(self, mock_download, mock_get_json): """Test successful manufacturer fetching.""" # Mock API response mock_get_json.return_value = { "data": [ { "name": "Производители промышленной продукции", "files": [ {"name": "data_orgs_20240101.xlsx", "url": "/files/test.xlsx"}, ], } ] } # Mock Excel download mock_download.return_value = _create_test_excel_manufacturers() with ManufacturesClient() as client: manufacturers = client.fetch_manufacturers() self.assertEqual(len(manufacturers), 5) self.assertIsInstance(manufacturers[0], Manufacturer) self.assertEqual(manufacturers[0].full_legal_name, "Manufacturer 0 LLC") @patch.object(BaseHTTPClient, "get_json") def test_fetch_manufacturers_no_files(self, mock_get_json): """Test returns empty list when no files found.""" mock_get_json.return_value = {"data": []} with ManufacturesClient() as client: manufacturers = client.fetch_manufacturers() self.assertEqual(manufacturers, []) @patch.object(BaseHTTPClient, "get_json") def test_get_latest_file_url_selects_newest(self, mock_get_json): """Test selects file with latest date.""" mock_get_json.return_value = { "data": [ { "name": "Производители промышленной продукции", "files": [ {"name": "data_orgs_20240101.xlsx", "url": "/files/old.xlsx"}, {"name": "data_orgs_20240315.xlsx", "url": "/files/new.xlsx"}, {"name": "data_orgs_20240201.xlsx", "url": "/files/mid.xlsx"}, ], } ] } client = ManufacturesClient() files_data = client._fetch_files_list() url = client._get_latest_file_url(files_data) self.assertIn("new.xlsx", url) def test_parse_row_valid(self): """Test parsing valid row.""" client = ManufacturesClient() row = ("Test Company LLC", "1234567890", "1234567890123", "Test Address") result = client._parse_row(row) self.assertIsInstance(result, Manufacturer) self.assertEqual(result.full_legal_name, "Test Company LLC") self.assertEqual(result.inn, "1234567890") def test_parse_row_without_address(self): """Test parsing row without address.""" client = ManufacturesClient() row = ("Test Company LLC", "1234567890", "1234567890123") result = client._parse_row(row) self.assertIsInstance(result, Manufacturer) self.assertEqual(result.address, "") @tag("integration", "slow", "network") class IndustrialProductionClientIntegrationTest(TestCase): """ Интеграционные тесты с реальной загрузкой данных. ВНИМАНИЕ: Эти тесты делают реальные HTTP запросы к внешним серверам. Запускать с тегом: python manage.py test --tag=integration """ def test_fetch_certificates_real_data(self): """ Интеграционный тест: реальная загрузка сертификатов с gisp.gov.ru. Этот тест: 1. Подключается к реальному API 2. Скачивает Excel файл 3. Парсит данные 4. Проверяет структуру результата Тест может занять время и зависит от доступности внешнего сервера. """ try: with IndustrialProductionClient(timeout=120) as client: certificates = client.fetch_certificates() # Проверяем что данные получены self.assertIsInstance(certificates, list) # Если данные есть - проверяем структуру if certificates: cert = certificates[0] self.assertIsInstance(cert, IndustrialCertificate) self.assertIsNotNone(cert.certificate_number) self.assertIsNotNone(cert.inn) self.assertIsNotNone(cert.organisation_name) # Логируем для информации print(f"\n[INTEGRATION] Loaded {len(certificates)} certificates") print(f"[INTEGRATION] First certificate: {cert.certificate_number}") print(f"[INTEGRATION] Organisation: {cert.organisation_name}") else: print("\n[INTEGRATION] No certificates found (API may be unavailable)") except HTTPClientError as e: # API может быть недоступен - это ожидаемое поведение для интеграционных тестов self.skipTest(f"External API unavailable: {e}") @tag("integration", "slow", "network") class ManufacturesClientIntegrationTest(TestCase): """ Интеграционные тесты для клиента производителей. ВНИМАНИЕ: Эти тесты делают реальные HTTP запросы к внешним серверам. Запускать с тегом: python manage.py test --tag=integration """ def test_fetch_manufacturers_real_data(self): """ Интеграционный тест: реальная загрузка производителей с gisp.gov.ru. """ try: with ManufacturesClient(timeout=120) as client: manufacturers = client.fetch_manufacturers() # Проверяем что данные получены self.assertIsInstance(manufacturers, list) # Если данные есть - проверяем структуру if manufacturers: m = manufacturers[0] self.assertIsInstance(m, Manufacturer) self.assertIsNotNone(m.full_legal_name) self.assertIsNotNone(m.inn) # Логируем для информации print(f"\n[INTEGRATION] Loaded {len(manufacturers)} manufacturers") print(f"[INTEGRATION] First manufacturer: {m.full_legal_name}") print(f"[INTEGRATION] INN: {m.inn}") else: print("\n[INTEGRATION] No manufacturers found (API may be unavailable)") except HTTPClientError as e: # API может быть недоступен - это ожидаемое поведение для интеграционных тестов self.skipTest(f"External API unavailable: {e}") def _create_test_xml_inspections() -> bytes: """Create test XML file with inspection data.""" xml_content = """ 772024000001 7701234567 1027700000001 ООО "Тест Компания 1" Роспотребнадзор плановая документарная 2024-01-15 2024-01-30 завершена 294-ФЗ нарушения не выявлены 772024000002 7702345678 1027700000002 АО "Тест Компания 2" Ростехнадзор внеплановая выездная 2024-02-01 2024-02-15 завершена 248-ФЗ выявлены нарушения """ return xml_content.encode("utf-8") def _create_test_xml_inspections_russian_tags() -> bytes: """Create test XML with Russian tag names.""" xml_content = """ <Проверки> <КНМ> <УчетныйНомер>772024000003 <ИНН>7703456789 <ОГРН>1027700000003 <Наименование>ПАО "Тест Компания 3" <КонтрольныйОрган>МЧС России <ТипПроверки>плановая <ФормаПроверки>документарная и выездная <ДатаНачала>2024-03-01 <ДатаОкончания>2024-03-20 <Статус>в процессе <ПравовоеОснование>294-ФЗ """ return xml_content.encode("utf-8") class ProverkiClientTest(TestCase): """Tests for ProverkiClient.""" def test_client_initialization(self): """Test client initializes correctly.""" client = ProverkiClient() self.assertIsNone(client.proxies) self.assertEqual(client.host, "proverki.gov.ru") def test_client_with_proxies(self): """Test client accepts proxy list.""" proxies = ["http://proxy1:8080", "http://proxy2:8080"] client = ProverkiClient(proxies=proxies) self.assertEqual(client.proxies, proxies) def test_context_manager(self): """Test client works as context manager.""" with ProverkiClient() as client: self.assertIsInstance(client, ProverkiClient) def test_parse_xml_content_english_tags(self): """Test parsing XML with English tag names.""" client = ProverkiClient() xml_content = _create_test_xml_inspections() inspections = client._parse_xml_content(xml_content, None) self.assertEqual(len(inspections), 2) self.assertIsInstance(inspections[0], Inspection) self.assertEqual(inspections[0].registration_number, "772024000001") self.assertEqual(inspections[0].inn, "7701234567") self.assertEqual(inspections[0].organisation_name, 'ООО "Тест Компания 1"') self.assertEqual(inspections[0].control_authority, "Роспотребнадзор") self.assertEqual(inspections[0].inspection_type, "плановая") self.assertEqual(inspections[0].legal_basis, "294-ФЗ") def test_parse_xml_content_russian_tags(self): """Test parsing XML with Russian tag names.""" client = ProverkiClient() xml_content = _create_test_xml_inspections_russian_tags() inspections = client._parse_xml_content(xml_content, None) self.assertEqual(len(inspections), 1) self.assertIsInstance(inspections[0], Inspection) self.assertEqual(inspections[0].registration_number, "772024000003") self.assertEqual(inspections[0].inn, "7703456789") self.assertEqual(inspections[0].control_authority, "МЧС России") def test_parse_xml_record_with_attributes(self): """Test parsing XML record with attributes instead of child elements.""" from xml.etree import ElementTree as ET client = ProverkiClient() xml_str = '' element = ET.fromstring(xml_str) # noqa: S314 result = client._parse_xml_record(element) self.assertIsNotNone(result) self.assertEqual(result.inn, "1234567890") self.assertEqual(result.registration_number, "TEST123") def test_parse_xml_record_invalid(self): """Test parsing invalid XML record returns None.""" from xml.etree import ElementTree as ET client = ProverkiClient() xml_str = "" element = ET.fromstring(xml_str) # noqa: S314 result = client._parse_xml_record(element) self.assertIsNone(result) def test_parse_windows_1251_encoding(self): """Test parsing XML with Windows-1251 encoding.""" client = ProverkiClient() xml_content = """ 1234567890 TEST001 Компания """.encode("windows-1251") inspections = client._parse_xml_content(xml_content, None) self.assertEqual(len(inspections), 1) self.assertEqual(inspections[0].organisation_name, "Компания") @patch.object(BaseHTTPClient, "download_file") @patch.object(ProverkiClient, "_discover_data_files") def test_fetch_inspections_with_file_url(self, mock_discover, mock_download): """Test fetching inspections with direct file URL.""" mock_download.return_value = _create_test_xml_inspections() with ProverkiClient() as client: inspections = client.fetch_inspections( file_url="https://proverki.gov.ru/opendata/test.xml" ) self.assertEqual(len(inspections), 2) mock_discover.assert_not_called() # Should not discover files when URL provided @patch.object(ProverkiClient, "_discover_data_files") def test_fetch_inspections_no_files(self, mock_discover): """Test returns empty list when no files found.""" mock_discover.return_value = [] with ProverkiClient() as client: inspections = client.fetch_inspections(year=2025) self.assertEqual(inspections, []) @tag("integration", "slow", "network") class ProverkiClientIntegrationTest(TestCase): """ Интеграционные тесты для клиента proverki.gov.ru. ВНИМАНИЕ: Эти тесты делают реальные HTTP запросы к внешним серверам. Запускать с тегом: python manage.py test --tag=integration """ def test_fetch_inspections_real_data(self): """ Интеграционный тест: реальная загрузка проверок с proverki.gov.ru. """ try: with ProverkiClient(timeout=120) as client: inspections = client.fetch_inspections(year=2025) # Проверяем что данные получены self.assertIsInstance(inspections, list) # Если данные есть - проверяем структуру if inspections: insp = inspections[0] self.assertIsInstance(insp, Inspection) self.assertIsNotNone(insp.registration_number) self.assertIsNotNone(insp.inn) # Логируем для информации print(f"\n[INTEGRATION] Loaded {len(inspections)} inspections") print(f"[INTEGRATION] First inspection: {insp.registration_number}") print(f"[INTEGRATION] Organisation: {insp.organisation_name}") print(f"[INTEGRATION] Control authority: {insp.control_authority}") else: print( "\n[INTEGRATION] No inspections found " "(API may be unavailable or data format changed)" ) except HTTPClientError as e: # API может быть недоступен self.skipTest(f"External API unavailable: {e}")