Files
mostovik-backend/tests/apps/parsers/test_clients.py
Aleksandr Meshchriakov 44355deeb3
All checks were successful
CI/CD Pipeline / Code Quality Checks (pull_request) Successful in 1m6s
CI/CD Pipeline / Run Tests (pull_request) Successful in 1m18s
CI/CD Pipeline / Build Docker Images (pull_request) Has been skipped
CI/CD Pipeline / Push to Gitea Registry (pull_request) Has been skipped
feat: add parser source dashboard and scheduling
2026-04-28 00:20:08 +02:00

1086 lines
43 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Tests for parsers clients."""
import json
import zipfile
from io import BytesIO
from unittest.mock import Mock, patch
from apps.parsers.clients.base import BaseHTTPClient, HTTPClientError
from apps.parsers.clients.common import (
GenericParserItem,
StructuredDataClient,
StructuredDataClientError,
)
from apps.parsers.clients.minpromtorg.industrial import IndustrialProductionClient
from apps.parsers.clients.minpromtorg.manufactures import ManufacturesClient
from apps.parsers.clients.minpromtorg.schemas import IndustrialCertificate, Manufacturer
from apps.parsers.clients.proverki import ProverkiClient
from apps.parsers.clients.proverki.schemas import Inspection
from apps.parsers.clients.trudvsem import TrudvsemClient
from apps.parsers.models import ParserLoadLog
from django.test import TestCase, tag
from faker import Faker
from openpyxl import Workbook
fake = Faker("ru_RU")
class BaseHTTPClientTest(TestCase):
"""Tests for BaseHTTPClient."""
def test_client_initialization(self):
"""Test client initializes with defaults."""
client = BaseHTTPClient(base_url="https://example.com")
self.assertEqual(client.base_url, "https://example.com")
self.assertIsNone(client.proxies)
self.assertEqual(client.timeout, 30)
def test_client_with_proxies(self):
"""Test client initializes with proxy list."""
proxies = ["http://proxy1:8080", "http://proxy2:8080"]
client = BaseHTTPClient(base_url="https://example.com", proxies=proxies)
self.assertEqual(client.proxies, proxies)
def test_select_proxy_returns_none_without_proxies(self):
"""Test _select_proxy returns None when no proxies."""
client = BaseHTTPClient(base_url="https://example.com")
self.assertIsNone(client._select_proxy())
def test_select_proxy_returns_random_from_list(self):
"""Test _select_proxy returns proxy from list."""
proxies = ["http://proxy1:8080", "http://proxy2:8080"]
client = BaseHTTPClient(base_url="https://example.com", proxies=proxies)
selected = client._select_proxy()
self.assertIn(selected, proxies)
def test_current_proxy_property(self):
"""Test current_proxy property is None before session creation."""
proxies = ["http://proxy:8080"]
client = BaseHTTPClient(base_url="https://example.com", proxies=proxies)
# current_proxy is None until session is created
proxy = client.current_proxy
self.assertIsNone(proxy)
# After accessing session, proxy should be set
_ = client.session
proxy = client.current_proxy
self.assertEqual(proxy, "http://proxy:8080")
def test_download_file_rejects_large_content_length_before_body_read(self):
"""Test download_file checks Content-Length before reading response body."""
client = BaseHTTPClient(base_url="https://example.com")
response = Mock()
response.ok = True
response.headers = {"Content-Length": "10"}
response.iter_content.return_value = [b"too-large"]
response.close = Mock()
client.session.get = Mock(return_value=response)
with self.assertRaises(HTTPClientError):
client.download_file("/data.csv", max_size_bytes=5)
response.iter_content.assert_not_called()
response.close.assert_called_once()
def test_download_file_passes_ssl_verification_flag(self):
"""Test download_file can disable SSL verification for broken upstream TLS."""
client = BaseHTTPClient(base_url="https://example.com", verify_ssl=False)
response = Mock()
response.ok = True
response.headers = {"Content-Length": "4"}
response.iter_content.return_value = [b"data"]
response.close = Mock()
client.session.get = Mock(return_value=response)
content = client.download_file("/data.csv")
self.assertEqual(content, b"data")
client.session.get.assert_called_once_with(
"https://example.com/data.csv",
stream=True,
timeout=30,
verify=False,
)
class StructuredDataClientTest(TestCase):
"""Tests for StructuredDataClient."""
def test_parse_json_records(self):
"""Test JSON parsing and normalization."""
payload = {
"data": [
{
"id": "FIN-1",
"inn": "1234567890",
"ogrn": "1234567890123",
"name": "Test Company",
"amount": "10 500,50",
"date": "2024",
}
]
}
client = StructuredDataClient(source=ParserLoadLog.Source.FNS_FINANCIAL)
records = client.fetch_records(
content=json.dumps(payload).encode("utf-8"),
file_name="data.json",
)
self.assertEqual(len(records), 1)
self.assertIsInstance(records[0], GenericParserItem)
self.assertEqual(records[0].external_id, "FIN-1")
self.assertEqual(records[0].inn, "1234567890")
self.assertEqual(str(records[0].amount), "10500.50")
def test_parse_csv_records(self):
"""Test CSV parsing with Russian headers."""
content = (
"реестровый номер;ИНН;наименование;сумма\n"
"RN-1;1234567890;ООО Тест;1000.00\n"
).encode("cp1251")
client = StructuredDataClient(source=ParserLoadLog.Source.UNFAIR_SUPPLIERS)
records = client.fetch_records(content=content, file_name="data.csv")
self.assertEqual(len(records), 1)
self.assertEqual(records[0].external_id, "RN-1")
self.assertEqual(records[0].organisation_name, "ООО Тест")
def test_parse_xml_records_under_wrapper(self):
"""Test XML parser selects repeated nested record elements."""
content = (
"<root><items>"
"<item><id>XML-1</id><inn>123</inn><name>ООО А</name></item>"
"<item><id>XML-2</id><inn>456</inn><name>ООО Б</name></item>"
"</items></root>"
).encode()
client = StructuredDataClient(source=ParserLoadLog.Source.FNS_FINANCIAL)
records = client.fetch_records(content=content, file_name="data.xml")
self.assertEqual(len(records), 2)
self.assertEqual(records[0].external_id, "XML-1")
self.assertEqual(records[1].external_id, "XML-2")
def test_json_payload_preserves_nested_objects(self):
"""Test payload keeps nested JSON structures machine-readable."""
payload = {
"data": [
{
"id": "NESTED-1",
"company": {"inn": "123", "name": "ООО А"},
"amounts": [1, 2],
}
]
}
client = StructuredDataClient(source=ParserLoadLog.Source.FNS_FINANCIAL)
record = client.fetch_records(
content=json.dumps(payload).encode("utf-8"),
file_name="data.json",
)[0]
self.assertEqual(record.payload["company"], {"inn": "123", "name": "ООО А"})
self.assertEqual(record.payload["amounts"], [1, 2])
def test_fallback_external_id_is_stable_after_reordering(self):
"""Test generated external_id does not depend on row position."""
client = StructuredDataClient(source=ParserLoadLog.Source.FNS_FINANCIAL)
first = client.fetch_records(
content=("name;amount\nООО А;10\nООО Б;20\n").encode(),
file_name="data.csv",
)
second = client.fetch_records(
content=("name;amount\nООО Б;20\nООО А;10\n").encode(),
file_name="data.csv",
)
self.assertEqual(first[0].external_id, second[1].external_id)
self.assertEqual(first[1].external_id, second[0].external_id)
def test_zip_rejects_too_many_supported_files(self):
"""Test ZIP parser refuses archives with too many supported files."""
archive_content = BytesIO()
with zipfile.ZipFile(archive_content, "w") as archive:
archive.writestr("one.csv", "id\n1\n")
archive.writestr("two.csv", "id\n2\n")
client = StructuredDataClient(
source=ParserLoadLog.Source.FNS_FINANCIAL,
max_zip_entries=1,
)
with self.assertRaises(StructuredDataClientError):
client.fetch_records(
content=archive_content.getvalue(),
file_name="data.zip",
)
def test_html_without_table_returns_empty_records(self):
"""Test generic parser does not treat HTML pages as malformed XML."""
content = b"<!doctype html><html><body><main>No table</main></body></html>"
client = StructuredDataClient(source=ParserLoadLog.Source.ARBITRATION)
records = client.fetch_records(content=content, file_name="")
self.assertEqual(records, [])
def test_html_table_after_long_head_is_detected(self):
"""Test HTML detection scans beyond the first kilobyte."""
content = (
"<!doctype html><html><head>" + (" " * 1500) + "</head><body><table>"
"<tr><th>id</th><th>inn</th></tr>"
"<tr><td>HTML-1</td><td>1234567890</td></tr>"
"</table></body></html>"
).encode()
client = StructuredDataClient(source=ParserLoadLog.Source.ARBITRATION)
records = client.fetch_records(content=content, file_name="")
self.assertEqual(len(records), 1)
self.assertEqual(records[0].external_id, "HTML-1")
self.assertEqual(records[0].inn, "1234567890")
def test_html_layout_table_without_headers_is_ignored(self):
"""Test layout/navigation tables are not imported as records."""
content = (
"<html><body><table>"
"<tr><td>Картотека</td><td>Страж</td></tr>"
"<tr><td>Календарь</td><td>Мой Арбитр</td></tr>"
"</table></body></html>"
).encode()
client = StructuredDataClient(source=ParserLoadLog.Source.ARBITRATION)
records = client.fetch_records(content=content, file_name="")
self.assertEqual(records, [])
@patch.object(BaseHTTPClient, "post_json")
def test_mpt_products_page_uses_official_search_api(self, mock_post_json):
"""Test GISP product page uses the official paginated UI API."""
mock_post_json.return_value = {
"ok": True,
"total_count": 1,
"items": [
{
"org_name": "ООО Производитель",
"org_inn": "7701000000",
"org_ogrn": "1027700000000",
"product_reg_number_2023": "10165413",
"product_name": "Средство дезинфицирующее",
"res_date": "2026-04-25",
"product_gisp_url": "https://gisp.gov.ru/goods/#/product/1",
}
],
}
client = StructuredDataClient(source=ParserLoadLog.Source.MPT_PRODUCTS)
records = client.fetch_records(file_url="https://gisp.gov.ru/pp719v2/pub/prod/")
self.assertEqual(len(records), 1)
self.assertEqual(records[0].external_id, "10165413")
self.assertEqual(records[0].inn, "7701000000")
self.assertEqual(records[0].organisation_name, "ООО Производитель")
self.assertEqual(records[0].title, "Средство дезинфицирующее")
self.assertEqual(
mock_post_json.call_args.args[0],
"https://gisp.gov.ru/pp719v2/pub/prod/b/",
)
def test_zakupki_cards_are_parsed_as_records(self):
"""Test ЕИС search cards are parsed when there is no HTML table."""
content = """
<html><body>
<div class="search-registry-entry-block">
<a href="/epz/order/notice/ea20/view/common-info.html?regNumber=0331">№ 0331</a>
<div>Работа комиссии</div>
<div>Объект закупки</div><div>Поставка оборудования</div>
<div>Заказчик</div><div>ГКУ Тест</div>
<div>Начальная цена</div><div>649 989,52 ₽</div>
<div>Размещено</div><div>20.04.2026</div>
</div>
</body></html>
""".encode()
client = StructuredDataClient(source=ParserLoadLog.Source.PROCUREMENTS_44FZ)
records = client.fetch_records(content=content, file_name="search.html")
self.assertEqual(len(records), 1)
self.assertEqual(records[0].external_id, "0331")
self.assertEqual(records[0].title, "Поставка оборудования")
self.assertEqual(records[0].organisation_name, "ГКУ Тест")
self.assertEqual(str(records[0].amount), "649989.52")
self.assertEqual(records[0].record_date, "20.04.2026")
def test_html_table_with_td_header_row_is_parsed(self):
"""Test registry tables without th still parse when first row is a header."""
content = """
<html><body><table>
<tr><td>Номер реестровой записи</td><td>Информация о лице</td><td>ИНН</td></tr>
<tr><td>ГОЗ-1</td><td>ООО Оборона</td><td>7701000000</td></tr>
</table></body></html>
""".encode()
client = StructuredDataClient(source=ParserLoadLog.Source.ARBITRATION)
records = client.fetch_records(content=content, file_name="fas.html")
self.assertEqual(len(records), 1)
self.assertEqual(records[0].external_id, "ГОЗ-1")
self.assertEqual(records[0].inn, "7701000000")
self.assertEqual(records[0].organisation_name, "ООО Оборона")
def test_fas_goz_multirow_header_table_is_parsed(self):
"""Test FAS GOZ table skips multirow headers and column-number rows."""
content = """
<html><body><table>
<tr><td rowspan="2">Номер реестровой записи</td><td rowspan="2">Орган</td>
<td colspan="3">Постановление</td><td colspan="3">Лицо</td></tr>
<tr><td>номер</td><td>дата</td><td>исполнение</td>
<td>полное наименование</td><td>адрес</td><td>ИНН</td></tr>
<tr><td>1</td><td>2</td><td>3</td><td>4</td><td>5</td><td>6</td><td>7</td><td>8</td></tr>
<tr><td>1</td><td>Нижегородское УФАС России</td>
<td>№ 052/04/7.29.2-2965/2023 от 22.01.2024</td>
<td>28.10.2025</td><td>В стадии исполнения</td>
<td>АО УАПО</td><td>АО УАПО</td><td>г. Уфа</td><td>0275074279</td></tr>
</table></body></html>
""".encode()
client = StructuredDataClient(source=ParserLoadLog.Source.FAS_GOZ)
records = client.fetch_records(content=content, file_name="fas.html")
self.assertEqual(len(records), 1)
self.assertEqual(records[0].external_id, "1")
self.assertEqual(records[0].inn, "0275074279")
self.assertEqual(records[0].organisation_name, "АО УАПО")
self.assertEqual(records[0].record_date, "28.10.2025")
self.assertEqual(records[0].status, "В стадии исполнения")
def test_fns_nested_bfo_fields_are_normalized(self):
"""Test FNS JSON keeps nested payload and maps useful BFO fields."""
payload = {
"content": [
{
"id": 6622458,
"inn": "<strong>7736050003</strong>",
"shortName": 'ПАО "ГАЗПРОМ"',
"ogrn": "1027700070518",
"statusCode": "ACTIVE",
"bfo": {
"period": "2025",
"actualBfoDate": "2026-03-16",
"gainSum": 5846351786,
},
}
]
}
client = StructuredDataClient(source=ParserLoadLog.Source.FNS_FINANCIAL)
records = client.fetch_records(
content=json.dumps(payload).encode("utf-8"),
file_name="fns.json",
)
self.assertEqual(records[0].external_id, "6622458")
self.assertEqual(records[0].inn, "7736050003")
self.assertEqual(records[0].organisation_name, 'ПАО "ГАЗПРОМ"')
self.assertEqual(records[0].record_date, "2026-03-16")
self.assertEqual(str(records[0].amount), "5846351786")
self.assertEqual(records[0].status, "ACTIVE")
@patch.object(BaseHTTPClient, "download_file")
def test_fstec_page_discovers_csv_download(self, mock_download):
"""Test FSTEC registry page follows the official CSV download link."""
mock_download.side_effect = [
(
'<html><a href="/reg3?option=com_rajax&module=rfiles&'
'method=download&format=file&mod=209&file=1">Государственный '
"реестр ССЗИ</a></html>"
).encode(),
(
'"№ сертификата","Дата внесения в реестр","Срок действия сертификата",'
'"Наименование средства (шифр)","Заявитель"\n'
'"17/1","2002-07-26","2020-08-01","ФСПК-100","ООО НПП ЭЛКОМ"\n'
).encode(),
]
client = StructuredDataClient(source=ParserLoadLog.Source.FSTEC)
records = client.fetch_records(file_url="https://reestr.fstec.ru/reg3")
self.assertEqual(len(records), 1)
self.assertEqual(records[0].external_id, "17/1")
self.assertEqual(records[0].organisation_name, "ООО НПП ЭЛКОМ")
self.assertEqual(records[0].title, "ФСПК-100")
self.assertEqual(records[0].record_date, "2002-07-26")
self.assertEqual(records[0].status, "2020-08-01")
class TrudvsemClientTest(TestCase):
"""Tests for TrudvsemClient."""
@patch.object(BaseHTTPClient, "get_json")
def test_fetch_vacancies_success(self, mock_get_json):
"""Test successful vacancies fetching."""
mock_get_json.return_value = {
"results": {
"vacancies": [
{
"vacancy": {
"id": "VAC-1",
"job-name": "Инженер",
"creation-date": "2026-01-01",
"salary": {"from": 120000},
"company": {
"name": "ООО Тест",
"inn": "1234567890",
"ogrn": "1234567890123",
},
"vac_url": "https://trudvsem.ru/vacancy/VAC-1",
}
}
]
}
}
with TrudvsemClient() as client:
records = client.fetch_vacancies(limit=1)
self.assertEqual(len(records), 1)
self.assertEqual(records[0].external_id, "VAC-1")
self.assertEqual(records[0].source, ParserLoadLog.Source.TRUDVSEM)
self.assertEqual(records[0].inn, "1234567890")
@patch.object(BaseHTTPClient, "get_json")
def test_fetch_vacancies_by_company_inn_scans_pages(self, mock_get_json):
"""Test company_inn search scans next pages instead of false empty result."""
mock_get_json.side_effect = [
{
"results": {
"vacancies": [
{
"vacancy": {
"id": "VAC-OTHER",
"company": {"inn": "0000000000"},
}
}
]
}
},
{
"results": {
"vacancies": [
{
"vacancy": {
"id": "VAC-MATCH",
"company": {"inn": "1234567890"},
}
}
]
}
},
]
with TrudvsemClient(company_search_max_pages=2) as client:
records = client.fetch_vacancies(limit=1, company_inn="1234567890")
self.assertEqual(len(records), 1)
self.assertEqual(records[0].external_id, "VAC-MATCH")
self.assertEqual(mock_get_json.call_args_list[0].kwargs["params"]["offset"], 0)
self.assertEqual(mock_get_json.call_args_list[1].kwargs["params"]["offset"], 1)
def _create_test_excel_certificates() -> bytes:
"""Create test Excel file with certificate data."""
wb = Workbook()
ws = wb.active
# Header
ws.append(
[
"issue_date",
"certificate_number",
"expiry_date",
"certificate_file_url",
"organisation_name",
"inn",
"ogrn",
]
)
# Data rows
for i in range(5):
ws.append(
[
"2024-01-01",
f"CERT-{i:04d}",
"2025-01-01",
f"https://example.com/cert{i}.pdf",
f"Company {i} LLC",
f"123456789{i}",
f"123456789012{i}",
]
)
output = BytesIO()
wb.save(output)
output.seek(0)
return output.read()
def _create_test_excel_manufacturers() -> bytes:
"""Create test Excel file with manufacturer data."""
wb = Workbook()
ws = wb.active
# Header
ws.append(["full_legal_name", "inn", "ogrn", "address"])
# Data rows
for i in range(5):
ws.append(
[
f"Manufacturer {i} LLC",
f"123456789{i}",
f"123456789012{i}",
f"Address {i}, City",
]
)
output = BytesIO()
wb.save(output)
output.seek(0)
return output.read()
class IndustrialProductionClientTest(TestCase):
"""Tests for IndustrialProductionClient."""
def test_client_initialization(self):
"""Test client initializes correctly."""
client = IndustrialProductionClient()
self.assertIsNone(client.proxies)
self.assertEqual(client.host, "minpromtorg.gov.ru")
def test_client_with_proxies(self):
"""Test client accepts proxy list."""
proxies = ["http://proxy1:8080", "http://proxy2:8080"]
client = IndustrialProductionClient(proxies=proxies)
self.assertEqual(client.proxies, proxies)
def test_context_manager(self):
"""Test client works as context manager."""
with IndustrialProductionClient() as client:
self.assertIsInstance(client, IndustrialProductionClient)
@patch.object(BaseHTTPClient, "get_json")
@patch.object(BaseHTTPClient, "download_file")
def test_fetch_certificates_success(self, mock_download, mock_get_json):
"""Test successful certificate fetching."""
# Mock API response
mock_get_json.return_value = {
"data": [
{
"name": "Заключения о подтверждении производства промышленной продукции на территории Российской Федерации",
"files": [
{
"name": "data_resolutions_20240101.xlsx",
"url": "/files/test.xlsx",
},
],
}
]
}
# Mock Excel download
mock_download.return_value = _create_test_excel_certificates()
with IndustrialProductionClient() as client:
certificates = client.fetch_certificates()
self.assertEqual(len(certificates), 5)
self.assertIsInstance(certificates[0], IndustrialCertificate)
self.assertEqual(certificates[0].certificate_number, "CERT-0000")
@patch.object(BaseHTTPClient, "get_json")
def test_fetch_certificates_no_files(self, mock_get_json):
"""Test returns empty list when no files found."""
mock_get_json.return_value = {"data": []}
with IndustrialProductionClient() as client:
certificates = client.fetch_certificates()
self.assertEqual(certificates, [])
@patch.object(BaseHTTPClient, "get_json")
def test_get_latest_file_url_selects_newest(self, mock_get_json):
"""Test selects file with latest date."""
mock_get_json.return_value = {
"data": [
{
"name": "Заключения о подтверждении производства промышленной продукции на территории Российской Федерации",
"files": [
{
"name": "data_resolutions_20240101.xlsx",
"url": "/files/old.xlsx",
},
{
"name": "data_resolutions_20240315.xlsx",
"url": "/files/new.xlsx",
},
{
"name": "data_resolutions_20240201.xlsx",
"url": "/files/mid.xlsx",
},
],
}
]
}
client = IndustrialProductionClient()
files_data = client._fetch_files_list()
url = client._get_latest_file_url(files_data)
self.assertIn("new.xlsx", url)
def test_parse_row_valid(self):
"""Test parsing valid row."""
client = IndustrialProductionClient()
row = (
"2024-01-01",
"CERT-123",
"2025-01-01",
"https://example.com/cert.pdf",
"Test Company",
"1234567890",
"1234567890123",
)
result = client._parse_row(row)
self.assertIsInstance(result, IndustrialCertificate)
self.assertEqual(result.certificate_number, "CERT-123")
self.assertEqual(result.inn, "1234567890")
def test_parse_row_invalid(self):
"""Test parsing invalid row returns None."""
client = IndustrialProductionClient()
row = ("only", "two") # Not enough columns
result = client._parse_row(row)
self.assertIsNone(result)
class ManufacturesClientTest(TestCase):
"""Tests for ManufacturesClient."""
def test_client_initialization(self):
"""Test client initializes correctly."""
client = ManufacturesClient()
self.assertIsNone(client.proxies)
self.assertEqual(client.host, "minpromtorg.gov.ru")
def test_client_with_proxies(self):
"""Test client accepts proxy list."""
proxies = ["http://proxy1:8080", "http://proxy2:8080"]
client = ManufacturesClient(proxies=proxies)
self.assertEqual(client.proxies, proxies)
def test_context_manager(self):
"""Test client works as context manager."""
with ManufacturesClient() as client:
self.assertIsInstance(client, ManufacturesClient)
@patch.object(BaseHTTPClient, "get_json")
@patch.object(BaseHTTPClient, "download_file")
def test_fetch_manufacturers_success(self, mock_download, mock_get_json):
"""Test successful manufacturer fetching."""
# Mock API response
mock_get_json.return_value = {
"data": [
{
"name": "Производители промышленной продукции",
"files": [
{"name": "data_orgs_20240101.xlsx", "url": "/files/test.xlsx"},
],
}
]
}
# Mock Excel download
mock_download.return_value = _create_test_excel_manufacturers()
with ManufacturesClient() as client:
manufacturers = client.fetch_manufacturers()
self.assertEqual(len(manufacturers), 5)
self.assertIsInstance(manufacturers[0], Manufacturer)
self.assertEqual(manufacturers[0].full_legal_name, "Manufacturer 0 LLC")
@patch.object(BaseHTTPClient, "get_json")
def test_fetch_manufacturers_no_files(self, mock_get_json):
"""Test returns empty list when no files found."""
mock_get_json.return_value = {"data": []}
with ManufacturesClient() as client:
manufacturers = client.fetch_manufacturers()
self.assertEqual(manufacturers, [])
@patch.object(BaseHTTPClient, "get_json")
def test_get_latest_file_url_selects_newest(self, mock_get_json):
"""Test selects file with latest date."""
mock_get_json.return_value = {
"data": [
{
"name": "Производители промышленной продукции",
"files": [
{"name": "data_orgs_20240101.xlsx", "url": "/files/old.xlsx"},
{"name": "data_orgs_20240315.xlsx", "url": "/files/new.xlsx"},
{"name": "data_orgs_20240201.xlsx", "url": "/files/mid.xlsx"},
],
}
]
}
client = ManufacturesClient()
files_data = client._fetch_files_list()
url = client._get_latest_file_url(files_data)
self.assertIn("new.xlsx", url)
def test_parse_row_valid(self):
"""Test parsing valid row."""
client = ManufacturesClient()
row = ("Test Company LLC", "1234567890", "1234567890123", "Test Address")
result = client._parse_row(row)
self.assertIsInstance(result, Manufacturer)
self.assertEqual(result.full_legal_name, "Test Company LLC")
self.assertEqual(result.inn, "1234567890")
def test_parse_row_without_address(self):
"""Test parsing row without address."""
client = ManufacturesClient()
row = ("Test Company LLC", "1234567890", "1234567890123")
result = client._parse_row(row)
self.assertIsInstance(result, Manufacturer)
self.assertEqual(result.address, "")
@tag("integration", "slow", "network")
class IndustrialProductionClientIntegrationTest(TestCase):
"""
Интеграционные тесты с реальной загрузкой данных.
ВНИМАНИЕ: Эти тесты делают реальные HTTP запросы к внешним серверам.
Запускать с тегом: python manage.py test --tag=integration
"""
def test_fetch_certificates_real_data(self):
"""
Интеграционный тест: реальная загрузка сертификатов с gisp.gov.ru.
Этот тест:
1. Подключается к реальному API
2. Скачивает Excel файл
3. Парсит данные
4. Проверяет структуру результата
Тест может занять время и зависит от доступности внешнего сервера.
"""
try:
with IndustrialProductionClient(timeout=120) as client:
certificates = client.fetch_certificates()
# Проверяем что данные получены
self.assertIsInstance(certificates, list)
# Если данные есть - проверяем структуру
if certificates:
cert = certificates[0]
self.assertIsInstance(cert, IndustrialCertificate)
self.assertIsNotNone(cert.certificate_number)
self.assertIsNotNone(cert.inn)
self.assertIsNotNone(cert.organisation_name)
# Логируем для информации
print(f"\n[INTEGRATION] Loaded {len(certificates)} certificates")
print(f"[INTEGRATION] First certificate: {cert.certificate_number}")
print(f"[INTEGRATION] Organisation: {cert.organisation_name}")
else:
print("\n[INTEGRATION] No certificates found (API may be unavailable)")
except HTTPClientError as e:
# API может быть недоступен - это ожидаемое поведение для интеграционных тестов
self.skipTest(f"External API unavailable: {e}")
@tag("integration", "slow", "network")
class ManufacturesClientIntegrationTest(TestCase):
"""
Интеграционные тесты для клиента производителей.
ВНИМАНИЕ: Эти тесты делают реальные HTTP запросы к внешним серверам.
Запускать с тегом: python manage.py test --tag=integration
"""
def test_fetch_manufacturers_real_data(self):
"""
Интеграционный тест: реальная загрузка производителей с gisp.gov.ru.
"""
try:
with ManufacturesClient(timeout=120) as client:
manufacturers = client.fetch_manufacturers()
# Проверяем что данные получены
self.assertIsInstance(manufacturers, list)
# Если данные есть - проверяем структуру
if manufacturers:
m = manufacturers[0]
self.assertIsInstance(m, Manufacturer)
self.assertIsNotNone(m.full_legal_name)
self.assertIsNotNone(m.inn)
# Логируем для информации
print(f"\n[INTEGRATION] Loaded {len(manufacturers)} manufacturers")
print(f"[INTEGRATION] First manufacturer: {m.full_legal_name}")
print(f"[INTEGRATION] INN: {m.inn}")
else:
print("\n[INTEGRATION] No manufacturers found (API may be unavailable)")
except HTTPClientError as e:
# API может быть недоступен - это ожидаемое поведение для интеграционных тестов
self.skipTest(f"External API unavailable: {e}")
def _create_test_xml_inspections() -> bytes:
"""Create test XML file with inspection data."""
xml_content = """<?xml version="1.0" encoding="UTF-8"?>
<inspections>
<inspection>
<registration_number>772024000001</registration_number>
<inn>7701234567</inn>
<ogrn>1027700000001</ogrn>
<organisation_name>ООО "Тест Компания 1"</organisation_name>
<control_authority>Роспотребнадзор</control_authority>
<inspection_type>плановая</inspection_type>
<inspection_form>документарная</inspection_form>
<start_date>2024-01-15</start_date>
<end_date>2024-01-30</end_date>
<status>завершена</status>
<legal_basis>294-ФЗ</legal_basis>
<result>нарушения не выявлены</result>
</inspection>
<inspection>
<registration_number>772024000002</registration_number>
<inn>7702345678</inn>
<ogrn>1027700000002</ogrn>
<organisation_name>АО "Тест Компания 2"</organisation_name>
<control_authority>Ростехнадзор</control_authority>
<inspection_type>внеплановая</inspection_type>
<inspection_form>выездная</inspection_form>
<start_date>2024-02-01</start_date>
<end_date>2024-02-15</end_date>
<status>завершена</status>
<legal_basis>248-ФЗ</legal_basis>
<result>выявлены нарушения</result>
</inspection>
</inspections>"""
return xml_content.encode("utf-8")
def _create_test_xml_inspections_russian_tags() -> bytes:
"""Create test XML with Russian tag names."""
xml_content = """<?xml version="1.0" encoding="UTF-8"?>
<Проверки>
<КНМ>
<УчетныйНомер>772024000003</УчетныйНомер>
<ИНН>7703456789</ИНН>
<ОГРН>1027700000003</ОГРН>
<Наименование>ПАО "Тест Компания 3"</Наименование>
<КонтрольныйОрган>МЧС России</КонтрольныйОрган>
<ТипПроверки>плановая</ТипПроверки>
<ФормаПроверки>документарная и выездная</ФормаПроверки>
<ДатаНачала>2024-03-01</ДатаНачала>
<ДатаОкончания>2024-03-20</ДатаОкончания>
<Статус>в процессе</Статус>
<ПравовоеОснование>294-ФЗ</ПравовоеОснование>
</КНМ>
</Проверки>"""
return xml_content.encode("utf-8")
class ProverkiClientTest(TestCase):
"""Tests for ProverkiClient."""
def test_client_initialization(self):
"""Test client initializes correctly."""
client = ProverkiClient()
self.assertIsNone(client.proxies)
self.assertEqual(client.host, "proverki.gov.ru")
def test_client_with_proxies(self):
"""Test client accepts proxy list."""
proxies = ["http://proxy1:8080", "http://proxy2:8080"]
client = ProverkiClient(proxies=proxies)
self.assertEqual(client.proxies, proxies)
def test_context_manager(self):
"""Test client works as context manager."""
with ProverkiClient() as client:
self.assertIsInstance(client, ProverkiClient)
def test_parse_xml_content_english_tags(self):
"""Test parsing XML with English tag names."""
client = ProverkiClient()
xml_content = _create_test_xml_inspections()
inspections = client._parse_xml_content(xml_content, None)
self.assertEqual(len(inspections), 2)
self.assertIsInstance(inspections[0], Inspection)
self.assertEqual(inspections[0].registration_number, "772024000001")
self.assertEqual(inspections[0].inn, "7701234567")
self.assertEqual(inspections[0].organisation_name, 'ООО "Тест Компания 1"')
self.assertEqual(inspections[0].control_authority, "Роспотребнадзор")
self.assertEqual(inspections[0].inspection_type, "плановая")
self.assertEqual(inspections[0].legal_basis, "294-ФЗ")
def test_parse_xml_content_russian_tags(self):
"""Test parsing XML with Russian tag names."""
client = ProverkiClient()
xml_content = _create_test_xml_inspections_russian_tags()
inspections = client._parse_xml_content(xml_content, None)
self.assertEqual(len(inspections), 1)
self.assertIsInstance(inspections[0], Inspection)
self.assertEqual(inspections[0].registration_number, "772024000003")
self.assertEqual(inspections[0].inn, "7703456789")
self.assertEqual(inspections[0].control_authority, "МЧС России")
def test_parse_xml_record_with_attributes(self):
"""Test parsing XML record with attributes instead of child elements."""
from xml.etree import ElementTree as ET
client = ProverkiClient()
xml_str = '<inspection inn="1234567890" registration_number="TEST123" organisation_name="Test Co"/>'
element = ET.fromstring(xml_str) # noqa: S314
result = client._parse_xml_record(element)
self.assertIsNotNone(result)
self.assertEqual(result.inn, "1234567890")
self.assertEqual(result.registration_number, "TEST123")
def test_parse_xml_record_invalid(self):
"""Test parsing invalid XML record returns None."""
from xml.etree import ElementTree as ET
client = ProverkiClient()
xml_str = "<empty_record></empty_record>"
element = ET.fromstring(xml_str) # noqa: S314
result = client._parse_xml_record(element)
self.assertIsNone(result)
def test_parse_windows_1251_encoding(self):
"""Test parsing XML with Windows-1251 encoding."""
client = ProverkiClient()
xml_content = """<?xml version="1.0" encoding="windows-1251"?>
<inspections>
<inspection>
<inn>1234567890</inn>
<registration_number>TEST001</registration_number>
<organisation_name>Компания</organisation_name>
</inspection>
</inspections>""".encode("windows-1251")
inspections = client._parse_xml_content(xml_content, None)
self.assertEqual(len(inspections), 1)
self.assertEqual(inspections[0].organisation_name, "Компания")
@patch.object(BaseHTTPClient, "download_file")
@patch.object(ProverkiClient, "_discover_data_files")
def test_fetch_inspections_with_file_url(self, mock_discover, mock_download):
"""Test fetching inspections with direct file URL."""
mock_download.return_value = _create_test_xml_inspections()
with ProverkiClient() as client:
inspections = client.fetch_inspections(
file_url="https://proverki.gov.ru/opendata/test.xml"
)
self.assertEqual(len(inspections), 2)
mock_discover.assert_not_called() # Should not discover files when URL provided
@patch.object(ProverkiClient, "_discover_data_files")
def test_fetch_inspections_no_files(self, mock_discover):
"""Test returns empty list when no files found."""
mock_discover.return_value = []
with ProverkiClient() as client:
inspections = client.fetch_inspections(year=2025)
self.assertEqual(inspections, [])
@tag("integration", "slow", "network")
class ProverkiClientIntegrationTest(TestCase):
"""
Интеграционные тесты для клиента proverki.gov.ru.
ВНИМАНИЕ: Эти тесты делают реальные HTTP запросы к внешним серверам.
Запускать с тегом: python manage.py test --tag=integration
"""
def test_fetch_inspections_real_data(self):
"""
Интеграционный тест: реальная загрузка проверок с proverki.gov.ru.
"""
try:
with ProverkiClient(timeout=120) as client:
inspections = client.fetch_inspections(year=2025)
# Проверяем что данные получены
self.assertIsInstance(inspections, list)
# Если данные есть - проверяем структуру
if inspections:
insp = inspections[0]
self.assertIsInstance(insp, Inspection)
self.assertIsNotNone(insp.registration_number)
self.assertIsNotNone(insp.inn)
# Логируем для информации
print(f"\n[INTEGRATION] Loaded {len(inspections)} inspections")
print(f"[INTEGRATION] First inspection: {insp.registration_number}")
print(f"[INTEGRATION] Organisation: {insp.organisation_name}")
print(f"[INTEGRATION] Control authority: {insp.control_authority}")
else:
print(
"\n[INTEGRATION] No inspections found "
"(API may be unavailable or data format changed)"
)
except HTTPClientError as e:
# API может быть недоступен
self.skipTest(f"External API unavailable: {e}")