Add organizations v2 API and registry enrichment
All checks were successful
CI/CD Pipeline / Quality Gate (push) Successful in 26s
CI/CD Pipeline / Build and Push Images (push) Successful in 6s
CI/CD Pipeline / Internal Notify (push) Successful in 0s
CI/CD Pipeline / Deploy Dev in Dokploy (push) Successful in 1s

This commit is contained in:
2026-05-06 19:04:46 +02:00
parent f54aa4cb0b
commit 0f17ff6773
62 changed files with 10311 additions and 430 deletions

View File

@@ -3,6 +3,7 @@
from __future__ import annotations
import asyncio
import os
import sys
import tempfile
import types
@@ -331,6 +332,93 @@ class ProverkiDownloadParseTest(SimpleTestCase):
)
self.assertEqual(len(inspections), 1)
def test_download_from_portal_does_not_wait_for_networkidle(self):
archive = build_zip(
[("data.xml", _xml_with_tag("INSPECTION", _inspection_attrs()))]
)
temp_file = tempfile.NamedTemporaryFile(delete=False)
temp_file.write(archive)
temp_file.close()
goto_calls = []
class _Element:
def __init__(self, href: str | None = None):
self.href = href
def click(self):
return None
def get_attribute(self, name: str):
return self.href if name == "href" else None
class _Download:
def path(self):
return temp_file.name
class _DownloadContext:
value = _Download()
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
return None
class _Page:
def __init__(self):
self.query_count = 0
def goto(self, url, wait_until=None, timeout=None):
goto_calls.append(
{"url": url, "wait_until": wait_until, "timeout": timeout}
)
def wait_for_selector(self, *args, **kwargs):
return None
def wait_for_timeout(self, *args, **kwargs):
return None
def content(self):
return "<html><a href='/data.zip'>Набор данных</a></html>"
def title(self):
return "dataset"
def query_selector(self, selector):
self.query_count += 1
if self.query_count == 1:
return _Element()
return _Element("/data.zip")
def expect_download(self, timeout=None):
return _DownloadContext()
class _Context:
def new_page(self):
return _Page()
def close(self):
return None
class _Browser:
def new_context(self, **kwargs):
return _Context()
class _PortalClient(ProverkiClient):
def _get_browser(self): # type: ignore[override]
return _Browser()
try:
content = _PortalClient(use_playwright=True)._download_from_portal(
"https://proverki.gov.ru/portal/public-open-data/check/2026/5?isFederalLaw248=true"
)
finally:
os.unlink(temp_file.name)
self.assertEqual(content, archive)
self.assertEqual(goto_calls[0]["wait_until"], "domcontentloaded")
def test_download_and_parse_html_switches_to_playwright(self):
xml = _xml_with_tag("INSPECTION", _inspection_attrs())
@@ -713,8 +801,10 @@ class _FakeBrowser:
def __init__(self, page: _FakePage):
self._page = page
self.closed = False
self.context_kwargs = []
def new_context(self, **_kwargs):
def new_context(self, **kwargs):
self.context_kwargs.append(kwargs)
return _FakeContext(self._page)
def close(self):
@@ -753,6 +843,25 @@ class ProverkiPlaywrightStubTest(SimpleTestCase):
result = client._download_with_playwright("http://example.com")
self.assertIn(b"<xml", result)
def test_download_with_playwright_uses_configured_proxy(self):
download_path = _temp_file(b"<xml></xml>")
page = _FakePage(
content_type="application/xml",
content="<xml></xml>",
download_path=download_path,
)
browser = _FakeBrowser(page)
client = ProverkiClient(proxies=["http://192.168.1.108:3130"])
client._browser = browser
result = client._download_with_playwright("http://example.com")
self.assertIn(b"<xml", result)
self.assertEqual(
browser.context_kwargs[0]["proxy"],
{"server": "http://192.168.1.108:3130"},
)
def test_download_with_playwright_download_link(self):
download_path = _temp_file(b"zip-data")
page = _FakePage(
@@ -839,6 +948,68 @@ class ProverkiPlaywrightStubTest(SimpleTestCase):
result = client._download_from_portal("http://portal.example.com")
self.assertEqual(result, b"zip-content")
def test_download_from_portal_downloads_zip_href_without_browser_download(self):
archive = build_zip(
[("data.xml", _xml_with_tag("INSPECTION", _inspection_attrs()))]
)
class _NoBrowserDownloadPage(_FakePage):
def expect_download(self, timeout=None):
raise AssertionError("browser download should not be used for zip href")
page = _NoBrowserDownloadPage(
content_type="text/html",
content="content",
download_path=None,
zip_link=_FakeLink(href="/blob/opendata/data.zip"),
download_tab=_FakeLink(),
)
with TestHTTPServer() as server:
server.add_bytes(
"/blob/opendata/data.zip",
archive,
content_type="application/zip",
)
client = ProverkiClient(
host="testserver",
scheme="http",
http_adapter=server.adapter,
use_playwright=True,
)
client._browser = _FakeBrowser(page)
result = client._download_from_portal(
f"{server.base_url}/portal/public-open-data/check/2026/5"
)
self.assertEqual(result, archive)
def test_download_portal_href_sends_download_headers(self):
class _RecordingHTTPClient:
def __init__(self):
self.endpoint = None
self.headers = None
def download_file(self, endpoint, headers=None):
self.endpoint = endpoint
self.headers = headers
return b"zip-content"
http_client = _RecordingHTTPClient()
client = ProverkiClient()
client._http_client = http_client
portal_url = "https://proverki.gov.ru/portal/public-open-data/check/2026/5"
result = client._download_portal_href(portal_url, "/blob/opendata/data.zip")
self.assertEqual(result, b"zip-content")
self.assertEqual(
http_client.endpoint,
"https://proverki.gov.ru/blob/opendata/data.zip",
)
self.assertEqual(http_client.headers["Referer"], portal_url)
self.assertIn("application/zip", http_client.headers["Accept"])
def test_download_from_portal_zip_link_without_download_path(self):
page = _FakePage(
content_type="text/html",