Add organizations v2 API and registry enrichment
This commit is contained in:
@@ -3,6 +3,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
import types
|
||||
@@ -331,6 +332,93 @@ class ProverkiDownloadParseTest(SimpleTestCase):
|
||||
)
|
||||
self.assertEqual(len(inspections), 1)
|
||||
|
||||
def test_download_from_portal_does_not_wait_for_networkidle(self):
|
||||
archive = build_zip(
|
||||
[("data.xml", _xml_with_tag("INSPECTION", _inspection_attrs()))]
|
||||
)
|
||||
temp_file = tempfile.NamedTemporaryFile(delete=False)
|
||||
temp_file.write(archive)
|
||||
temp_file.close()
|
||||
goto_calls = []
|
||||
|
||||
class _Element:
|
||||
def __init__(self, href: str | None = None):
|
||||
self.href = href
|
||||
|
||||
def click(self):
|
||||
return None
|
||||
|
||||
def get_attribute(self, name: str):
|
||||
return self.href if name == "href" else None
|
||||
|
||||
class _Download:
|
||||
def path(self):
|
||||
return temp_file.name
|
||||
|
||||
class _DownloadContext:
|
||||
value = _Download()
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
return None
|
||||
|
||||
class _Page:
|
||||
def __init__(self):
|
||||
self.query_count = 0
|
||||
|
||||
def goto(self, url, wait_until=None, timeout=None):
|
||||
goto_calls.append(
|
||||
{"url": url, "wait_until": wait_until, "timeout": timeout}
|
||||
)
|
||||
|
||||
def wait_for_selector(self, *args, **kwargs):
|
||||
return None
|
||||
|
||||
def wait_for_timeout(self, *args, **kwargs):
|
||||
return None
|
||||
|
||||
def content(self):
|
||||
return "<html><a href='/data.zip'>Набор данных</a></html>"
|
||||
|
||||
def title(self):
|
||||
return "dataset"
|
||||
|
||||
def query_selector(self, selector):
|
||||
self.query_count += 1
|
||||
if self.query_count == 1:
|
||||
return _Element()
|
||||
return _Element("/data.zip")
|
||||
|
||||
def expect_download(self, timeout=None):
|
||||
return _DownloadContext()
|
||||
|
||||
class _Context:
|
||||
def new_page(self):
|
||||
return _Page()
|
||||
|
||||
def close(self):
|
||||
return None
|
||||
|
||||
class _Browser:
|
||||
def new_context(self, **kwargs):
|
||||
return _Context()
|
||||
|
||||
class _PortalClient(ProverkiClient):
|
||||
def _get_browser(self): # type: ignore[override]
|
||||
return _Browser()
|
||||
|
||||
try:
|
||||
content = _PortalClient(use_playwright=True)._download_from_portal(
|
||||
"https://proverki.gov.ru/portal/public-open-data/check/2026/5?isFederalLaw248=true"
|
||||
)
|
||||
finally:
|
||||
os.unlink(temp_file.name)
|
||||
|
||||
self.assertEqual(content, archive)
|
||||
self.assertEqual(goto_calls[0]["wait_until"], "domcontentloaded")
|
||||
|
||||
def test_download_and_parse_html_switches_to_playwright(self):
|
||||
xml = _xml_with_tag("INSPECTION", _inspection_attrs())
|
||||
|
||||
@@ -713,8 +801,10 @@ class _FakeBrowser:
|
||||
def __init__(self, page: _FakePage):
|
||||
self._page = page
|
||||
self.closed = False
|
||||
self.context_kwargs = []
|
||||
|
||||
def new_context(self, **_kwargs):
|
||||
def new_context(self, **kwargs):
|
||||
self.context_kwargs.append(kwargs)
|
||||
return _FakeContext(self._page)
|
||||
|
||||
def close(self):
|
||||
@@ -753,6 +843,25 @@ class ProverkiPlaywrightStubTest(SimpleTestCase):
|
||||
result = client._download_with_playwright("http://example.com")
|
||||
self.assertIn(b"<xml", result)
|
||||
|
||||
def test_download_with_playwright_uses_configured_proxy(self):
|
||||
download_path = _temp_file(b"<xml></xml>")
|
||||
page = _FakePage(
|
||||
content_type="application/xml",
|
||||
content="<xml></xml>",
|
||||
download_path=download_path,
|
||||
)
|
||||
browser = _FakeBrowser(page)
|
||||
client = ProverkiClient(proxies=["http://192.168.1.108:3130"])
|
||||
client._browser = browser
|
||||
|
||||
result = client._download_with_playwright("http://example.com")
|
||||
|
||||
self.assertIn(b"<xml", result)
|
||||
self.assertEqual(
|
||||
browser.context_kwargs[0]["proxy"],
|
||||
{"server": "http://192.168.1.108:3130"},
|
||||
)
|
||||
|
||||
def test_download_with_playwright_download_link(self):
|
||||
download_path = _temp_file(b"zip-data")
|
||||
page = _FakePage(
|
||||
@@ -839,6 +948,68 @@ class ProverkiPlaywrightStubTest(SimpleTestCase):
|
||||
result = client._download_from_portal("http://portal.example.com")
|
||||
self.assertEqual(result, b"zip-content")
|
||||
|
||||
def test_download_from_portal_downloads_zip_href_without_browser_download(self):
|
||||
archive = build_zip(
|
||||
[("data.xml", _xml_with_tag("INSPECTION", _inspection_attrs()))]
|
||||
)
|
||||
|
||||
class _NoBrowserDownloadPage(_FakePage):
|
||||
def expect_download(self, timeout=None):
|
||||
raise AssertionError("browser download should not be used for zip href")
|
||||
|
||||
page = _NoBrowserDownloadPage(
|
||||
content_type="text/html",
|
||||
content="content",
|
||||
download_path=None,
|
||||
zip_link=_FakeLink(href="/blob/opendata/data.zip"),
|
||||
download_tab=_FakeLink(),
|
||||
)
|
||||
with TestHTTPServer() as server:
|
||||
server.add_bytes(
|
||||
"/blob/opendata/data.zip",
|
||||
archive,
|
||||
content_type="application/zip",
|
||||
)
|
||||
client = ProverkiClient(
|
||||
host="testserver",
|
||||
scheme="http",
|
||||
http_adapter=server.adapter,
|
||||
use_playwright=True,
|
||||
)
|
||||
client._browser = _FakeBrowser(page)
|
||||
|
||||
result = client._download_from_portal(
|
||||
f"{server.base_url}/portal/public-open-data/check/2026/5"
|
||||
)
|
||||
|
||||
self.assertEqual(result, archive)
|
||||
|
||||
def test_download_portal_href_sends_download_headers(self):
|
||||
class _RecordingHTTPClient:
|
||||
def __init__(self):
|
||||
self.endpoint = None
|
||||
self.headers = None
|
||||
|
||||
def download_file(self, endpoint, headers=None):
|
||||
self.endpoint = endpoint
|
||||
self.headers = headers
|
||||
return b"zip-content"
|
||||
|
||||
http_client = _RecordingHTTPClient()
|
||||
client = ProverkiClient()
|
||||
client._http_client = http_client
|
||||
portal_url = "https://proverki.gov.ru/portal/public-open-data/check/2026/5"
|
||||
|
||||
result = client._download_portal_href(portal_url, "/blob/opendata/data.zip")
|
||||
|
||||
self.assertEqual(result, b"zip-content")
|
||||
self.assertEqual(
|
||||
http_client.endpoint,
|
||||
"https://proverki.gov.ru/blob/opendata/data.zip",
|
||||
)
|
||||
self.assertEqual(http_client.headers["Referer"], portal_url)
|
||||
self.assertIn("application/zip", http_client.headers["Accept"])
|
||||
|
||||
def test_download_from_portal_zip_link_without_download_path(self):
|
||||
page = _FakePage(
|
||||
content_type="text/html",
|
||||
|
||||
Reference in New Issue
Block a user