fix(parsers): search registry vacancies across job boards

This commit is contained in:
2026-05-14 17:07:58 +02:00
parent 5fdd23ecc0
commit df89e498cc
3 changed files with 327 additions and 18 deletions

View File

@@ -2191,6 +2191,160 @@ class ParseVacanciesTaskTestCase(TestCase):
{"trudvsem:7701000102"},
)
@override_settings(SUPERJOB_APP_ID="test-superjob-app-id")
def test_parse_trudvsem_vacancies_matches_job_boards_by_employer_name(self):
organization = OrganizationFactory(
pn_name='Общество с ограниченной ответственностью "Ромашка"',
mn_inn=7701000301,
mn_ogrn=1027700000301,
)
RegistryMembershipPeriodFactory(organization=organization, ended_at=None)
captured_client_kwargs = {}
captured_text_queries = {}
class _Provider:
def __init__(self, source_name, *, supports_company_inn):
self.source_name = source_name
self.supports_company_inn = supports_company_inn
def fetch_vacancies(self, **kwargs):
if self.source_name == "trudvsem":
return [
GenericParserItem(
source=ParserLoadLog.Source.TRUDVSEM,
external_id="trudvsem:romashka",
inn=kwargs["company_inn"],
title="Работа России",
payload={"vacancy_source": "trudvsem"},
)
]
captured_text_queries[self.source_name] = kwargs["text"]
return [
GenericParserItem(
source=ParserLoadLog.Source.TRUDVSEM,
external_id=f"{self.source_name}:romashka",
organisation_name='ООО "Ромашка"',
title=f"{self.source_name} matching vacancy",
payload={"vacancy_source": self.source_name},
),
GenericParserItem(
source=ParserLoadLog.Source.TRUDVSEM,
external_id=f"{self.source_name}:other",
organisation_name='ООО "Лютик"',
title=f"{self.source_name} unrelated vacancy",
payload={"vacancy_source": self.source_name},
),
]
class _VacanciesClient:
def __init__(self, **kwargs):
captured_client_kwargs.update(kwargs)
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
return None
def fetch_vacancies(self, **kwargs):
return [
GenericParserItem(
source=ParserLoadLog.Source.TRUDVSEM,
external_id="trudvsem:romashka",
inn=kwargs["company_inn"],
title="Работа России",
payload={"vacancy_source": "trudvsem"},
)
]
def iter_source_clients(self):
return [
("trudvsem", _Provider("trudvsem", supports_company_inn=True)),
("hh", _Provider("hh", supports_company_inn=False)),
("superjob", _Provider("superjob", supports_company_inn=False)),
]
original_client = parser_tasks.VacanciesClient
parser_tasks.VacanciesClient = _VacanciesClient
try:
result = parse_trudvsem_vacancies(limit=50, proxies=[])
finally:
parser_tasks.VacanciesClient = original_client
self.assertEqual(result["status"], "success")
self.assertIsNone(captured_client_kwargs["sources"])
self.assertEqual(
captured_text_queries,
{
"hh": "ромашка",
"superjob": "ромашка",
},
)
self.assertEqual(result["saved"], 3)
self.assertEqual(
set(
GenericParserRecord.objects.values_list(
"external_id",
"source",
"registry_organization_id",
)
),
{
("trudvsem:romashka", "trudvsem", organization.id),
("hh:romashka", "hh", organization.id),
("superjob:romashka", "superjob", organization.id),
},
)
def test_registry_job_board_matching_fetches_only_first_text_search_page(self):
organization = OrganizationFactory(
pn_name='Общество с ограниченной ответственностью "Ромашка"',
mn_inn=7701000302,
mn_ogrn=1027700000302,
)
RegistryMembershipPeriodFactory(organization=organization, ended_at=None)
captured_offsets = []
class _Provider:
supports_company_inn = False
def fetch_vacancies(self, **kwargs):
captured_offsets.append(kwargs["offset"])
return [
GenericParserItem(
source=ParserLoadLog.Source.TRUDVSEM,
external_id=f"hh:romashka:{kwargs['offset']}",
organisation_name='ООО "Ромашка"',
title="HeadHunter",
payload={"vacancy_source": "hh"},
)
]
class _VacanciesClient:
def __init__(self, **kwargs):
pass
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
return None
def iter_source_clients(self):
return [("hh", _Provider())]
original_client = parser_tasks.VacanciesClient
parser_tasks.VacanciesClient = _VacanciesClient
try:
result = parse_trudvsem_vacancies(limit=1, proxies=[])
finally:
parser_tasks.VacanciesClient = original_client
self.assertEqual(result["status"], "success")
self.assertEqual(result["saved"], 1)
self.assertEqual(captured_offsets, [0])
@override_settings(SUPERJOB_APP_ID="test-superjob-app-id")
def test_parse_trudvsem_vacancies_uses_combined_vacancies_client(self):
captured_kwargs = {}