perf(organizations): cache and instrument API responses

This commit is contained in:
2026-05-14 16:14:45 +02:00
parent 6d1ec2e55c
commit 5fdd23ecc0
13 changed files with 603 additions and 33 deletions

View File

@@ -7,3 +7,6 @@ class OrganizationsConfig(AppConfig):
default_auto_field = "django.db.models.BigAutoField"
name = "organizations"
verbose_name = "Организации"
def ready(self) -> None:
import organizations.signals # noqa: F401

View File

@@ -0,0 +1,59 @@
"""Cache helpers for organizations API responses."""
from __future__ import annotations
import time
from django.core.cache import cache
ORGANIZATION_API_CACHE_PREFIX = "api:v2:organizations"
ORGANIZATION_API_CACHE_VERSION_KEY = f"{ORGANIZATION_API_CACHE_PREFIX}:version"
DEFAULT_ORGANIZATION_API_CACHE_VERSION = 1
DEFAULT_ORGANIZATION_API_CACHE_TIMEOUT_SECONDS = 24 * 60 * 60
def get_organization_api_cache_version() -> int:
"""Return the current organizations API cache version."""
version = cache.get(ORGANIZATION_API_CACHE_VERSION_KEY)
if version is None:
cache.add(
ORGANIZATION_API_CACHE_VERSION_KEY,
DEFAULT_ORGANIZATION_API_CACHE_VERSION,
timeout=None,
)
version = cache.get(
ORGANIZATION_API_CACHE_VERSION_KEY,
DEFAULT_ORGANIZATION_API_CACHE_VERSION,
)
try:
return int(version)
except (TypeError, ValueError):
cache.set(
ORGANIZATION_API_CACHE_VERSION_KEY,
DEFAULT_ORGANIZATION_API_CACHE_VERSION,
timeout=None,
)
return DEFAULT_ORGANIZATION_API_CACHE_VERSION
def invalidate_organization_api_cache() -> int:
"""
Invalidate organizations API responses by moving them to a new key version.
Versioning avoids broad cache scans and works with cache backends that do not
support pattern deletion.
"""
new_version = _new_cache_version()
if cache.add(ORGANIZATION_API_CACHE_VERSION_KEY, new_version, timeout=None):
return new_version
try:
return int(cache.incr(ORGANIZATION_API_CACHE_VERSION_KEY))
except (TypeError, ValueError, NotImplementedError):
cache.set(ORGANIZATION_API_CACHE_VERSION_KEY, new_version, timeout=None)
return new_version
def _new_cache_version() -> int:
return time.time_ns()

View File

@@ -6,6 +6,7 @@ import json
from apps.core.management.commands.base import BaseAppCommand
from organizations.cache import invalidate_organization_api_cache
from organizations.services import OrganizationDataSnapshotRefreshService
@@ -36,6 +37,7 @@ class Command(BaseAppCommand):
organization_uids=options.get("uids"),
batch_size=options["batch_size"],
)
invalidate_organization_api_cache()
rendered = json.dumps(
{
"processed": result.processed,

View File

@@ -49,6 +49,13 @@ class OrganizationSerializer(serializers.ModelSerializer):
enrichment = self.context.get("enrichment", {}).get(str(obj.uid))
if enrichment is None:
return {}
data_sources = self.context.get("data_sources")
if data_sources is not None:
return {
source: enrichment.data_presence.get(source, [])
for source in data_sources
if source in enrichment.data_presence
}
return enrichment.data_presence
def get_data_sources(self, obj) -> list[dict[str, int | str]]:

View File

@@ -0,0 +1,89 @@
"""Invalidation hooks for organizations API cache."""
from __future__ import annotations
from apps.parsers.models import ParserLoadLog
from django.db import transaction
from django.db.models.signals import post_delete, post_save
from django.dispatch import receiver
from registers.models import (
Organization as RegistryOrganization,
)
from registers.models import (
Register,
RegisterUpload,
RegistryMembershipPeriod,
)
from organizations.cache import invalidate_organization_api_cache
from organizations.models import OrganizationDataSnapshot
SOURCE_UPDATE_STATUSES = {
ParserLoadLog.Status.SUCCESS,
ParserLoadLog.Status.SKIPPED,
}
def _invalidate_on_commit() -> None:
transaction.on_commit(invalidate_organization_api_cache)
@receiver(
post_save, sender=ParserLoadLog, dispatch_uid="organizations_parser_load_save"
)
def invalidate_for_parser_load(sender, instance: ParserLoadLog, **kwargs) -> None:
"""Invalidate when a parser source reaches a visible terminal state."""
if instance.status in SOURCE_UPDATE_STATUSES:
_invalidate_on_commit()
@receiver(post_save, sender=Register, dispatch_uid="organizations_register_save")
@receiver(post_delete, sender=Register, dispatch_uid="organizations_register_delete")
@receiver(
post_save,
sender=RegistryOrganization,
dispatch_uid="organizations_registry_organization_save",
)
@receiver(
post_delete,
sender=RegistryOrganization,
dispatch_uid="organizations_registry_organization_delete",
)
@receiver(
post_save,
sender=RegistryMembershipPeriod,
dispatch_uid="organizations_registry_membership_save",
)
@receiver(
post_delete,
sender=RegistryMembershipPeriod,
dispatch_uid="organizations_registry_membership_delete",
)
@receiver(
post_save,
sender=OrganizationDataSnapshot,
dispatch_uid="organizations_data_snapshot_save",
)
@receiver(
post_delete,
sender=OrganizationDataSnapshot,
dispatch_uid="organizations_data_snapshot_delete",
)
def invalidate_for_registry_or_snapshot_change(sender, **kwargs) -> None:
"""Invalidate for direct registry and snapshot writes."""
_invalidate_on_commit()
@receiver(
post_save, sender=RegisterUpload, dispatch_uid="organizations_register_upload_save"
)
def invalidate_for_successful_register_upload(
sender,
instance: RegisterUpload,
created: bool,
**kwargs,
) -> None:
"""Invalidate once a registry import has completed successfully."""
if created or instance.import_status != RegisterUpload.ImportStatus.SUCCESS:
return
_invalidate_on_commit()

View File

@@ -5,8 +5,8 @@ from __future__ import annotations
import logging
from celery import shared_task
from django.core.cache import cache
from organizations.cache import invalidate_organization_api_cache
from organizations.services import OrganizationDataSnapshotRefreshService
logger = logging.getLogger(__name__)
@@ -16,7 +16,7 @@ logger = logging.getLogger(__name__)
def refresh_all_organization_data_snapshots(batch_size: int = 100) -> dict:
"""Refresh all organization data snapshots for API v2."""
result = OrganizationDataSnapshotRefreshService.refresh(batch_size=batch_size)
cache.clear()
invalidate_organization_api_cache()
payload = {
"processed": result.processed,
"created": result.created,
@@ -39,7 +39,7 @@ def refresh_organization_data_snapshots_for_parser_batch(
batch_id=batch_id,
batch_size=batch_size,
)
cache.clear()
invalidate_organization_api_cache()
payload = {
"source": source,
"batch_id": batch_id,

View File

@@ -23,11 +23,15 @@ from organizations.api_enrichment import (
to_api_data_source,
to_internal_data_source,
)
from organizations.cache import (
DEFAULT_ORGANIZATION_API_CACHE_TIMEOUT_SECONDS,
ORGANIZATION_API_CACHE_PREFIX,
get_organization_api_cache_version,
)
from organizations.filters import OrganizationFilter
from organizations.models import Organization
from organizations.serializers import OrganizationSerializer
ORGANIZATIONS_API_CACHE_TIMEOUT_SECONDS = 300
ORGANIZATIONS_TAG = swagger_tag("Организации", "Organizations")
ORGANIZATION_DATA_SOURCE_KEYS = ", ".join(sorted(API_DATA_SOURCE_KEY_SET))
@@ -59,7 +63,9 @@ ORGANIZATION_DATA_PARAMS = [
description=(
"Ограничить блок data одним или несколькими источниками. "
f"Допустимые значения: {ORGANIZATION_DATA_SOURCE_KEYS}. "
"Можно передать несколько параметров или CSV-строку."
"Можно передать несколько параметров или CSV-строку. "
"На list endpoint блок data по умолчанию пустой; передайте этот "
"параметр, чтобы вернуть данные источников."
),
),
_query_parameter(
@@ -238,17 +244,28 @@ ORGANIZATION_DETAIL_RESPONSE = openapi.Response(
class CachedReadOnlyMixin:
"""Cache successful GET list/retrieve responses by full request path."""
cache_timeout = ORGANIZATIONS_API_CACHE_TIMEOUT_SECONDS
cache_key_prefix = "api:v2:organizations"
cache_timeout = DEFAULT_ORGANIZATION_API_CACHE_TIMEOUT_SECONDS
cache_key_prefix = ORGANIZATION_API_CACHE_PREFIX
def _cache_timeout(self) -> int:
return getattr(
settings,
"ORGANIZATIONS_API_CACHE_TIMEOUT_SECONDS",
self.cache_timeout,
)
def _build_cache_key(self, request) -> str:
user_marker = "anonymous"
if request.user and request.user.is_authenticated:
user_marker = "authenticated"
raw_key = f"{request.method}:{request.get_full_path()}:{user_marker}"
cache_version = get_organization_api_cache_version()
raw_key = (
f"v{cache_version}:{request.method}:"
f"{request.get_full_path()}:{user_marker}"
)
digest = hashlib.md5(raw_key.encode(), usedforsecurity=False).hexdigest()
return f"{self.cache_key_prefix}:{digest}"
return f"{self.cache_key_prefix}:v{cache_version}:{digest}"
def _cached_response(self, request, producer) -> Response:
cache_key = self._build_cache_key(request)
@@ -260,7 +277,7 @@ class CachedReadOnlyMixin:
response = producer()
if 200 <= response.status_code < 300:
cache.set(cache_key, response.data, timeout=self.cache_timeout)
cache.set(cache_key, response.data, timeout=self._cache_timeout())
response["X-Cache"] = "MISS"
return response
@@ -312,7 +329,9 @@ class OrganizationViewSet(CachedReadOnlyMixin, ReadOnlyModelViewSet):
"По умолчанию показывает только организации с активным участием "
"в реестрах; передайте has_registry=false, чтобы снять это ограничение. "
"Поддерживает пагинацию, поиск по наименованию и реквизитам, фильтры "
"по реестрам и наличию данных по источникам."
"по реестрам и наличию данных по источникам. Для list endpoint "
"тяжелый блок data по умолчанию пустой; передайте data/data_sources, "
"чтобы вернуть данные конкретных источников."
),
manual_parameters=ORGANIZATION_LIST_PARAMS,
responses={200: ORGANIZATION_LIST_RESPONSE},
@@ -343,7 +362,7 @@ class OrganizationViewSet(CachedReadOnlyMixin, ReadOnlyModelViewSet):
def _list_with_enrichment(self, request, *args: Any, **kwargs: Any) -> Response:
queryset = self.filter_queryset(self.get_queryset())
data_sources = self._parse_data_sources(request)
data_sources = self._parse_data_sources(request, default=set())
page = self.paginate_queryset(queryset)
if page is not None:
@@ -419,7 +438,11 @@ class OrganizationViewSet(CachedReadOnlyMixin, ReadOnlyModelViewSet):
)
@staticmethod
def _parse_data_sources(request) -> set[str] | None:
def _parse_data_sources(
request,
*,
default: set[str] | None = None,
) -> set[str] | None:
included = _query_param_values(request, "data", "data_sources")
excluded = _query_param_values(request, "exclude_data", "exclude_data_sources")
@@ -443,7 +466,7 @@ class OrganizationViewSet(CachedReadOnlyMixin, ReadOnlyModelViewSet):
}
if excluded:
return API_DATA_SOURCE_KEY_SET - excluded
return None
return default
def _query_param_values(request, *names: str) -> set[str]: