fix(parsers): detect stale jobs by run age
This commit is contained in:
@@ -693,9 +693,15 @@ class BackgroundJobService(BaseReadOnlyService):
|
||||
from apps.core.models import JobStatus
|
||||
|
||||
cutoff = timezone.now() - timedelta(minutes=max_age_minutes)
|
||||
queryset = cls.get_queryset().filter(
|
||||
status__in=[JobStatus.PENDING, JobStatus.STARTED, JobStatus.RETRY],
|
||||
updated_at__lt=cutoff,
|
||||
queryset = (
|
||||
cls.get_queryset()
|
||||
.filter(
|
||||
status__in=[JobStatus.PENDING, JobStatus.STARTED, JobStatus.RETRY],
|
||||
)
|
||||
.filter(
|
||||
Q(started_at__isnull=False, started_at__lt=cutoff)
|
||||
| Q(started_at__isnull=True, created_at__lt=cutoff)
|
||||
)
|
||||
)
|
||||
if task_names:
|
||||
queryset = queryset.filter(task_name__in=task_names)
|
||||
|
||||
@@ -323,10 +323,16 @@ def _active_tasks_for_definition(
|
||||
for source_key in definition.source_keys
|
||||
if source_key in PARSER_SOURCES
|
||||
]
|
||||
queryset = BackgroundJobService.get_queryset().filter(
|
||||
task_name__in=task_names,
|
||||
status__in=ACTIVE_JOB_STATUSES,
|
||||
updated_at__gte=_stale_cutoff(),
|
||||
queryset = (
|
||||
BackgroundJobService.get_queryset()
|
||||
.filter(
|
||||
task_name__in=task_names,
|
||||
status__in=ACTIVE_JOB_STATUSES,
|
||||
)
|
||||
.filter(
|
||||
Q(started_at__isnull=False, started_at__gte=_stale_cutoff())
|
||||
| Q(started_at__isnull=True, created_at__gte=_stale_cutoff())
|
||||
)
|
||||
)
|
||||
return [_serialize_active_job(job) for job in queryset.order_by("-created_at")[:10]]
|
||||
|
||||
|
||||
@@ -416,33 +416,24 @@ class ParserLoadLogService(BaseService[ParserLoadLog]):
|
||||
updated = 0
|
||||
active_statuses = [JobStatus.PENDING, JobStatus.STARTED, JobStatus.RETRY]
|
||||
for log in stale_logs:
|
||||
batch_job = (
|
||||
BackgroundJob.objects.filter(
|
||||
status__in=active_statuses,
|
||||
meta__source=log.source,
|
||||
meta__batch_id=log.batch_id,
|
||||
)
|
||||
.order_by("-updated_at")
|
||||
.first()
|
||||
active_jobs = BackgroundJob.objects.filter(
|
||||
status__in=active_statuses,
|
||||
meta__source=log.source,
|
||||
).filter(Q(meta__batch_id=log.batch_id) | Q(meta__batch_id__isnull=True))
|
||||
fresh_jobs = active_jobs.filter(
|
||||
Q(started_at__isnull=False, started_at__gte=cutoff)
|
||||
| Q(started_at__isnull=True, created_at__gte=cutoff)
|
||||
)
|
||||
source_job = None
|
||||
if batch_job is None:
|
||||
source_job = (
|
||||
BackgroundJob.objects.filter(
|
||||
status__in=active_statuses,
|
||||
meta__source=log.source,
|
||||
meta__batch_id__isnull=True,
|
||||
)
|
||||
.order_by("-updated_at")
|
||||
.first()
|
||||
)
|
||||
job = batch_job or source_job
|
||||
if job is not None and job.updated_at >= cutoff:
|
||||
if fresh_jobs.exists():
|
||||
continue
|
||||
|
||||
cls.mark_failed(log, stale_message)
|
||||
updated += 1
|
||||
if job is not None:
|
||||
stale_jobs = active_jobs.filter(
|
||||
Q(started_at__isnull=False, started_at__lt=cutoff)
|
||||
| Q(started_at__isnull=True, created_at__lt=cutoff)
|
||||
)
|
||||
for job in stale_jobs.order_by("created_at"):
|
||||
job.fail(error=stale_message)
|
||||
|
||||
return updated
|
||||
|
||||
@@ -21,7 +21,7 @@ from apps.parsers.models import (
|
||||
ProcurementRecord,
|
||||
)
|
||||
from django.conf import settings
|
||||
from django.db.models import Max
|
||||
from django.db.models import Max, Q
|
||||
from django.http import Http404
|
||||
from django.utils import timezone
|
||||
from rest_framework.exceptions import ValidationError
|
||||
@@ -727,10 +727,16 @@ class SourceCardService:
|
||||
cls, definition: SourceCardDefinition
|
||||
) -> list[dict[str, Any]]:
|
||||
cutoff = cls._stale_cutoff()
|
||||
queryset = BackgroundJobService.get_queryset().filter(
|
||||
task_name__in=definition.task_names,
|
||||
status__in=ACTIVE_JOB_STATUSES,
|
||||
updated_at__gte=cutoff,
|
||||
queryset = (
|
||||
BackgroundJobService.get_queryset()
|
||||
.filter(
|
||||
task_name__in=definition.task_names,
|
||||
status__in=ACTIVE_JOB_STATUSES,
|
||||
)
|
||||
.filter(
|
||||
Q(started_at__isnull=False, started_at__gte=cutoff)
|
||||
| Q(started_at__isnull=True, created_at__gte=cutoff)
|
||||
)
|
||||
)
|
||||
return [
|
||||
cls._serialize_job(job) for job in queryset.order_by("-created_at")[:10]
|
||||
|
||||
Reference in New Issue
Block a user