from django.db import migrations, models API_DATA_SOURCE_ALIASES = { "trudvsem": "vacancies", } def to_api_data_source(source): return API_DATA_SOURCE_ALIASES.get(str(source), str(source)) def data_source_summary(data): summary = [] for source in sorted(data): value = data[source] if isinstance(value, list): count = len(value) elif value: count = 1 else: count = 0 if count: summary.append({"source": to_api_data_source(source), "count": count}) return summary def backfill_data_source_counts_python(apps): snapshot_model = apps.get_model("organizations", "OrganizationDataSnapshot") updates = [] for snapshot in snapshot_model.objects.only( "organization_id", "data", "data_source_counts", ).iterator(chunk_size=100): snapshot.data_source_counts = data_source_summary(snapshot.data) updates.append(snapshot) if len(updates) >= 100: snapshot_model.objects.bulk_update(updates, ["data_source_counts"]) updates = [] if updates: snapshot_model.objects.bulk_update(updates, ["data_source_counts"]) def backfill_data_source_counts(apps, schema_editor): if schema_editor.connection.vendor != "postgresql": backfill_data_source_counts_python(apps) return with schema_editor.connection.cursor() as cursor: cursor.execute( """ UPDATE organizations_data_snapshot snapshot SET data_source_counts = COALESCE( ( SELECT jsonb_agg( jsonb_build_object( 'source', source_counts.source, 'count', source_counts.record_count ) ORDER BY source_counts.source ) FROM ( SELECT CASE source_items.key WHEN 'trudvsem' THEN 'vacancies' ELSE source_items.key END AS source, CASE WHEN jsonb_typeof(source_items.value) = 'array' THEN jsonb_array_length(source_items.value) WHEN source_items.value IN ( 'null'::jsonb, 'false'::jsonb, '[]'::jsonb, '{}'::jsonb, '""'::jsonb ) THEN 0 ELSE 1 END AS record_count FROM jsonb_each(snapshot.data) AS source_items ) AS source_counts WHERE source_counts.record_count > 0 ), '[]'::jsonb ) """ ) def clear_data_source_counts(apps, schema_editor): snapshot_model = apps.get_model("organizations", "OrganizationDataSnapshot") snapshot_model.objects.update(data_source_counts=[]) class Migration(migrations.Migration): dependencies = [ ("organizations", "0004_seed_daily_snapshot_refresh_schedule"), ] operations = [ migrations.AddField( model_name="organizationdatasnapshot", name="data_source_counts", field=models.JSONField( default=list, help_text="Готовый JSON data_sources для API v2", verbose_name="счетчики источников", ), ), migrations.RunPython( backfill_data_source_counts, reverse_code=clear_data_source_counts, ), ]