refactor: change cache schema to slots based

This commit is contained in:
2026-04-06 23:08:57 +02:00
parent 92860f0d30
commit f175eda57e
9 changed files with 594 additions and 264 deletions
+73 -96
View File
@@ -20,9 +20,12 @@ from .config import (
TTL_NOT_FOUND,
TTL_NETWORK_ERROR,
HIGH_CONFIDENCE,
SLOT_SYNCED,
SLOT_UNSYNCED,
)
from .models import TrackMeta, LyricResult, CacheStatus
from .enrichers import create_enrichers, enrich_track
from .ranking import is_better_result, select_best_positive
# Maps CacheStatus to the default TTL used when storing results
@@ -34,44 +37,11 @@ _STATUS_TTL: dict[CacheStatus, Optional[int]] = {
}
def _is_better(new: LyricResult, old: LyricResult, allow_unsynced: bool) -> bool:
"""Compare two results: higher confidence wins; if equal, synced > unsynced.
If allow_unsynced is False, treat unsynced as strictly worse than any synced."""
# If new is negative, it's definitely not better
if new.status not in (CacheStatus.SUCCESS_SYNCED, CacheStatus.SUCCESS_UNSYNCED):
return False
# If old is negative, the result is better or equal regardless of other factors
if old.status not in (CacheStatus.SUCCESS_SYNCED, CacheStatus.SUCCESS_UNSYNCED):
return True
# If unsynced results are not allowed, treat them as strictly worse than any synced result
if not allow_unsynced:
if (
new.status == CacheStatus.SUCCESS_UNSYNCED
and old.status == CacheStatus.SUCCESS_SYNCED
):
return False
if (
old.status == CacheStatus.SUCCESS_UNSYNCED
and new.status == CacheStatus.SUCCESS_SYNCED
):
return True
# Compare confidence
if new.confidence != old.confidence:
return new.confidence > old.confidence
# Equal confidence — prefer synced as tiebreaker
# Will return false if unsynced results are not allowed
return (
new.status == CacheStatus.SUCCESS_SYNCED
and old.status != CacheStatus.SUCCESS_SYNCED
)
def _pick_for_return(
result: FetchResult,
allow_unsynced: bool,
) -> Optional[LyricResult]:
"""Pick which lyric result should participate in final selection."""
"""Pick best positive slot for final selection under current strategy."""
candidates: list[LyricResult] = []
if result.synced and result.synced.status == CacheStatus.SUCCESS_SYNCED:
candidates.append(result.synced)
@@ -82,43 +52,41 @@ def _pick_for_return(
):
candidates.append(result.unsynced)
if not candidates:
return None
best = candidates[0]
for c in candidates[1:]:
if _is_better(c, best, allow_unsynced=True):
best = c
return best
return select_best_positive(candidates, allow_unsynced=True)
def _pick_for_cache(result: FetchResult) -> Optional[LyricResult]:
"""Pick a single cacheable result from FetchResult for legacy one-slot cache schema."""
slots = [r for r in (result.synced, result.unsynced) if r is not None]
if not slots:
return None
def _iter_slot_results(result: FetchResult) -> list[tuple[str, LyricResult]]:
"""Return all non-None slot results with their cache slot key."""
out: list[tuple[str, LyricResult]] = []
if result.synced is not None:
out.append((SLOT_SYNCED, result.synced))
if result.unsynced is not None:
out.append((SLOT_UNSYNCED, result.unsynced))
return out
positives = [
r
for r in slots
if r.status in (CacheStatus.SUCCESS_SYNCED, CacheStatus.SUCCESS_UNSYNCED)
]
if positives:
best = positives[0]
for p in positives[1:]:
if _is_better(p, best, allow_unsynced=True):
best = p
return best
# If there is no positive result, prefer caching NETWORK_ERROR over NOT_FOUND
# to avoid long false-negative TTL when error signals disagree between slots.
for r in slots:
if r.status == CacheStatus.NETWORK_ERROR:
return r
for r in slots:
if r.status == CacheStatus.NOT_FOUND:
return r
return None
def _pick_cached_for_return(
cached_rows: list[LyricResult],
allow_unsynced: bool,
) -> Optional[LyricResult]:
"""Convert cached slot rows into FetchResult-like view and select return candidate."""
fr = FetchResult()
for row in cached_rows:
if row.status == CacheStatus.SUCCESS_SYNCED:
fr = FetchResult(synced=row, unsynced=fr.unsynced)
elif row.status == CacheStatus.SUCCESS_UNSYNCED:
fr = FetchResult(synced=fr.synced, unsynced=row)
return _pick_for_return(fr, allow_unsynced)
def _has_negative_for_both_slots(cached_rows: list[LyricResult]) -> bool:
"""True when both slot rows are present and both are negative."""
if len(cached_rows) < 2:
return False
return all(
r.status in (CacheStatus.NOT_FOUND, CacheStatus.NETWORK_ERROR)
for r in cached_rows
)
class LrcManager:
@@ -137,33 +105,38 @@ class LrcManager:
bypass_cache: bool,
allow_unsynced: bool,
) -> list[tuple[str, LyricResult]]:
"""Run one group: cache-check first, then parallel-fetch uncached. Returns (source, result) pairs."""
"""Run one group with slot-aware cache check then parallel fetch uncached sources."""
cached_results: list[tuple[str, LyricResult]] = []
need_fetch: list[BaseFetcher] = []
for fetcher in group:
source = fetcher.source_name
if not bypass_cache and not fetcher.self_cached:
cached = self.cache.get(track, source)
if cached:
if cached.status in (
CacheStatus.NOT_FOUND,
CacheStatus.NETWORK_ERROR,
):
cached_rows = self.cache.get_all(track, source)
if cached_rows:
if _has_negative_for_both_slots(cached_rows):
logger.debug(
f"[{source}] cache hit: {cached.status.value}, skipping"
f"[{source}] cache hit: all slots negative, skipping"
)
continue
is_trusted = cached.confidence >= HIGH_CONFIDENCE
logger.info(
f"[{source}] cache hit: {cached.status.value}"
f" (confidence={cached.confidence:.0f})"
cached_for_return = _pick_cached_for_return(
cached_rows, allow_unsynced
)
cached_results.append((source, cached))
# Return immediately on trusted synced cache hit
if cached.status == CacheStatus.SUCCESS_SYNCED and is_trusted:
return cached_results
continue
if cached_for_return is not None:
is_trusted = cached_for_return.confidence >= HIGH_CONFIDENCE
logger.info(
f"[{source}] cache hit: {cached_for_return.status.value}"
f" (confidence={cached_for_return.confidence:.0f})"
)
cached_results.append((source, cached_for_return))
# Return immediately on trusted synced cache hit
if (
cached_for_return.status == CacheStatus.SUCCESS_SYNCED
and is_trusted
):
return cached_results
continue
elif not fetcher.self_cached:
logger.debug(f"[{source}] cache bypassed")
need_fetch.append(fetcher)
@@ -193,18 +166,20 @@ class LrcManager:
logger.debug(f"[{source}] returned None")
continue
cache_result = _pick_for_cache(result)
return_result = _pick_for_return(result, allow_unsynced)
if (
cache_result is not None
and not fetcher.self_cached
and not bypass_cache
):
ttl = cache_result.ttl or _STATUS_TTL.get(
cache_result.status, TTL_NOT_FOUND
)
self.cache.set(track, source, cache_result, ttl_seconds=ttl)
if not fetcher.self_cached and not bypass_cache:
for slot_kind, slot_result in _iter_slot_results(result):
ttl = slot_result.ttl or _STATUS_TTL.get(
slot_result.status, TTL_NOT_FOUND
)
self.cache.set(
track,
source,
slot_result,
ttl_seconds=ttl,
positive_kind=slot_kind,
)
if return_result is not None:
logger.info(
@@ -269,8 +244,10 @@ class LrcManager:
)
return result
if best_result is None or _is_better(
result, best_result, allow_unsynced
if best_result is None or is_better_result(
result,
best_result,
allow_unsynced=allow_unsynced,
):
best_result = result