Spaces:
Sleeping
Sleeping
File size: 4,909 Bytes
6830eb0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
from __future__ import annotations
import hashlib
import json
import sqlite3
from pathlib import Path
from typing import Any
from datetime import datetime
class CacheManager:
def __init__(self, cache_dir: str | Path = "cache"):
self.cache_dir = Path(cache_dir)
self.cache_dir.mkdir(parents=True, exist_ok=True)
# Create SQLite database for structured results
self.db_path = self.cache_dir / "extraction_cache.db"
self._init_db()
def _init_db(self):
"""Initialize the SQLite database with necessary tables."""
with sqlite3.connect(self.db_path) as conn:
conn.execute("""
CREATE TABLE IF NOT EXISTS extractions (
input_hash TEXT,
form_type TEXT,
result TEXT,
model_name TEXT,
timestamp DATETIME,
PRIMARY KEY (input_hash, form_type)
)
""")
conn.execute("""
CREATE TABLE IF NOT EXISTS transcripts (
video_id TEXT PRIMARY KEY,
transcript TEXT,
timestamp DATETIME
)
""")
def _hash_content(self, content: str) -> str:
"""Generate a stable hash for input content."""
return hashlib.sha256(content.encode('utf-8')).hexdigest()
def get_transcript(self, video_id: str) -> str | None:
"""Retrieve a cached transcript if it exists."""
with sqlite3.connect(self.db_path) as conn:
cursor = conn.execute(
"SELECT transcript FROM transcripts WHERE video_id = ?",
(video_id,)
)
result = cursor.fetchone()
return result[0] if result else None
def store_transcript(self, video_id: str, transcript: str):
"""Store a transcript in the cache."""
with sqlite3.connect(self.db_path) as conn:
conn.execute(
"""
INSERT OR REPLACE INTO transcripts (video_id, transcript, timestamp)
VALUES (?, ?, ?)
""",
(video_id, transcript, datetime.now())
)
def get_extraction(
self,
input_content: str,
form_type: str,
model_name: str
) -> dict | None:
"""Retrieve cached extraction results if they exist."""
input_hash = self._hash_content(input_content)
with sqlite3.connect(self.db_path) as conn:
cursor = conn.execute(
"""
SELECT result FROM extractions
WHERE input_hash = ? AND form_type = ? AND model_name = ?
""",
(input_hash, form_type, model_name)
)
result = cursor.fetchone()
if result:
return json.loads(result[0])
return None
def store_extraction(
self,
input_content: str,
form_type: str,
result: dict,
model_name: str
):
"""Store extraction results in the cache."""
input_hash = self._hash_content(input_content)
with sqlite3.connect(self.db_path) as conn:
conn.execute(
"""
INSERT OR REPLACE INTO extractions
(input_hash, form_type, result, model_name, timestamp)
VALUES (?, ?, ?, ?, ?)
""",
(
input_hash,
form_type,
json.dumps(result),
model_name,
datetime.now()
)
)
def clear_cache(self, older_than_days: int | None = None):
"""Clear the cache, optionally only entries older than specified days."""
with sqlite3.connect(self.db_path) as conn:
if older_than_days is not None:
conn.execute(
"""
DELETE FROM extractions
WHERE timestamp < datetime('now', ?)
""",
(f'-{older_than_days} days',)
)
conn.execute(
"""
DELETE FROM transcripts
WHERE timestamp < datetime('now', ?)
""",
(f'-{older_than_days} days',)
)
else:
conn.execute("DELETE FROM extractions")
conn.execute("DELETE FROM transcripts")
def cleanup_gradio_cache(self):
"""Clean up Gradio's example cache directory."""
gradio_cache = Path(".gradio")
if gradio_cache.exists():
import shutil
shutil.rmtree(gradio_cache)
print("Cleaned up Gradio cache") |