File size: 4,909 Bytes
6830eb0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
from __future__ import annotations
import hashlib
import json
import sqlite3
from pathlib import Path
from typing import Any
from datetime import datetime

class CacheManager:
    def __init__(self, cache_dir: str | Path = "cache"):
        self.cache_dir = Path(cache_dir)
        self.cache_dir.mkdir(parents=True, exist_ok=True)
        
        # Create SQLite database for structured results
        self.db_path = self.cache_dir / "extraction_cache.db"
        self._init_db()
    
    def _init_db(self):
        """Initialize the SQLite database with necessary tables."""
        with sqlite3.connect(self.db_path) as conn:
            conn.execute("""
                CREATE TABLE IF NOT EXISTS extractions (
                    input_hash TEXT,
                    form_type TEXT,
                    result TEXT,
                    model_name TEXT,
                    timestamp DATETIME,
                    PRIMARY KEY (input_hash, form_type)
                )
            """)
            
            conn.execute("""
                CREATE TABLE IF NOT EXISTS transcripts (
                    video_id TEXT PRIMARY KEY,
                    transcript TEXT,
                    timestamp DATETIME
                )
            """)
    
    def _hash_content(self, content: str) -> str:
        """Generate a stable hash for input content."""
        return hashlib.sha256(content.encode('utf-8')).hexdigest()
    
    def get_transcript(self, video_id: str) -> str | None:
        """Retrieve a cached transcript if it exists."""
        with sqlite3.connect(self.db_path) as conn:
            cursor = conn.execute(
                "SELECT transcript FROM transcripts WHERE video_id = ?",
                (video_id,)
            )
            result = cursor.fetchone()
            return result[0] if result else None
    
    def store_transcript(self, video_id: str, transcript: str):
        """Store a transcript in the cache."""
        with sqlite3.connect(self.db_path) as conn:
            conn.execute(
                """
                INSERT OR REPLACE INTO transcripts (video_id, transcript, timestamp)
                VALUES (?, ?, ?)
                """,
                (video_id, transcript, datetime.now())
            )
    
    def get_extraction(
        self,
        input_content: str,
        form_type: str,
        model_name: str
    ) -> dict | None:
        """Retrieve cached extraction results if they exist."""
        input_hash = self._hash_content(input_content)
        
        with sqlite3.connect(self.db_path) as conn:
            cursor = conn.execute(
                """
                SELECT result FROM extractions 
                WHERE input_hash = ? AND form_type = ? AND model_name = ?
                """,
                (input_hash, form_type, model_name)
            )
            result = cursor.fetchone()
            
            if result:
                return json.loads(result[0])
        return None
    
    def store_extraction(
        self,
        input_content: str,
        form_type: str,
        result: dict,
        model_name: str
    ):
        """Store extraction results in the cache."""
        input_hash = self._hash_content(input_content)
        
        with sqlite3.connect(self.db_path) as conn:
            conn.execute(
                """
                INSERT OR REPLACE INTO extractions 
                (input_hash, form_type, result, model_name, timestamp)
                VALUES (?, ?, ?, ?, ?)
                """,
                (
                    input_hash,
                    form_type,
                    json.dumps(result),
                    model_name,
                    datetime.now()
                )
            )
    
    def clear_cache(self, older_than_days: int | None = None):
        """Clear the cache, optionally only entries older than specified days."""
        with sqlite3.connect(self.db_path) as conn:
            if older_than_days is not None:
                conn.execute(
                    """
                    DELETE FROM extractions 
                    WHERE timestamp < datetime('now', ?)
                    """,
                    (f'-{older_than_days} days',)
                )
                conn.execute(
                    """
                    DELETE FROM transcripts 
                    WHERE timestamp < datetime('now', ?)
                    """,
                    (f'-{older_than_days} days',)
                )
            else:
                conn.execute("DELETE FROM extractions")
                conn.execute("DELETE FROM transcripts") 
    
    def cleanup_gradio_cache(self):
        """Clean up Gradio's example cache directory."""
        gradio_cache = Path(".gradio")
        if gradio_cache.exists():
            import shutil
            shutil.rmtree(gradio_cache)
            print("Cleaned up Gradio cache")