Spaces:
Running
Running
File size: 7,504 Bytes
dbd33b2 507c938 dbd33b2 a61b32e dbd33b2 507c938 a61b32e 507c938 dbd33b2 507c938 a61b32e 507c938 a61b32e 507c938 dbd33b2 507c938 a61b32e 507c938 a61b32e 507c938 a61b32e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 |
import sqlite3
import os
class DatabaseHandler:
def __init__(self, db_path='data/sqlite.db'):
self.db_path = db_path
self.conn = None
self.create_tables()
self.update_schema()
def create_tables(self):
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute('''
CREATE TABLE IF NOT EXISTS videos (
id INTEGER PRIMARY KEY AUTOINCREMENT,
youtube_id TEXT UNIQUE,
title TEXT,
channel_name TEXT,
processed_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
upload_date TEXT,
view_count INTEGER,
like_count INTEGER,
comment_count INTEGER,
video_duration TEXT,
transcript_content TEXT
)
''')
cursor.execute('''
CREATE TABLE IF NOT EXISTS user_feedback (
id INTEGER PRIMARY KEY AUTOINCREMENT,
video_id INTEGER,
query TEXT,
feedback INTEGER,
timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (video_id) REFERENCES videos (id)
)
''')
cursor.execute('''
CREATE TABLE IF NOT EXISTS embedding_models (
id INTEGER PRIMARY KEY AUTOINCREMENT,
model_name TEXT UNIQUE,
description TEXT
)
''')
cursor.execute('''
CREATE TABLE IF NOT EXISTS elasticsearch_indices (
id INTEGER PRIMARY KEY AUTOINCREMENT,
video_id INTEGER,
index_name TEXT,
embedding_model_id INTEGER,
FOREIGN KEY (video_id) REFERENCES videos (id),
FOREIGN KEY (embedding_model_id) REFERENCES embedding_models (id)
)
''')
conn.commit()
def update_schema(self):
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute("PRAGMA table_info(videos)")
columns = [column[1] for column in cursor.fetchall()]
new_columns = [
("upload_date", "TEXT"),
("view_count", "INTEGER"),
("like_count", "INTEGER"),
("comment_count", "INTEGER"),
("video_duration", "TEXT"),
("transcript_content", "TEXT")
]
for col_name, col_type in new_columns:
if col_name not in columns:
cursor.execute(f"ALTER TABLE videos ADD COLUMN {col_name} {col_type}")
conn.commit()
def add_video(self, video_data):
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute('''
INSERT OR REPLACE INTO videos
(youtube_id, title, channel_name, upload_date, view_count, like_count, comment_count, video_duration, transcript_content)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
''', (
video_data['video_id'],
video_data['title'],
video_data['author'],
video_data['upload_date'],
video_data['view_count'],
video_data['like_count'],
video_data['comment_count'],
video_data['video_duration'],
video_data['transcript_content']
))
conn.commit()
return cursor.lastrowid
def add_user_feedback(self, video_id, query, feedback):
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute('''
INSERT INTO user_feedback (video_id, query, feedback)
VALUES (?, ?, ?)
''', (video_id, query, feedback))
conn.commit()
def add_embedding_model(self, model_name, description):
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute('''
INSERT OR IGNORE INTO embedding_models (model_name, description)
VALUES (?, ?)
''', (model_name, description))
conn.commit()
return cursor.lastrowid
def add_elasticsearch_index(self, video_id, index_name, embedding_model_id):
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute('''
INSERT INTO elasticsearch_indices (video_id, index_name, embedding_model_id)
VALUES (?, ?, ?)
''', (video_id, index_name, embedding_model_id))
conn.commit()
def get_video_by_youtube_id(self, youtube_id):
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute('SELECT * FROM videos WHERE youtube_id = ?', (youtube_id,))
return cursor.fetchone()
def get_elasticsearch_index(self, video_id, embedding_model):
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute('''
SELECT ei.index_name
FROM elasticsearch_indices ei
JOIN embedding_models em ON ei.embedding_model_id = em.id
JOIN videos v ON ei.video_id = v.id
WHERE v.youtube_id = ? AND em.model_name = ?
''', (video_id, embedding_model))
result = cursor.fetchone()
return result[0] if result else None
def get_all_videos(self):
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute('''
SELECT youtube_id, title, channel_name, upload_date
FROM videos
ORDER BY upload_date DESC
''')
return cursor.fetchall()
def get_elasticsearch_index_by_youtube_id(self, youtube_id):
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute('''
SELECT ei.index_name
FROM elasticsearch_indices ei
JOIN videos v ON ei.video_id = v.id
WHERE v.youtube_id = ?
''', (youtube_id,))
result = cursor.fetchone()
return result[0] if result else None
def get_transcript_content(self, youtube_id):
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute('''
SELECT transcript_content
FROM videos
WHERE youtube_id = ?
''', (youtube_id,))
result = cursor.fetchone()
return result[0] if result else None
# This method is no longer needed as transcript is added in add_video
# def add_transcript_content(self, youtube_id, transcript_content):
# with sqlite3.connect(self.db_path) as conn:
# cursor = conn.cursor()
# cursor.execute('''
# UPDATE videos
# SET transcript_content = ?
# WHERE youtube_id = ?
# ''', (transcript_content, youtube_id))
# conn.commit() |