Spaces:
Running
Running
import sqlite3 | |
import os | |
class DatabaseHandler: | |
def __init__(self, db_path='data/sqlite.db'): | |
self.db_path = db_path | |
self.conn = None | |
self.create_tables() | |
self.update_schema() | |
def create_tables(self): | |
with sqlite3.connect(self.db_path) as conn: | |
cursor = conn.cursor() | |
cursor.execute(''' | |
CREATE TABLE IF NOT EXISTS videos ( | |
id INTEGER PRIMARY KEY AUTOINCREMENT, | |
youtube_id TEXT UNIQUE, | |
title TEXT, | |
channel_name TEXT, | |
processed_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP, | |
upload_date TEXT, | |
view_count INTEGER, | |
like_count INTEGER, | |
comment_count INTEGER, | |
video_duration TEXT, | |
transcript_content TEXT | |
) | |
''') | |
cursor.execute(''' | |
CREATE TABLE IF NOT EXISTS user_feedback ( | |
id INTEGER PRIMARY KEY AUTOINCREMENT, | |
video_id INTEGER, | |
query TEXT, | |
feedback INTEGER, | |
timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP, | |
FOREIGN KEY (video_id) REFERENCES videos (id) | |
) | |
''') | |
cursor.execute(''' | |
CREATE TABLE IF NOT EXISTS embedding_models ( | |
id INTEGER PRIMARY KEY AUTOINCREMENT, | |
model_name TEXT UNIQUE, | |
description TEXT | |
) | |
''') | |
cursor.execute(''' | |
CREATE TABLE IF NOT EXISTS elasticsearch_indices ( | |
id INTEGER PRIMARY KEY AUTOINCREMENT, | |
video_id INTEGER, | |
index_name TEXT, | |
embedding_model_id INTEGER, | |
FOREIGN KEY (video_id) REFERENCES videos (id), | |
FOREIGN KEY (embedding_model_id) REFERENCES embedding_models (id) | |
) | |
''') | |
conn.commit() | |
def update_schema(self): | |
with sqlite3.connect(self.db_path) as conn: | |
cursor = conn.cursor() | |
cursor.execute("PRAGMA table_info(videos)") | |
columns = [column[1] for column in cursor.fetchall()] | |
new_columns = [ | |
("upload_date", "TEXT"), | |
("view_count", "INTEGER"), | |
("like_count", "INTEGER"), | |
("comment_count", "INTEGER"), | |
("video_duration", "TEXT"), | |
("transcript_content", "TEXT") | |
] | |
for col_name, col_type in new_columns: | |
if col_name not in columns: | |
cursor.execute(f"ALTER TABLE videos ADD COLUMN {col_name} {col_type}") | |
conn.commit() | |
def add_video(self, video_data): | |
with sqlite3.connect(self.db_path) as conn: | |
cursor = conn.cursor() | |
cursor.execute(''' | |
INSERT OR REPLACE INTO videos | |
(youtube_id, title, channel_name, upload_date, view_count, like_count, comment_count, video_duration, transcript_content) | |
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) | |
''', ( | |
video_data['video_id'], | |
video_data['title'], | |
video_data['author'], | |
video_data['upload_date'], | |
video_data['view_count'], | |
video_data['like_count'], | |
video_data['comment_count'], | |
video_data['video_duration'], | |
video_data['transcript_content'] | |
)) | |
conn.commit() | |
return cursor.lastrowid | |
def add_user_feedback(self, video_id, query, feedback): | |
with sqlite3.connect(self.db_path) as conn: | |
cursor = conn.cursor() | |
cursor.execute(''' | |
INSERT INTO user_feedback (video_id, query, feedback) | |
VALUES (?, ?, ?) | |
''', (video_id, query, feedback)) | |
conn.commit() | |
def add_embedding_model(self, model_name, description): | |
with sqlite3.connect(self.db_path) as conn: | |
cursor = conn.cursor() | |
cursor.execute(''' | |
INSERT OR IGNORE INTO embedding_models (model_name, description) | |
VALUES (?, ?) | |
''', (model_name, description)) | |
conn.commit() | |
return cursor.lastrowid | |
def add_elasticsearch_index(self, video_id, index_name, embedding_model_id): | |
with sqlite3.connect(self.db_path) as conn: | |
cursor = conn.cursor() | |
cursor.execute(''' | |
INSERT INTO elasticsearch_indices (video_id, index_name, embedding_model_id) | |
VALUES (?, ?, ?) | |
''', (video_id, index_name, embedding_model_id)) | |
conn.commit() | |
def get_video_by_youtube_id(self, youtube_id): | |
with sqlite3.connect(self.db_path) as conn: | |
cursor = conn.cursor() | |
cursor.execute('SELECT * FROM videos WHERE youtube_id = ?', (youtube_id,)) | |
return cursor.fetchone() | |
def get_elasticsearch_index(self, video_id, embedding_model): | |
with sqlite3.connect(self.db_path) as conn: | |
cursor = conn.cursor() | |
cursor.execute(''' | |
SELECT ei.index_name | |
FROM elasticsearch_indices ei | |
JOIN embedding_models em ON ei.embedding_model_id = em.id | |
JOIN videos v ON ei.video_id = v.id | |
WHERE v.youtube_id = ? AND em.model_name = ? | |
''', (video_id, embedding_model)) | |
result = cursor.fetchone() | |
return result[0] if result else None | |
def get_all_videos(self): | |
with sqlite3.connect(self.db_path) as conn: | |
cursor = conn.cursor() | |
cursor.execute(''' | |
SELECT youtube_id, title, channel_name, upload_date | |
FROM videos | |
ORDER BY upload_date DESC | |
''') | |
return cursor.fetchall() | |
def get_elasticsearch_index_by_youtube_id(self, youtube_id): | |
with sqlite3.connect(self.db_path) as conn: | |
cursor = conn.cursor() | |
cursor.execute(''' | |
SELECT ei.index_name | |
FROM elasticsearch_indices ei | |
JOIN videos v ON ei.video_id = v.id | |
WHERE v.youtube_id = ? | |
''', (youtube_id,)) | |
result = cursor.fetchone() | |
return result[0] if result else None | |
def get_transcript_content(self, youtube_id): | |
with sqlite3.connect(self.db_path) as conn: | |
cursor = conn.cursor() | |
cursor.execute(''' | |
SELECT transcript_content | |
FROM videos | |
WHERE youtube_id = ? | |
''', (youtube_id,)) | |
result = cursor.fetchone() | |
return result[0] if result else None | |
# This method is no longer needed as transcript is added in add_video | |
# def add_transcript_content(self, youtube_id, transcript_content): | |
# with sqlite3.connect(self.db_path) as conn: | |
# cursor = conn.cursor() | |
# cursor.execute(''' | |
# UPDATE videos | |
# SET transcript_content = ? | |
# WHERE youtube_id = ? | |
# ''', (transcript_content, youtube_id)) | |
# conn.commit() |