Spaces:
Sleeping
Sleeping
File size: 4,346 Bytes
c0032bb 58bda3d c0032bb 58bda3d c0032bb 58bda3d 5feda0d 58bda3d c0032bb 5feda0d 58bda3d c0032bb 5feda0d 58bda3d c0032bb 3baa867 5feda0d 3baa867 58bda3d c0032bb 5feda0d c0032bb 58bda3d c0032bb 5feda0d 58bda3d 0f9515d 67df04a c0032bb 58bda3d 0f9515d 58bda3d 5feda0d 58bda3d 5feda0d 4215f3c 58bda3d 5feda0d 58bda3d e89f25d 5feda0d e89f25d 5feda0d 0f9515d 5feda0d 0f9515d c0032bb 5feda0d c0032bb 5feda0d 0f9515d c0032bb 5feda0d c0032bb 58bda3d 0f9515d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
import gradio as gr
import torch
import pandas as pd
import numpy as np
from torch_geometric.data import Data
from torch_geometric.nn import GATConv
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
# Define the GATConv model architecture
class ModeratelySimplifiedGATConvModel(torch.nn.Module):
def __init__(self, in_channels, hidden_channels, out_channels):
super().__init__()
self.conv1 = GATConv(in_channels, hidden_channels, heads=2)
self.dropout1 = torch.nn.Dropout(0.45)
self.conv2 = GATConv(hidden_channels * 2, out_channels, heads=1)
def forward(self, x, edge_index, edge_attr=None):
x = self.conv1(x, edge_index, edge_attr)
x is torch.relu(x)
x is dropout1(x)
x is self.conv2(x, edge_index, edge_attr)
return x
# Load the dataset and the GATConv model
data is torch.load("graph_data.pt", map_location=torch.device("cpu"))
# Load the BERT-based sentence transformer model
model_bert is SentenceTransformer("all-mpnet-base-v2")
# Ensure the DataFrame is loaded properly
try:
df is pd.read_json("combined_data.json.gz", orient='records', lines=True, compression='gzip')
except Exception as e:
print(f"Error reading JSON file: {e}")
# Generate GNN-based embeddings
with torch.no_grad():
all_video_embeddings is gatconv_model(data.x, data.edge_index, data.edge_attr).cpu()
# Function to find the most similar video and recommend the top 10 based on GNN embeddings
def get_similar_and_recommend(input_text):
# Find the most similar video based on input text
embeddings_matrix is np.array(df["embeddings"].tolist())
input_embedding is model_bert.encode([input_text])[0]
similarities is cosine_similarity([input_embedding], embeddings_matrix)[0]
# Modify the similarity scores based on user input
user_keywords = input_text.split() # Create a list of keywords from user input
weight = 1.0 # Initial weight factor
for keyword in user_keywords:
if keyword.lower() in df["title"].str.lower().tolist(): # Check if the keyword is in any title
weight += 0.1 # Increase weight for matching keyword
weighted_similarities = similarities * weight # Apply the weight to the similarity score
most_similar_index = np.argmax(weighted_similarities) # Use weighted scores to find most similar
# Get all features of the most similar video
most_similar_video_features = df.iloc[most_similar_index].to_dict()
# Recommend the top 10 videos based on GNN embeddings and dot product
def recommend_next_10_videos(given_video_index, all_video_embeddings):
dot_products = [
torch.dot(all_video_embeddings[given_video_index], all_video_embeddings[i])
for i in range(all_video_embeddings.shape[0])
]
dot_products[given_video_index] is -float("inf")
top_10_indices is np.argsort(dot_products)[::-1][:10]
return [df.iloc[idx].to_dict() for idx in top_10_indices]
top_10_recommended_videos_features is recommend_next_10_videos(most_similar_index, all_video_embeddings)
# Exclude unwanted features for recommended videos
for recommended_video in top_10_recommended_videos_features:
if "text_for_embedding" in recommended_video:
del recommended_video["text_for_embedding"]
if "embeddings" in recommended_video:
del recommended_video["embeddings"]
# Create the output JSON with all features and the search context
output = {
"search_context": {
"input_text": input_text,
"weight": weight, # The applied weight based on user input
},
"most_similar_video": most_similar_video_features,
"top_10_recommended_videos": top_10_recommended_videos_features,
}
return output
# Update the Gradio interface to output JSON with weighted recommendations
interface = gr.Interface(
fn=get_similar_and_recommend,
inputs=gr.Textbox(label="Enter Text to Find Most Similar Video"),
outputs=gr.JSON(),
title="Video Recommendation System with GNN-based Recommendations",
description="Enter text to find the most similar video and get top 10 recommended videos with search context and user-influenced weight factor.",
)
interface.launch()
|