Spaces:

roadmapacademy
/

GATConvTest

Running

File size: 4,252 Bytes

c0032bb
58bda3d
 
 
 
 
c0032bb
58bda3d
 
c0032bb
58bda3d
 
 
 
 
 
 
 
 
 
 
 
 
 
c0032bb
 
 
 
 
 
 
 
 
 
 
 
 
 
58bda3d
 
 
 
 
c0032bb
58bda3d
 
c0032bb
58bda3d
 
c0032bb
 
 
 
 
58bda3d
c0032bb
58bda3d
 
 
 
 
 
 
 
 
 
 
c0032bb
58bda3d
 
 
 
 
 
 
 
 
 
 
 
c0032bb
58bda3d
 
c0032bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58bda3d
c0032bb

import gradio as gr
import torch
import pandas as pd
import numpy as np
from torch_geometric.data import Data
from torch_geometric.nn import GATConv
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

# Define the GATConv model architecture
class ModeratelySimplifiedGATConvModel(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = GATConv(in_channels, hidden_channels, heads=2)
        self.dropout1 = torch.nn.Dropout(0.45)
        self.conv2 = GATConv(hidden_channels * 2, out_channels, heads=1)

    def forward(self, x, edge_index, edge_attr=None):
        x = self.conv1(x, edge_index, edge_attr)
        x = torch.relu(x)
        x = self.dropout1(x)
        x = self.conv2(x, edge_index, edge_attr)
        return x

# Load the dataset and the GATConv model
data = torch.load("graph_data.pt", map_location=torch.device("cpu"))

# Correct the state dictionary's key names
original_state_dict = torch.load("graph_model.pth", map_location=torch.device("cpu"))
corrected_state_dict = {}
for key, value in original_state_dict.items():
    if "lin.weight" in key:
        corrected_state_dict[key.replace("lin.weight", "lin_src.weight")] = value
        corrected_state_dict[key.replace("lin.weight", "lin_dst.weight")] = value
    else:
        corrected_state_dict[key] = value

# Initialize the GATConv model with the corrected state dictionary
gatconv_model = ModeratelySimplifiedGATConvModel(
    in_channels=data.x.shape[1], hidden_channels=32, out_channels=768
)
gatconv_model.load_state_dict(corrected_state_dict)

# Load the BERT-based sentence transformer model
model_bert = SentenceTransformer("all-mpnet-base-v2")

# Ensure the DataFrame is loaded properly
df = pd.read_feather("EmbeddedCombined.feather")

# Generate GNN-based embeddings
with torch.no_grad():
    all_video_embeddings = gatconv_model(data.x, data.edge_index, data.edge_attr).cpu()

# Function to find the most similar video and recommend the top 10 based on GNN embeddings
def get_similar_and_recommend(input_text):
    # Find the most similar video based on input text
    embeddings_matrix = np.array(df["embeddings"].tolist())
    input_embedding = model_bert.encode([input_text])[0]
    similarities = cosine_similarity([input_embedding], embeddings_matrix)[0]
    most_similar_index = np.argmax(similarities)

    most_similar_video = {
        "title": df["title"].iloc[most_similar_index],
        "description": df["description"].iloc[most_similar_index],
        "similarity_score": similarities[most_similar_index],
    }

    # Recommend the top 10 videos based on GNN embeddings and dot product
    def recommend_next_10_videos(given_video_index, all_video_embeddings):
        dot_products = [
            torch.dot(all_video_embeddings[given_video_index].cpu(), all_video_embeddings[i].cpu())
            for i in range(all_video_embeddings.shape[0])
        ]
        dot_products[given_video_index] = -float("inf")

        top_10_indices = np.argsort(dot_products)[::-1][:10]
        recommendations = [df["title"].iloc[idx] for idx in top_10_indices]
        return recommendations

    top_10_recommendations = recommend_next_10_videos(
        most_similar_index, all_video_embeddings
    )

    return (
        most_similar_video["title"],
        most_similar_video["description"],
        most_similar_video["similarity_score"],
        top_10_recommendations,
    )

# Update the Gradio interface to fix the output type
interface = gr.Interface(
    fn=get_similar_and_recommend,
    inputs=gr.components.Textbox(label="Enter Text to Find Most Similar Video"),
    outputs=[
        gr.components.Textbox(label="Video Title"),
        gr.components.Textbox(label="Video Description"),
        gr.components.Textbox(label="Similarity Score"),
        gr.components.Textbox(label="Top 10 Recommended Videos", lines=10),  # Handle a list
    ],
    title="Video Recommendation System with GNN-based Recommendations",
    description="Enter text to find the most similar video and get the top 10 recommended videos based on dot product and GNN embeddings.",
)

# Launch the Gradio interface
interface.launch()