NimaKL commited on
Commit
5103548
·
verified ·
1 Parent(s): 8926c50

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -22
app.py CHANGED
@@ -41,11 +41,11 @@ gatconv_model = ModeratelySimplifiedGATConvModel(
41
  gatconv_model.load_state_dict(corrected_state_dict)
42
 
43
  # Load the BERT-based sentence transformer model
44
- model_bert is SentenceTransformer("all-mpnet-base-v2")
45
 
46
  # Ensure the DataFrame is loaded properly
47
  try:
48
- df is pd.read_json("combined_data.json.gz", orient='records', lines=True, compression='gzip')
49
  except Exception as e:
50
  print(f"Error reading JSON file: {e}")
51
 
@@ -69,53 +69,46 @@ def get_similar_and_recommend(input_text):
69
  def recommend_top_10(given_video_index, all_video_embeddings):
70
  dot_products = [
71
  torch.dot(all_video_embeddings[given_video_index], all_video_embeddings[i])
72
- for i in range(all_video_embeddings.shape[0])
73
  ]
74
  dot_products[given_video_index] = -float("inf") # Exclude the most similar video
75
 
76
- top_10_indices = np.argsort(dot_products)[::-1][:10]
77
  return [df.iloc[idx].to_dict() for idx in top_10_indices]
78
 
79
  top_10_recommended_videos_features = recommend_top_10(most_similar_index, all_video_embeddings)
80
 
81
- # Apply search context to the top 10 results
82
  user_keywords = input_text.split() # Create a list of keywords from user input
83
- weight = 1.0 # Base weight factor
 
84
 
85
  for keyword in user_keywords:
86
  if keyword.lower() in df["title"].str.lower().tolist(): # Check for matching keywords
87
- weight += 0.1 # Increase weight for each match
88
 
89
- # Adjust the recommendations based on the search context weight
90
- final_recommendations = [
91
- {key: value for key, value in video.items() if key != "embeddings"} # Exclude embeddings
92
- for video in top_10_recommended_videos_features
93
- ]
94
 
95
- # Apply the weight to sort the final recommendations (higher weight is better)
96
- final_recommendations.sort(
97
- key=lambda video: weight * dot_products[top_10_indices.index(video)], reverse=True
98
- )
99
-
100
- # Create the output JSON with the most similar video and final recommendations
101
  output = {
102
  "search_context": {
103
  "input_text": input_text, # What the user provided
104
- "weight": weight, # Weight based on search context
105
  },
106
  "most_similar_video": most_similar_video_features,
107
- "final_recommendations": final_recommendations, # Top 10 with search context applied
108
  }
109
 
110
  return output
111
 
112
- # Update the Gradio interface to output JSON with search context for the final recommendations
113
  interface = gr.Interface(
114
  fn=get_similar_and_recommend,
115
  inputs=gr.Textbox(label="Enter Text to Find Most Similar Video"),
116
  outputs=gr.JSON(),
117
  title="Video Recommendation System with GNN-based Recommendations",
118
- description="Enter text to find the most similar video and get top 10 recommended videos with search context applied after GNN-based search.",
119
  )
120
 
121
  interface.launch()
 
41
  gatconv_model.load_state_dict(corrected_state_dict)
42
 
43
  # Load the BERT-based sentence transformer model
44
+ model_bert = SentenceTransformer("all-mpnet-base-v2")
45
 
46
  # Ensure the DataFrame is loaded properly
47
  try:
48
+ df = pd.read_json("combined_data.json.gz", orient='records', lines=True, compression='gzip')
49
  except Exception as e:
50
  print(f"Error reading JSON file: {e}")
51
 
 
69
  def recommend_top_10(given_video_index, all_video_embeddings):
70
  dot_products = [
71
  torch.dot(all_video_embeddings[given_video_index], all_video_embeddings[i])
72
+ for i in range all_video_embeddings.shape[0]
73
  ]
74
  dot_products[given_video_index] = -float("inf") # Exclude the most similar video
75
 
76
+ top_10_indices = np.argsort(dot_products)[[::-1][:10]
77
  return [df.iloc[idx].to_dict() for idx in top_10_indices]
78
 
79
  top_10_recommended_videos_features = recommend_top_10(most_similar_index, all_video_embeddings)
80
 
81
+ # Apply search context to determine weights for GNN results
82
  user_keywords = input_text.split() # Create a list of keywords from user input
83
+ video_weights = []
84
+ weight = 1.0 # Initial weight factor
85
 
86
  for keyword in user_keywords:
87
  if keyword.lower() in df["title"].str.lower().tolist(): # Check for matching keywords
88
+ weight += 0.1 # Increase weight for matching keyword
89
 
90
+ # Calculate the weight for each GNN output
91
+ video_weights = [weight] * len(top_10_recommended_videos_features)
 
 
 
92
 
93
+ # Create the output JSON with the most similar video, final recommendations, and weights
 
 
 
 
 
94
  output = {
95
  "search_context": {
96
  "input_text": input_text, # What the user provided
97
+ "weights": video_weights, # Weights for each GNN-based recommendation
98
  },
99
  "most_similar_video": most_similar_video_features,
100
+ "final_recommendations": top_10_recommended_videos_features, # Top 10 recommendations
101
  }
102
 
103
  return output
104
 
105
+ # Update the Gradio interface to output JSON with detailed context
106
  interface = gr.Interface(
107
  fn=get_similar_and_recommend,
108
  inputs=gr.Textbox(label="Enter Text to Find Most Similar Video"),
109
  outputs=gr.JSON(),
110
  title="Video Recommendation System with GNN-based Recommendations",
111
+ description="Enter text to find the most similar video and get top 10 recommended videos with individual weights for each recommendation.",
112
  )
113
 
114
  interface.launch()