Update app.py
Browse files
app.py
CHANGED
@@ -1,13 +1,15 @@
|
|
1 |
-
# ===
|
|
|
|
|
|
|
2 |
import pandas as pd
|
3 |
import numpy as np
|
4 |
from sentence_transformers import SentenceTransformer
|
5 |
import faiss
|
6 |
import streamlit as st
|
7 |
-
import gradio as gr
|
8 |
import os
|
9 |
|
10 |
-
# === Dataset Loading Function ===
|
11 |
def load_dataset():
|
12 |
"""
|
13 |
Provides multiple options to load the dataset: manual upload, Kaggle download, or specifying a local path.
|
@@ -62,7 +64,7 @@ def load_dataset():
|
|
62 |
|
63 |
return None
|
64 |
|
65 |
-
# === Preprocess Data ===
|
66 |
def preprocess_data(df):
|
67 |
"""
|
68 |
Normalizes column names and prepares text for embeddings. Adds placeholders for missing columns if needed.
|
@@ -80,7 +82,7 @@ def preprocess_data(df):
|
|
80 |
|
81 |
return df
|
82 |
|
83 |
-
# === Create Embeddings and FAISS Index ===
|
84 |
def create_faiss_index(df, model):
|
85 |
"""
|
86 |
Generates embeddings using a sentence-transformer model and creates a FAISS index.
|
@@ -91,7 +93,7 @@ def create_faiss_index(df, model):
|
|
91 |
index.add(embeddings)
|
92 |
return index
|
93 |
|
94 |
-
# === Define Retrieval Function ===
|
95 |
def retrieve(query, model, index, df, top_k=5):
|
96 |
"""
|
97 |
Retrieves top-k results for a given query using FAISS index.
|
@@ -101,26 +103,10 @@ def retrieve(query, model, index, df, top_k=5):
|
|
101 |
results = df.iloc[indices[0]].to_dict(orient="records")
|
102 |
return results
|
103 |
|
104 |
-
# ===
|
105 |
-
def movie_query_app(query, model, index, df):
|
106 |
-
"""
|
107 |
-
Gradio interface function to retrieve and display movie recommendations based on a query.
|
108 |
-
"""
|
109 |
-
results = retrieve(query, model, index, df)
|
110 |
-
response = ""
|
111 |
-
for i, res in enumerate(results):
|
112 |
-
response += f"**{i+1}. {res['title']} ({res['year']})**\n"
|
113 |
-
response += f"- **Genres**: {res['genres']}\n"
|
114 |
-
response += f"- **Summary**: {res['short summary']}\n"
|
115 |
-
response += f"- **Director**: {res['director']}\n"
|
116 |
-
response += f"- **Cast**: {res['cast']}\n"
|
117 |
-
response += f"- **Rating**: {res['rating']}\n\n"
|
118 |
-
return response
|
119 |
-
|
120 |
-
# === Main Function ===
|
121 |
if __name__ == "__main__":
|
122 |
# Streamlit Setup
|
123 |
-
st.title("
|
124 |
|
125 |
# Step 1: Load dataset
|
126 |
df = load_dataset()
|
@@ -137,16 +123,21 @@ if __name__ == "__main__":
|
|
137 |
model = SentenceTransformer('all-MiniLM-L6-v2')
|
138 |
index = create_faiss_index(df, model)
|
139 |
|
140 |
-
# Step 4:
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
|
|
|
|
|
|
|
|
|
|
151 |
else:
|
152 |
-
st.write("### Please load the dataset to proceed.")
|
|
|
1 |
+
# === Install Required Libraries === #
|
2 |
+
# !pip install pandas numpy sentence-transformers faiss-cpu streamlit
|
3 |
+
|
4 |
+
# === Import Required Libraries === #
|
5 |
import pandas as pd
|
6 |
import numpy as np
|
7 |
from sentence_transformers import SentenceTransformer
|
8 |
import faiss
|
9 |
import streamlit as st
|
|
|
10 |
import os
|
11 |
|
12 |
+
# === Dataset Loading Function === #
|
13 |
def load_dataset():
|
14 |
"""
|
15 |
Provides multiple options to load the dataset: manual upload, Kaggle download, or specifying a local path.
|
|
|
64 |
|
65 |
return None
|
66 |
|
67 |
+
# === Preprocess Data === #
|
68 |
def preprocess_data(df):
|
69 |
"""
|
70 |
Normalizes column names and prepares text for embeddings. Adds placeholders for missing columns if needed.
|
|
|
82 |
|
83 |
return df
|
84 |
|
85 |
+
# === Create Embeddings and FAISS Index === #
|
86 |
def create_faiss_index(df, model):
|
87 |
"""
|
88 |
Generates embeddings using a sentence-transformer model and creates a FAISS index.
|
|
|
93 |
index.add(embeddings)
|
94 |
return index
|
95 |
|
96 |
+
# === Define Retrieval Function === #
|
97 |
def retrieve(query, model, index, df, top_k=5):
|
98 |
"""
|
99 |
Retrieves top-k results for a given query using FAISS index.
|
|
|
103 |
results = df.iloc[indices[0]].to_dict(orient="records")
|
104 |
return results
|
105 |
|
106 |
+
# === Main Function === #
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
if __name__ == "__main__":
|
108 |
# Streamlit Setup
|
109 |
+
st.title("Movie Recommendation Application with FAISS and Sentence-Transformers")
|
110 |
|
111 |
# Step 1: Load dataset
|
112 |
df = load_dataset()
|
|
|
123 |
model = SentenceTransformer('all-MiniLM-L6-v2')
|
124 |
index = create_faiss_index(df, model)
|
125 |
|
126 |
+
# Step 4: Query Input and Result Display
|
127 |
+
query = st.text_input("Enter a movie name or keyword for recommendations:")
|
128 |
+
|
129 |
+
if query:
|
130 |
+
st.write("### Query Results")
|
131 |
+
results = retrieve(query, model, index, df)
|
132 |
+
response = ""
|
133 |
+
for i, res in enumerate(results):
|
134 |
+
response += f"**{i+1}. {res['title']} ({res['year']})**\n"
|
135 |
+
response += f"- **Genres**: {res['genres']}\n"
|
136 |
+
response += f"- **Summary**: {res['short summary']}\n"
|
137 |
+
response += f"- **Director**: {res['director']}\n"
|
138 |
+
response += f"- **Cast**: {res['cast']}\n"
|
139 |
+
response += f"- **Rating**: {res['rating']}\n\n"
|
140 |
+
st.write(response)
|
141 |
+
|
142 |
else:
|
143 |
+
st.write("### Please load the dataset to proceed.")
|