Spaces:

MohammadYaseen
/

RAG-Application-with-Integrated-Dataset-Loading

Sleeping

App Files Files Community

MohammadYaseen commited on Nov 29, 2024

Commit

01ce151

verified ·

1 Parent(s): afd65e8

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -36

app.py CHANGED Viewed

@@ -1,13 +1,15 @@
-# === Import Required Libraries ===
 import pandas as pd
 import numpy as np
 from sentence_transformers import SentenceTransformer
 import faiss
 import streamlit as st
-import gradio as gr
 import os
-# === Dataset Loading Function ===
 def load_dataset():
     """
     Provides multiple options to load the dataset: manual upload, Kaggle download, or specifying a local path.
@@ -62,7 +64,7 @@ def load_dataset():
     return None
-# === Preprocess Data ===
 def preprocess_data(df):
     """
     Normalizes column names and prepares text for embeddings. Adds placeholders for missing columns if needed.
@@ -80,7 +82,7 @@ def preprocess_data(df):
     return df
-# === Create Embeddings and FAISS Index ===
 def create_faiss_index(df, model):
     """
     Generates embeddings using a sentence-transformer model and creates a FAISS index.
@@ -91,7 +93,7 @@ def create_faiss_index(df, model):
     index.add(embeddings)
     return index
-# === Define Retrieval Function ===
 def retrieve(query, model, index, df, top_k=5):
     """
     Retrieves top-k results for a given query using FAISS index.
@@ -101,26 +103,10 @@ def retrieve(query, model, index, df, top_k=5):
     results = df.iloc[indices[0]].to_dict(orient="records")
     return results
-# === Define Gradio Interface ===
-def movie_query_app(query, model, index, df):
-    """
-    Gradio interface function to retrieve and display movie recommendations based on a query.
-    """
-    results = retrieve(query, model, index, df)
-    response = ""
-    for i, res in enumerate(results):
-        response += f"**{i+1}. {res['title']} ({res['year']})**\n"
-        response += f"- **Genres**: {res['genres']}\n"
-        response += f"- **Summary**: {res['short summary']}\n"
-        response += f"- **Director**: {res['director']}\n"
-        response += f"- **Cast**: {res['cast']}\n"
-        response += f"- **Rating**: {res['rating']}\n\n"
-    return response
-# === Main Function ===
 if __name__ == "__main__":
     # Streamlit Setup
-    st.title("RAG Application with Integrated Dataset Loading")
     # Step 1: Load dataset
     df = load_dataset()
@@ -137,16 +123,21 @@ if __name__ == "__main__":
         model = SentenceTransformer('all-MiniLM-L6-v2')
         index = create_faiss_index(df, model)
-        # Step 4: Set up Gradio interface
-        iface = gr.Interface(
-            fn=lambda query: movie_query_app(query, model, index, df),
-            inputs="text",
-            outputs="text",
-            title="Movie Recommendation App",
-        )
-        # Step 5: Launch the app
-        st.write("### Launching Gradio App...")
-        iface.launch()
     else:
-        st.write("### Please load the dataset to proceed.")

+# === Install Required Libraries === #
+# !pip install pandas numpy sentence-transformers faiss-cpu streamlit
+# === Import Required Libraries === #
 import pandas as pd
 import numpy as np
 from sentence_transformers import SentenceTransformer
 import faiss
 import streamlit as st
 import os
+# === Dataset Loading Function === #
 def load_dataset():
     """
     Provides multiple options to load the dataset: manual upload, Kaggle download, or specifying a local path.
     return None
+# === Preprocess Data === #
 def preprocess_data(df):
     """
     Normalizes column names and prepares text for embeddings. Adds placeholders for missing columns if needed.
     return df
+# === Create Embeddings and FAISS Index === #
 def create_faiss_index(df, model):
     """
     Generates embeddings using a sentence-transformer model and creates a FAISS index.
     index.add(embeddings)
     return index
+# === Define Retrieval Function === #
 def retrieve(query, model, index, df, top_k=5):
     """
     Retrieves top-k results for a given query using FAISS index.
     results = df.iloc[indices[0]].to_dict(orient="records")
     return results
+# === Main Function === #
 if __name__ == "__main__":
     # Streamlit Setup
+    st.title("Movie Recommendation Application with FAISS and Sentence-Transformers")
     # Step 1: Load dataset
     df = load_dataset()
         model = SentenceTransformer('all-MiniLM-L6-v2')
         index = create_faiss_index(df, model)
+        # Step 4: Query Input and Result Display
+        query = st.text_input("Enter a movie name or keyword for recommendations:")
+        if query:
+            st.write("### Query Results")
+            results = retrieve(query, model, index, df)
+            response = ""
+            for i, res in enumerate(results):
+                response += f"**{i+1}. {res['title']} ({res['year']})**\n"
+                response += f"- **Genres**: {res['genres']}\n"
+                response += f"- **Summary**: {res['short summary']}\n"
+                response += f"- **Director**: {res['director']}\n"
+                response += f"- **Cast**: {res['cast']}\n"
+                response += f"- **Rating**: {res['rating']}\n\n"
+            st.write(response)
     else:
+        st.write("### Please load the dataset to proceed.")