import streamlit as st
import pandas as pd
from transformers import pipeline
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
import numpy as np
# Set modern page configuration
st.set_page_config(page_title="News Analyzer", layout="wide")
# Inject custom CSS for sleek dark blue theme with black fonts
st.markdown("""
""", unsafe_allow_html=True)
# Modern Header
st.markdown("
", unsafe_allow_html=True)
# Load the Hugging Face model
pipe = pipeline("question-answering", model="distilbert/distilbert-base-cased-distilled-squad")
# Initialize sentence transformer model
sentence_model = SentenceTransformer('all-MiniLM-L6-v2') # Pre-trained sentence model
# Responsive Layout - Uses full width
col1, col2 = st.columns([1.1, 1])
# Left Section - File Upload & CSV/Excel Display
with col1:
st.markdown("", unsafe_allow_html=True)
st.subheader("📂 Upload News Data")
uploaded_file = st.file_uploader("Upload a CSV or Excel file", type=["csv", "xlsx"])
if uploaded_file is not None:
# Determine the file extension
file_extension = uploaded_file.name.split('.')[-1]
if file_extension == 'csv':
df = pd.read_csv(uploaded_file)
elif file_extension == 'xlsx':
df = pd.read_excel(uploaded_file)
# Download button
st.download_button(
label="⬇️ Download Processed Data",
data=df.to_csv(index=False).encode('utf-8'),
file_name="output.csv",
mime="text/csv"
)
# CSV Preview Box
st.markdown("
📜 CSV/Excel Preview
", unsafe_allow_html=True)
st.dataframe(df, use_container_width=True)
st.markdown("", unsafe_allow_html=True)
st.markdown("
", unsafe_allow_html=True)
# Right Section - Q&A Interface
with col2:
st.markdown("", unsafe_allow_html=True)
st.subheader("🤖 AI Assistant")
# Answer Display Box (Initially Empty)
answer_placeholder = st.empty()
answer_placeholder.markdown("
", unsafe_allow_html=True)
# Question Input
st.markdown("### 🔍 Ask Your Question:")
user_question = st.text_input("Enter your question here", label_visibility="hidden") # Hides the label
# Button & Answer Display
if st.button("🔮 Get Answer"):
if user_question.strip() and uploaded_file is not None:
with st.spinner("⏳ Wait, our agent will look into that..."):
# Extract the 1st column as context (0-indexed)
context = df.iloc[:, 0].dropna().tolist()
# Generate embeddings for the context rows and the question
context_embeddings = sentence_model.encode(context)
question_embedding = sentence_model.encode([user_question])
# Calculate cosine similarity
similarities = cosine_similarity(question_embedding, context_embeddings)
top_indices = similarities[0].argsort()[-5:][::-1] # Get top 5 similar rows
# Prepare the top 5 similar context rows
top_context = "\n".join([context[i] for i in top_indices])
# Get answer from Hugging Face model using top context
result = pipe(question=user_question, context=top_context)
answer = result['answer']
else:
answer = "⚠️ Please upload a valid file first!"
answer_placeholder.markdown(f"
{answer}
", unsafe_allow_html=True)
st.markdown("
", unsafe_allow_html=True)