Spaces:

nishjay
/

Country-specific-Updated-Sentiment-Analysis-of-India

Sleeping

App Files Files Community

nishjay commited on Mar 4

Commit

3d65ea8

verified ·

1 Parent(s): 0694408

Create app.py

Browse files

Files changed (1) hide show

app.py +125 -0

app.py ADDED Viewed

	@@ -0,0 +1,125 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+from sentence_transformers import SentenceTransformer
+import faiss
+# ------------------------------
+# Load Data
+# ------------------------------
+@st.cache_data
+def load_data():
+    df = pd.read_csv("mea.csv", parse_dates=["date"])
+    # Ensure 'year' column exists
+    if "year" not in df.columns:
+        df["year"] = df["date"].dt.year
+    return df
+df = load_data()
+# ------------------------------
+# Extract Unique Country Names
+# ------------------------------
+def get_unique_countries(df):
+    country_set = set()
+    for entry in df["countries"].dropna():
+        for country in entry.split(","):
+            country = country.strip()
+            if country:
+                country_set.add(country)
+    return sorted(list(country_set))
+unique_countries = get_unique_countries(df)
+# ------------------------------
+# Load SentenceTransformer Model
+# ------------------------------
+@st.cache_resource
+def load_model():
+    return SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
+model = load_model()
+# ------------------------------
+# Compute Embeddings for Unique Countries
+# ------------------------------
+@st.cache_resource
+def compute_embeddings(countries):
+    return model.encode(countries, convert_to_tensor=False)
+country_embeddings = compute_embeddings(unique_countries)
+# ------------------------------
+# Build FAISS Index
+# ------------------------------
+@st.cache_resource
+def build_faiss_index(embeddings):
+    dimension = embeddings.shape[1]
+    index = faiss.IndexFlatL2(dimension)
+    index.add(np.array(embeddings))
+    return index
+index = build_faiss_index(country_embeddings)
+# Create a mapping from index to country name
+country_map = {i: country for i, country in enumerate(unique_countries)}
+# ------------------------------
+# Sentiment Analysis Function
+# ------------------------------
+def sentiment_analysis(df, query, model, index, country_map, k, start_year):
+    # Encode the query term and search for similar country names
+    query_embedding = model.encode([query], convert_to_tensor=False)
+    distances, indices = index.search(np.array(query_embedding), k)
+    similar_countries = [country_map[idx] for idx in indices[0]]
+    st.write("**Similar country names to _{}_:**".format(query))
+    st.write(similar_countries)
+    # Filter the DataFrame using the similar country names (regex OR join)
+    country_variations = '|'.join(similar_countries)
+    filtered_df = df[df['countries'].str.contains(country_variations, case=False, na=False)]
+    filtered_df = filtered_df[filtered_df['year'] >= start_year]
+    if filtered_df.empty:
+        st.warning("No records found for the given query and start year.")
+        return
+    # Plot 1: Mean Sentiment per Year
+    mean_sentiment_per_year = filtered_df.groupby('year')['sentiment'].mean()
+    fig1, ax1 = plt.subplots()
+    ax1.plot(mean_sentiment_per_year.index, mean_sentiment_per_year, marker='o', color='r')
+    ax1.set_title(f'Mean Sentiment Score Over Years for "{query}"')
+    ax1.set_xlabel('Year')
+    ax1.set_ylabel('Mean Sentiment Score')
+    ax1.grid(True)
+    st.pyplot(fig1)
+    # Plot 2: Sentiment Scores Over Time (Scatter Plot)
+    fig2, ax2 = plt.subplots(figsize=(10, 6))
+    colors = filtered_df['sentiment'].apply(lambda x: 'red' if x < 0 else 'orange' if x > 0 else 'blue')
+    ax2.scatter(filtered_df['date'], filtered_df['sentiment'], marker='o', color=colors)
+    ax2.set_title(f'Sentiment Scores Over Time for "{query}"')
+    ax2.set_xlabel('Date')
+    ax2.set_ylabel('Sentiment Score')
+    ax2.grid(True)
+    st.pyplot(fig2)
+    # Display the average sentiment
+    average_sentiment = filtered_df['sentiment'].mean()
+    st.write(f'**Average sentiment of India towards "{query}" from {start_year} onwards = {average_sentiment:.2f}**')
+# ------------------------------
+# Streamlit User Interface
+# ------------------------------
+st.title("Sentiment Analysis: India & Country Relationship")
+st.write("This app visualizes sentiment trends in India's press releases toward a selected country.")
+# User inputs
+query = st.text_input("Enter a country name (or variation) to search for:", "United States")
+start_year = st.number_input("Enter start year (e.g., 2010):", min_value=1900, max_value=2100, value=2010)
+k = st.number_input("Enter number of similar country variations to consider:", min_value=1, max_value=100, value=48)
+if st.button("Analyze"):
+    sentiment_analysis(df, query, model, index, country_map, k, start_year)