Spaces:

marianeft
/

sentiment-medium

Sleeping

App Files Files Community

marianeft commited on Feb 24

Commit

defe71f

1 Parent(s): 11db685

Initial commit

Browse files

Files changed (3) hide show

README.md +1 -1
app.py +125 -0
requirements.txt +8 -0

README.md CHANGED Viewed

@@ -8,7 +8,7 @@ sdk_version: 1.42.2
 app_file: app.py
 pinned: false
 license: apache-2.0
-short_description: Analyze sentiment in Medium
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 app_file: app.py
 pinned: false
 license: apache-2.0
+short_description: Analyze sentiment in Medium articles
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,125 @@

+import streamlit as st
+import torch
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+import requests
+import pandas as pd
+import altair as alt
+from collections import OrderedDict
+from nltk.tokenize import sent_tokenize
+import trafilatura
+import validators
+# Load the punkt tokenizer from nltk
+import nltk
+nltk.download('punkt')
+# Load model and tokenizer
+model_name = 'dejanseo/sentiment'
+model = AutoModelForSequenceClassification.from_pretrained(model_name)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+# Sentiment labels as textual descriptions
+sentiment_labels = {
+    0: "very positive",
+    1: "positive",
+    2: "somewhat positive",
+    3: "neutral",
+    4: "somewhat negative",
+    5: "negative",
+    6: "very negative"
+}
+# Background colors for sentiments
+background_colors = {
+    "very positive": "rgba(0, 255, 0, 0.5)",
+    "positive": "rgba(0, 255, 0, 0.3)",
+    "somewhat positive": "rgba(0, 255, 0, 0.1)",
+    "neutral": "rgba(128, 128, 128, 0.1)",
+    "somewhat negative": "rgba(255, 0, 0, 0.1)",
+    "negative": "rgba(255, 0, 0, 0.3)",
+    "very negative": "rgba(255, 0, 0, 0.5)"
+}
+# Function to get text content from a URL, restricted to Medium stories/articles
+def get_text_from_url(url):
+    if not validators.url(url):
+        return None, "Invalid URL"
+    if "medium.com/" not in url:  # Check if it's a Medium URL
+        return None, "URL is not a Medium story/article."
+    try:
+        downloaded = trafilatura.fetch_url(url)
+        if downloaded:
+            return trafilatura.extract(downloaded), None
+        else:
+            return None, "Could not download content from URL."
+    except Exception as e:
+        return None, f"Error extracting text: {e}"
+# ... (rest of the functions: classify_text, classify_long_text, classify_sentences remain the same)
+# Streamlit UI
+st.title("Sentiment Classification Model by DEJAN (Medium Only)")
+url = st.text_input("Enter Medium URL:")
+if url:
+    text, error_message = get_text_from_url(url)
+    if error_message:
+        st.error(error_message)  # Display error message
+    elif text:
+        # ... (rest of the analysis and display code remains the same)
+        scores, chunk_scores_list, chunks = classify_long_text(text)
+        scores_dict = {sentiment_labels[i]: scores[i] for i in range(len(sentiment_labels))}
+        # Ensure the exact order of labels in the graph
+        sentiment_order = [
+            "very positive", "positive", "somewhat positive",
+            "neutral",
+            "somewhat negative", "negative", "very negative"
+        ]
+        ordered_scores_dict = OrderedDict((label, scores_dict[label]) for label in sentiment_order)
+        # Prepare the DataFrame and reindex
+        df = pd.DataFrame.from_dict(ordered_scores_dict, orient='index', columns=['Likelihood']).reindex(sentiment_order)
+        # Use Altair to plot the bar chart
+        chart = alt.Chart(df.reset_index()).mark_bar().encode(
+            x=alt.X('index', sort=sentiment_order, title='Sentiment'),
+            y='Likelihood'
+        ).properties(
+            width=600,
+            height=400
+        )
+        st.altair_chart(chart, use_container_width=True)
+        # Display each chunk and its own chart
+        for i, (chunk_scores, chunk) in enumerate(zip(chunk_scores_list, chunks)):
+            chunk_scores_dict = {sentiment_labels[j]: chunk_scores[j] for j in range(len(sentiment_labels))}
+            ordered_chunk_scores_dict = OrderedDict((label, chunk_scores_dict[label]) for label in sentiment_order)
+            df_chunk = pd.DataFrame.from_dict(ordered_chunk_scores_dict, orient='index', columns=['Likelihood']).reindex(sentiment_order)
+            chunk_chart = alt.Chart(df_chunk.reset_index()).mark_bar().encode(
+                x=alt.X('index', sort=sentiment_order, title='Sentiment'),
+                y='Likelihood'
+            ).properties(
+                width=600,
+                height=400
+            )
+            st.write(f"Chunk {i + 1}:")
+            st.write(chunk)
+            st.altair_chart(chunk_chart, use_container_width=True)
+        # Sentence-level classification with background colors
+        st.write("Extracted Text with Sentiment Highlights:")
+        sentence_scores = classify_sentences(text)
+        for sentence, sentiment in sentence_scores:
+            bg_color = background_colors[sentiment]
+            st.markdown(f'<span style="background-color: {bg_color}">{sentence}</span>', unsafe_allow_html=True)
+    # No 'else' needed here, as the error message is already handled above.

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+streamlit
+torch
+transformers
+requests
+trafilatura
+pandas
+altair
+nltk