Spaces:

marianeft
/

sentiment-medium

Sleeping

App Files Files Community

marianeft commited on Mar 4

Commit

f90a027

verified ·

1 Parent(s): d9b5c9b

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -5

app.py CHANGED Viewed

@@ -14,7 +14,7 @@ import nltk
 nltk.download('punkt')
 # Load model and tokenizer
-model_name = 'dejanseo/sentiment'
 model = AutoModelForSequenceClassification.from_pretrained(model_name)
 tokenizer = AutoTokenizer.from_pretrained(model_name)
@@ -40,6 +40,15 @@ background_colors = {
     "very negative": "rgba(255, 0, 0, 0.5)"
 }
 # Function to get text content from a URL, restricted to Medium stories/articles
 def get_text_from_url(url):
     if not validators.url(url):
@@ -57,7 +66,31 @@ def get_text_from_url(url):
     except Exception as e:
         return None, f"Error extracting text: {e}"
-# ... (rest of the functions: classify_text, classify_long_text, classify_sentences remain the same)
 # Streamlit UI
 st.title("Sentiment Classification Model (Medium Only)")
@@ -111,9 +144,7 @@ if url:
             )
             st.write(f"Chunk {i + 1}:")
-            st.write(chunk)
-            st.altair_chart(chunk_chart, use_container_width=True)
         # Sentence-level classification with background colors
         st.write("Extracted Text with Sentiment Highlights:")
         sentence_scores = classify_sentences(text)

 nltk.download('punkt')
 # Load model and tokenizer
+model_name = 'dejanseo/sentiment' #Load model adapted from
 model = AutoModelForSequenceClassification.from_pretrained(model_name)
 tokenizer = AutoTokenizer.from_pretrained(model_name)
     "very negative": "rgba(255, 0, 0, 0.5)"
 }
+# Function to classify text and return sentiment scores
+def classify_text(text, max_length):
+    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=max_length)
+    with torch.no_grad():
+        outputs = model(**inputs)
+        logits = outputs.logits
+        probabilities = torch.softmax(logits, dim=-1).squeeze().tolist()
+    return probabilities
 # Function to get text content from a URL, restricted to Medium stories/articles
 def get_text_from_url(url):
     if not validators.url(url):
     except Exception as e:
         return None, f"Error extracting text: {e}"
+# Function to handle long texts
+def classify_long_text(text):
+    max_length = tokenizer.model_max_length
+    # Split the text into chunks
+    chunks = [text[i:i + max_length] for i in range(0, len(text), max_length)]
+    aggregate_scores = [0] * len(sentiment_labels)
+    chunk_scores_list = []
+    for chunk in chunks:
+        chunk_scores = classify_text(chunk, max_length)
+        chunk_scores_list.append(chunk_scores)
+        aggregate_scores = [x + y for x, y in zip(aggregate_scores, chunk_scores)]
+    # Average the scores
+    aggregate_scores = [x / len(chunks) for x in aggregate_scores]
+    return aggregate_scores, chunk_scores_list, chunks
+# Function to classify each sentence in the text
+def classify_sentences(text):
+    sentences = sent_tokenize(text)
+    sentence_scores = []
+    for sentence in sentences:
+        scores = classify_text(sentence, tokenizer.model_max_length)
+        sentiment_idx = scores.index(max(scores))
+        sentiment = sentiment_labels[sentiment_idx]
+        sentence_scores.append((sentence, sentiment))
+    return sentence_scores
 # Streamlit UI
 st.title("Sentiment Classification Model (Medium Only)")
             )
             st.write(f"Chunk {i + 1}:")
         # Sentence-level classification with background colors
         st.write("Extracted Text with Sentiment Highlights:")
         sentence_scores = classify_sentences(text)