Spaces:

pentarosarium
/

processor

Sleeping

App Files Files Community

pentarosarium commited on Oct 16, 2024

Commit

7e14e6f

1 Parent(s): c39eb14

another try at sync with hf

Browse files

Files changed (2) hide show

.github/workflows/sync-to-huggingface.yml +18 -0
app.py +90 -0

.github/workflows/sync-to-huggingface.yml ADDED Viewed

	@@ -0,0 +1,18 @@

+name: Sync to Hugging Face
+   on:
+     push:
+       branches: [main]
+   jobs:
+     sync-to-hub:
+       runs-on: ubuntu-latest
+       steps:
+         - uses: actions/checkout@v2
+           with:
+             fetch-depth: 0
+         - name: Push to hub
+           env:
+             HF_TOKEN: ${{ secrets.HF_TOKEN }}
+           run: |
+             git push https://pentarosarium:[email protected]/spaces/pentarosarium/processor main

app.py ADDED Viewed

	@@ -0,0 +1,90 @@

+import streamlit as st
+import pandas as pd
+import time
+from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
+from transformers import pipeline, MarianMTModel, MarianTokenizer
+import matplotlib.pyplot as plt
+from pymystem3 import Mystem
+import io
+from rapidfuzz import fuzz
+# Initialize components (VADER, FinBERT, RoBERTa, FinBERT-Tone, Mystem, translation model)
+# (Copy the initialization code from your original script)
+# Define helper functions (lemmatize_text, translate, get_vader_sentiment...)
+# (Copy these functions from your original script)
+def process_file(uploaded_file):
+    df = pd.read_excel(uploaded_file, sheet_name='Публикации')
+    # Apply fuzzy deduplication
+    df = df.groupby('Объект').apply(lambda x: fuzzy_deduplicate(x, 'Выдержки из текста', 65)).reset_index(drop=True)
+    # Translate texts
+    translated_texts = []
+    progress_bar = st.progress(0)
+    for i, text in enumerate(df['Выдержки из текста']):
+        translated_text = translate(str(text))
+        translated_texts.append(translated_text)
+        progress_bar.progress((i + 1) / len(df))
+    # Perform sentiment analysis
+    vader_results = [get_vader_sentiment(text) for text in translated_texts]
+    finbert_results = [get_finbert_sentiment(text) for text in translated_texts]
+    roberta_results = [get_roberta_sentiment(text) for text in translated_texts]
+    finbert_tone_results = [get_finbert_tone_sentiment(text) for text in translated_texts]
+    # Add results to DataFrame
+    df['VADER'] = vader_results
+    df['FinBERT'] = finbert_results
+    df['RoBERTa'] = roberta_results
+    df['FinBERT-Tone'] = finbert_tone_results
+    # Reorder columns
+    columns_order = ['Объект', 'VADER', 'FinBERT', 'RoBERTa', 'FinBERT-Tone', 'Выдержки из текста']
+    df = df[columns_order]
+    return df
+def main():
+    st.title("Sentiment Analysis App")
+    uploaded_file = st.file_uploader("Choose an Excel file", type="xlsx")
+    if uploaded_file is not None:
+        df = process_file(uploaded_file)
+        st.subheader("Data Preview")
+        st.write(df.head())
+        st.subheader("Sentiment Distribution")
+        fig, axs = plt.subplots(2, 2, figsize=(12, 8))
+        fig.suptitle("Sentiment Distribution for Each Model")
+        models = ['VADER', 'FinBERT', 'RoBERTa', 'FinBERT-Tone']
+        for i, model in enumerate(models):
+            ax = axs[i // 2, i % 2]
+            sentiment_counts = df[model].value_counts()
+            sentiment_counts.plot(kind='bar', ax=ax)
+            ax.set_title(f"{model} Sentiment")
+            ax.set_xlabel("Sentiment")
+            ax.set_ylabel("Count")
+        plt.tight_layout()
+        st.pyplot(fig)
+        # Offer download of results
+        output = io.BytesIO()
+        with pd.ExcelWriter(output, engine='openpyxl') as writer:
+            df.to_excel(writer, index=False)
+        output.seek(0)
+        st.download_button(
+            label="Download results as Excel",
+            data=output,
+            file_name="sentiment_analysis_results.xlsx",
+            mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
+        )
+if __name__ == "__main__":
+    main()