Spaces:

ivanpalenciab
/

sentiment_analysis_app

Running

App Files Files Community

ivanpalenciab commited on 18 days ago

Commit

97f77d9

verified ·

1 Parent(s): deb0851

Upload 4 files

Browse files

Files changed (4) hide show

Models/BERTo_model_parameters.pth +3 -0
app.py +46 -0
model.py +89 -0
requirements.txt +163 -0

Models/BERTo_model_parameters.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:336d381a3c17b26f8c0b1648fe1a33a35f0c99e213c4b09e4ac7cb2356a88111
+size 439497507

app.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import streamlit as st
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+from model import classifySentiment,groupClassifier
+st.title("🔍 Análisis de Sentimiento para Empresas")
+option = st.sidebar.selectbox("Elige una opción", ["Analizar un comentario", "Subir archivo CSV"])
+if option == "Analizar un comentario":
+    user_input = st.text_area("Escribe un comentario:")
+    if st.button("Analizar"):
+        original_text ,sentiment = classifySentiment(user_input)
+        st.write(f"📊 Sentimiento: {sentiment}")
+elif option == "Subir archivo CSV":
+    file = st.file_uploader("Sube un archivo con comentarios en formato csv", type=["csv"])
+    if file:
+        try:
+            df = pd.read_csv(file,sep=None, engine="python")
+            clasification = groupClassifier(df)
+            clasified_data = pd.DataFrame(clasification)
+            plt.figure(figsize=(6,4))
+            sns.countplot(x=clasified_data["label"], hue=clasified_data["label"], palette="pastel", legend=False)
+            # Etiquetas y título
+            plt.xlabel("Sentiment category")
+            plt.ylabel("number of texts")
+            plt.title("Data distribution")
+            st.pyplot(plt)
+        except ValueError as e:
+            print(f"Error: {e}")
+        # Mostrar estadísticas
+        #sentiment_counts = df["sentiment"].value_counts()
+        #st.bar_chart("Aqui va la visualizacion de archivo")
+        # WordCloud
+       # words = " ".join(df["texto"])
+        #wordcloud = WordCloud(width=800, height=400).generate(words)
+        #st.image(wordcloud.to_array())"""

model.py ADDED Viewed

	@@ -0,0 +1,89 @@

+import torch
+from torch.utils.data import Dataset, DataLoader
+from torch import nn
+from transformers import BertModel, BertTokenizer
+RANDOM_SEED = 30
+MAX_LEN = 200
+BATCH_SIZE = 16
+NCLASSES = 3
+#device selection
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+#tokenitation
+PRE_TRAINED_MODEL_NAME = 'dccuchile/bert-base-spanish-wwm-cased'
+tokenizer = BertTokenizer.from_pretrained(PRE_TRAINED_MODEL_NAME)
+#Model Class
+class BERTSentimentClassifier(nn.Module):
+  def __init__(self, n_classes):
+    super(BERTSentimentClassifier, self).__init__()
+    self.bert = BertModel.from_pretrained(PRE_TRAINED_MODEL_NAME)
+    self.drop = nn.Dropout(p=0.3)
+    self.linear = nn.Linear(self.bert.config.hidden_size, n_classes)
+  def forward(self, input_ids, attention_mask):
+    outputs = self.bert(input_ids = input_ids,attention_mask = attention_mask)
+    cls_output = outputs.pooler_output
+    drop_output = self.drop(cls_output)
+    output = self.linear(drop_output)
+    return output
+model = BERTSentimentClassifier(NCLASSES)
+model.load_state_dict(torch.load("Models/BERTo_model_parameters.pth", map_location=torch.device('cpu')))
+model.to(device)
+model.eval()
+def classifySentiment(review_text):
+  encoding_review = tokenizer.encode_plus(
+      review_text,
+      max_length = MAX_LEN,
+      truncation = True,
+      add_special_tokens = True,
+      return_token_type_ids = False,
+       padding="max_length",
+      return_attention_mask = True,
+      return_tensors = 'pt'
+      )
+  input_ids = encoding_review['input_ids'].to(device)
+  attention_mask = encoding_review['attention_mask'].to(device)
+  with torch.no_grad():
+    output = model(input_ids, attention_mask)
+  prediction = torch.argmax(output, dim=1)   # Getting class with more probability
+  #Mapping the class with 3 cattegory
+  sentiment_labels = {0: "Positivo", 1: "Neutral", 2: "Negativo"}
+  return review_text, sentiment_labels[prediction.item()]
+def groupClassifier(df):
+    """This function allow to clasify a group of sentiment that should come in a dataframe of pandas"""
+    class SentimentDataset(Dataset):
+        def __init__(self, texts):
+            self.texts = texts
+        def __len__(self):
+            return len(self.texts)
+        def __getitem__(self, idx):
+            return self.texts[idx]
+    dataset = SentimentDataset(df["text"].tolist())
+    dataloader = DataLoader(dataset, batch_size=32, num_workers=0)
+    clasification = {
+        "text": [],
+        "label": []
+    }
+    for batch in dataloader:
+        for text in batch:
+                original_text,label = classifySentiment(text)
+                clasification["text"].append(original_text)
+                clasification["label"].append(label)
+    return clasification

requirements.txt ADDED Viewed

	@@ -0,0 +1,163 @@

+altair==5.5.0
+anyio==4.8.0
+argon2-cffi==23.1.0
+argon2-cffi-bindings==21.2.0
+arrow==1.3.0
+asttokens==3.0.0
+async-lru==2.0.4
+attrs==25.1.0
+Automat==24.8.1
+babel==2.17.0
+beautifulsoup4==4.13.3
+bleach==6.2.0
+blinker==1.9.0
+cachetools==5.5.2
+certifi==2025.1.31
+cffi==1.17.1
+charset-normalizer==3.4.1
+click==8.1.8
+colorama==0.4.6
+comm==0.2.2
+constantly==23.10.4
+contourpy==1.3.1
+cryptography==44.0.2
+cssselect==1.3.0
+cycler==0.12.1
+debugpy==1.8.12
+decorator==5.2.1
+deepl==1.21.1
+defusedxml==0.7.1
+exceptiongroup==1.2.2
+executing==2.2.0
+fastjsonschema==2.21.1
+filelock==3.18.0
+fonttools==4.56.0
+fqdn==1.5.1
+fsspec==2025.3.0
+gitdb==4.0.12
+GitPython==3.1.44
+h11==0.14.0
+httpcore==1.0.7
+httpx==0.28.1
+huggingface-hub==0.29.3
+hyperlink==21.0.0
+idna==3.10
+incremental==24.7.2
+ipykernel==6.29.5
+ipython==8.32.0
+ipywidgets==8.1.5
+isoduration==20.11.0
+itemadapter==0.11.0
+itemloaders==1.3.2
+jedi==0.19.2
+Jinja2==3.1.5
+jmespath==1.0.1
+json5==0.10.0
+jsonpointer==3.0.0
+jsonschema==4.23.0
+jsonschema-specifications==2024.10.1
+jupyter==1.1.1
+jupyter-console==6.6.3
+jupyter-events==0.12.0
+jupyter-lsp==2.2.5
+jupyter_client==8.6.3
+jupyter_core==5.7.2
+jupyter_server==2.15.0
+jupyter_server_terminals==0.5.3
+jupyterlab==4.3.5
+jupyterlab_pygments==0.3.0
+jupyterlab_server==2.27.3
+jupyterlab_widgets==3.0.13
+kiwisolver==1.4.8
+lxml==5.3.1
+MarkupSafe==3.0.2
+matplotlib==3.10.1
+matplotlib-inline==0.1.7
+mistune==3.1.2
+mpmath==1.3.0
+narwhals==1.31.0
+nbclient==0.10.2
+nbconvert==7.16.6
+nbformat==5.10.4
+nest-asyncio==1.6.0
+networkx==3.4.2
+notebook==7.3.2
+notebook_shim==0.2.4
+numpy==2.2.3
+overrides==7.7.0
+packaging==24.2
+pandas==2.2.3
+pandocfilters==1.5.1
+parsel==1.10.0
+parso==0.8.4
+pillow==11.1.0
+platformdirs==4.3.6
+prometheus_client==0.21.1
+prompt_toolkit==3.0.50
+Protego==0.4.0
+protobuf==5.29.3
+psutil==7.0.0
+pure_eval==0.2.3
+pyarrow==19.0.1
+pyasn1==0.6.1
+pyasn1_modules==0.4.1
+pycparser==2.22
+pydeck==0.9.1
+PyDispatcher==2.0.7
+Pygments==2.19.1
+pyOpenSSL==25.0.0
+pyparsing==3.2.1
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+python-json-logger==3.2.1
+pytz==2025.1
+pywin32==308
+pywinpty==2.0.15
+PyYAML==6.0.2
+pyzmq==26.2.1
+queuelib==1.7.0
+referencing==0.36.2
+regex==2024.11.6
+requests==2.32.3
+requests-file==2.1.0
+rfc3339-validator==0.1.4
+rfc3986-validator==0.1.1
+rpds-py==0.23.1
+safetensors==0.5.3
+Scrapy==2.12.0
+seaborn==0.13.2
+Send2Trash==1.8.3
+service-identity==24.2.0
+six==1.17.0
+smmap==5.0.2
+sniffio==1.3.1
+soupsieve==2.6
+stack-data==0.6.3
+streamlit==1.43.2
+sympy==1.13.1
+tenacity==9.0.0
+terminado==0.18.1
+tinycss2==1.4.0
+tldextract==5.1.3
+tokenizers==0.21.1
+toml==0.10.2
+tomli==2.2.1
+torch==2.6.0
+tornado==6.4.2
+tqdm==4.67.1
+traitlets==5.14.3
+transformers==4.49.0
+Twisted==24.11.0
+types-python-dateutil==2.9.0.20241206
+typing_extensions==4.12.2
+tzdata==2025.1
+uri-template==1.3.0
+urllib3==2.3.0
+w3lib==2.3.1
+watchdog==6.0.0
+wcwidth==0.2.13
+webcolors==24.11.1
+webencodings==0.5.1
+websocket-client==1.8.0
+widgetsnbextension==4.0.13
+zope.interface==7.2