Upload 4 files
Browse files- Models/BERTo_model_parameters.pth +3 -0
- app.py +46 -0
- model.py +89 -0
- requirements.txt +163 -0
Models/BERTo_model_parameters.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:336d381a3c17b26f8c0b1648fe1a33a35f0c99e213c4b09e4ac7cb2356a88111
|
3 |
+
size 439497507
|
app.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import matplotlib.pyplot as plt
|
4 |
+
import seaborn as sns
|
5 |
+
|
6 |
+
from model import classifySentiment,groupClassifier
|
7 |
+
|
8 |
+
|
9 |
+
st.title("🔍 Análisis de Sentimiento para Empresas")
|
10 |
+
|
11 |
+
option = st.sidebar.selectbox("Elige una opción", ["Analizar un comentario", "Subir archivo CSV"])
|
12 |
+
|
13 |
+
|
14 |
+
if option == "Analizar un comentario":
|
15 |
+
user_input = st.text_area("Escribe un comentario:")
|
16 |
+
if st.button("Analizar"):
|
17 |
+
original_text ,sentiment = classifySentiment(user_input)
|
18 |
+
st.write(f"📊 Sentimiento: {sentiment}")
|
19 |
+
|
20 |
+
elif option == "Subir archivo CSV":
|
21 |
+
file = st.file_uploader("Sube un archivo con comentarios en formato csv", type=["csv"])
|
22 |
+
if file:
|
23 |
+
try:
|
24 |
+
df = pd.read_csv(file,sep=None, engine="python")
|
25 |
+
clasification = groupClassifier(df)
|
26 |
+
clasified_data = pd.DataFrame(clasification)
|
27 |
+
plt.figure(figsize=(6,4))
|
28 |
+
sns.countplot(x=clasified_data["label"], hue=clasified_data["label"], palette="pastel", legend=False)
|
29 |
+
|
30 |
+
# Etiquetas y título
|
31 |
+
plt.xlabel("Sentiment category")
|
32 |
+
plt.ylabel("number of texts")
|
33 |
+
plt.title("Data distribution")
|
34 |
+
|
35 |
+
st.pyplot(plt)
|
36 |
+
except ValueError as e:
|
37 |
+
print(f"Error: {e}")
|
38 |
+
|
39 |
+
# Mostrar estadísticas
|
40 |
+
#sentiment_counts = df["sentiment"].value_counts()
|
41 |
+
#st.bar_chart("Aqui va la visualizacion de archivo")
|
42 |
+
|
43 |
+
# WordCloud
|
44 |
+
# words = " ".join(df["texto"])
|
45 |
+
#wordcloud = WordCloud(width=800, height=400).generate(words)
|
46 |
+
#st.image(wordcloud.to_array())"""
|
model.py
ADDED
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from torch.utils.data import Dataset, DataLoader
|
3 |
+
from torch import nn
|
4 |
+
from transformers import BertModel, BertTokenizer
|
5 |
+
|
6 |
+
|
7 |
+
RANDOM_SEED = 30
|
8 |
+
MAX_LEN = 200
|
9 |
+
BATCH_SIZE = 16
|
10 |
+
NCLASSES = 3
|
11 |
+
|
12 |
+
#device selection
|
13 |
+
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
14 |
+
|
15 |
+
#tokenitation
|
16 |
+
PRE_TRAINED_MODEL_NAME = 'dccuchile/bert-base-spanish-wwm-cased'
|
17 |
+
tokenizer = BertTokenizer.from_pretrained(PRE_TRAINED_MODEL_NAME)
|
18 |
+
|
19 |
+
#Model Class
|
20 |
+
class BERTSentimentClassifier(nn.Module):
|
21 |
+
|
22 |
+
def __init__(self, n_classes):
|
23 |
+
super(BERTSentimentClassifier, self).__init__()
|
24 |
+
self.bert = BertModel.from_pretrained(PRE_TRAINED_MODEL_NAME)
|
25 |
+
self.drop = nn.Dropout(p=0.3)
|
26 |
+
self.linear = nn.Linear(self.bert.config.hidden_size, n_classes)
|
27 |
+
|
28 |
+
def forward(self, input_ids, attention_mask):
|
29 |
+
outputs = self.bert(input_ids = input_ids,attention_mask = attention_mask)
|
30 |
+
cls_output = outputs.pooler_output
|
31 |
+
drop_output = self.drop(cls_output)
|
32 |
+
output = self.linear(drop_output)
|
33 |
+
return output
|
34 |
+
|
35 |
+
|
36 |
+
model = BERTSentimentClassifier(NCLASSES)
|
37 |
+
model.load_state_dict(torch.load("Models/BERTo_model_parameters.pth", map_location=torch.device('cpu')))
|
38 |
+
model.to(device)
|
39 |
+
model.eval()
|
40 |
+
|
41 |
+
def classifySentiment(review_text):
|
42 |
+
encoding_review = tokenizer.encode_plus(
|
43 |
+
review_text,
|
44 |
+
max_length = MAX_LEN,
|
45 |
+
truncation = True,
|
46 |
+
add_special_tokens = True,
|
47 |
+
return_token_type_ids = False,
|
48 |
+
padding="max_length",
|
49 |
+
return_attention_mask = True,
|
50 |
+
return_tensors = 'pt'
|
51 |
+
)
|
52 |
+
|
53 |
+
input_ids = encoding_review['input_ids'].to(device)
|
54 |
+
attention_mask = encoding_review['attention_mask'].to(device)
|
55 |
+
with torch.no_grad():
|
56 |
+
output = model(input_ids, attention_mask)
|
57 |
+
prediction = torch.argmax(output, dim=1) # Getting class with more probability
|
58 |
+
|
59 |
+
#Mapping the class with 3 cattegory
|
60 |
+
sentiment_labels = {0: "Positivo", 1: "Neutral", 2: "Negativo"}
|
61 |
+
|
62 |
+
return review_text, sentiment_labels[prediction.item()]
|
63 |
+
|
64 |
+
def groupClassifier(df):
|
65 |
+
"""This function allow to clasify a group of sentiment that should come in a dataframe of pandas"""
|
66 |
+
class SentimentDataset(Dataset):
|
67 |
+
def __init__(self, texts):
|
68 |
+
self.texts = texts
|
69 |
+
|
70 |
+
def __len__(self):
|
71 |
+
return len(self.texts)
|
72 |
+
|
73 |
+
def __getitem__(self, idx):
|
74 |
+
return self.texts[idx]
|
75 |
+
|
76 |
+
dataset = SentimentDataset(df["text"].tolist())
|
77 |
+
dataloader = DataLoader(dataset, batch_size=32, num_workers=0)
|
78 |
+
|
79 |
+
clasification = {
|
80 |
+
"text": [],
|
81 |
+
"label": []
|
82 |
+
}
|
83 |
+
|
84 |
+
for batch in dataloader:
|
85 |
+
for text in batch:
|
86 |
+
original_text,label = classifySentiment(text)
|
87 |
+
clasification["text"].append(original_text)
|
88 |
+
clasification["label"].append(label)
|
89 |
+
return clasification
|
requirements.txt
ADDED
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
altair==5.5.0
|
2 |
+
anyio==4.8.0
|
3 |
+
argon2-cffi==23.1.0
|
4 |
+
argon2-cffi-bindings==21.2.0
|
5 |
+
arrow==1.3.0
|
6 |
+
asttokens==3.0.0
|
7 |
+
async-lru==2.0.4
|
8 |
+
attrs==25.1.0
|
9 |
+
Automat==24.8.1
|
10 |
+
babel==2.17.0
|
11 |
+
beautifulsoup4==4.13.3
|
12 |
+
bleach==6.2.0
|
13 |
+
blinker==1.9.0
|
14 |
+
cachetools==5.5.2
|
15 |
+
certifi==2025.1.31
|
16 |
+
cffi==1.17.1
|
17 |
+
charset-normalizer==3.4.1
|
18 |
+
click==8.1.8
|
19 |
+
colorama==0.4.6
|
20 |
+
comm==0.2.2
|
21 |
+
constantly==23.10.4
|
22 |
+
contourpy==1.3.1
|
23 |
+
cryptography==44.0.2
|
24 |
+
cssselect==1.3.0
|
25 |
+
cycler==0.12.1
|
26 |
+
debugpy==1.8.12
|
27 |
+
decorator==5.2.1
|
28 |
+
deepl==1.21.1
|
29 |
+
defusedxml==0.7.1
|
30 |
+
exceptiongroup==1.2.2
|
31 |
+
executing==2.2.0
|
32 |
+
fastjsonschema==2.21.1
|
33 |
+
filelock==3.18.0
|
34 |
+
fonttools==4.56.0
|
35 |
+
fqdn==1.5.1
|
36 |
+
fsspec==2025.3.0
|
37 |
+
gitdb==4.0.12
|
38 |
+
GitPython==3.1.44
|
39 |
+
h11==0.14.0
|
40 |
+
httpcore==1.0.7
|
41 |
+
httpx==0.28.1
|
42 |
+
huggingface-hub==0.29.3
|
43 |
+
hyperlink==21.0.0
|
44 |
+
idna==3.10
|
45 |
+
incremental==24.7.2
|
46 |
+
ipykernel==6.29.5
|
47 |
+
ipython==8.32.0
|
48 |
+
ipywidgets==8.1.5
|
49 |
+
isoduration==20.11.0
|
50 |
+
itemadapter==0.11.0
|
51 |
+
itemloaders==1.3.2
|
52 |
+
jedi==0.19.2
|
53 |
+
Jinja2==3.1.5
|
54 |
+
jmespath==1.0.1
|
55 |
+
json5==0.10.0
|
56 |
+
jsonpointer==3.0.0
|
57 |
+
jsonschema==4.23.0
|
58 |
+
jsonschema-specifications==2024.10.1
|
59 |
+
jupyter==1.1.1
|
60 |
+
jupyter-console==6.6.3
|
61 |
+
jupyter-events==0.12.0
|
62 |
+
jupyter-lsp==2.2.5
|
63 |
+
jupyter_client==8.6.3
|
64 |
+
jupyter_core==5.7.2
|
65 |
+
jupyter_server==2.15.0
|
66 |
+
jupyter_server_terminals==0.5.3
|
67 |
+
jupyterlab==4.3.5
|
68 |
+
jupyterlab_pygments==0.3.0
|
69 |
+
jupyterlab_server==2.27.3
|
70 |
+
jupyterlab_widgets==3.0.13
|
71 |
+
kiwisolver==1.4.8
|
72 |
+
lxml==5.3.1
|
73 |
+
MarkupSafe==3.0.2
|
74 |
+
matplotlib==3.10.1
|
75 |
+
matplotlib-inline==0.1.7
|
76 |
+
mistune==3.1.2
|
77 |
+
mpmath==1.3.0
|
78 |
+
narwhals==1.31.0
|
79 |
+
nbclient==0.10.2
|
80 |
+
nbconvert==7.16.6
|
81 |
+
nbformat==5.10.4
|
82 |
+
nest-asyncio==1.6.0
|
83 |
+
networkx==3.4.2
|
84 |
+
notebook==7.3.2
|
85 |
+
notebook_shim==0.2.4
|
86 |
+
numpy==2.2.3
|
87 |
+
overrides==7.7.0
|
88 |
+
packaging==24.2
|
89 |
+
pandas==2.2.3
|
90 |
+
pandocfilters==1.5.1
|
91 |
+
parsel==1.10.0
|
92 |
+
parso==0.8.4
|
93 |
+
pillow==11.1.0
|
94 |
+
platformdirs==4.3.6
|
95 |
+
prometheus_client==0.21.1
|
96 |
+
prompt_toolkit==3.0.50
|
97 |
+
Protego==0.4.0
|
98 |
+
protobuf==5.29.3
|
99 |
+
psutil==7.0.0
|
100 |
+
pure_eval==0.2.3
|
101 |
+
pyarrow==19.0.1
|
102 |
+
pyasn1==0.6.1
|
103 |
+
pyasn1_modules==0.4.1
|
104 |
+
pycparser==2.22
|
105 |
+
pydeck==0.9.1
|
106 |
+
PyDispatcher==2.0.7
|
107 |
+
Pygments==2.19.1
|
108 |
+
pyOpenSSL==25.0.0
|
109 |
+
pyparsing==3.2.1
|
110 |
+
python-dateutil==2.9.0.post0
|
111 |
+
python-dotenv==1.0.1
|
112 |
+
python-json-logger==3.2.1
|
113 |
+
pytz==2025.1
|
114 |
+
pywin32==308
|
115 |
+
pywinpty==2.0.15
|
116 |
+
PyYAML==6.0.2
|
117 |
+
pyzmq==26.2.1
|
118 |
+
queuelib==1.7.0
|
119 |
+
referencing==0.36.2
|
120 |
+
regex==2024.11.6
|
121 |
+
requests==2.32.3
|
122 |
+
requests-file==2.1.0
|
123 |
+
rfc3339-validator==0.1.4
|
124 |
+
rfc3986-validator==0.1.1
|
125 |
+
rpds-py==0.23.1
|
126 |
+
safetensors==0.5.3
|
127 |
+
Scrapy==2.12.0
|
128 |
+
seaborn==0.13.2
|
129 |
+
Send2Trash==1.8.3
|
130 |
+
service-identity==24.2.0
|
131 |
+
six==1.17.0
|
132 |
+
smmap==5.0.2
|
133 |
+
sniffio==1.3.1
|
134 |
+
soupsieve==2.6
|
135 |
+
stack-data==0.6.3
|
136 |
+
streamlit==1.43.2
|
137 |
+
sympy==1.13.1
|
138 |
+
tenacity==9.0.0
|
139 |
+
terminado==0.18.1
|
140 |
+
tinycss2==1.4.0
|
141 |
+
tldextract==5.1.3
|
142 |
+
tokenizers==0.21.1
|
143 |
+
toml==0.10.2
|
144 |
+
tomli==2.2.1
|
145 |
+
torch==2.6.0
|
146 |
+
tornado==6.4.2
|
147 |
+
tqdm==4.67.1
|
148 |
+
traitlets==5.14.3
|
149 |
+
transformers==4.49.0
|
150 |
+
Twisted==24.11.0
|
151 |
+
types-python-dateutil==2.9.0.20241206
|
152 |
+
typing_extensions==4.12.2
|
153 |
+
tzdata==2025.1
|
154 |
+
uri-template==1.3.0
|
155 |
+
urllib3==2.3.0
|
156 |
+
w3lib==2.3.1
|
157 |
+
watchdog==6.0.0
|
158 |
+
wcwidth==0.2.13
|
159 |
+
webcolors==24.11.1
|
160 |
+
webencodings==0.5.1
|
161 |
+
websocket-client==1.8.0
|
162 |
+
widgetsnbextension==4.0.13
|
163 |
+
zope.interface==7.2
|