ivanpalenciab commited on
Commit
97f77d9
·
verified ·
1 Parent(s): deb0851

Upload 4 files

Browse files
Files changed (4) hide show
  1. Models/BERTo_model_parameters.pth +3 -0
  2. app.py +46 -0
  3. model.py +89 -0
  4. requirements.txt +163 -0
Models/BERTo_model_parameters.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:336d381a3c17b26f8c0b1648fe1a33a35f0c99e213c4b09e4ac7cb2356a88111
3
+ size 439497507
app.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+ import seaborn as sns
5
+
6
+ from model import classifySentiment,groupClassifier
7
+
8
+
9
+ st.title("🔍 Análisis de Sentimiento para Empresas")
10
+
11
+ option = st.sidebar.selectbox("Elige una opción", ["Analizar un comentario", "Subir archivo CSV"])
12
+
13
+
14
+ if option == "Analizar un comentario":
15
+ user_input = st.text_area("Escribe un comentario:")
16
+ if st.button("Analizar"):
17
+ original_text ,sentiment = classifySentiment(user_input)
18
+ st.write(f"📊 Sentimiento: {sentiment}")
19
+
20
+ elif option == "Subir archivo CSV":
21
+ file = st.file_uploader("Sube un archivo con comentarios en formato csv", type=["csv"])
22
+ if file:
23
+ try:
24
+ df = pd.read_csv(file,sep=None, engine="python")
25
+ clasification = groupClassifier(df)
26
+ clasified_data = pd.DataFrame(clasification)
27
+ plt.figure(figsize=(6,4))
28
+ sns.countplot(x=clasified_data["label"], hue=clasified_data["label"], palette="pastel", legend=False)
29
+
30
+ # Etiquetas y título
31
+ plt.xlabel("Sentiment category")
32
+ plt.ylabel("number of texts")
33
+ plt.title("Data distribution")
34
+
35
+ st.pyplot(plt)
36
+ except ValueError as e:
37
+ print(f"Error: {e}")
38
+
39
+ # Mostrar estadísticas
40
+ #sentiment_counts = df["sentiment"].value_counts()
41
+ #st.bar_chart("Aqui va la visualizacion de archivo")
42
+
43
+ # WordCloud
44
+ # words = " ".join(df["texto"])
45
+ #wordcloud = WordCloud(width=800, height=400).generate(words)
46
+ #st.image(wordcloud.to_array())"""
model.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch.utils.data import Dataset, DataLoader
3
+ from torch import nn
4
+ from transformers import BertModel, BertTokenizer
5
+
6
+
7
+ RANDOM_SEED = 30
8
+ MAX_LEN = 200
9
+ BATCH_SIZE = 16
10
+ NCLASSES = 3
11
+
12
+ #device selection
13
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
14
+
15
+ #tokenitation
16
+ PRE_TRAINED_MODEL_NAME = 'dccuchile/bert-base-spanish-wwm-cased'
17
+ tokenizer = BertTokenizer.from_pretrained(PRE_TRAINED_MODEL_NAME)
18
+
19
+ #Model Class
20
+ class BERTSentimentClassifier(nn.Module):
21
+
22
+ def __init__(self, n_classes):
23
+ super(BERTSentimentClassifier, self).__init__()
24
+ self.bert = BertModel.from_pretrained(PRE_TRAINED_MODEL_NAME)
25
+ self.drop = nn.Dropout(p=0.3)
26
+ self.linear = nn.Linear(self.bert.config.hidden_size, n_classes)
27
+
28
+ def forward(self, input_ids, attention_mask):
29
+ outputs = self.bert(input_ids = input_ids,attention_mask = attention_mask)
30
+ cls_output = outputs.pooler_output
31
+ drop_output = self.drop(cls_output)
32
+ output = self.linear(drop_output)
33
+ return output
34
+
35
+
36
+ model = BERTSentimentClassifier(NCLASSES)
37
+ model.load_state_dict(torch.load("Models/BERTo_model_parameters.pth", map_location=torch.device('cpu')))
38
+ model.to(device)
39
+ model.eval()
40
+
41
+ def classifySentiment(review_text):
42
+ encoding_review = tokenizer.encode_plus(
43
+ review_text,
44
+ max_length = MAX_LEN,
45
+ truncation = True,
46
+ add_special_tokens = True,
47
+ return_token_type_ids = False,
48
+ padding="max_length",
49
+ return_attention_mask = True,
50
+ return_tensors = 'pt'
51
+ )
52
+
53
+ input_ids = encoding_review['input_ids'].to(device)
54
+ attention_mask = encoding_review['attention_mask'].to(device)
55
+ with torch.no_grad():
56
+ output = model(input_ids, attention_mask)
57
+ prediction = torch.argmax(output, dim=1) # Getting class with more probability
58
+
59
+ #Mapping the class with 3 cattegory
60
+ sentiment_labels = {0: "Positivo", 1: "Neutral", 2: "Negativo"}
61
+
62
+ return review_text, sentiment_labels[prediction.item()]
63
+
64
+ def groupClassifier(df):
65
+ """This function allow to clasify a group of sentiment that should come in a dataframe of pandas"""
66
+ class SentimentDataset(Dataset):
67
+ def __init__(self, texts):
68
+ self.texts = texts
69
+
70
+ def __len__(self):
71
+ return len(self.texts)
72
+
73
+ def __getitem__(self, idx):
74
+ return self.texts[idx]
75
+
76
+ dataset = SentimentDataset(df["text"].tolist())
77
+ dataloader = DataLoader(dataset, batch_size=32, num_workers=0)
78
+
79
+ clasification = {
80
+ "text": [],
81
+ "label": []
82
+ }
83
+
84
+ for batch in dataloader:
85
+ for text in batch:
86
+ original_text,label = classifySentiment(text)
87
+ clasification["text"].append(original_text)
88
+ clasification["label"].append(label)
89
+ return clasification
requirements.txt ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ altair==5.5.0
2
+ anyio==4.8.0
3
+ argon2-cffi==23.1.0
4
+ argon2-cffi-bindings==21.2.0
5
+ arrow==1.3.0
6
+ asttokens==3.0.0
7
+ async-lru==2.0.4
8
+ attrs==25.1.0
9
+ Automat==24.8.1
10
+ babel==2.17.0
11
+ beautifulsoup4==4.13.3
12
+ bleach==6.2.0
13
+ blinker==1.9.0
14
+ cachetools==5.5.2
15
+ certifi==2025.1.31
16
+ cffi==1.17.1
17
+ charset-normalizer==3.4.1
18
+ click==8.1.8
19
+ colorama==0.4.6
20
+ comm==0.2.2
21
+ constantly==23.10.4
22
+ contourpy==1.3.1
23
+ cryptography==44.0.2
24
+ cssselect==1.3.0
25
+ cycler==0.12.1
26
+ debugpy==1.8.12
27
+ decorator==5.2.1
28
+ deepl==1.21.1
29
+ defusedxml==0.7.1
30
+ exceptiongroup==1.2.2
31
+ executing==2.2.0
32
+ fastjsonschema==2.21.1
33
+ filelock==3.18.0
34
+ fonttools==4.56.0
35
+ fqdn==1.5.1
36
+ fsspec==2025.3.0
37
+ gitdb==4.0.12
38
+ GitPython==3.1.44
39
+ h11==0.14.0
40
+ httpcore==1.0.7
41
+ httpx==0.28.1
42
+ huggingface-hub==0.29.3
43
+ hyperlink==21.0.0
44
+ idna==3.10
45
+ incremental==24.7.2
46
+ ipykernel==6.29.5
47
+ ipython==8.32.0
48
+ ipywidgets==8.1.5
49
+ isoduration==20.11.0
50
+ itemadapter==0.11.0
51
+ itemloaders==1.3.2
52
+ jedi==0.19.2
53
+ Jinja2==3.1.5
54
+ jmespath==1.0.1
55
+ json5==0.10.0
56
+ jsonpointer==3.0.0
57
+ jsonschema==4.23.0
58
+ jsonschema-specifications==2024.10.1
59
+ jupyter==1.1.1
60
+ jupyter-console==6.6.3
61
+ jupyter-events==0.12.0
62
+ jupyter-lsp==2.2.5
63
+ jupyter_client==8.6.3
64
+ jupyter_core==5.7.2
65
+ jupyter_server==2.15.0
66
+ jupyter_server_terminals==0.5.3
67
+ jupyterlab==4.3.5
68
+ jupyterlab_pygments==0.3.0
69
+ jupyterlab_server==2.27.3
70
+ jupyterlab_widgets==3.0.13
71
+ kiwisolver==1.4.8
72
+ lxml==5.3.1
73
+ MarkupSafe==3.0.2
74
+ matplotlib==3.10.1
75
+ matplotlib-inline==0.1.7
76
+ mistune==3.1.2
77
+ mpmath==1.3.0
78
+ narwhals==1.31.0
79
+ nbclient==0.10.2
80
+ nbconvert==7.16.6
81
+ nbformat==5.10.4
82
+ nest-asyncio==1.6.0
83
+ networkx==3.4.2
84
+ notebook==7.3.2
85
+ notebook_shim==0.2.4
86
+ numpy==2.2.3
87
+ overrides==7.7.0
88
+ packaging==24.2
89
+ pandas==2.2.3
90
+ pandocfilters==1.5.1
91
+ parsel==1.10.0
92
+ parso==0.8.4
93
+ pillow==11.1.0
94
+ platformdirs==4.3.6
95
+ prometheus_client==0.21.1
96
+ prompt_toolkit==3.0.50
97
+ Protego==0.4.0
98
+ protobuf==5.29.3
99
+ psutil==7.0.0
100
+ pure_eval==0.2.3
101
+ pyarrow==19.0.1
102
+ pyasn1==0.6.1
103
+ pyasn1_modules==0.4.1
104
+ pycparser==2.22
105
+ pydeck==0.9.1
106
+ PyDispatcher==2.0.7
107
+ Pygments==2.19.1
108
+ pyOpenSSL==25.0.0
109
+ pyparsing==3.2.1
110
+ python-dateutil==2.9.0.post0
111
+ python-dotenv==1.0.1
112
+ python-json-logger==3.2.1
113
+ pytz==2025.1
114
+ pywin32==308
115
+ pywinpty==2.0.15
116
+ PyYAML==6.0.2
117
+ pyzmq==26.2.1
118
+ queuelib==1.7.0
119
+ referencing==0.36.2
120
+ regex==2024.11.6
121
+ requests==2.32.3
122
+ requests-file==2.1.0
123
+ rfc3339-validator==0.1.4
124
+ rfc3986-validator==0.1.1
125
+ rpds-py==0.23.1
126
+ safetensors==0.5.3
127
+ Scrapy==2.12.0
128
+ seaborn==0.13.2
129
+ Send2Trash==1.8.3
130
+ service-identity==24.2.0
131
+ six==1.17.0
132
+ smmap==5.0.2
133
+ sniffio==1.3.1
134
+ soupsieve==2.6
135
+ stack-data==0.6.3
136
+ streamlit==1.43.2
137
+ sympy==1.13.1
138
+ tenacity==9.0.0
139
+ terminado==0.18.1
140
+ tinycss2==1.4.0
141
+ tldextract==5.1.3
142
+ tokenizers==0.21.1
143
+ toml==0.10.2
144
+ tomli==2.2.1
145
+ torch==2.6.0
146
+ tornado==6.4.2
147
+ tqdm==4.67.1
148
+ traitlets==5.14.3
149
+ transformers==4.49.0
150
+ Twisted==24.11.0
151
+ types-python-dateutil==2.9.0.20241206
152
+ typing_extensions==4.12.2
153
+ tzdata==2025.1
154
+ uri-template==1.3.0
155
+ urllib3==2.3.0
156
+ w3lib==2.3.1
157
+ watchdog==6.0.0
158
+ wcwidth==0.2.13
159
+ webcolors==24.11.1
160
+ webencodings==0.5.1
161
+ websocket-client==1.8.0
162
+ widgetsnbextension==4.0.13
163
+ zope.interface==7.2