Joshua1808 commited on
Commit
cadaacb
·
1 Parent(s): 453fff7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -42
app.py CHANGED
@@ -98,53 +98,23 @@ with colT2:
98
 
99
  def analizar_tweets(search_words, number_of_tweets ):
100
  tweets = api.user_timeline(screen_name = search_words, count= number_of_tweets)
 
 
 
101
  tweet_list = [i.text for i in tweets]
102
  text= pd.DataFrame(tweet_list)
103
  text[0] = text[0].apply(preprocess_tweet)
104
  text1=text[0].values
105
- indices1=tokenizer.batch_encode_plus(text1.tolist(), max_length=128,add_special_tokens=True, return_attention_mask=True,pad_to_max_length=True,truncation=True)
106
- input_ids1=indices1["input_ids"]
107
- attention_masks1=indices1["attention_mask"]
108
- prediction_inputs1= torch.tensor(input_ids1)
109
- prediction_masks1 = torch.tensor(attention_masks1)
110
- batch_size = 25
111
- # Create the DataLoader.
112
- prediction_data1 = TensorDataset(prediction_inputs1, prediction_masks1)
113
- prediction_sampler1 = SequentialSampler(prediction_data1)
114
- prediction_dataloader1 = DataLoader(prediction_data1, sampler=prediction_sampler1, batch_size=batch_size)
115
- #print('Predicting labels for {:,} test sentences...'.format(len(prediction_inputs1)))
116
- # Put model in evaluation mode
117
- model.eval()
118
- # Tracking variables
119
- predictions = []
120
- for batch in prediction_dataloader1:
121
- batch = tuple(t.to(device) for t in batch)
122
- # Unpack the inputs from our dataloader
123
- b_input_ids1, b_input_mask1 = batch
124
 
125
- #Telling the model not to compute or store gradients, saving memory and # speeding up prediction
126
- with torch.no_grad():
127
- # Forward pass, calculate logit predictions
128
- outputs1 = model(b_input_ids1, token_type_ids=None,attention_mask=b_input_mask1)
129
- logits1 = outputs1[0]
130
- # Move logits and labels to CPU
131
- logits1 = logits1.detach().cpu().numpy()
132
- # Store predictions and true labels
133
- predictions.append(logits1)
134
-
135
- #flat_predictions = [item for sublist in predictions for item in sublist]
136
- flat_predictions = [item for sublist in predictions for item in sublist]
137
-
138
- flat_predictions = np.argmax(flat_predictions, axis=1).flatten()
139
-
140
- probability = np.amax(logits1,axis=1).flatten()
141
  Tweets =['Últimos '+ str(number_of_tweets)+' Tweets'+' de '+search_words]
142
  df = pd.DataFrame(list(zip(text1, flat_predictions,probability)), columns = ['Tweets' , 'Prediccion','Probabilidad'])
143
 
144
- df['Prediccion']= np.where(df['Prediccion']== 0, 'No Sexista', 'Sexista')
145
- df['Tweets'] = df['Tweets'].str.replace('RT|@', '')
146
- #df['Tweets'] = df['Tweets'].apply(lambda x: re.sub(r'[:;][-o^]?[)\]DpP3]|[(/\\]|[\U0001f600-\U0001f64f]|[\U0001f300-\U0001f5ff]|[\U0001f680-\U0001f6ff]|[\U0001f1e0-\U0001f1ff]','', x))
147
-
148
  tabla = st.table(df.reset_index(drop=True).head(30).style.applymap(color_survived, subset=['Prediccion']))
149
 
150
  return tabla
@@ -155,14 +125,14 @@ def analizar_frase(frase):
155
  predictions = pipeline_nlp(frase)
156
 
157
  # convierte las predicciones en una lista de diccionarios
158
- data = [{'text': frase, 'label': prediction['label'], 'score': prediction['score']} for prediction in predictions]
159
 
160
  # crea un DataFrame a partir de la lista de diccionarios
161
  df = pd.DataFrame(data)
162
- df['label'] = np.where( df['label'] == 'LABEL_1', 'Sexista', 'No Sexista')
163
  # muestra el DataFrame
164
  #st.table(df.reset_index(drop=True).head(30).style.applymap(color_survived, subset=['Prediccion']))
165
- tabla = st.table(df)
166
 
167
  return tabla
168
 
 
98
 
99
  def analizar_tweets(search_words, number_of_tweets ):
100
  tweets = api.user_timeline(screen_name = search_words, count= number_of_tweets)
101
+ #for tweet in tweets:
102
+ # tweet_list = tweet.text
103
+
104
  tweet_list = [i.text for i in tweets]
105
  text= pd.DataFrame(tweet_list)
106
  text[0] = text[0].apply(preprocess_tweet)
107
  text1=text[0].values
108
+ predictions = pipeline_nlp(text1)
109
+ data = [{'Texto': text1, 'Prediccion': prediction['label'], 'Probabilidad': prediction['score']} for prediction in predictions]
110
+ # crea un DataFrame a partir de la lista de diccionarios
111
+ df = pd.DataFrame(data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  Tweets =['Últimos '+ str(number_of_tweets)+' Tweets'+' de '+search_words]
114
  df = pd.DataFrame(list(zip(text1, flat_predictions,probability)), columns = ['Tweets' , 'Prediccion','Probabilidad'])
115
 
116
+ df['Prediccion'] = np.where( df['Prediccion'] == 'LABEL_1', 'Sexista', 'No Sexista')
117
+
 
 
118
  tabla = st.table(df.reset_index(drop=True).head(30).style.applymap(color_survived, subset=['Prediccion']))
119
 
120
  return tabla
 
125
  predictions = pipeline_nlp(frase)
126
 
127
  # convierte las predicciones en una lista de diccionarios
128
+ data = [{'Texto': frase, 'Prediccion': prediction['label'], 'Probabilidad': prediction['score']} for prediction in predictions]
129
 
130
  # crea un DataFrame a partir de la lista de diccionarios
131
  df = pd.DataFrame(data)
132
+ df['Prediccion'] = np.where( df['Prediccion'] == 'LABEL_1', 'Sexista', 'No Sexista')
133
  # muestra el DataFrame
134
  #st.table(df.reset_index(drop=True).head(30).style.applymap(color_survived, subset=['Prediccion']))
135
+ tabla = st.table(df.reset_index(drop=True).head(30).style.applymap(color_survived, subset=['Prediccion']))
136
 
137
  return tabla
138