hylee commited on
Commit
8f23661
·
1 Parent(s): 67d1b71

remove stopwords for word cloud

Browse files
Files changed (2) hide show
  1. handler.py +4 -1
  2. requirements.txt +2 -1
handler.py CHANGED
@@ -2,6 +2,7 @@ from typing import Dict, List, Any
2
  from scipy.special import softmax
3
  import numpy as np
4
  import weakref
 
5
 
6
  from utils import clean_str, clean_str_nopunct
7
  import torch
@@ -135,10 +136,12 @@ class Transcript:
135
  teacher_dict = {}
136
  student_dict = {}
137
  uptake_teacher_dict = {}
 
 
138
  for utt in self.utterances:
139
-
140
  words = (utt.get_clean_text(remove_punct=True)).split(' ')
141
  for word in words:
 
142
  if utt.role == 'teacher':
143
  if word not in teacher_dict:
144
  teacher_dict[word] = 0
 
2
  from scipy.special import softmax
3
  import numpy as np
4
  import weakref
5
+ import nltk
6
 
7
  from utils import clean_str, clean_str_nopunct
8
  import torch
 
136
  teacher_dict = {}
137
  student_dict = {}
138
  uptake_teacher_dict = {}
139
+ stopwords = nltk.corpus.stopwords.word('english')
140
+ print("stopwords: ", stopwords)
141
  for utt in self.utterances:
 
142
  words = (utt.get_clean_text(remove_punct=True)).split(' ')
143
  for word in words:
144
+ if word in stopwords: continue
145
  if utt.role == 'teacher':
146
  if word not in teacher_dict:
147
  teacher_dict[word] = 0
requirements.txt CHANGED
@@ -3,4 +3,5 @@ num2words==0.5.10
3
  numpy==1.22.4
4
  scipy==1.7.3
5
  torch==1.10.2
6
- transformers==4.29.1
 
 
3
  numpy==1.22.4
4
  scipy==1.7.3
5
  torch==1.10.2
6
+ transformers==4.29.1
7
+ nltk==3.8.1