hylee
commited on
Commit
·
8f23661
1
Parent(s):
67d1b71
remove stopwords for word cloud
Browse files- handler.py +4 -1
- requirements.txt +2 -1
handler.py
CHANGED
@@ -2,6 +2,7 @@ from typing import Dict, List, Any
|
|
2 |
from scipy.special import softmax
|
3 |
import numpy as np
|
4 |
import weakref
|
|
|
5 |
|
6 |
from utils import clean_str, clean_str_nopunct
|
7 |
import torch
|
@@ -135,10 +136,12 @@ class Transcript:
|
|
135 |
teacher_dict = {}
|
136 |
student_dict = {}
|
137 |
uptake_teacher_dict = {}
|
|
|
|
|
138 |
for utt in self.utterances:
|
139 |
-
|
140 |
words = (utt.get_clean_text(remove_punct=True)).split(' ')
|
141 |
for word in words:
|
|
|
142 |
if utt.role == 'teacher':
|
143 |
if word not in teacher_dict:
|
144 |
teacher_dict[word] = 0
|
|
|
2 |
from scipy.special import softmax
|
3 |
import numpy as np
|
4 |
import weakref
|
5 |
+
import nltk
|
6 |
|
7 |
from utils import clean_str, clean_str_nopunct
|
8 |
import torch
|
|
|
136 |
teacher_dict = {}
|
137 |
student_dict = {}
|
138 |
uptake_teacher_dict = {}
|
139 |
+
stopwords = nltk.corpus.stopwords.word('english')
|
140 |
+
print("stopwords: ", stopwords)
|
141 |
for utt in self.utterances:
|
|
|
142 |
words = (utt.get_clean_text(remove_punct=True)).split(' ')
|
143 |
for word in words:
|
144 |
+
if word in stopwords: continue
|
145 |
if utt.role == 'teacher':
|
146 |
if word not in teacher_dict:
|
147 |
teacher_dict[word] = 0
|
requirements.txt
CHANGED
@@ -3,4 +3,5 @@ num2words==0.5.10
|
|
3 |
numpy==1.22.4
|
4 |
scipy==1.7.3
|
5 |
torch==1.10.2
|
6 |
-
transformers==4.29.1
|
|
|
|
3 |
numpy==1.22.4
|
4 |
scipy==1.7.3
|
5 |
torch==1.10.2
|
6 |
+
transformers==4.29.1
|
7 |
+
nltk==3.8.1
|