Update app.py
Browse files
app.py
CHANGED
@@ -1,12 +1,11 @@
|
|
1 |
import gradio as gr
|
2 |
import nltk
|
3 |
from nltk.tokenize import sent_tokenize, word_tokenize
|
4 |
-
from nltk.corpus import stopwords, wordnet, brown
|
5 |
from nltk.stem import PorterStemmer, WordNetLemmatizer
|
6 |
from nltk import pos_tag, ne_chunk, ngrams
|
7 |
from nltk.collocations import BigramCollocationFinder
|
8 |
from nltk.classify import NaiveBayesClassifier
|
9 |
-
from nltk.corpus import movie_reviews
|
10 |
import random
|
11 |
|
12 |
# Tải các tài nguyên cần thiết
|
@@ -25,10 +24,15 @@ stemmer = PorterStemmer()
|
|
25 |
lemmatizer = WordNetLemmatizer()
|
26 |
stop_words = set(stopwords.words('english'))
|
27 |
|
28 |
-
# Hàm huấn luyện classifier
|
29 |
def train_classifier():
|
30 |
-
|
31 |
-
|
|
|
|
|
|
|
|
|
|
|
32 |
train_set = pos_reviews + neg_reviews
|
33 |
random.shuffle(train_set)
|
34 |
return NaiveBayesClassifier.train(train_set)
|
|
|
1 |
import gradio as gr
|
2 |
import nltk
|
3 |
from nltk.tokenize import sent_tokenize, word_tokenize
|
4 |
+
from nltk.corpus import stopwords, wordnet, brown, movie_reviews
|
5 |
from nltk.stem import PorterStemmer, WordNetLemmatizer
|
6 |
from nltk import pos_tag, ne_chunk, ngrams
|
7 |
from nltk.collocations import BigramCollocationFinder
|
8 |
from nltk.classify import NaiveBayesClassifier
|
|
|
9 |
import random
|
10 |
|
11 |
# Tải các tài nguyên cần thiết
|
|
|
24 |
lemmatizer = WordNetLemmatizer()
|
25 |
stop_words = set(stopwords.words('english'))
|
26 |
|
27 |
+
# Hàm huấn luyện classifier sửa lại
|
28 |
def train_classifier():
|
29 |
+
# Lấy danh sách file từ thư mục pos và neg
|
30 |
+
pos_files = movie_reviews.fileids('pos')[:50] # Giới hạn 50 file để nhanh hơn
|
31 |
+
neg_files = movie_reviews.fileids('neg')[:50]
|
32 |
+
|
33 |
+
# Tạo tập huấn luyện
|
34 |
+
pos_reviews = [({word: True for word in movie_reviews.words(fileid)}, 'positive') for fileid in pos_files]
|
35 |
+
neg_reviews = [({word: True for word in movie_reviews.words(fileid)}, 'negative') for fileid in neg_files]
|
36 |
train_set = pos_reviews + neg_reviews
|
37 |
random.shuffle(train_set)
|
38 |
return NaiveBayesClassifier.train(train_set)
|