rapacious commited on
Commit
3b6e119
·
verified ·
1 Parent(s): 927ef75

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -5
app.py CHANGED
@@ -1,12 +1,11 @@
1
  import gradio as gr
2
  import nltk
3
  from nltk.tokenize import sent_tokenize, word_tokenize
4
- from nltk.corpus import stopwords, wordnet, brown
5
  from nltk.stem import PorterStemmer, WordNetLemmatizer
6
  from nltk import pos_tag, ne_chunk, ngrams
7
  from nltk.collocations import BigramCollocationFinder
8
  from nltk.classify import NaiveBayesClassifier
9
- from nltk.corpus import movie_reviews
10
  import random
11
 
12
  # Tải các tài nguyên cần thiết
@@ -25,10 +24,15 @@ stemmer = PorterStemmer()
25
  lemmatizer = WordNetLemmatizer()
26
  stop_words = set(stopwords.words('english'))
27
 
28
- # Hàm huấn luyện classifier đơn giản
29
  def train_classifier():
30
- pos_reviews = [({"word": word}, 'positive') for word in movie_reviews.words('pos')[:100]]
31
- neg_reviews = [({"word": word}, 'negative') for word in movie_reviews.words('neg')[:100]]
 
 
 
 
 
32
  train_set = pos_reviews + neg_reviews
33
  random.shuffle(train_set)
34
  return NaiveBayesClassifier.train(train_set)
 
1
  import gradio as gr
2
  import nltk
3
  from nltk.tokenize import sent_tokenize, word_tokenize
4
+ from nltk.corpus import stopwords, wordnet, brown, movie_reviews
5
  from nltk.stem import PorterStemmer, WordNetLemmatizer
6
  from nltk import pos_tag, ne_chunk, ngrams
7
  from nltk.collocations import BigramCollocationFinder
8
  from nltk.classify import NaiveBayesClassifier
 
9
  import random
10
 
11
  # Tải các tài nguyên cần thiết
 
24
  lemmatizer = WordNetLemmatizer()
25
  stop_words = set(stopwords.words('english'))
26
 
27
+ # Hàm huấn luyện classifier sửa lại
28
  def train_classifier():
29
+ # Lấy danh sách file từ thư mục pos và neg
30
+ pos_files = movie_reviews.fileids('pos')[:50] # Giới hạn 50 file để nhanh hơn
31
+ neg_files = movie_reviews.fileids('neg')[:50]
32
+
33
+ # Tạo tập huấn luyện
34
+ pos_reviews = [({word: True for word in movie_reviews.words(fileid)}, 'positive') for fileid in pos_files]
35
+ neg_reviews = [({word: True for word in movie_reviews.words(fileid)}, 'negative') for fileid in neg_files]
36
  train_set = pos_reviews + neg_reviews
37
  random.shuffle(train_set)
38
  return NaiveBayesClassifier.train(train_set)