kkosmi commited on
Commit
1ed0340
ยท
verified ยท
1 Parent(s): 73a0a7b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -99
app.py CHANGED
@@ -5,71 +5,18 @@ from gensim.models import Word2Vec
5
  import gradio as gr
6
  from sklearn.decomposition import PCA
7
  import plotly.graph_objects as go
8
- import nltk
9
- from nltk.tokenize import word_tokenize
10
- from nltk.corpus import stopwords
11
- from nltk.stem import PorterStemmer, WordNetLemmatizer
12
- from nltk.tag import pos_tag
13
-
14
- from docs import NOVEL_TEXT
15
-
16
- def download_nltk_library():
17
- try:
18
- nltk.download('punkt')
19
- nltk.download('stopwords')
20
- nltk.download('wordnet')
21
- nltk.download('averaged_perceptron_tagger')
22
- nltk.download('punkt_tab')
23
- return True
24
- except:
25
- return False
26
-
27
- # Function to process each sentence
28
- def process_text(text):
29
-
30
- lemmatizer = WordNetLemmatizer()
31
- stop_words = set(stopwords.words('english'))
32
-
33
- # Tokenization
34
- tokens = word_tokenize(text.lower())
35
-
36
- # Remove stop words and apply lemmatization
37
- processed_tokens = [
38
- lemmatizer.lemmatize(token)
39
- for token in tokens if token.isalnum() and token not in stop_words
40
- ]
41
 
42
- return processed_tokens
43
 
44
  # Word2Vec ๋ชจ๋ธ ํ•™์Šต ํ•จ์ˆ˜
45
  def train_word2vec(sentences):
46
- model = Word2Vec(sentences, vector_size=100, window=3, min_count=2, workers=4, sg=0, epochs=100)
 
47
  return model
48
 
49
- # def preprocess_text(file_path):
50
- # with open(file_path, 'r', encoding='utf-8') as file:
51
- # text = file.read()
52
-
53
- # # ํ† ํฐํ™” ๋ฐ ํ’ˆ์‚ฌ ํƒœ๊น…
54
- # tokens = word_tokenize(text)
55
- # tagged = pos_tag(tokens)
56
-
57
- # # ๋ช…์‚ฌ๋งŒ ์ถ”์ถœ (NN, NNS, NNP, NNPS)
58
- # nouns = [word.lower() for word, pos in tagged if pos.startswith('NN')]
59
-
60
- # # ์ค‘๋ณต ์ œ๊ฑฐ ๋ฐ ์ •๋ ฌ
61
- # unique_nouns = sorted(set(nouns))
62
-
63
- # # ๊ฐ„๋‹จํ•œ ๋ฌธ์žฅ ์ƒ์„ฑ (๊ฐ ๋ช…์‚ฌ๋ฅผ ๊ฐœ๋ณ„ ๋ฌธ์žฅ์œผ๋กœ ์ทจ๊ธ‰)
64
- # sentences = [[noun] for noun in unique_nouns]
65
-
66
- # return sentences, unique_nouns
67
-
68
  def apply_pca(word_vectors):
69
  pca = PCA(n_components=3)
70
  return pca.fit_transform(word_vectors)
71
 
72
- # def process_text(file_path, target_word):
73
 
74
  def get_unique(model):
75
  vocablist1=list(model.wv.index_to_key)
@@ -89,7 +36,8 @@ def train_model(sentence):
89
  return model, unique_words
90
 
91
  def process_model(target_word):
92
-
 
93
  # Word2Vec ๋ชจ๋ธ ๋กœ๋“œ
94
  model = Word2Vec.load("word2vec.model")
95
  unique_words = get_unique(model)
@@ -122,6 +70,7 @@ def process_model(target_word):
122
  for idx in dissimilar_word_indices:
123
  colors[idx] = 'rgba(138, 43, 226, 0.8)' # ๊ฐ€์žฅ ๋จผ ๋‹จ์–ด๋“ค์„ ๋ณด๋ผ์ƒ‰์œผ๋กœ ํ‘œ์‹œ
124
 
 
125
  # Plotly๋ฅผ ์‚ฌ์šฉํ•œ 3D ์‚ฐ์ ๋„ ์ƒ์„ฑ
126
  fig = go.Figure(data=[go.Scatter3d(
127
  x=word_vectors_3d[:, 0],
@@ -143,76 +92,60 @@ def process_model(target_word):
143
  yaxis_title="Y",
144
  zaxis_title="Z"
145
  ),
146
- width=800,
147
- height=800
148
  )
149
 
150
  # ๊ฐ€์žฅ ๊ฐ€๊นŒ์šด ๋‹จ์–ด 10๊ฐœ ๋ชฉ๋ก ์ƒ์„ฑ
151
  similar_words_text = ""
152
  if target_word in model.wv:
153
- similar_words_text = "๊ฐ€์žฅ ๊ฐ€๊นŒ์šด ๋‹จ์–ด 10๊ฐœ:\n" + "\n".join([f"{word}: {score:.4f}" for word, score in similar_words])
154
 
155
- dissimilar_words_text = ""
156
  if target_word in model.wv:
157
- dissimilar_words_text = "๊ฐ€์žฅ ๋จผ ๋‹จ์–ด 10๊ฐœ:\n" + "\n".join([f"{word}: {score:.4f}" for word, score in dissimilar_words])
158
 
159
  return fig, similar_words_text, dissimilar_words_text
160
 
161
- def change_button_state_true():
162
- # If the first button is clicked, enable or disable the second button based on its state
163
- return gr.update(interactive=True)
164
-
165
- def change_button_state_false():
166
- # If the first button is clicked, enable or disable the second button based on its state
167
- return gr.update(interactive=False)
168
 
169
 
170
  # Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ˆ˜์ •
171
  with gr.Blocks(css=".plot-box {width: 70%; height: 500px;}") as iface:
172
  gr.Markdown("# Word Embedding 3D ์‹œ๊ฐํ™”")
173
- gr.Markdown("๋‹จ์–ด๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”. Word2Vec๊ณผ PCA๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ๋‹จ์–ด ์ž„๋ฒ ๋”ฉ์„ 3D๋กœ ์‹œ๊ฐํ™”ํ•ฉ๋‹ˆ๋‹ค. ์ž…๋ ฅํ•œ ๋‹จ์–ด๋Š” ๋นจ๊ฐ„์ƒ‰์œผ๋กœ, ๊ฐ€์žฅ ์œ ์‚ฌํ•œ 10๊ฐœ ๋‹จ์–ด๋Š” ์ดˆ๋ก์ƒ‰, ๊ฐ€์žฅ ๋จผ ๋‹จ์–ด๋Š” ๋ณด๋ผ์ƒ‰์œผ๋กœ ๊ฐ•์กฐ๋ฉ๋‹ˆ๋‹ค. ์œ ์‚ฌํ•œ ๋‹จ์–ด ๋ชฉ๋ก์€ ๊ทธ๋ž˜ํ”„ ์•„๋ž˜์— ํ‘œ์‹œ๋ฉ๋‹ˆ๋‹ค.")
174
-
175
- download_nltk_library()
176
 
177
  with gr.Row():
178
- word_input = gr.Textbox(label="**๊ฐ•์กฐํ•  ๋‹จ์–ด ์ž…๋ ฅ**", elem_id="input-box", placeholder="๋‹จ์–ด๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”", lines=1, interactive=False)
179
- with gr.Column(scale=1):
180
  # ์‚ฌ์šฉ์ž ์ž…๋ ฅ ๋ฐ•์Šค๋ฅผ ๊ฐ•์กฐํ•˜๊ธฐ ์œ„ํ•ด ์Šคํƒ€์ผ์„ ๋ณ€๊ฒฝ
181
- # word_input = gr.Textbox(label="**๊ฐ•์กฐํ•  ๋‹จ์–ด ์ž…๋ ฅ**", elem_id="input-box", placeholder="๋‹จ์–ด๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”", lines=1)
182
- load_btn = gr.Button("๋ชจ๋ธ ๋กœ๋”ฉ", elem_id="submit-btn")
183
- submit_btn = gr.Button("๋‹จ์–ด ์ž…๋ ฅ", elem_id="submit-btn", interactive=False)
 
184
 
185
  with gr.Row():
186
  # ์‹œ๊ฐํ™” ํ™”๋ฉด์˜ ํฌ๊ธฐ๋ฅผ CSS๋กœ ์ฆ๊ฐ€
187
  plot_output = gr.Plot(label="Word Embedding 3D ์‹œ๊ฐํ™”", elem_id="plot-box")
188
 
189
  with gr.Column(scale=0.3): # ์ปฌ๋Ÿผ์˜ ๋„ˆ๋น„๋ฅผ ์ค„์ด๊ธฐ ์œ„ํ•ด scale ๊ฐ’์„ ๋‚ฎ์ถค
190
- similar_words_output = gr.Textbox(label="์œ ์‚ฌํ•œ ๋‹จ์–ด", interactive=False, lines=5)
191
- dissimilar_words_output = gr.Textbox(label="์œ ์‚ฌํ•˜์ง€ ์•Š์€ ๋‹จ์–ด", interactive=False, lines=5)
 
192
 
193
- load_btn.click(
194
- fn=process_model,
195
- inputs=[word_input],
196
- outputs=[plot_output, similar_words_output, dissimilar_words_output]
197
- )
198
- load_btn.click(
199
- fn=change_button_state_true,
200
- outputs=submit_btn
201
- )
202
- load_btn.click(
203
- fn=change_button_state_true,
204
- outputs=word_input
205
- )
206
  submit_btn.click(
207
- fn=process_model,
 
208
  inputs=[word_input],
209
- outputs=[plot_output, similar_words_output, dissimilar_words_output]
210
- )
211
- submit_btn.click(
212
- fn=change_button_state_false,
213
- outputs=load_btn
214
- )
215
 
 
 
 
 
 
 
 
 
216
 
217
  if __name__ == "__main__":
218
- iface.launch()
 
5
  import gradio as gr
6
  from sklearn.decomposition import PCA
7
  import plotly.graph_objects as go
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
 
9
 
10
  # Word2Vec ๋ชจ๋ธ ํ•™์Šต ํ•จ์ˆ˜
11
  def train_word2vec(sentences):
12
+ # model = Word2Vec(sentences, vector_size=100, window=4, min_count=6, workers=4, sg=0, epochs=100)
13
+ model = Word2Vec(sentences, vector_size=50, window=4, min_count=1, sg=0, epochs=100)
14
  return model
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  def apply_pca(word_vectors):
17
  pca = PCA(n_components=3)
18
  return pca.fit_transform(word_vectors)
19
 
 
20
 
21
  def get_unique(model):
22
  vocablist1=list(model.wv.index_to_key)
 
36
  return model, unique_words
37
 
38
  def process_model(target_word):
39
+ target_word =target_word.lower() #################
40
+
41
  # Word2Vec ๋ชจ๋ธ ๋กœ๋“œ
42
  model = Word2Vec.load("word2vec.model")
43
  unique_words = get_unique(model)
 
70
  for idx in dissimilar_word_indices:
71
  colors[idx] = 'rgba(138, 43, 226, 0.8)' # ๊ฐ€์žฅ ๋จผ ๋‹จ์–ด๋“ค์„ ๋ณด๋ผ์ƒ‰์œผ๋กœ ํ‘œ์‹œ
72
 
73
+
74
  # Plotly๋ฅผ ์‚ฌ์šฉํ•œ 3D ์‚ฐ์ ๋„ ์ƒ์„ฑ
75
  fig = go.Figure(data=[go.Scatter3d(
76
  x=word_vectors_3d[:, 0],
 
92
  yaxis_title="Y",
93
  zaxis_title="Z"
94
  ),
95
+ width=1100,
96
+ height=900
97
  )
98
 
99
  # ๊ฐ€์žฅ ๊ฐ€๊นŒ์šด ๋‹จ์–ด 10๊ฐœ ๋ชฉ๋ก ์ƒ์„ฑ
100
  similar_words_text = ""
101
  if target_word in model.wv:
102
+ similar_words_text = "\n".join([f"{word}: {score:.4f}" for word, score in similar_words])
103
 
104
+ dissimlar_words_Text=""
105
  if target_word in model.wv:
106
+ dissimilar_words_text = "\n".join([f"{word}: {score:.4f}" for word, score in dissimilar_words])
107
 
108
  return fig, similar_words_text, dissimilar_words_text
109
 
 
 
 
 
 
 
 
110
 
111
 
112
  # Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ˆ˜์ •
113
  with gr.Blocks(css=".plot-box {width: 70%; height: 500px;}") as iface:
114
  gr.Markdown("# Word Embedding 3D ์‹œ๊ฐํ™”")
115
+ gr.Markdown("<Inside Out 2> ๋‹จ์–ด ์˜๋ฏธ ์ง€๋„(์ž„๋ฒ ๋”ฉ ๋ฒกํ„ฐ) 3D ์‹œ๊ฐํ™” ๋„๊ตฌ")
 
 
116
 
117
  with gr.Row():
 
 
118
  # ์‚ฌ์šฉ์ž ์ž…๋ ฅ ๋ฐ•์Šค๋ฅผ ๊ฐ•์กฐํ•˜๊ธฐ ์œ„ํ•ด ์Šคํƒ€์ผ์„ ๋ณ€๊ฒฝ
119
+ with gr.Column():
120
+ word_input = gr.Textbox(label="**๋‹จ์–ด ์ž…๋ ฅ**", elem_id="input-box", placeholder="ex. emotion, puberty, hockey, friend, anxiety, memory, ...", lines=1)
121
+ submit_btn = gr.Button("์ œ์ถœ", elem_id="submit-btn")
122
+ bulletin = gr.Textbox(label="์‚ฌ์šฉ๋ฒ• ์•ˆ๋‚ด", interactive=False, lines=4, value="1. ์†Œ์„ค์— ๋‚˜์˜จ ๋‹จ์–ด๋ฅผ ์ž…๋ ฅํ•˜๊ณ  [์ œ์ถœ] ๋ฒ„ํŠผ์ด๋‚˜ [Enter]๋ฅผ ๋ˆ„๋ฅด์„ธ์š” \n2. ์ž…๋ ฅ ๋‹จ์–ด๋Š” ๋นจ๊ฐ„์ƒ‰, ๊ฐ€๊นŒ์šด ๋‹จ์–ด๋“ค์€ ์ฃผํ™ฉ์ƒ‰, ๋จผ ๋‹จ์–ด๋“ค์€ ๋ณด๋ผ์ƒ‰์œผ๋กœ ๊ฐ•์กฐ๋ฉ๋‹ˆ๋‹ค. \n3. <Error>๊ฐ€ ๋‚˜ํƒ€๋‚˜๋Š” ๊ฒฝ์šฐ, ๋‹ค๋ฅธ ๋‹จ์–ด๋ฅผ ์ž…๋ ฅํ•ด๋ณด์„ธ์š”.\n4. ๋งˆ์šฐ์Šค ๋“œ๋ž˜๊ทธ ๋ฐ ์Šคํฌ๋กค์„ ํ™œ์šฉํ•˜์—ฌ 3D ํ™”๋ฉด์„ ์‚ดํŽด๋ณด์„ธ์š”. \n5. ๋‹จ์–ด ์ž…๋ ฅ์ฐฝ์— ๋‹ค๋ฅธ ๋‹จ์–ด๋“ค๋„ ์ž…๋ ฅํ•ด๋ณด์„ธ์š”.")
123
 
124
  with gr.Row():
125
  # ์‹œ๊ฐํ™” ํ™”๋ฉด์˜ ํฌ๊ธฐ๋ฅผ CSS๋กœ ์ฆ๊ฐ€
126
  plot_output = gr.Plot(label="Word Embedding 3D ์‹œ๊ฐํ™”", elem_id="plot-box")
127
 
128
  with gr.Column(scale=0.3): # ์ปฌ๋Ÿผ์˜ ๋„ˆ๋น„๋ฅผ ์ค„์ด๊ธฐ ์œ„ํ•ด scale ๊ฐ’์„ ๋‚ฎ์ถค
129
+ similar_words_output = gr.Textbox(label="๊ฐ€์žฅ ๊ฐ€๊นŒ์šด ๋‹จ์–ด 10๊ฐœ", interactive=False, lines=5)
130
+ dissimilar_words_output = gr.Textbox(label="๊ฐ€์žฅ ๋จผ ๋‹จ์–ด 10๊ฐœ", interactive=False, lines=5)
131
+ gr.Image(value="https://compote.slate.com/images/8324cd2e-21f5-4b20-84d5-f08ece97ac38.jpeg?crop=1560%2C1040%2Cx0%2Cy0&width=1280", label="URL ์ด๋ฏธ์ง€", interactive=False)
132
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  submit_btn.click(
134
+ fn=process_text,
135
+ # word_input = word_input.lower(),
136
  inputs=[word_input],
137
+ outputs=[plot_output, similar_words_output, dissimilar_words_output],
138
+ # preprocess=lambda word: word.lower() if word else "" # None ์ฒดํฌ ํ›„ ์†Œ๋ฌธ์ž ๋ณ€ํ™˜
139
+ )
 
 
 
140
 
141
+ # "Enter" ํ‚ค ์ž…๋ ฅ ์‹œ ๋™์ž‘ ์„ค์ •
142
+ word_input.submit(
143
+ fn=process_text,
144
+ # word_input = word_input.lower(),
145
+ inputs=[word_input],
146
+ outputs=[plot_output, similar_words_output, dissimilar_words_output],
147
+ preprocess=lambda word: word.lower() if word else "" # None ์ฒดํฌ ํ›„ ์†Œ๋ฌธ์ž ๋ณ€ํ™˜
148
+ )
149
 
150
  if __name__ == "__main__":
151
+ iface.launch()