File size: 1,646 Bytes
89ea00c
ec5aa0b
89ea00c
 
 
 
 
ec5aa0b
89ea00c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3fb0324
89ea00c
 
 
 
ec5aa0b
3fb0324
 
89ea00c
4f8337d
dbe02a3
89ea00c
 
dbe02a3
ec5aa0b
3fb0324
89ea00c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import os
import gradio as gr
from google.cloud import translate_v2 as translate
from nltk import download
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk import pos_tag

# nltk ๋ฐ์ดํ„ฐ ๋‹ค์šด๋กœ๋“œ
download('punkt')
download('averaged_perceptron_tagger')
download('stopwords')

# Google Cloud Translation ํด๋ผ์ด์–ธํŠธ ์„ค์ •
translate_client = translate.Client()

def extract_keywords(text):
    # ์˜์–ด ํ…์ŠคํŠธ๋ฅผ ํ† ํฐํ™”
    tokens = word_tokenize(text)
    # ๋ถˆ์šฉ์–ด ์ œ๊ฑฐ ๋ฐ ์ค‘์š” ๋‹จ์–ด ์ถ”์ถœ
    tokens = [word for word in tokens if word.isalnum() and word.lower() not in stopwords.words('english')]
    # ํ’ˆ์‚ฌ ํƒœ๊น…
    tagged = pos_tag(tokens)
    # ๋ช…์‚ฌ, ๊ณ ์œ ๋ช…์‚ฌ, ๋™์‚ฌ ์ค‘์š” ํ‚ค์›Œ๋“œ ์ถ”์ถœ
    keywords = [word for word, tag in tagged if tag in ['NN', 'NNP', 'NNS', 'VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ']]
    return ' '.join(keywords)

def translate_and_extract_keywords(text):
    # ์ž…๋ ฅ๋ฐ›์€ ํ•œ๊ธ€ ๋ฌธ์žฅ์„ ์˜์–ด๋กœ ๋ฒˆ์—ญ
    result = translate_client.translate(text, target_language='en')
    translated_text = result['translatedText']
    # ํ‚ค์›Œ๋“œ ์ถ”์ถœ
    return extract_keywords(translated_text)

# Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ •์˜
interface = gr.Interface(
    fn=translate_and_extract_keywords,
    inputs=gr.Textbox(lines=2, placeholder="ํ•œ๊ธ€ ๋ฌธ์žฅ์„ ์ž…๋ ฅํ•˜์„ธ์š”..."),
    outputs="text",
    title="ํ•œ๊ธ€ ๋ฌธ์žฅ์„ ์˜์–ด ํ‚ค์›Œ๋“œ๋กœ ๋ฒˆ์—ญ ๋ฐ ์ถ”์ถœ",
    description="ํ•œ๊ธ€ ๋ฌธ์žฅ์„ ์ž…๋ ฅํ•˜๋ฉด, ๊ทธ ์˜๋ฏธ๊ฐ€ ํฌํ•จ๋œ ์˜์–ด ํ‚ค์›Œ๋“œ๋ฅผ ์ถ”์ถœํ•˜์—ฌ ์ถœ๋ ฅํ•ฉ๋‹ˆ๋‹ค."
)

# ์• ํ”Œ๋ฆฌ์ผ€์ด์…˜ ์‹คํ–‰
interface.launch(share=True)