aiqtest3 / app.py
seawolf2357's picture
Update app.py
89ea00c verified
raw
history blame
1.65 kB
import os
import gradio as gr
from google.cloud import translate_v2 as translate
from nltk import download
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk import pos_tag
# nltk ๋ฐ์ดํ„ฐ ๋‹ค์šด๋กœ๋“œ
download('punkt')
download('averaged_perceptron_tagger')
download('stopwords')
# Google Cloud Translation ํด๋ผ์ด์–ธํŠธ ์„ค์ •
translate_client = translate.Client()
def extract_keywords(text):
# ์˜์–ด ํ…์ŠคํŠธ๋ฅผ ํ† ํฐํ™”
tokens = word_tokenize(text)
# ๋ถˆ์šฉ์–ด ์ œ๊ฑฐ ๋ฐ ์ค‘์š” ๋‹จ์–ด ์ถ”์ถœ
tokens = [word for word in tokens if word.isalnum() and word.lower() not in stopwords.words('english')]
# ํ’ˆ์‚ฌ ํƒœ๊น…
tagged = pos_tag(tokens)
# ๋ช…์‚ฌ, ๊ณ ์œ ๋ช…์‚ฌ, ๋™์‚ฌ ์ค‘์š” ํ‚ค์›Œ๋“œ ์ถ”์ถœ
keywords = [word for word, tag in tagged if tag in ['NN', 'NNP', 'NNS', 'VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ']]
return ' '.join(keywords)
def translate_and_extract_keywords(text):
# ์ž…๋ ฅ๋ฐ›์€ ํ•œ๊ธ€ ๋ฌธ์žฅ์„ ์˜์–ด๋กœ ๋ฒˆ์—ญ
result = translate_client.translate(text, target_language='en')
translated_text = result['translatedText']
# ํ‚ค์›Œ๋“œ ์ถ”์ถœ
return extract_keywords(translated_text)
# Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ •์˜
interface = gr.Interface(
fn=translate_and_extract_keywords,
inputs=gr.Textbox(lines=2, placeholder="ํ•œ๊ธ€ ๋ฌธ์žฅ์„ ์ž…๋ ฅํ•˜์„ธ์š”..."),
outputs="text",
title="ํ•œ๊ธ€ ๋ฌธ์žฅ์„ ์˜์–ด ํ‚ค์›Œ๋“œ๋กœ ๋ฒˆ์—ญ ๋ฐ ์ถ”์ถœ",
description="ํ•œ๊ธ€ ๋ฌธ์žฅ์„ ์ž…๋ ฅํ•˜๋ฉด, ๊ทธ ์˜๋ฏธ๊ฐ€ ํฌํ•จ๋œ ์˜์–ด ํ‚ค์›Œ๋“œ๋ฅผ ์ถ”์ถœํ•˜์—ฌ ์ถœ๋ ฅํ•ฉ๋‹ˆ๋‹ค."
)
# ์• ํ”Œ๋ฆฌ์ผ€์ด์…˜ ์‹คํ–‰
interface.launch(share=True)