Spaces:
Sleeping
Sleeping
import os | |
import gradio as gr | |
from google.cloud import translate_v2 as translate | |
from nltk import download | |
from nltk.tokenize import word_tokenize | |
from nltk.corpus import stopwords | |
from nltk import pos_tag | |
# nltk ๋ฐ์ดํฐ ๋ค์ด๋ก๋ | |
download('punkt') | |
download('averaged_perceptron_tagger') | |
download('stopwords') | |
# Google Cloud Translation ํด๋ผ์ด์ธํธ ์ค์ | |
translate_client = translate.Client() | |
def extract_keywords(text): | |
# ์์ด ํ ์คํธ๋ฅผ ํ ํฐํ | |
tokens = word_tokenize(text) | |
# ๋ถ์ฉ์ด ์ ๊ฑฐ ๋ฐ ์ค์ ๋จ์ด ์ถ์ถ | |
tokens = [word for word in tokens if word.isalnum() and word.lower() not in stopwords.words('english')] | |
# ํ์ฌ ํ๊น | |
tagged = pos_tag(tokens) | |
# ๋ช ์ฌ, ๊ณ ์ ๋ช ์ฌ, ๋์ฌ ์ค์ ํค์๋ ์ถ์ถ | |
keywords = [word for word, tag in tagged if tag in ['NN', 'NNP', 'NNS', 'VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ']] | |
return ' '.join(keywords) | |
def translate_and_extract_keywords(text): | |
# ์ ๋ ฅ๋ฐ์ ํ๊ธ ๋ฌธ์ฅ์ ์์ด๋ก ๋ฒ์ญ | |
result = translate_client.translate(text, target_language='en') | |
translated_text = result['translatedText'] | |
# ํค์๋ ์ถ์ถ | |
return extract_keywords(translated_text) | |
# Gradio ์ธํฐํ์ด์ค ์ ์ | |
interface = gr.Interface( | |
fn=translate_and_extract_keywords, | |
inputs=gr.Textbox(lines=2, placeholder="ํ๊ธ ๋ฌธ์ฅ์ ์ ๋ ฅํ์ธ์..."), | |
outputs="text", | |
title="ํ๊ธ ๋ฌธ์ฅ์ ์์ด ํค์๋๋ก ๋ฒ์ญ ๋ฐ ์ถ์ถ", | |
description="ํ๊ธ ๋ฌธ์ฅ์ ์ ๋ ฅํ๋ฉด, ๊ทธ ์๋ฏธ๊ฐ ํฌํจ๋ ์์ด ํค์๋๋ฅผ ์ถ์ถํ์ฌ ์ถ๋ ฅํฉ๋๋ค." | |
) | |
# ์ ํ๋ฆฌ์ผ์ด์ ์คํ | |
interface.launch(share=True) | |