Spaces:
Sleeping
Sleeping
File size: 1,646 Bytes
89ea00c ec5aa0b 89ea00c ec5aa0b 89ea00c 3fb0324 89ea00c ec5aa0b 3fb0324 89ea00c 4f8337d dbe02a3 89ea00c dbe02a3 ec5aa0b 3fb0324 89ea00c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
import os
import gradio as gr
from google.cloud import translate_v2 as translate
from nltk import download
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk import pos_tag
# nltk ๋ฐ์ดํฐ ๋ค์ด๋ก๋
download('punkt')
download('averaged_perceptron_tagger')
download('stopwords')
# Google Cloud Translation ํด๋ผ์ด์ธํธ ์ค์
translate_client = translate.Client()
def extract_keywords(text):
# ์์ด ํ
์คํธ๋ฅผ ํ ํฐํ
tokens = word_tokenize(text)
# ๋ถ์ฉ์ด ์ ๊ฑฐ ๋ฐ ์ค์ ๋จ์ด ์ถ์ถ
tokens = [word for word in tokens if word.isalnum() and word.lower() not in stopwords.words('english')]
# ํ์ฌ ํ๊น
tagged = pos_tag(tokens)
# ๋ช
์ฌ, ๊ณ ์ ๋ช
์ฌ, ๋์ฌ ์ค์ ํค์๋ ์ถ์ถ
keywords = [word for word, tag in tagged if tag in ['NN', 'NNP', 'NNS', 'VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ']]
return ' '.join(keywords)
def translate_and_extract_keywords(text):
# ์
๋ ฅ๋ฐ์ ํ๊ธ ๋ฌธ์ฅ์ ์์ด๋ก ๋ฒ์ญ
result = translate_client.translate(text, target_language='en')
translated_text = result['translatedText']
# ํค์๋ ์ถ์ถ
return extract_keywords(translated_text)
# Gradio ์ธํฐํ์ด์ค ์ ์
interface = gr.Interface(
fn=translate_and_extract_keywords,
inputs=gr.Textbox(lines=2, placeholder="ํ๊ธ ๋ฌธ์ฅ์ ์
๋ ฅํ์ธ์..."),
outputs="text",
title="ํ๊ธ ๋ฌธ์ฅ์ ์์ด ํค์๋๋ก ๋ฒ์ญ ๋ฐ ์ถ์ถ",
description="ํ๊ธ ๋ฌธ์ฅ์ ์
๋ ฅํ๋ฉด, ๊ทธ ์๋ฏธ๊ฐ ํฌํจ๋ ์์ด ํค์๋๋ฅผ ์ถ์ถํ์ฌ ์ถ๋ ฅํฉ๋๋ค."
)
# ์ ํ๋ฆฌ์ผ์ด์
์คํ
interface.launch(share=True)
|