Spaces:
Runtime error
Runtime error
import gradio as gr | |
from newspaper import Article | |
from newspaper import Config | |
from transformers import pipeline | |
import requests | |
from bs4 import BeautifulSoup | |
import re | |
from bs4 import BeautifulSoup as bs | |
import requests | |
from transformers import PreTrainedTokenizerFast, BartForConditionalGeneration | |
# Load Model and Tokenize | |
def get_summary(input_text): | |
tokenizer = PreTrainedTokenizerFast.from_pretrained("ainize/kobart-news") | |
summary_model = BartForConditionalGeneration.from_pretrained("ainize/kobart-news") | |
input_ids = tokenizer.encode(input_text, return_tensors="pt") | |
summary_text_ids = summary_model.generate( | |
input_ids=input_ids, | |
bos_token_id=summary_model.config.bos_token_id, | |
eos_token_id=summary_model.config.eos_token_id, | |
length_penalty=2.0, | |
max_length=142, | |
min_length=56, | |
num_beams=4, | |
) | |
return tokenizer.decode(summary_text_ids[0], skip_special_tokens=True) | |
USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:78.0) Gecko/20100101 Firefox/78.0' | |
config = Config() | |
config.browser_user_agent = USER_AGENT | |
config.request_timeout = 10 | |
class news_collector: | |
def __init__(self): | |
self.examples = [] | |
def get_new_parser(self, url): | |
article = Article(url, language='ko') | |
article.download() | |
article.parse() | |
return article | |
def get_news_links(self, page=''): | |
url = "https://news.daum.net/breakingnews/economic" | |
response = requests.get(url) | |
html_text = response.text | |
soup = bs(response.text, 'html.parser') | |
news_titles = soup.select("a.link_txt") | |
links = [item.attrs['href'] for item in news_titles ] | |
https_links = [item for item in links if item.startswith('https') == True] | |
https_links | |
return https_links | |
def update_news_examples(self): | |
news_links = self.get_news_links() | |
for news_url in news_links: | |
article = self.get_new_parser(news_url) | |
self.examples.append(get_summary(article.text[:1000])) | |
def collect_news(): | |
news = news_collector() | |
news.update_news_examples() | |
return news.examples | |
examples = collect_news() | |
title = "๊ท ํ์กํ ๋ด์ค ์ฝ๊ธฐ (Balanced News Reading)" | |
with gr.Blocks() as demo: | |
# news = news_collector() | |
# news.update_news_examples() | |
with gr.Tab("์๊ฐ"): | |
gr.Markdown( | |
""" | |
# ๊ท ํ์กํ ๋ด์ค ์ฝ๊ธฐ (Balanced News Reading) | |
๊ธ์ ์ ์ธ ๊ธฐ์ฌ์ ๋ถ์ ์ ์ธ ๊ธฐ์ฌ์ธ์ง ํ์ธํ์ฌ ๋ด์ค๋ฅผ ์ฝ์ ์ ์์ต๋๋ค. ์ต๊ทผ ๊ฒฝ์ ๋ด์ค๊ธฐ์ฌ๋ฅผ ๊ฐ์ ธ์ Example์์ ๋ฐ๋ก ํ์ธํ ์ ์๋๋ก ๊ตฌ์ฑํ์ต๋๋ค. | |
## 1. ์ฌ์ฉ๋ฐฉ๋ฒ | |
Daum๋ด์ค์ ๊ฒฝ์ ๊ธฐ์ฌ๋ฅผ ๊ฐ์ ธ์ ๋ด์ฉ์ ์์ฝํ๊ณ `Example`์ ๊ฐ์ ธ์ต๋๋ค. ๊ฐ์ ๋ถ์์ ํ๊ณ ์ถ์ ๊ธฐ์ฌ๋ฅผ `Examples`์์ ์ ํํด์ `Submit`์ ๋๋ฅด๋ฉด `Classification`์ | |
ํด๋น ๊ธฐ์ฌ์ ๊ฐ์ ํ๊ฐ ๊ฒฐ๊ณผ๊ฐ ํ์๋ฉ๋๋ค. ๊ฐ์ ํ๊ฐ๋ ๊ฐ ์ํ์ ํ๋ฅ ์ ๋ณด์ ํจ๊ป `neutral`, `positive`, `negative` 3๊ฐ์ง๋ก ํ์๋ฉ๋๋ค. | |
## 2. ๊ตฌ์กฐ ์ค๋ช | |
๋ด์ค๊ธฐ์ฌ๋ฅผ ํฌ๋กค๋ง ๋ฐ ์์ฝ ๋ชจ๋ธ์ ์ด์ฉํ ๊ธฐ์ฌ ์์ฝ >> ๊ธฐ์ฌ ์์ฝ์ ๋ณด Example์ ์ถ๊ฐ >> ํ๊ตญ์ด fine-tunningํ ๊ฐ์ ํ๊ฐ ๋ชจ๋ธ์ ์ด์ฉํด ์ ๋ ฅ๋ ๊ธฐ์ฌ์ ๋ํ ๊ฐ์ ํ๊ฐ ์งํ | |
""") | |
with gr.Tab("๋ฐ๋ชจ"): | |
gr.load("models/gabrielyang/finance_news_classifier-KR_v7", | |
inputs = gr.Textbox( placeholder="๋ด์ค ๊ธฐ์ฌ ๋ด์ฉ์ ์ ๋ ฅํ์ธ์." ), | |
examples=examples) | |
if __name__ == "__main__": | |
demo.launch() |