Spaces:
Runtime error
Runtime error
import gradio as gr | |
from newspaper import Article | |
from newspaper import Config | |
from transformers import pipeline | |
import requests | |
from bs4 import BeautifulSoup | |
import re | |
from bs4 import BeautifulSoup as bs | |
import requests | |
from transformers import PreTrainedTokenizerFast, BartForConditionalGeneration | |
# Load Model and Tokenize | |
def get_summary(input_text): | |
tokenizer = PreTrainedTokenizerFast.from_pretrained("ainize/kobart-news") | |
summary_model = BartForConditionalGeneration.from_pretrained("ainize/kobart-news") | |
input_ids = tokenizer.encode(input_text, return_tensors="pt") | |
summary_text_ids = summary_model.generate( | |
input_ids=input_ids, | |
bos_token_id=summary_model.config.bos_token_id, | |
eos_token_id=summary_model.config.eos_token_id, | |
length_penalty=2.0, | |
max_length=142, | |
min_length=56, | |
num_beams=4, | |
) | |
return tokenizer.decode(summary_text_ids[0], skip_special_tokens=True) | |
USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:78.0) Gecko/20100101 Firefox/78.0' | |
config = Config() | |
config.browser_user_agent = USER_AGENT | |
config.request_timeout = 10 | |
class news_collector: | |
def __init__(self): | |
self.examples = [] | |
def get_new_parser(self, url): | |
article = Article(url, language='ko') | |
article.download() | |
article.parse() | |
return article | |
def get_news_links(self, page=''): | |
url = "https://news.daum.net/breakingnews/economic" | |
response = requests.get(url) | |
html_text = response.text | |
soup = bs(response.text, 'html.parser') | |
news_titles = soup.select("a.link_txt") | |
links = [item.attrs['href'] for item in news_titles ] | |
https_links = [item for item in links if item.startswith('https') == True] | |
https_links | |
return https_links[:2] | |
def update_news_examples(self): | |
news_links = self.get_news_links() | |
for news_url in news_links: | |
article = self.get_new_parser(news_url) | |
self.examples.append(get_summary(article.text)) | |
title = "๊ท ํ์กํ ๋ด์ค ์ฝ๊ธฐ (Balanced News Reading)" | |
with gr.Blocks() as demo: | |
news = news_collector() | |
gr.Markdown( | |
""" | |
# ๊ท ํ์กํ ๋ด์ค ์ฝ๊ธฐ (Balanced News Reading) | |
๊ธ์ ์ ์ธ ๊ธฐ์ฌ์ ๋ถ์ ์ ์ธ ๊ธฐ์ฌ์ ๊ท ํ์ ๋ณด๋ฉฐ ๋ด์ค๋ฅผ ์ฝ์ ์ ์์ต๋๋ค. ๋ฐ๋ชจ๋ฅผ ์คํํ๋ฉด ๋ฐ๋ชจ ์คํ ๋ ์ง์ Daum๋ด์ค๋ฅผ `Example`์ ๊ฐ์ ธ์ต๋๋ค. | |
๋ชจ๋ธ์์ ์ฌ์ฉํ ์ ์๋ ๊ธธ์ด๋ณด๋ค ๊ธด ๊ธฐ์ฌ๊ฐ ์๊ธฐ ๋๋ฌธ์ ๊ธฐ์ฌ๋ด์ฉ์ ์์ฝํ ํ ์์ญ๋ ๋ด์ฉ์ `Example`์ ์ถ๊ฐํฉ๋๋ค. | |
๋ด์ค๊ธฐ์ฌ๋ฅผ ์ ํํ๊ณ `Submit`๋ฒํผ์ ๋๋ฅด๋ฉด ๊ธฐ์ฌ์ ๊ฐ์ ํ๊ฐ๋ฅผ ํ์ธํ ์ ์์ต๋๋ค. | |
""") | |
news.update_news_examples() | |
gr.load("models/gabrielyang/finance_news_classifier-KR_v7", | |
inputs = gr.Textbox( placeholder="๋ด์ค ๊ธฐ์ฌ ๋ด์ฉ์ ์ ๋ ฅํ์ธ์." ), | |
examples=news.examples) | |
# gr.Examples( | |
# examples=[ | |
# ["images/demo1.jpg"], | |
# ["images/demo2.jpg"], | |
# ["images/demo4.jpg"], | |
# ], | |
if __name__ == "__main__": | |
demo.launch() |