Spaces:
Sleeping
Sleeping
import gradio as gr | |
import selenium | |
import requests | |
from bs4 import BeautifulSoup | |
import pandas as pd | |
from selenium import webdriver | |
from selenium.webdriver.common.keys import Keys | |
import pandas as pd | |
import time | |
from transformers import pipeline | |
# Search Query | |
def news_and_analysis(query): | |
# Encode special characters in a text string | |
def encode_special_characters(text): | |
encoded_text = '' | |
special_characters = {'&': '%26', '=': '%3D', '+': '%2B', ' ': '%20'} # Add more special characters as needed | |
for char in text.lower(): | |
encoded_text += special_characters.get(char, char) | |
return encoded_text | |
query2 = encode_special_characters(query) | |
url = f"https://news.google.com/search?q={query2}&hl=en-US&gl=in&ceid=US%3Aen&num=3" | |
response = requests.get(url, verify = False) | |
soup = BeautifulSoup(response.text, 'html.parser') | |
articles = soup.find_all('article') | |
links = [article.find('a')['href'] for article in articles] | |
links = [link.replace("./articles/", "https://news.google.com/articles/") for link in links] | |
news_text = [article.get_text(separator='\n') for article in articles] | |
news_text_split = [text.split('\n') for text in news_text] | |
news_df = pd.DataFrame({ | |
'Title': [text[2] for text in news_text_split], | |
'Source': [text[0] for text in news_text_split], | |
'Time': [text[3] if len(text) > 3 else 'Missing' for text in news_text_split], | |
'Author': [text[4].split('By ')[-1] if len(text) > 4 else 'Missing' for text in news_text_split], | |
'Link': links | |
}) | |
news_df = news_df.loc[0:5,:] | |
options = webdriver.ChromeOptions() | |
options.add_argument('--headless') | |
options.add_argument('--no-sandbox') | |
options.add_argument('--disable-dev-shm-usage') | |
options.use_chromium = True | |
driver = webdriver.Chrome(options = options) | |
classification= pipeline(model="finiteautomata/bertweet-base-sentiment-analysis") | |
news_df['Sentiment'] = '' | |
for i in range(0, len(news_df)): | |
# driver.get(news_df.loc[i,'Link']) | |
# time.sleep(10) | |
# headline = driver.find_element('xpath', '//h1').text | |
#news_df.loc[i, 'Headline'] = headline | |
title = news_df.loc[i, 'Title'] | |
news_df.loc[i, 'Sentiment'] = str(classification(title)) | |
print(news_df) | |
return(news_df) | |
with gr.Blocks() as demo: | |
topic= gr.Textbox(label="Topic for which you want Google news and sentiment analysis") | |
btn = gr.Button(value="Submit") | |
btn.click(news_and_analysis, inputs=topic, outputs= gr.Dataframe()) | |
demo.launch() | |