Spaces:

mksaad
/

NewsSummary

Sleeping

App Files Files Community

NewsSummary / app.py

mksaad

Update app.py

3ffc077 verified 8 months ago

raw

history blame contribute delete

2.03 kB

	import gradio as gr
	import requests


	import requests
	from bs4 import BeautifulSoup
	import re


	# from huggingface_hub import login
	# login()


	# Use a pipeline as a high-level helper
	from transformers import pipeline
	# pipe = pipeline("text-generation", model="openai-community/gpt2-medium")
	pipe = pipeline("text-generation", model="openai-community/gpt2")

	def get_clean_text(url):
	try:
	# Send a GET request to the URL
	response = requests.get(url)

	# Check if the request was successful
	if response.status_code != 200:
	return None

	# Parse the HTML content using BeautifulSoup
	soup = BeautifulSoup(response.text, 'html.parser')

	# Remove all script and style elements
	for script in soup(["script", "style"]):
	script.decompose()

	# Get the text from the HTML content
	text = soup.get_text()

	# Break the text into lines and remove leading and trailing whitespace
	lines = (line.strip() for line in text.splitlines())

	# Break multi-headlines into a line each
	chunks = (phrase.strip() for line in lines for phrase in line.split(" "))

	# Remove blank lines
	text = '\n'.join(chunk for chunk in chunks if chunk)

	# Remove extra whitespace
	text = re.sub(r'\s+', ' ', text)

	return text

	except Exception as e:
	print(f"An error occurred: {e}")
	return None



	def summarize(link):
	summary = ""
	# link = "https://www.aljazeeramubasher.net/palestine/"
	text = get_clean_text(link)
	summary = pipe(f"summarize the following news into bullet points {text}")
	return summary


	demo = gr.Interface(fn=summarize, inputs="text", outputs="json")
	demo.launch()




	# demo = gr.interface(fn=summarize, inputs="textbox", outputs="textbox")
	# if __name__ == "__main__":
	# demo.launch()

	# # gr.load("models/meta-llama/Llama-3.2-1B").launch()