Spaces:
Sleeping
Sleeping
import gradio as gr | |
import requests | |
import requests | |
from bs4 import BeautifulSoup | |
import re | |
# from huggingface_hub import login | |
# login() | |
# Use a pipeline as a high-level helper | |
from transformers import pipeline | |
# pipe = pipeline("text-generation", model="openai-community/gpt2-medium") | |
pipe = pipeline("text-generation", model="openai-community/gpt2") | |
def get_clean_text(url): | |
try: | |
# Send a GET request to the URL | |
response = requests.get(url) | |
# Check if the request was successful | |
if response.status_code != 200: | |
return None | |
# Parse the HTML content using BeautifulSoup | |
soup = BeautifulSoup(response.text, 'html.parser') | |
# Remove all script and style elements | |
for script in soup(["script", "style"]): | |
script.decompose() | |
# Get the text from the HTML content | |
text = soup.get_text() | |
# Break the text into lines and remove leading and trailing whitespace | |
lines = (line.strip() for line in text.splitlines()) | |
# Break multi-headlines into a line each | |
chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) | |
# Remove blank lines | |
text = '\n'.join(chunk for chunk in chunks if chunk) | |
# Remove extra whitespace | |
text = re.sub(r'\s+', ' ', text) | |
return text | |
except Exception as e: | |
print(f"An error occurred: {e}") | |
return None | |
def summarize(link): | |
summary = "" | |
# link = "https://www.aljazeeramubasher.net/palestine/" | |
text = get_clean_text(link) | |
summary = pipe(f"summarize the following news into bullet points {text}") | |
return summary | |
demo = gr.Interface(fn=summarize, inputs="text", outputs="json") | |
demo.launch() | |
# demo = gr.interface(fn=summarize, inputs="textbox", outputs="textbox") | |
# if __name__ == "__main__": | |
# demo.launch() | |
# # gr.load("models/meta-llama/Llama-3.2-1B").launch() |