import gradio as gr import requests import requests from bs4 import BeautifulSoup import re # from huggingface_hub import login # login() # Use a pipeline as a high-level helper from transformers import pipeline # pipe = pipeline("text-generation", model="openai-community/gpt2-medium") pipe = pipeline("text-generation", model="openai-community/gpt2") def get_clean_text(url): try: # Send a GET request to the URL response = requests.get(url) # Check if the request was successful if response.status_code != 200: return None # Parse the HTML content using BeautifulSoup soup = BeautifulSoup(response.text, 'html.parser') # Remove all script and style elements for script in soup(["script", "style"]): script.decompose() # Get the text from the HTML content text = soup.get_text() # Break the text into lines and remove leading and trailing whitespace lines = (line.strip() for line in text.splitlines()) # Break multi-headlines into a line each chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) # Remove blank lines text = '\n'.join(chunk for chunk in chunks if chunk) # Remove extra whitespace text = re.sub(r'\s+', ' ', text) return text except Exception as e: print(f"An error occurred: {e}") return None def summarize(link): summary = "" # link = "https://www.aljazeeramubasher.net/palestine/" text = get_clean_text(link) summary = pipe(f"summarize the following news into bullet points {text}") return summary demo = gr.Interface(fn=summarize, inputs="text", outputs="json") demo.launch() # demo = gr.interface(fn=summarize, inputs="textbox", outputs="textbox") # if __name__ == "__main__": # demo.launch() # # gr.load("models/meta-llama/Llama-3.2-1B").launch()