Spaces:

mksaad
/

NewsSummary

Sleeping

File size: 1,701 Bytes

0f7edc8
de506dc
0f7edc8
de506dc

import gradio as gr
import requests 


import requests
from bs4 import BeautifulSoup
import re


from transformers import pipeline
pipe = pipeline("text-generation", model="meta-llama/Llama-3.2-1B")

def get_clean_text(url):
    try:
        # Send a GET request to the URL
        response = requests.get(url)
        
        # Check if the request was successful
        if response.status_code != 200:
            return None
        
        # Parse the HTML content using BeautifulSoup
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Remove all script and style elements
        for script in soup(["script", "style"]):
            script.decompose()
        
        # Get the text from the HTML content
        text = soup.get_text()
        
        # Break the text into lines and remove leading and trailing whitespace
        lines = (line.strip() for line in text.splitlines())
        
        # Break multi-headlines into a line each
        chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
        
        # Remove blank lines
        text = '\n'.join(chunk for chunk in chunks if chunk)
        
        # Remove extra whitespace
        text = re.sub(r'\s+', ' ', text)
        
        return text
    
    except Exception as e:
        print(f"An error occurred: {e}")
        return None



def summarize(alink):
    summary = ""
    alink = "https://www.aljazeeramubasher.net/palestine/"
    text = get_clean_text
    summary = pipe(f"summarize the following news into bullet points {text}")
    return summary

gr.interface(fn=summarize, input="text", output="text")

# gr.load("models/meta-llama/Llama-3.2-1B").launch()