text-summarizer / app2.py
falconX90's picture
commit change
570b976
raw
history blame
2.85 kB
import streamlit as st
import requests
import os
from transformers import pipeline
# Get Hugging Face token from environment variables
huggingface_token = os.getenv('HF_TOKEN')
if not huggingface_token:
st.error("Hugging Face token is missing. Please set it as an environment variable 'HF_TOKEN'.")
# Hugging Face API details
API_URL = "https://api-inference.huggingface.co/models/facebook/bart-large-cnn"
headers = {"Authorization": f"Bearer {huggingface_token}"}
# Function to query Hugging Face API
def query(payload):
try:
response = requests.post(API_URL, headers=headers, json=payload)
response.raise_for_status() # Raise error for bad HTTP responses
return response.json()
except requests.exceptions.RequestException as e:
st.error(f"Error querying Hugging Face API: {e}")
return None
# Function to generate chunks of text
def generate_chunks(inp_str, max_chunk=500):
inp_str = inp_str.replace('.', '.<eos>')
inp_str = inp_str.replace('?', '?<eos>')
inp_str = inp_str.replace('!', '!<eos>')
sentences = inp_str.split('<eos>')
chunks = []
current_chunk = 0
for sentence in sentences:
if len(chunks) == current_chunk + 1:
if len(chunks[current_chunk].split()) + len(sentence.split()) <= max_chunk:
chunks[current_chunk] += " " + sentence
else:
current_chunk += 1
chunks.append(sentence)
else:
chunks.append(sentence)
return [chunk.strip() for chunk in chunks]
# Streamlit UI
st.title("Text Summarization App")
# Text area for user input
sentence = st.text_area('Please paste your article:', height=200)
# Sidebar for options
max_length = st.sidebar.slider('Max summary length:', 50, 500, step=10, value=150)
min_length = st.sidebar.slider('Min summary length:', 10, 450, step=10, value=50)
do_sample = st.sidebar.checkbox("Use sampling", value=False)
# Summarization button
button = st.button("Summarize", type="primary")
# Hugging Face API summarization
if button and sentence:
with st.spinner("Summarizing..."):
chunks = generate_chunks(sentence)
summaries = []
for chunk in chunks:
output = query({
"inputs": chunk,
"parameters": {
"min_length": min_length,
"max_length": max_length,
"do_sample": do_sample
},
})
if output and "summary_text" in output:
summaries.append(output["summary_text"])
else:
st.error("Error in summarization. Please check your input or API settings.")
# Display the combined summary
final_summary = " ".join(summaries)
st.write("### Summary:")
st.write(final_summary)