Spaces:
Sleeping
Sleeping
import streamlit as st | |
import requests | |
import os | |
from transformers import pipeline | |
# Get Hugging Face token from environment variables | |
huggingface_token = os.getenv('HF_TOKEN') | |
if not huggingface_token: | |
st.error("Hugging Face token is missing. Please set it as an environment variable 'HF_TOKEN'.") | |
# Hugging Face API details | |
API_URL = "https://api-inference.huggingface.co/models/facebook/bart-large-cnn" | |
headers = {"Authorization": f"Bearer {huggingface_token}"} | |
# Function to query Hugging Face API | |
def query(payload): | |
try: | |
response = requests.post(API_URL, headers=headers, json=payload) | |
response.raise_for_status() # Raise error for bad HTTP responses | |
return response.json() | |
except requests.exceptions.RequestException as e: | |
st.error(f"Error querying Hugging Face API: {e}") | |
return None | |
# Function to generate chunks of text | |
def generate_chunks(inp_str, max_chunk=500): | |
inp_str = inp_str.replace('.', '.<eos>') | |
inp_str = inp_str.replace('?', '?<eos>') | |
inp_str = inp_str.replace('!', '!<eos>') | |
sentences = inp_str.split('<eos>') | |
chunks = [] | |
current_chunk = 0 | |
for sentence in sentences: | |
if len(chunks) == current_chunk + 1: | |
if len(chunks[current_chunk].split()) + len(sentence.split()) <= max_chunk: | |
chunks[current_chunk] += " " + sentence | |
else: | |
current_chunk += 1 | |
chunks.append(sentence) | |
else: | |
chunks.append(sentence) | |
return [chunk.strip() for chunk in chunks] | |
# Streamlit UI | |
st.title("Text Summarization App") | |
# Text area for user input | |
sentence = st.text_area('Please paste your article:', height=200) | |
# Sidebar for options | |
max_length = st.sidebar.slider('Max summary length:', 50, 500, step=10, value=150) | |
min_length = st.sidebar.slider('Min summary length:', 10, 450, step=10, value=50) | |
do_sample = st.sidebar.checkbox("Use sampling", value=False) | |
# Summarization button | |
button = st.button("Summarize", type="primary") | |
# Hugging Face API summarization | |
if button and sentence: | |
with st.spinner("Summarizing..."): | |
chunks = generate_chunks(sentence) | |
summaries = [] | |
for chunk in chunks: | |
output = query({ | |
"inputs": chunk, | |
"parameters": { | |
"min_length": min_length, | |
"max_length": max_length, | |
"do_sample": do_sample | |
}, | |
}) | |
if output and "summary_text" in output: | |
summaries.append(output["summary_text"]) | |
else: | |
st.error("Error in summarization. Please check your input or API settings.") | |
# Display the combined summary | |
final_summary = " ".join(summaries) | |
st.write("### Summary:") | |
st.write(final_summary) | |