import streamlit as st import requests import os from transformers import pipeline # Get Hugging Face token from environment variables huggingface_token = os.getenv('HF_TOKEN') if not huggingface_token: st.error("Hugging Face token is missing. Please set it as an environment variable 'HF_TOKEN'.") # Hugging Face API details API_URL = "https://api-inference.huggingface.co/models/facebook/bart-large-cnn" headers = {"Authorization": f"Bearer {huggingface_token}"} # Function to query Hugging Face API def query(payload): try: response = requests.post(API_URL, headers=headers, json=payload) response.raise_for_status() # Raise error for bad HTTP responses return response.json() except requests.exceptions.RequestException as e: st.error(f"Error querying Hugging Face API: {e}") return None # Function to generate chunks of text def generate_chunks(inp_str, max_chunk=500): inp_str = inp_str.replace('.', '.<eos>') inp_str = inp_str.replace('?', '?<eos>') inp_str = inp_str.replace('!', '!<eos>') sentences = inp_str.split('<eos>') chunks = [] current_chunk = 0 for sentence in sentences: if len(chunks) == current_chunk + 1: if len(chunks[current_chunk].split()) + len(sentence.split()) <= max_chunk: chunks[current_chunk] += " " + sentence else: current_chunk += 1 chunks.append(sentence) else: chunks.append(sentence) return [chunk.strip() for chunk in chunks] # Streamlit UI st.title("Text Summarization App") # Text area for user input sentence = st.text_area('Please paste your article:', height=200) # Sidebar for options max_length = st.sidebar.slider('Max summary length:', 50, 500, step=10, value=150) min_length = st.sidebar.slider('Min summary length:', 10, 450, step=10, value=50) do_sample = st.sidebar.checkbox("Use sampling", value=False) # Summarization button button = st.button("Summarize", type="primary") # Hugging Face API summarization if button and sentence: with st.spinner("Summarizing..."): chunks = generate_chunks(sentence) summaries = [] for chunk in chunks: output = query({ "inputs": chunk, "parameters": { "min_length": min_length, "max_length": max_length, "do_sample": do_sample }, }) if output and "summary_text" in output: summaries.append(output["summary_text"]) else: st.error("Error in summarization. Please check your input or API settings.") # Display the combined summary final_summary = " ".join(summaries) st.write("### Summary:") st.write(final_summary)