File size: 2,848 Bytes
570b976
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import streamlit as st
import requests
import os
from transformers import pipeline

# Get Hugging Face token from environment variables
huggingface_token = os.getenv('HF_TOKEN')
if not huggingface_token:
    st.error("Hugging Face token is missing. Please set it as an environment variable 'HF_TOKEN'.")

# Hugging Face API details
API_URL = "https://api-inference.huggingface.co/models/facebook/bart-large-cnn"
headers = {"Authorization": f"Bearer {huggingface_token}"}

# Function to query Hugging Face API
def query(payload):
    try:
        response = requests.post(API_URL, headers=headers, json=payload)
        response.raise_for_status()  # Raise error for bad HTTP responses
        return response.json()
    except requests.exceptions.RequestException as e:
        st.error(f"Error querying Hugging Face API: {e}")
        return None

# Function to generate chunks of text
def generate_chunks(inp_str, max_chunk=500):
    inp_str = inp_str.replace('.', '.<eos>')
    inp_str = inp_str.replace('?', '?<eos>')
    inp_str = inp_str.replace('!', '!<eos>')

    sentences = inp_str.split('<eos>')
    chunks = []
    current_chunk = 0

    for sentence in sentences:
        if len(chunks) == current_chunk + 1:
            if len(chunks[current_chunk].split()) + len(sentence.split()) <= max_chunk:
                chunks[current_chunk] += " " + sentence
            else:
                current_chunk += 1
                chunks.append(sentence)
        else:
            chunks.append(sentence)

    return [chunk.strip() for chunk in chunks]

# Streamlit UI
st.title("Text Summarization App")

# Text area for user input
sentence = st.text_area('Please paste your article:', height=200)

# Sidebar for options
max_length = st.sidebar.slider('Max summary length:', 50, 500, step=10, value=150)
min_length = st.sidebar.slider('Min summary length:', 10, 450, step=10, value=50)
do_sample = st.sidebar.checkbox("Use sampling", value=False)

# Summarization button
button = st.button("Summarize", type="primary")

# Hugging Face API summarization
if button and sentence:
    with st.spinner("Summarizing..."):
        chunks = generate_chunks(sentence)
        summaries = []

        for chunk in chunks:
            output = query({
                "inputs": chunk,
                "parameters": {
                    "min_length": min_length,
                    "max_length": max_length,
                    "do_sample": do_sample
                },
            })
            if output and "summary_text" in output:
                summaries.append(output["summary_text"])
            else:
                st.error("Error in summarization. Please check your input or API settings.")

        # Display the combined summary
        final_summary = " ".join(summaries)
        st.write("### Summary:")
        st.write(final_summary)