Spaces:
Sleeping
Sleeping
commit change
Browse files
app.md
DELETED
@@ -1,86 +0,0 @@
|
|
1 |
-
```python
|
2 |
-
import streamlit as st
|
3 |
-
import requests
|
4 |
-
import os
|
5 |
-
from transformers import pipeline
|
6 |
-
|
7 |
-
huggingface_token = os.getenv('HF_TOKEN')
|
8 |
-
|
9 |
-
|
10 |
-
API_URL = "https://api-inference.huggingface.co/models/facebook/bart-large-cnn"
|
11 |
-
headers = {"Authorization": f"Bearer {huggingface_token}"}
|
12 |
-
|
13 |
-
def query(payload):
|
14 |
-
response = requests.post(API_URL, headers=headers, json=payload)
|
15 |
-
return response.json()
|
16 |
-
|
17 |
-
output = query({
|
18 |
-
"inputs": "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.",
|
19 |
-
})
|
20 |
-
|
21 |
-
print(output)
|
22 |
-
|
23 |
-
|
24 |
-
st.title("Text Summarization App")
|
25 |
-
sentence = st.text_area('Please paste your article :', height=68)
|
26 |
-
output = query({
|
27 |
-
"inputs": sentence,
|
28 |
-
})
|
29 |
-
button = st.button("Summarize", type="primary")
|
30 |
-
# st.write(output[0]["summary_text"])
|
31 |
-
|
32 |
-
if button and sentence:
|
33 |
-
st.write(output[0]["summary_text"])
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
# @st.cache_data(allow_output_mutation=True)
|
40 |
-
# def load_summarizer():
|
41 |
-
# model = pipeline("summarization", device=0)
|
42 |
-
# return model
|
43 |
-
|
44 |
-
|
45 |
-
def generate_chunks(inp_str):
|
46 |
-
max_chunk = 500
|
47 |
-
inp_str = inp_str.replace('.', '.<eos>')
|
48 |
-
inp_str = inp_str.replace('?', '?<eos>')
|
49 |
-
inp_str = inp_str.replace('!', '!<eos>')
|
50 |
-
|
51 |
-
sentences = inp_str.split('<eos>')
|
52 |
-
current_chunk = 0
|
53 |
-
chunks = []
|
54 |
-
for sentence in sentences:
|
55 |
-
if len(chunks) == current_chunk + 1:
|
56 |
-
if len(chunks[current_chunk]) + len(sentence.split(' ')) <= max_chunk:
|
57 |
-
chunks[current_chunk].extend(sentence.split(' '))
|
58 |
-
else:
|
59 |
-
current_chunk += 1
|
60 |
-
chunks.append(sentence.split(' '))
|
61 |
-
else:
|
62 |
-
chunks.append(sentence.split(' '))
|
63 |
-
|
64 |
-
for chunk_id in range(len(chunks)):
|
65 |
-
chunks[chunk_id] = ' '.join(chunks[chunk_id])
|
66 |
-
return chunks
|
67 |
-
|
68 |
-
|
69 |
-
summarizer = load_summarizer()
|
70 |
-
st.title("Summarize Text")
|
71 |
-
sentence = st.text_area('Please paste your article :', height=30)
|
72 |
-
button = st.button("Summarize")
|
73 |
-
|
74 |
-
max = st.sidebar.slider('Select max', 50, 500, step=10, value=150)
|
75 |
-
min = st.sidebar.slider('Select min', 10, 450, step=10, value=50)
|
76 |
-
do_sample = st.sidebar.checkbox("Do sample", value=False)
|
77 |
-
with st.spinner("Generating Summary.."):
|
78 |
-
if button and sentence:
|
79 |
-
chunks = generate_chunks(sentence)
|
80 |
-
res = summarizer(chunks,
|
81 |
-
max_length=max,
|
82 |
-
min_length=min,
|
83 |
-
do_sample=do_sample)
|
84 |
-
text = ' '.join([summ['summary_text'] for summ in res])
|
85 |
-
# st.write(result[0]['summary_text'])
|
86 |
-
st.write(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
CHANGED
@@ -1,84 +1,86 @@
|
|
|
|
1 |
import streamlit as st
|
2 |
import requests
|
3 |
import os
|
4 |
from transformers import pipeline
|
5 |
|
6 |
-
# Get Hugging Face token from environment variables
|
7 |
huggingface_token = os.getenv('HF_TOKEN')
|
8 |
-
if not huggingface_token:
|
9 |
-
st.error("Hugging Face token is missing. Please set it as an environment variable 'HF_TOKEN'.")
|
10 |
|
11 |
-
|
12 |
API_URL = "https://api-inference.huggingface.co/models/facebook/bart-large-cnn"
|
13 |
headers = {"Authorization": f"Bearer {huggingface_token}"}
|
14 |
|
15 |
-
# Function to query Hugging Face API
|
16 |
def query(payload):
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
inp_str = inp_str.replace('.', '.<eos>')
|
28 |
inp_str = inp_str.replace('?', '?<eos>')
|
29 |
inp_str = inp_str.replace('!', '!<eos>')
|
30 |
-
|
31 |
sentences = inp_str.split('<eos>')
|
|
|
32 |
chunks = []
|
33 |
-
current_chunk = 0
|
34 |
-
|
35 |
for sentence in sentences:
|
36 |
-
if len(chunks) == current_chunk + 1:
|
37 |
-
if len(chunks[current_chunk]
|
38 |
-
chunks[current_chunk]
|
39 |
else:
|
40 |
current_chunk += 1
|
41 |
-
chunks.append(sentence)
|
42 |
else:
|
43 |
-
chunks.append(sentence)
|
44 |
-
|
45 |
-
return [chunk.strip() for chunk in chunks]
|
46 |
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
# Text area for user input
|
51 |
-
sentence = st.text_area('Please paste your article:', height=200)
|
52 |
|
53 |
-
# Sidebar for options
|
54 |
-
max_length = st.sidebar.slider('Max summary length:', 50, 500, step=10, value=150)
|
55 |
-
min_length = st.sidebar.slider('Min summary length:', 10, 450, step=10, value=50)
|
56 |
-
do_sample = st.sidebar.checkbox("Use sampling", value=False)
|
57 |
|
58 |
-
|
59 |
-
|
|
|
|
|
60 |
|
61 |
-
|
62 |
-
|
63 |
-
|
|
|
|
|
64 |
chunks = generate_chunks(sentence)
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
"max_length": max_length,
|
73 |
-
"do_sample": do_sample
|
74 |
-
},
|
75 |
-
})
|
76 |
-
if output and "summary_text" in output:
|
77 |
-
summaries.append(output["summary_text"])
|
78 |
-
else:
|
79 |
-
st.error("Error in summarization. Please check your input or API settings.")
|
80 |
-
|
81 |
-
# Display the combined summary
|
82 |
-
final_summary = " ".join(summaries)
|
83 |
-
st.write("### Summary:")
|
84 |
-
st.write(final_summary)
|
|
|
1 |
+
```python
|
2 |
import streamlit as st
|
3 |
import requests
|
4 |
import os
|
5 |
from transformers import pipeline
|
6 |
|
|
|
7 |
huggingface_token = os.getenv('HF_TOKEN')
|
|
|
|
|
8 |
|
9 |
+
|
10 |
API_URL = "https://api-inference.huggingface.co/models/facebook/bart-large-cnn"
|
11 |
headers = {"Authorization": f"Bearer {huggingface_token}"}
|
12 |
|
|
|
13 |
def query(payload):
|
14 |
+
response = requests.post(API_URL, headers=headers, json=payload)
|
15 |
+
return response.json()
|
16 |
+
|
17 |
+
output = query({
|
18 |
+
"inputs": "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.",
|
19 |
+
})
|
20 |
+
|
21 |
+
print(output)
|
22 |
+
|
23 |
+
|
24 |
+
st.title("Text Summarization App")
|
25 |
+
sentence = st.text_area('Please paste your article :', height=68)
|
26 |
+
output = query({
|
27 |
+
"inputs": sentence,
|
28 |
+
})
|
29 |
+
button = st.button("Summarize", type="primary")
|
30 |
+
# st.write(output[0]["summary_text"])
|
31 |
+
|
32 |
+
if button and sentence:
|
33 |
+
st.write(output[0]["summary_text"])
|
34 |
+
|
35 |
+
|
36 |
+
|
37 |
+
|
38 |
+
|
39 |
+
# @st.cache_data(allow_output_mutation=True)
|
40 |
+
# def load_summarizer():
|
41 |
+
# model = pipeline("summarization", device=0)
|
42 |
+
# return model
|
43 |
+
|
44 |
+
|
45 |
+
def generate_chunks(inp_str):
|
46 |
+
max_chunk = 500
|
47 |
inp_str = inp_str.replace('.', '.<eos>')
|
48 |
inp_str = inp_str.replace('?', '?<eos>')
|
49 |
inp_str = inp_str.replace('!', '!<eos>')
|
50 |
+
|
51 |
sentences = inp_str.split('<eos>')
|
52 |
+
current_chunk = 0
|
53 |
chunks = []
|
|
|
|
|
54 |
for sentence in sentences:
|
55 |
+
if len(chunks) == current_chunk + 1:
|
56 |
+
if len(chunks[current_chunk]) + len(sentence.split(' ')) <= max_chunk:
|
57 |
+
chunks[current_chunk].extend(sentence.split(' '))
|
58 |
else:
|
59 |
current_chunk += 1
|
60 |
+
chunks.append(sentence.split(' '))
|
61 |
else:
|
62 |
+
chunks.append(sentence.split(' '))
|
|
|
|
|
63 |
|
64 |
+
for chunk_id in range(len(chunks)):
|
65 |
+
chunks[chunk_id] = ' '.join(chunks[chunk_id])
|
66 |
+
return chunks
|
|
|
|
|
67 |
|
|
|
|
|
|
|
|
|
68 |
|
69 |
+
summarizer = load_summarizer()
|
70 |
+
st.title("Summarize Text")
|
71 |
+
sentence = st.text_area('Please paste your article :', height=30)
|
72 |
+
button = st.button("Summarize")
|
73 |
|
74 |
+
max = st.sidebar.slider('Select max', 50, 500, step=10, value=150)
|
75 |
+
min = st.sidebar.slider('Select min', 10, 450, step=10, value=50)
|
76 |
+
do_sample = st.sidebar.checkbox("Do sample", value=False)
|
77 |
+
with st.spinner("Generating Summary.."):
|
78 |
+
if button and sentence:
|
79 |
chunks = generate_chunks(sentence)
|
80 |
+
res = summarizer(chunks,
|
81 |
+
max_length=max,
|
82 |
+
min_length=min,
|
83 |
+
do_sample=do_sample)
|
84 |
+
text = ' '.join([summ['summary_text'] for summ in res])
|
85 |
+
# st.write(result[0]['summary_text'])
|
86 |
+
st.write(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app2.py
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import requests
|
3 |
+
import os
|
4 |
+
from transformers import pipeline
|
5 |
+
|
6 |
+
# Get Hugging Face token from environment variables
|
7 |
+
huggingface_token = os.getenv('HF_TOKEN')
|
8 |
+
if not huggingface_token:
|
9 |
+
st.error("Hugging Face token is missing. Please set it as an environment variable 'HF_TOKEN'.")
|
10 |
+
|
11 |
+
# Hugging Face API details
|
12 |
+
API_URL = "https://api-inference.huggingface.co/models/facebook/bart-large-cnn"
|
13 |
+
headers = {"Authorization": f"Bearer {huggingface_token}"}
|
14 |
+
|
15 |
+
# Function to query Hugging Face API
|
16 |
+
def query(payload):
|
17 |
+
try:
|
18 |
+
response = requests.post(API_URL, headers=headers, json=payload)
|
19 |
+
response.raise_for_status() # Raise error for bad HTTP responses
|
20 |
+
return response.json()
|
21 |
+
except requests.exceptions.RequestException as e:
|
22 |
+
st.error(f"Error querying Hugging Face API: {e}")
|
23 |
+
return None
|
24 |
+
|
25 |
+
# Function to generate chunks of text
|
26 |
+
def generate_chunks(inp_str, max_chunk=500):
|
27 |
+
inp_str = inp_str.replace('.', '.<eos>')
|
28 |
+
inp_str = inp_str.replace('?', '?<eos>')
|
29 |
+
inp_str = inp_str.replace('!', '!<eos>')
|
30 |
+
|
31 |
+
sentences = inp_str.split('<eos>')
|
32 |
+
chunks = []
|
33 |
+
current_chunk = 0
|
34 |
+
|
35 |
+
for sentence in sentences:
|
36 |
+
if len(chunks) == current_chunk + 1:
|
37 |
+
if len(chunks[current_chunk].split()) + len(sentence.split()) <= max_chunk:
|
38 |
+
chunks[current_chunk] += " " + sentence
|
39 |
+
else:
|
40 |
+
current_chunk += 1
|
41 |
+
chunks.append(sentence)
|
42 |
+
else:
|
43 |
+
chunks.append(sentence)
|
44 |
+
|
45 |
+
return [chunk.strip() for chunk in chunks]
|
46 |
+
|
47 |
+
# Streamlit UI
|
48 |
+
st.title("Text Summarization App")
|
49 |
+
|
50 |
+
# Text area for user input
|
51 |
+
sentence = st.text_area('Please paste your article:', height=200)
|
52 |
+
|
53 |
+
# Sidebar for options
|
54 |
+
max_length = st.sidebar.slider('Max summary length:', 50, 500, step=10, value=150)
|
55 |
+
min_length = st.sidebar.slider('Min summary length:', 10, 450, step=10, value=50)
|
56 |
+
do_sample = st.sidebar.checkbox("Use sampling", value=False)
|
57 |
+
|
58 |
+
# Summarization button
|
59 |
+
button = st.button("Summarize", type="primary")
|
60 |
+
|
61 |
+
# Hugging Face API summarization
|
62 |
+
if button and sentence:
|
63 |
+
with st.spinner("Summarizing..."):
|
64 |
+
chunks = generate_chunks(sentence)
|
65 |
+
summaries = []
|
66 |
+
|
67 |
+
for chunk in chunks:
|
68 |
+
output = query({
|
69 |
+
"inputs": chunk,
|
70 |
+
"parameters": {
|
71 |
+
"min_length": min_length,
|
72 |
+
"max_length": max_length,
|
73 |
+
"do_sample": do_sample
|
74 |
+
},
|
75 |
+
})
|
76 |
+
if output and "summary_text" in output:
|
77 |
+
summaries.append(output["summary_text"])
|
78 |
+
else:
|
79 |
+
st.error("Error in summarization. Please check your input or API settings.")
|
80 |
+
|
81 |
+
# Display the combined summary
|
82 |
+
final_summary = " ".join(summaries)
|
83 |
+
st.write("### Summary:")
|
84 |
+
st.write(final_summary)
|