Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -84,12 +84,31 @@ def process_excel(uploaded_file):
|
|
84 |
st.error(f"Error processing file: {str(e)}")
|
85 |
return None
|
86 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
def generate_summary(text, model, tokenizer):
|
88 |
"""Generate summary for single abstract"""
|
89 |
if not isinstance(text, str) or not text.strip():
|
90 |
return "No abstract available to summarize."
|
91 |
|
92 |
-
|
|
|
|
|
|
|
93 |
|
94 |
with torch.no_grad():
|
95 |
summary_ids = model.generate(
|
|
|
84 |
st.error(f"Error processing file: {str(e)}")
|
85 |
return None
|
86 |
|
87 |
+
def preprocess_text(text):
|
88 |
+
"""Preprocess text to add appropriate formatting before summarization"""
|
89 |
+
if not isinstance(text, str) or not text.strip():
|
90 |
+
return text
|
91 |
+
|
92 |
+
# Split text into sentences (basic implementation)
|
93 |
+
sentences = [s.strip() for s in text.replace('. ', '.\n').split('\n')]
|
94 |
+
|
95 |
+
# Remove empty sentences
|
96 |
+
sentences = [s for s in sentences if s]
|
97 |
+
|
98 |
+
# Join with proper line breaks
|
99 |
+
formatted_text = '\n'.join(sentences)
|
100 |
+
|
101 |
+
return formatted_text
|
102 |
+
|
103 |
def generate_summary(text, model, tokenizer):
|
104 |
"""Generate summary for single abstract"""
|
105 |
if not isinstance(text, str) or not text.strip():
|
106 |
return "No abstract available to summarize."
|
107 |
|
108 |
+
# Preprocess the text first
|
109 |
+
formatted_text = preprocess_text(text)
|
110 |
+
|
111 |
+
inputs = tokenizer(formatted_text, return_tensors="pt", max_length=1024, truncation=True)
|
112 |
|
113 |
with torch.no_grad():
|
114 |
summary_ids = model.generate(
|