pendar02 commited on
Commit
d16f597
·
verified ·
1 Parent(s): 6cd4890

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -1
app.py CHANGED
@@ -84,12 +84,31 @@ def process_excel(uploaded_file):
84
  st.error(f"Error processing file: {str(e)}")
85
  return None
86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  def generate_summary(text, model, tokenizer):
88
  """Generate summary for single abstract"""
89
  if not isinstance(text, str) or not text.strip():
90
  return "No abstract available to summarize."
91
 
92
- inputs = tokenizer(text, return_tensors="pt", max_length=1024, truncation=True)
 
 
 
93
 
94
  with torch.no_grad():
95
  summary_ids = model.generate(
 
84
  st.error(f"Error processing file: {str(e)}")
85
  return None
86
 
87
+ def preprocess_text(text):
88
+ """Preprocess text to add appropriate formatting before summarization"""
89
+ if not isinstance(text, str) or not text.strip():
90
+ return text
91
+
92
+ # Split text into sentences (basic implementation)
93
+ sentences = [s.strip() for s in text.replace('. ', '.\n').split('\n')]
94
+
95
+ # Remove empty sentences
96
+ sentences = [s for s in sentences if s]
97
+
98
+ # Join with proper line breaks
99
+ formatted_text = '\n'.join(sentences)
100
+
101
+ return formatted_text
102
+
103
  def generate_summary(text, model, tokenizer):
104
  """Generate summary for single abstract"""
105
  if not isinstance(text, str) or not text.strip():
106
  return "No abstract available to summarize."
107
 
108
+ # Preprocess the text first
109
+ formatted_text = preprocess_text(text)
110
+
111
+ inputs = tokenizer(formatted_text, return_tensors="pt", max_length=1024, truncation=True)
112
 
113
  with torch.no_grad():
114
  summary_ids = model.generate(