pendar02 commited on
Commit
6cd4890
Β·
verified Β·
1 Parent(s): d1c4e7d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -51
app.py CHANGED
@@ -31,7 +31,6 @@ def load_model(model_type):
31
  "facebook/bart-large-cnn",
32
  cache_dir="./models"
33
  )
34
- # Load scientific lay summarizer model
35
  model = PeftModel.from_pretrained(
36
  base_model,
37
  "pendar02/results",
@@ -48,7 +47,6 @@ def load_model(model_type):
48
  "GanjinZero/biobart-base",
49
  cache_dir="./models"
50
  )
51
- # Load biobart fine-tuned model
52
  model = PeftModel.from_pretrained(
53
  base_model,
54
  "pendar02/biobart-finetune",
@@ -61,30 +59,11 @@ def load_model(model_type):
61
  cache_dir="./models"
62
  )
63
 
64
- # Ensure model is in evaluation mode
65
  model.eval()
66
  return model, tokenizer
67
-
68
  except Exception as e:
69
  st.error(f"Error loading model: {str(e)}")
70
  raise
71
-
72
- # Ensure model is in evaluation mode
73
- model.eval()
74
- return model, tokenizer
75
-
76
- except Exception as e:
77
- # Fallback to base model if PEFT loading fails
78
- st.warning(f"Error loading PEFT model: {str(e)}. Falling back to base model.")
79
- if model_type == "summarize":
80
- model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")
81
- tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
82
- else:
83
- model = AutoModelForSeq2SeqLM.from_pretrained("GanjinZero/biobart-base")
84
- tokenizer = AutoTokenizer.from_pretrained("GanjinZero/biobart-base")
85
-
86
- model.eval()
87
- return model, tokenizer
88
 
89
  @st.cache_data
90
  def process_excel(uploaded_file):
@@ -92,14 +71,14 @@ def process_excel(uploaded_file):
92
  try:
93
  df = pd.read_excel(uploaded_file)
94
  required_columns = ['Abstract', 'Article Title', 'Authors',
95
- 'Source Title', 'Publication Year', 'DOI']
96
 
97
  # Check required columns
98
  missing_columns = [col for col in required_columns if col not in df.columns]
99
  if missing_columns:
100
  st.error(f"Missing required columns: {', '.join(missing_columns)}")
101
  return None
102
-
103
  return df[required_columns]
104
  except Exception as e:
105
  st.error(f"Error processing file: {str(e)}")
@@ -107,6 +86,9 @@ def process_excel(uploaded_file):
107
 
108
  def generate_summary(text, model, tokenizer):
109
  """Generate summary for single abstract"""
 
 
 
110
  inputs = tokenizer(text, return_tensors="pt", max_length=1024, truncation=True)
111
 
112
  with torch.no_grad():
@@ -175,11 +157,11 @@ def main():
175
  with st.spinner("Processing file..."):
176
  df = process_excel(uploaded_file)
177
  if df is not None:
178
- st.session_state.processed_data = df
179
 
180
  if st.session_state.processed_data is not None:
181
  df = st.session_state.processed_data
182
- st.write(f"πŸ“Š Loaded {len(df)} papers")
183
 
184
  # Individual Summaries Section
185
  st.header("πŸ“ Individual Paper Summaries")
@@ -224,22 +206,7 @@ def main():
224
  sorted_df = display_df.sort_values(by=sort_column, ascending=ascending)
225
 
226
  # Show interactive table
227
- st.dataframe(
228
- sorted_df,
229
- column_config={
230
- "Abstract": st.column_config.TextColumn(
231
- "Abstract",
232
- width="medium",
233
- help="Original abstract text"
234
- ),
235
- "Summary": st.column_config.TextColumn(
236
- "Summary",
237
- width="medium",
238
- help="Generated summary"
239
- )
240
- },
241
- hide_index=True
242
- )
243
 
244
  # Question-focused Summary Section
245
  st.header("❓ Question-focused Summary")
@@ -255,17 +222,13 @@ def main():
255
  top_k=5
256
  )
257
 
258
- # Show spell-check suggestion if needed
259
- if results['processed_question']['original'] != results['processed_question']['corrected']:
260
- st.info(f"Did you mean: {results['processed_question']['corrected']}?")
261
-
262
  # Load question-focused model
263
  model, tokenizer = load_model("question_focused")
264
 
265
  # Get relevant abstracts and generate summary
266
  relevant_abstracts = df['Abstract'].iloc[results['top_indices']].tolist()
267
  focused_summary = generate_focused_summary(
268
- results['processed_question']['corrected'],
269
  relevant_abstracts,
270
  model,
271
  tokenizer
@@ -283,10 +246,6 @@ def main():
283
  relevant_papers['Relevance Score'] = results['scores']
284
  st.dataframe(relevant_papers, hide_index=True)
285
 
286
- # Show identified medical terms
287
- st.subheader("Identified Medical Terms")
288
- st.write(", ".join(results['processed_question']['medical_entities']))
289
-
290
  # Clear GPU memory
291
  del model
292
  del tokenizer
@@ -297,4 +256,4 @@ def main():
297
  st.error(f"Error generating focused summary: {str(e)}")
298
 
299
  if __name__ == "__main__":
300
- main()
 
31
  "facebook/bart-large-cnn",
32
  cache_dir="./models"
33
  )
 
34
  model = PeftModel.from_pretrained(
35
  base_model,
36
  "pendar02/results",
 
47
  "GanjinZero/biobart-base",
48
  cache_dir="./models"
49
  )
 
50
  model = PeftModel.from_pretrained(
51
  base_model,
52
  "pendar02/biobart-finetune",
 
59
  cache_dir="./models"
60
  )
61
 
 
62
  model.eval()
63
  return model, tokenizer
 
64
  except Exception as e:
65
  st.error(f"Error loading model: {str(e)}")
66
  raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
  @st.cache_data
69
  def process_excel(uploaded_file):
 
71
  try:
72
  df = pd.read_excel(uploaded_file)
73
  required_columns = ['Abstract', 'Article Title', 'Authors',
74
+ 'Source Title', 'Publication Year', 'DOI']
75
 
76
  # Check required columns
77
  missing_columns = [col for col in required_columns if col not in df.columns]
78
  if missing_columns:
79
  st.error(f"Missing required columns: {', '.join(missing_columns)}")
80
  return None
81
+
82
  return df[required_columns]
83
  except Exception as e:
84
  st.error(f"Error processing file: {str(e)}")
 
86
 
87
  def generate_summary(text, model, tokenizer):
88
  """Generate summary for single abstract"""
89
+ if not isinstance(text, str) or not text.strip():
90
+ return "No abstract available to summarize."
91
+
92
  inputs = tokenizer(text, return_tensors="pt", max_length=1024, truncation=True)
93
 
94
  with torch.no_grad():
 
157
  with st.spinner("Processing file..."):
158
  df = process_excel(uploaded_file)
159
  if df is not None:
160
+ st.session_state.processed_data = df.dropna(subset=["Abstract"])
161
 
162
  if st.session_state.processed_data is not None:
163
  df = st.session_state.processed_data
164
+ st.write(f"πŸ“Š Loaded {len(df)} papers with abstracts")
165
 
166
  # Individual Summaries Section
167
  st.header("πŸ“ Individual Paper Summaries")
 
206
  sorted_df = display_df.sort_values(by=sort_column, ascending=ascending)
207
 
208
  # Show interactive table
209
+ st.dataframe(sorted_df, hide_index=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
 
211
  # Question-focused Summary Section
212
  st.header("❓ Question-focused Summary")
 
222
  top_k=5
223
  )
224
 
 
 
 
 
225
  # Load question-focused model
226
  model, tokenizer = load_model("question_focused")
227
 
228
  # Get relevant abstracts and generate summary
229
  relevant_abstracts = df['Abstract'].iloc[results['top_indices']].tolist()
230
  focused_summary = generate_focused_summary(
231
+ question,
232
  relevant_abstracts,
233
  model,
234
  tokenizer
 
246
  relevant_papers['Relevance Score'] = results['scores']
247
  st.dataframe(relevant_papers, hide_index=True)
248
 
 
 
 
 
249
  # Clear GPU memory
250
  del model
251
  del tokenizer
 
256
  st.error(f"Error generating focused summary: {str(e)}")
257
 
258
  if __name__ == "__main__":
259
+ main()