Spaces:

pendar02
/

biomedical

Sleeping

App Files Files Community

pendar02 commited on Jan 10

Commit

6cd4890

verified ·

1 Parent(s): d1c4e7d

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -51

app.py CHANGED Viewed

@@ -31,7 +31,6 @@ def load_model(model_type):
                 "facebook/bart-large-cnn",
                 cache_dir="./models"
             )
-            # Load scientific lay summarizer model
             model = PeftModel.from_pretrained(
                 base_model,
                 "pendar02/results",
@@ -48,7 +47,6 @@ def load_model(model_type):
                 "GanjinZero/biobart-base",
                 cache_dir="./models"
             )
-            # Load biobart fine-tuned model
             model = PeftModel.from_pretrained(
                 base_model,
                 "pendar02/biobart-finetune",
@@ -61,30 +59,11 @@ def load_model(model_type):
                 cache_dir="./models"
             )
-        # Ensure model is in evaluation mode
         model.eval()
         return model, tokenizer
     except Exception as e:
         st.error(f"Error loading model: {str(e)}")
         raise
-        # Ensure model is in evaluation mode
-        model.eval()
-        return model, tokenizer
-    except Exception as e:
-        # Fallback to base model if PEFT loading fails
-        st.warning(f"Error loading PEFT model: {str(e)}. Falling back to base model.")
-        if model_type == "summarize":
-            model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")
-            tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
-        else:
-            model = AutoModelForSeq2SeqLM.from_pretrained("GanjinZero/biobart-base")
-            tokenizer = AutoTokenizer.from_pretrained("GanjinZero/biobart-base")
-        model.eval()
-        return model, tokenizer
 @st.cache_data
 def process_excel(uploaded_file):
@@ -92,14 +71,14 @@ def process_excel(uploaded_file):
     try:
         df = pd.read_excel(uploaded_file)
         required_columns = ['Abstract', 'Article Title', 'Authors',
-                          'Source Title', 'Publication Year', 'DOI']
         # Check required columns
         missing_columns = [col for col in required_columns if col not in df.columns]
         if missing_columns:
             st.error(f"Missing required columns: {', '.join(missing_columns)}")
             return None
         return df[required_columns]
     except Exception as e:
         st.error(f"Error processing file: {str(e)}")
@@ -107,6 +86,9 @@ def process_excel(uploaded_file):
 def generate_summary(text, model, tokenizer):
     """Generate summary for single abstract"""
     inputs = tokenizer(text, return_tensors="pt", max_length=1024, truncation=True)
     with torch.no_grad():
@@ -175,11 +157,11 @@ def main():
             with st.spinner("Processing file..."):
                 df = process_excel(uploaded_file)
                 if df is not None:
-                    st.session_state.processed_data = df
         if st.session_state.processed_data is not None:
             df = st.session_state.processed_data
-            st.write(f"📊 Loaded {len(df)} papers")
             # Individual Summaries Section
             st.header("📝 Individual Paper Summaries")
@@ -224,22 +206,7 @@ def main():
                 sorted_df = display_df.sort_values(by=sort_column, ascending=ascending)
                 # Show interactive table
-                st.dataframe(
-                    sorted_df,
-                    column_config={
-                        "Abstract": st.column_config.TextColumn(
-                            "Abstract",
-                            width="medium",
-                            help="Original abstract text"
-                        ),
-                        "Summary": st.column_config.TextColumn(
-                            "Summary",
-                            width="medium",
-                            help="Generated summary"
-                        )
-                    },
-                    hide_index=True
-                )
             # Question-focused Summary Section
             st.header("❓ Question-focused Summary")
@@ -255,17 +222,13 @@ def main():
                             top_k=5
                         )
-                        # Show spell-check suggestion if needed
-                        if results['processed_question']['original'] != results['processed_question']['corrected']:
-                            st.info(f"Did you mean: {results['processed_question']['corrected']}?")
                         # Load question-focused model
                         model, tokenizer = load_model("question_focused")
                         # Get relevant abstracts and generate summary
                         relevant_abstracts = df['Abstract'].iloc[results['top_indices']].tolist()
                         focused_summary = generate_focused_summary(
-                            results['processed_question']['corrected'],
                             relevant_abstracts,
                             model,
                             tokenizer
@@ -283,10 +246,6 @@ def main():
                         relevant_papers['Relevance Score'] = results['scores']
                         st.dataframe(relevant_papers, hide_index=True)
-                        # Show identified medical terms
-                        st.subheader("Identified Medical Terms")
-                        st.write(", ".join(results['processed_question']['medical_entities']))
                         # Clear GPU memory
                         del model
                         del tokenizer
@@ -297,4 +256,4 @@ def main():
                     st.error(f"Error generating focused summary: {str(e)}")
 if __name__ == "__main__":
-    main()

                 "facebook/bart-large-cnn",
                 cache_dir="./models"
             )
             model = PeftModel.from_pretrained(
                 base_model,
                 "pendar02/results",
                 "GanjinZero/biobart-base",
                 cache_dir="./models"
             )
             model = PeftModel.from_pretrained(
                 base_model,
                 "pendar02/biobart-finetune",
                 cache_dir="./models"
             )
         model.eval()
         return model, tokenizer
     except Exception as e:
         st.error(f"Error loading model: {str(e)}")
         raise
 @st.cache_data
 def process_excel(uploaded_file):
     try:
         df = pd.read_excel(uploaded_file)
         required_columns = ['Abstract', 'Article Title', 'Authors',
+                            'Source Title', 'Publication Year', 'DOI']
         # Check required columns
         missing_columns = [col for col in required_columns if col not in df.columns]
         if missing_columns:
             st.error(f"Missing required columns: {', '.join(missing_columns)}")
             return None
         return df[required_columns]
     except Exception as e:
         st.error(f"Error processing file: {str(e)}")
 def generate_summary(text, model, tokenizer):
     """Generate summary for single abstract"""
+    if not isinstance(text, str) or not text.strip():
+        return "No abstract available to summarize."
     inputs = tokenizer(text, return_tensors="pt", max_length=1024, truncation=True)
     with torch.no_grad():
             with st.spinner("Processing file..."):
                 df = process_excel(uploaded_file)
                 if df is not None:
+                    st.session_state.processed_data = df.dropna(subset=["Abstract"])
         if st.session_state.processed_data is not None:
             df = st.session_state.processed_data
+            st.write(f"📊 Loaded {len(df)} papers with abstracts")
             # Individual Summaries Section
             st.header("📝 Individual Paper Summaries")
                 sorted_df = display_df.sort_values(by=sort_column, ascending=ascending)
                 # Show interactive table
+                st.dataframe(sorted_df, hide_index=True)
             # Question-focused Summary Section
             st.header("❓ Question-focused Summary")
                             top_k=5
                         )
                         # Load question-focused model
                         model, tokenizer = load_model("question_focused")
                         # Get relevant abstracts and generate summary
                         relevant_abstracts = df['Abstract'].iloc[results['top_indices']].tolist()
                         focused_summary = generate_focused_summary(
+                            question,
                             relevant_abstracts,
                             model,
                             tokenizer
                         relevant_papers['Relevance Score'] = results['scores']
                         st.dataframe(relevant_papers, hide_index=True)
                         # Clear GPU memory
                         del model
                         del tokenizer
                     st.error(f"Error generating focused summary: {str(e)}")
 if __name__ == "__main__":
+    main()