Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -31,7 +31,6 @@ def load_model(model_type):
|
|
31 |
"facebook/bart-large-cnn",
|
32 |
cache_dir="./models"
|
33 |
)
|
34 |
-
# Load scientific lay summarizer model
|
35 |
model = PeftModel.from_pretrained(
|
36 |
base_model,
|
37 |
"pendar02/results",
|
@@ -48,7 +47,6 @@ def load_model(model_type):
|
|
48 |
"GanjinZero/biobart-base",
|
49 |
cache_dir="./models"
|
50 |
)
|
51 |
-
# Load biobart fine-tuned model
|
52 |
model = PeftModel.from_pretrained(
|
53 |
base_model,
|
54 |
"pendar02/biobart-finetune",
|
@@ -61,30 +59,11 @@ def load_model(model_type):
|
|
61 |
cache_dir="./models"
|
62 |
)
|
63 |
|
64 |
-
# Ensure model is in evaluation mode
|
65 |
model.eval()
|
66 |
return model, tokenizer
|
67 |
-
|
68 |
except Exception as e:
|
69 |
st.error(f"Error loading model: {str(e)}")
|
70 |
raise
|
71 |
-
|
72 |
-
# Ensure model is in evaluation mode
|
73 |
-
model.eval()
|
74 |
-
return model, tokenizer
|
75 |
-
|
76 |
-
except Exception as e:
|
77 |
-
# Fallback to base model if PEFT loading fails
|
78 |
-
st.warning(f"Error loading PEFT model: {str(e)}. Falling back to base model.")
|
79 |
-
if model_type == "summarize":
|
80 |
-
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")
|
81 |
-
tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
|
82 |
-
else:
|
83 |
-
model = AutoModelForSeq2SeqLM.from_pretrained("GanjinZero/biobart-base")
|
84 |
-
tokenizer = AutoTokenizer.from_pretrained("GanjinZero/biobart-base")
|
85 |
-
|
86 |
-
model.eval()
|
87 |
-
return model, tokenizer
|
88 |
|
89 |
@st.cache_data
|
90 |
def process_excel(uploaded_file):
|
@@ -92,14 +71,14 @@ def process_excel(uploaded_file):
|
|
92 |
try:
|
93 |
df = pd.read_excel(uploaded_file)
|
94 |
required_columns = ['Abstract', 'Article Title', 'Authors',
|
95 |
-
|
96 |
|
97 |
# Check required columns
|
98 |
missing_columns = [col for col in required_columns if col not in df.columns]
|
99 |
if missing_columns:
|
100 |
st.error(f"Missing required columns: {', '.join(missing_columns)}")
|
101 |
return None
|
102 |
-
|
103 |
return df[required_columns]
|
104 |
except Exception as e:
|
105 |
st.error(f"Error processing file: {str(e)}")
|
@@ -107,6 +86,9 @@ def process_excel(uploaded_file):
|
|
107 |
|
108 |
def generate_summary(text, model, tokenizer):
|
109 |
"""Generate summary for single abstract"""
|
|
|
|
|
|
|
110 |
inputs = tokenizer(text, return_tensors="pt", max_length=1024, truncation=True)
|
111 |
|
112 |
with torch.no_grad():
|
@@ -175,11 +157,11 @@ def main():
|
|
175 |
with st.spinner("Processing file..."):
|
176 |
df = process_excel(uploaded_file)
|
177 |
if df is not None:
|
178 |
-
st.session_state.processed_data = df
|
179 |
|
180 |
if st.session_state.processed_data is not None:
|
181 |
df = st.session_state.processed_data
|
182 |
-
st.write(f"π Loaded {len(df)} papers")
|
183 |
|
184 |
# Individual Summaries Section
|
185 |
st.header("π Individual Paper Summaries")
|
@@ -224,22 +206,7 @@ def main():
|
|
224 |
sorted_df = display_df.sort_values(by=sort_column, ascending=ascending)
|
225 |
|
226 |
# Show interactive table
|
227 |
-
st.dataframe(
|
228 |
-
sorted_df,
|
229 |
-
column_config={
|
230 |
-
"Abstract": st.column_config.TextColumn(
|
231 |
-
"Abstract",
|
232 |
-
width="medium",
|
233 |
-
help="Original abstract text"
|
234 |
-
),
|
235 |
-
"Summary": st.column_config.TextColumn(
|
236 |
-
"Summary",
|
237 |
-
width="medium",
|
238 |
-
help="Generated summary"
|
239 |
-
)
|
240 |
-
},
|
241 |
-
hide_index=True
|
242 |
-
)
|
243 |
|
244 |
# Question-focused Summary Section
|
245 |
st.header("β Question-focused Summary")
|
@@ -255,17 +222,13 @@ def main():
|
|
255 |
top_k=5
|
256 |
)
|
257 |
|
258 |
-
# Show spell-check suggestion if needed
|
259 |
-
if results['processed_question']['original'] != results['processed_question']['corrected']:
|
260 |
-
st.info(f"Did you mean: {results['processed_question']['corrected']}?")
|
261 |
-
|
262 |
# Load question-focused model
|
263 |
model, tokenizer = load_model("question_focused")
|
264 |
|
265 |
# Get relevant abstracts and generate summary
|
266 |
relevant_abstracts = df['Abstract'].iloc[results['top_indices']].tolist()
|
267 |
focused_summary = generate_focused_summary(
|
268 |
-
|
269 |
relevant_abstracts,
|
270 |
model,
|
271 |
tokenizer
|
@@ -283,10 +246,6 @@ def main():
|
|
283 |
relevant_papers['Relevance Score'] = results['scores']
|
284 |
st.dataframe(relevant_papers, hide_index=True)
|
285 |
|
286 |
-
# Show identified medical terms
|
287 |
-
st.subheader("Identified Medical Terms")
|
288 |
-
st.write(", ".join(results['processed_question']['medical_entities']))
|
289 |
-
|
290 |
# Clear GPU memory
|
291 |
del model
|
292 |
del tokenizer
|
@@ -297,4 +256,4 @@ def main():
|
|
297 |
st.error(f"Error generating focused summary: {str(e)}")
|
298 |
|
299 |
if __name__ == "__main__":
|
300 |
-
main()
|
|
|
31 |
"facebook/bart-large-cnn",
|
32 |
cache_dir="./models"
|
33 |
)
|
|
|
34 |
model = PeftModel.from_pretrained(
|
35 |
base_model,
|
36 |
"pendar02/results",
|
|
|
47 |
"GanjinZero/biobart-base",
|
48 |
cache_dir="./models"
|
49 |
)
|
|
|
50 |
model = PeftModel.from_pretrained(
|
51 |
base_model,
|
52 |
"pendar02/biobart-finetune",
|
|
|
59 |
cache_dir="./models"
|
60 |
)
|
61 |
|
|
|
62 |
model.eval()
|
63 |
return model, tokenizer
|
|
|
64 |
except Exception as e:
|
65 |
st.error(f"Error loading model: {str(e)}")
|
66 |
raise
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
|
68 |
@st.cache_data
|
69 |
def process_excel(uploaded_file):
|
|
|
71 |
try:
|
72 |
df = pd.read_excel(uploaded_file)
|
73 |
required_columns = ['Abstract', 'Article Title', 'Authors',
|
74 |
+
'Source Title', 'Publication Year', 'DOI']
|
75 |
|
76 |
# Check required columns
|
77 |
missing_columns = [col for col in required_columns if col not in df.columns]
|
78 |
if missing_columns:
|
79 |
st.error(f"Missing required columns: {', '.join(missing_columns)}")
|
80 |
return None
|
81 |
+
|
82 |
return df[required_columns]
|
83 |
except Exception as e:
|
84 |
st.error(f"Error processing file: {str(e)}")
|
|
|
86 |
|
87 |
def generate_summary(text, model, tokenizer):
|
88 |
"""Generate summary for single abstract"""
|
89 |
+
if not isinstance(text, str) or not text.strip():
|
90 |
+
return "No abstract available to summarize."
|
91 |
+
|
92 |
inputs = tokenizer(text, return_tensors="pt", max_length=1024, truncation=True)
|
93 |
|
94 |
with torch.no_grad():
|
|
|
157 |
with st.spinner("Processing file..."):
|
158 |
df = process_excel(uploaded_file)
|
159 |
if df is not None:
|
160 |
+
st.session_state.processed_data = df.dropna(subset=["Abstract"])
|
161 |
|
162 |
if st.session_state.processed_data is not None:
|
163 |
df = st.session_state.processed_data
|
164 |
+
st.write(f"π Loaded {len(df)} papers with abstracts")
|
165 |
|
166 |
# Individual Summaries Section
|
167 |
st.header("π Individual Paper Summaries")
|
|
|
206 |
sorted_df = display_df.sort_values(by=sort_column, ascending=ascending)
|
207 |
|
208 |
# Show interactive table
|
209 |
+
st.dataframe(sorted_df, hide_index=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
210 |
|
211 |
# Question-focused Summary Section
|
212 |
st.header("β Question-focused Summary")
|
|
|
222 |
top_k=5
|
223 |
)
|
224 |
|
|
|
|
|
|
|
|
|
225 |
# Load question-focused model
|
226 |
model, tokenizer = load_model("question_focused")
|
227 |
|
228 |
# Get relevant abstracts and generate summary
|
229 |
relevant_abstracts = df['Abstract'].iloc[results['top_indices']].tolist()
|
230 |
focused_summary = generate_focused_summary(
|
231 |
+
question,
|
232 |
relevant_abstracts,
|
233 |
model,
|
234 |
tokenizer
|
|
|
246 |
relevant_papers['Relevance Score'] = results['scores']
|
247 |
st.dataframe(relevant_papers, hide_index=True)
|
248 |
|
|
|
|
|
|
|
|
|
249 |
# Clear GPU memory
|
250 |
del model
|
251 |
del tokenizer
|
|
|
256 |
st.error(f"Error generating focused summary: {str(e)}")
|
257 |
|
258 |
if __name__ == "__main__":
|
259 |
+
main()
|