Spaces:
Sleeping
Sleeping
Erva Ulusoy
commited on
Commit
·
9a145bd
1
Parent(s):
988f84d
added hyperlinks to uniprot and go ids
Browse files- ProtHGT_app.py +27 -12
- run_prothgt_app.py +2 -2
ProtHGT_app.py
CHANGED
|
@@ -331,12 +331,17 @@ if st.session_state.submitted:
|
|
| 331 |
col1, col2, col3, col4 = st.columns(4)
|
| 332 |
|
| 333 |
with col1:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 334 |
# Protein filter
|
| 335 |
selected_protein = st.selectbox(
|
| 336 |
"Filter by Protein",
|
| 337 |
-
options=['All'] + sorted(
|
| 338 |
)
|
| 339 |
-
|
| 340 |
with col2:
|
| 341 |
# GO category filter
|
| 342 |
selected_category = st.selectbox(
|
|
@@ -374,13 +379,13 @@ if st.session_state.submitted:
|
|
| 374 |
filtered_df = st.session_state.predictions_df.copy()
|
| 375 |
|
| 376 |
if selected_protein != 'All':
|
| 377 |
-
filtered_df = filtered_df[filtered_df['
|
| 378 |
-
|
| 379 |
if selected_category != 'All':
|
| 380 |
filtered_df = filtered_df[filtered_df['GO_category'] == selected_category]
|
| 381 |
|
| 382 |
if go_term_filter:
|
| 383 |
-
filtered_df = filtered_df[filtered_df['
|
| 384 |
|
| 385 |
filtered_df = filtered_df[(filtered_df['Probability'] >= min_probability_threshold) &
|
| 386 |
(filtered_df['Probability'] <= max_probability_threshold)]
|
|
@@ -429,12 +434,23 @@ if st.session_state.submitted:
|
|
| 429 |
start_idx = st.session_state.page_number * rows_per_page
|
| 430 |
end_idx = min(start_idx + rows_per_page, total_rows)
|
| 431 |
|
| 432 |
-
# Display the paginated dataframe with increased width
|
| 433 |
st.dataframe(
|
| 434 |
filtered_df.iloc[start_idx:end_idx],
|
| 435 |
hide_index=True,
|
| 436 |
-
use_container_width=True,
|
| 437 |
column_config={
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 438 |
"Probability": st.column_config.ProgressColumn(
|
| 439 |
"Probability",
|
| 440 |
format="%.2f",
|
|
@@ -443,7 +459,7 @@ if st.session_state.submitted:
|
|
| 443 |
),
|
| 444 |
"Protein": st.column_config.TextColumn(
|
| 445 |
"Protein",
|
| 446 |
-
help="
|
| 447 |
),
|
| 448 |
"GO_category": st.column_config.TextColumn(
|
| 449 |
"GO Category",
|
|
@@ -451,15 +467,14 @@ if st.session_state.submitted:
|
|
| 451 |
),
|
| 452 |
"GO_term": st.column_config.TextColumn(
|
| 453 |
"GO Term",
|
| 454 |
-
help="Gene Ontology Term
|
| 455 |
),
|
| 456 |
}
|
| 457 |
)
|
| 458 |
-
|
| 459 |
# Pagination controls with better layout
|
| 460 |
col1, col2, col3 = st.columns([1, 3, 1])
|
| 461 |
with col1:
|
| 462 |
-
if st.button("
|
| 463 |
st.session_state.page_number -= 1
|
| 464 |
st.rerun()
|
| 465 |
|
|
@@ -472,7 +487,7 @@ if st.session_state.submitted:
|
|
| 472 |
""", unsafe_allow_html=True)
|
| 473 |
|
| 474 |
with col3:
|
| 475 |
-
if st.button("Next
|
| 476 |
st.session_state.page_number += 1
|
| 477 |
st.rerun()
|
| 478 |
|
|
|
|
| 331 |
col1, col2, col3, col4 = st.columns(4)
|
| 332 |
|
| 333 |
with col1:
|
| 334 |
+
# Extract UniProt IDs from URLs for the selectbox
|
| 335 |
+
uniprot_ids = st.session_state.predictions_df['UniProt_ID'].apply(
|
| 336 |
+
lambda x: x.split('/')[-2] # Gets the ID part from the URL
|
| 337 |
+
).unique().tolist()
|
| 338 |
+
|
| 339 |
# Protein filter
|
| 340 |
selected_protein = st.selectbox(
|
| 341 |
"Filter by Protein",
|
| 342 |
+
options=['All'] + sorted(uniprot_ids)
|
| 343 |
)
|
| 344 |
+
|
| 345 |
with col2:
|
| 346 |
# GO category filter
|
| 347 |
selected_category = st.selectbox(
|
|
|
|
| 379 |
filtered_df = st.session_state.predictions_df.copy()
|
| 380 |
|
| 381 |
if selected_protein != 'All':
|
| 382 |
+
filtered_df = filtered_df[filtered_df['UniProt_ID'].str.contains(selected_protein)]
|
| 383 |
+
|
| 384 |
if selected_category != 'All':
|
| 385 |
filtered_df = filtered_df[filtered_df['GO_category'] == selected_category]
|
| 386 |
|
| 387 |
if go_term_filter:
|
| 388 |
+
filtered_df = filtered_df[filtered_df['GO_ID'].str.contains(go_term_filter, case=False, na=False)]
|
| 389 |
|
| 390 |
filtered_df = filtered_df[(filtered_df['Probability'] >= min_probability_threshold) &
|
| 391 |
(filtered_df['Probability'] <= max_probability_threshold)]
|
|
|
|
| 434 |
start_idx = st.session_state.page_number * rows_per_page
|
| 435 |
end_idx = min(start_idx + rows_per_page, total_rows)
|
| 436 |
|
|
|
|
| 437 |
st.dataframe(
|
| 438 |
filtered_df.iloc[start_idx:end_idx],
|
| 439 |
hide_index=True,
|
| 440 |
+
use_container_width=True,
|
| 441 |
column_config={
|
| 442 |
+
"UniProt_ID": st.column_config.LinkColumn(
|
| 443 |
+
"UniProt ID",
|
| 444 |
+
help="Click to view protein in UniProt",
|
| 445 |
+
validate="^https://www\\.uniprot\\.org/uniprotkb/[A-Z0-9]+/entry$",
|
| 446 |
+
display_text="^https://www\\.uniprot\\.org/uniprotkb/([A-Z0-9]+)/entry$"
|
| 447 |
+
),
|
| 448 |
+
"GO_ID": st.column_config.LinkColumn(
|
| 449 |
+
"GO ID",
|
| 450 |
+
help="Click to view GO term in QuickGO",
|
| 451 |
+
validate="^https://www\\.ebi\\.ac\\.uk/QuickGO/term/GO:[0-9]+$",
|
| 452 |
+
display_text="^https://www\\.ebi\\.ac\\.uk/QuickGO/term/(GO:[0-9]+)$"
|
| 453 |
+
),
|
| 454 |
"Probability": st.column_config.ProgressColumn(
|
| 455 |
"Probability",
|
| 456 |
format="%.2f",
|
|
|
|
| 459 |
),
|
| 460 |
"Protein": st.column_config.TextColumn(
|
| 461 |
"Protein",
|
| 462 |
+
help="Protein Name",
|
| 463 |
),
|
| 464 |
"GO_category": st.column_config.TextColumn(
|
| 465 |
"GO Category",
|
|
|
|
| 467 |
),
|
| 468 |
"GO_term": st.column_config.TextColumn(
|
| 469 |
"GO Term",
|
| 470 |
+
help="Gene Ontology Term Name",
|
| 471 |
),
|
| 472 |
}
|
| 473 |
)
|
|
|
|
| 474 |
# Pagination controls with better layout
|
| 475 |
col1, col2, col3 = st.columns([1, 3, 1])
|
| 476 |
with col1:
|
| 477 |
+
if st.button("Previous", disabled=st.session_state.page_number == 0):
|
| 478 |
st.session_state.page_number -= 1
|
| 479 |
st.rerun()
|
| 480 |
|
|
|
|
| 487 |
""", unsafe_allow_html=True)
|
| 488 |
|
| 489 |
with col3:
|
| 490 |
+
if st.button("Next", disabled=st.session_state.page_number >= total_pages - 1):
|
| 491 |
st.session_state.page_number += 1
|
| 492 |
st.rerun()
|
| 493 |
|
run_prothgt_app.py
CHANGED
|
@@ -130,9 +130,9 @@ def _create_prediction_df(predictions, heterodata, protein_ids, go_category):
|
|
| 130 |
|
| 131 |
# Create DataFrame
|
| 132 |
prediction_df = pd.DataFrame({
|
| 133 |
-
'UniProt_ID': all_proteins,
|
| 134 |
'Protein': all_protein_names,
|
| 135 |
-
'GO_ID': all_go_terms,
|
| 136 |
'GO_term': all_go_term_names,
|
| 137 |
'GO_category': all_categories,
|
| 138 |
'Probability': all_probabilities
|
|
|
|
| 130 |
|
| 131 |
# Create DataFrame
|
| 132 |
prediction_df = pd.DataFrame({
|
| 133 |
+
'UniProt_ID': [f"https://www.uniprot.org/uniprotkb/{pid}/entry" for pid in all_proteins],
|
| 134 |
'Protein': all_protein_names,
|
| 135 |
+
'GO_ID': [f"https://www.ebi.ac.uk/QuickGO/term/{go_id}" for go_id in all_go_terms],
|
| 136 |
'GO_term': all_go_term_names,
|
| 137 |
'GO_category': all_categories,
|
| 138 |
'Probability': all_probabilities
|