Upload app.py
Browse files
app.py
CHANGED
@@ -36,7 +36,7 @@ class PDBSearchAssistant:
|
|
36 |
Extract specific search parameters from the protein-related query:
|
37 |
1. Protein name or type
|
38 |
2. Resolution cutoff (in Å)
|
39 |
-
3.
|
40 |
4. Specific PDB ID
|
41 |
5. Experimental method (X-RAY, EM, NMR)
|
42 |
6. Organism/Species information
|
@@ -58,6 +58,46 @@ class PDBSearchAssistant:
|
|
58 |
PDB_ID: none
|
59 |
Method: X-RAY
|
60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
Now analyze:
|
62 |
Query: {query}
|
63 |
"""
|
@@ -380,52 +420,65 @@ class PDBSearchAssistant:
|
|
380 |
print(f"Error in direct API call for PDB ID {pdb_id}: {str(e)}")
|
381 |
return []
|
382 |
|
383 |
-
def
|
384 |
-
"""
|
385 |
-
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
-
|
391 |
-
|
392 |
-
|
393 |
-
|
394 |
-
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
-
|
400 |
-
if
|
401 |
-
|
402 |
-
|
|
|
|
|
|
|
|
|
403 |
return {
|
404 |
"type": "sequence",
|
405 |
-
"
|
406 |
}
|
407 |
-
|
408 |
-
|
409 |
-
return {
|
410 |
-
"type": "structure",
|
411 |
-
"query": query
|
412 |
-
}
|
413 |
-
|
414 |
-
def process_query(self, query):
|
415 |
-
"""Process query and return appropriate results"""
|
416 |
-
query_info = self.analyze_query_type(query)
|
417 |
-
|
418 |
-
if query_info["type"] == "sequence":
|
419 |
-
return {
|
420 |
-
"type": "sequence",
|
421 |
-
"results": self.get_sequences_by_pdb_id(query_info["pdb_id"])
|
422 |
-
}
|
423 |
-
else:
|
424 |
return {
|
425 |
"type": "structure",
|
426 |
-
"results": self.search_pdb(
|
427 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
428 |
|
|
|
429 |
|
430 |
def create_interactive_table(df):
|
431 |
if df.empty:
|
@@ -610,8 +663,7 @@ app_ui = ui.page_fluid(
|
|
610 |
ui.tags.ul(
|
611 |
ui.tags.li("Human hemoglobin C resolution better than 2.5Å"),
|
612 |
ui.tags.li("Find structures containing sequence MNIFEMLRIDEGLRLKIYKDTEGYYTIGIGHLLTKSPSLNAAKSELDKAIGRNTNGVITKDEAEKLFNQDVDAAVRGILRNAKLKPVYDSLDAVRRAALINMVFQMGETGVAGFTNSLRMLQQKRWDEAAVNLAKSRWYNQTPNRAKRVITTFRTGTWDAYKNL"),
|
613 |
-
ui.tags.li("Sequence of PDB ID 8ET6")
|
614 |
-
ui.tags.li("Get sequence 7BZ5")
|
615 |
)
|
616 |
)
|
617 |
)
|
@@ -707,8 +759,6 @@ def server(input, output, session):
|
|
707 |
@render.text
|
708 |
def sequence_output():
|
709 |
current_results = results_store.get()
|
710 |
-
print(current_results["type"])
|
711 |
-
print(current_results["results"])
|
712 |
if current_results["type"] == "sequence":
|
713 |
sequences = current_results["results"]
|
714 |
if not sequences:
|
|
|
36 |
Extract specific search parameters from the protein-related query:
|
37 |
1. Protein name or type
|
38 |
2. Resolution cutoff (in Å)
|
39 |
+
3. Protein sequence information
|
40 |
4. Specific PDB ID
|
41 |
5. Experimental method (X-RAY, EM, NMR)
|
42 |
6. Organism/Species information
|
|
|
58 |
PDB_ID: none
|
59 |
Method: X-RAY
|
60 |
|
61 |
+
Query: "Get sequence of PDB ID 8ET6"
|
62 |
+
Protein: none
|
63 |
+
Organism: none
|
64 |
+
Resolution: none
|
65 |
+
Sequence: none
|
66 |
+
PDB_ID: 8ET6
|
67 |
+
Method: none
|
68 |
+
|
69 |
+
Query: "Sequence of 7BZ5"
|
70 |
+
Protein: none
|
71 |
+
Organism: none
|
72 |
+
Resolution: none
|
73 |
+
Sequence: none
|
74 |
+
PDB_ID: 7BZ5
|
75 |
+
Method: none
|
76 |
+
|
77 |
+
Query: "7BZ5"
|
78 |
+
Protein: none
|
79 |
+
Organism: none
|
80 |
+
Resolution: none
|
81 |
+
Sequence: none
|
82 |
+
PDB_ID: 7BZ5
|
83 |
+
Method: none
|
84 |
+
|
85 |
+
Query: "6KAO"
|
86 |
+
Protein: none
|
87 |
+
Organism: none
|
88 |
+
Resolution: none
|
89 |
+
Sequence: none
|
90 |
+
PDB_ID: 6KAO
|
91 |
+
Method: none
|
92 |
+
|
93 |
+
Query: "Find structures containing sequence MNIFEMLRIDEGLRLKIYKDTEGYYTIGIGHLLTKSPSLNAAKSELDKAIGRNTNGVITKDEAEKLFNQDVDAAVRGILRNAKLKPVYDSLDAVRRAALINMVFQMGETGVAGFTNSLRMLQQKRWDEAAVNLAKSRWYNQTPNRAKRVITTFRTGTWDAYKNL"
|
94 |
+
Protein: none
|
95 |
+
Organism: none
|
96 |
+
Resolution: none
|
97 |
+
Sequence: MNIFEMLRIDEGLRLKIYKDTEGYYTIGIGHLLTKSPSLNAAKSELDKAIGRNTNGVITKDEAEKLFNQDVDAAVRGILRNAKLKPVYDSLDAVRRAALINMVFQMGETGVAGFTNSLRMLQQKRWDEAAVNLAKSRWYNQTPNRAKRVITTFRTGTWDAYKNL
|
98 |
+
PDB_ID: none
|
99 |
+
Method: none
|
100 |
+
|
101 |
Now analyze:
|
102 |
Query: {query}
|
103 |
"""
|
|
|
420 |
print(f"Error in direct API call for PDB ID {pdb_id}: {str(e)}")
|
421 |
return []
|
422 |
|
423 |
+
def process_query(self, query):
|
424 |
+
"""Process query and return results"""
|
425 |
+
try:
|
426 |
+
# Get search parameters from LLM
|
427 |
+
formatted_prompt = self.prompt_template.format(query=query)
|
428 |
+
response = self.pipe(formatted_prompt)[0]['generated_text']
|
429 |
+
print("Generated parameters:", response)
|
430 |
+
|
431 |
+
# Parse LLM response for PDB ID
|
432 |
+
pdb_id = None
|
433 |
+
for line in response.split('\n'):
|
434 |
+
if 'PDB_ID:' in line:
|
435 |
+
value = line.split('PDB_ID:')[1].strip()
|
436 |
+
if value.lower() not in ['none', 'n/a']:
|
437 |
+
pdb_id = value.upper()
|
438 |
+
break
|
439 |
+
|
440 |
+
# Check if query is asking for sequence
|
441 |
+
sequence_keywords = ['sequence', 'seq']
|
442 |
+
is_sequence_query = any(keyword in query.lower() for keyword in sequence_keywords)
|
443 |
+
|
444 |
+
if is_sequence_query and pdb_id:
|
445 |
+
# Get sequences for the PDB ID
|
446 |
+
sequences = self.get_sequences_by_pdb_id(pdb_id)
|
447 |
return {
|
448 |
"type": "sequence",
|
449 |
+
"results": sequences
|
450 |
}
|
451 |
+
|
452 |
+
# If not a sequence query or no PDB ID found, proceed with normal structure search
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
453 |
return {
|
454 |
"type": "structure",
|
455 |
+
"results": self.search_pdb(query)
|
456 |
}
|
457 |
+
|
458 |
+
except Exception as e:
|
459 |
+
print(f"Error processing query: {str(e)}")
|
460 |
+
return {"type": "structure", "results": []}
|
461 |
+
|
462 |
+
def pdbsummary(name):
|
463 |
+
|
464 |
+
search_engine = ProteinSearchEngine()
|
465 |
+
|
466 |
+
query = ProteinQuery(
|
467 |
+
name,
|
468 |
+
max_resolution= 5.0
|
469 |
+
)
|
470 |
+
|
471 |
+
results = search_engine.search(query)
|
472 |
+
|
473 |
+
answer = ""
|
474 |
+
for i, structure in enumerate(results, 1):
|
475 |
+
answer += f"\n{i}. PDB ID : {structure.pdb_id}\n"
|
476 |
+
answer += f"\nResolution : {structure.resolution:.2f} A \n"
|
477 |
+
answer += f"Method : {structure.method}\n Title : {structure.title}\n"
|
478 |
+
answer += f"Release Date : {structure.release_date}\n Sequence length: {len(structure.sequence)} aa\n"
|
479 |
+
answer += f" Sequence:\n {structure.sequence}\n"
|
480 |
|
481 |
+
return answer
|
482 |
|
483 |
def create_interactive_table(df):
|
484 |
if df.empty:
|
|
|
663 |
ui.tags.ul(
|
664 |
ui.tags.li("Human hemoglobin C resolution better than 2.5Å"),
|
665 |
ui.tags.li("Find structures containing sequence MNIFEMLRIDEGLRLKIYKDTEGYYTIGIGHLLTKSPSLNAAKSELDKAIGRNTNGVITKDEAEKLFNQDVDAAVRGILRNAKLKPVYDSLDAVRRAALINMVFQMGETGVAGFTNSLRMLQQKRWDEAAVNLAKSRWYNQTPNRAKRVITTFRTGTWDAYKNL"),
|
666 |
+
ui.tags.li("Sequence of PDB ID 8ET6")
|
|
|
667 |
)
|
668 |
)
|
669 |
)
|
|
|
759 |
@render.text
|
760 |
def sequence_output():
|
761 |
current_results = results_store.get()
|
|
|
|
|
762 |
if current_results["type"] == "sequence":
|
763 |
sequences = current_results["results"]
|
764 |
if not sequences:
|