Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -107,26 +107,178 @@ def respond(
|
|
107 |
return [{"role": "user", "content": message},
|
108 |
{"role": "assistant", "content": f"์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}"}]
|
109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
def process_chat(message, history):
|
111 |
try:
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
|
|
|
|
|
|
|
|
116 |
|
117 |
-
#
|
118 |
-
|
|
|
|
|
|
|
|
|
|
|
119 |
|
120 |
-
#
|
121 |
-
|
|
|
|
|
|
|
|
|
|
|
122 |
|
123 |
-
#
|
124 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
|
|
130 |
else:
|
131 |
return history + [
|
132 |
{"role": "user", "content": message},
|
@@ -135,27 +287,9 @@ def process_chat(message, history):
|
|
135 |
except Exception as e:
|
136 |
return history + [
|
137 |
{"role": "user", "content": message},
|
138 |
-
{"role": "assistant", "content": f"์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}"}
|
139 |
]
|
140 |
|
141 |
-
def search_protein_data(query):
|
142 |
-
# ๋ฐ์ดํฐ์
์์ ๊ด๋ จ ํญ๋ชฉ ๊ฒ์
|
143 |
-
relevant_entries = []
|
144 |
-
for entry in ds['train']:
|
145 |
-
if any(keyword in entry['sequence'].lower() for keyword in query.lower().split()):
|
146 |
-
relevant_entries.append(entry)
|
147 |
-
return relevant_entries
|
148 |
-
|
149 |
-
def extract_parameters(llm_response, dataset_info):
|
150 |
-
# LLM ์๋ต์์ ํ๋ผ๋ฏธํฐ ์ถ์ถ
|
151 |
-
params = {
|
152 |
-
'sequence_length': 100, # ๊ธฐ๋ณธ๊ฐ
|
153 |
-
'helix_bias': 0.02,
|
154 |
-
'strand_bias': 0.02,
|
155 |
-
'loop_bias': 0.1,
|
156 |
-
'hydrophobic_target_score': 0
|
157 |
-
}
|
158 |
-
return params
|
159 |
|
160 |
def generate_protein(params):
|
161 |
# ๊ธฐ์กด protein_diffusion_model ํจ์ ํธ์ถ
|
|
|
107 |
return [{"role": "user", "content": message},
|
108 |
{"role": "assistant", "content": f"์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}"}]
|
109 |
|
110 |
+
def analyze_prompt(message):
|
111 |
+
"""LLM์ ์ฌ์ฉํ์ฌ ํ๋กฌํํธ ๋ถ์"""
|
112 |
+
try:
|
113 |
+
analysis_prompt = f"""
|
114 |
+
๋ค์ ์์ฒญ์ ๋ถ์ํ์ฌ ๋จ๋ฐฑ์ง ์ค๊ณ์ ํ์ํ ์ฃผ์ ํน์ฑ์ ์ถ์ถํ์ธ์:
|
115 |
+
์์ฒญ: {message}
|
116 |
+
|
117 |
+
๋ค์ ํญ๋ชฉ๋ค์ ๋ถ์ํด์ฃผ์ธ์:
|
118 |
+
1. ์ฃผ์ ๊ธฐ๋ฅ (์: ์น๋ฃ, ๊ฒฐํฉ, ์ด๋งค ๋ฑ)
|
119 |
+
2. ๋ชฉํ ํ๊ฒฝ (์: ์ธํฌ๋ง, ์์ฉ์ฑ, ๋ฑ)
|
120 |
+
3. ํ์ํ ๊ตฌ์กฐ์ ํน์ง
|
121 |
+
4. ํฌ๊ธฐ ๋ฐ ๋ณต์ก๋ ์๊ตฌ์ฌํญ
|
122 |
+
"""
|
123 |
+
|
124 |
+
response = client.chat.completions.create(
|
125 |
+
model="CohereForAI/c4ai-command-r-plus-08-2024",
|
126 |
+
messages=[{"role": "user", "content": analysis_prompt}],
|
127 |
+
temperature=0.7
|
128 |
+
)
|
129 |
+
|
130 |
+
return response.choices[0].message.content
|
131 |
+
except Exception as e:
|
132 |
+
print(f"ํ๋กฌํํธ ๋ถ์ ์ค ์ค๋ฅ: {str(e)}")
|
133 |
+
return None
|
134 |
+
|
135 |
+
def search_protein_data(analysis, dataset):
|
136 |
+
"""๋ถ์ ๊ฒฐ๊ณผ๋ฅผ ๋ฐํ์ผ๋ก ๋ฐ์ดํฐ์
์์ ์ ์ฌํ ๊ตฌ์กฐ ๊ฒ์"""
|
137 |
+
try:
|
138 |
+
# ๋ถ์์์ ํค์๋ ์ถ์ถ
|
139 |
+
keywords = extract_keywords(analysis)
|
140 |
+
|
141 |
+
# ์ ์ฌ๋ ์ ์ ๊ณ์ฐ
|
142 |
+
scored_entries = []
|
143 |
+
for entry in dataset['train']:
|
144 |
+
score = calculate_similarity(keywords, entry)
|
145 |
+
scored_entries.append((score, entry))
|
146 |
+
|
147 |
+
# ์์ ์ ์ฌ ๊ตฌ์กฐ ๋ฐํ
|
148 |
+
scored_entries.sort(reverse=True)
|
149 |
+
return scored_entries[:3] # ์์ 3๊ฐ ๊ตฌ์กฐ ๋ฐํ
|
150 |
+
except Exception as e:
|
151 |
+
print(f"๋ฐ์ดํฐ ๊ฒ์ ์ค ์ค๋ฅ: {str(e)}")
|
152 |
+
return []
|
153 |
+
|
154 |
+
def extract_parameters(analysis, similar_structures):
|
155 |
+
"""๋ถ์ ๊ฒฐ๊ณผ์ ์ ์ฌ ๊ตฌ์กฐ๋ฅผ ๋ฐํ์ผ๋ก ์์ฑ ํ๋ผ๋ฏธํฐ ๊ฒฐ์ """
|
156 |
+
try:
|
157 |
+
# ๊ธฐ๋ณธ ํ๋ผ๋ฏธํฐ ํ
ํ๋ฆฟ
|
158 |
+
params = {
|
159 |
+
'sequence_length': 100,
|
160 |
+
'helix_bias': 0.02,
|
161 |
+
'strand_bias': 0.02,
|
162 |
+
'loop_bias': 0.1,
|
163 |
+
'hydrophobic_target_score': 0
|
164 |
+
}
|
165 |
+
|
166 |
+
# ๋ถ์ ๊ฒฐ๊ณผ์์ ๊ตฌ์กฐ์ ์๊ตฌ์ฌํญ ํ์
|
167 |
+
if "๋ง ํฌ๊ณผ" in analysis or "์์์ฑ" in analysis:
|
168 |
+
params['hydrophobic_target_score'] = -2
|
169 |
+
params['helix_bias'] = 0.03
|
170 |
+
elif "์์ฉ์ฑ" in analysis or "๊ฐ์ฉ์ฑ" in analysis:
|
171 |
+
params['hydrophobic_target_score'] = 2
|
172 |
+
params['loop_bias'] = 0.15
|
173 |
+
|
174 |
+
# ์ ์ฌ ๊ตฌ์กฐ๋ค์ ํน์ฑ ๋ฐ์
|
175 |
+
if similar_structures:
|
176 |
+
avg_length = sum(len(s[1]['sequence']) for s in similar_structures) / len(similar_structures)
|
177 |
+
params['sequence_length'] = int(avg_length)
|
178 |
+
|
179 |
+
# ๊ตฌ์กฐ์ ํน์ฑ ๋ถ์ ๋ฐ ๋ฐ์
|
180 |
+
for _, structure in similar_structures:
|
181 |
+
if 'secondary_structure' in structure:
|
182 |
+
helix_ratio = structure['secondary_structure'].count('H') / len(structure['secondary_structure'])
|
183 |
+
sheet_ratio = structure['secondary_structure'].count('E') / len(structure['secondary_structure'])
|
184 |
+
params['helix_bias'] = max(0.01, min(0.05, helix_ratio))
|
185 |
+
params['strand_bias'] = max(0.01, min(0.05, sheet_ratio))
|
186 |
+
|
187 |
+
return params
|
188 |
+
except Exception as e:
|
189 |
+
print(f"ํ๋ผ๋ฏธํฐ ์ถ์ถ ์ค ์ค๋ฅ: {str(e)}")
|
190 |
+
return None
|
191 |
+
|
192 |
def process_chat(message, history):
|
193 |
try:
|
194 |
+
if any(keyword in message.lower() for keyword in ['protein', 'generate', '๋จ๋ฐฑ์ง', '์์ฑ', '์น๋ฃ']):
|
195 |
+
# 1. LLM์ ์ฌ์ฉํ ํ๋กฌํํธ ๋ถ์
|
196 |
+
analysis = analyze_prompt(message)
|
197 |
+
if not analysis:
|
198 |
+
return history + [
|
199 |
+
{"role": "user", "content": message},
|
200 |
+
{"role": "assistant", "content": "์์ฒญ ๋ถ์์ ์คํจํ์ต๋๋ค."}
|
201 |
+
]
|
202 |
|
203 |
+
# 2. ์ ์ฌ ๊ตฌ์กฐ ๊ฒ์
|
204 |
+
similar_structures = search_protein_data(analysis, ds)
|
205 |
+
if not similar_structures:
|
206 |
+
return history + [
|
207 |
+
{"role": "user", "content": message},
|
208 |
+
{"role": "assistant", "content": "์ ํฉํ ์ฐธ์กฐ ๊ตฌ์กฐ๋ฅผ ์ฐพ์ง ๋ชปํ์ต๋๋ค."}
|
209 |
+
]
|
210 |
|
211 |
+
# 3. ์์ฑ ํ๋ผ๋ฏธํฐ ๊ฒฐ์
|
212 |
+
params = extract_parameters(analysis, similar_structures)
|
213 |
+
if not params:
|
214 |
+
return history + [
|
215 |
+
{"role": "user", "content": message},
|
216 |
+
{"role": "assistant", "content": "ํ๋ผ๋ฏธํฐ ์ค์ ์ ์คํจํ์ต๋๋ค."}
|
217 |
+
]
|
218 |
|
219 |
+
# 4. ๋จ๋ฐฑ์ง ์์ฑ
|
220 |
+
try:
|
221 |
+
protein_result = protein_diffusion_model(
|
222 |
+
sequence=None,
|
223 |
+
seq_len=params['sequence_length'],
|
224 |
+
helix_bias=params['helix_bias'],
|
225 |
+
strand_bias=params['strand_bias'],
|
226 |
+
loop_bias=params['loop_bias'],
|
227 |
+
secondary_structure=None,
|
228 |
+
aa_bias=None,
|
229 |
+
aa_bias_potential=None,
|
230 |
+
num_steps="25",
|
231 |
+
noise="normal",
|
232 |
+
hydrophobic_target_score=str(params['hydrophobic_target_score']),
|
233 |
+
hydrophobic_potential="2",
|
234 |
+
contigs=None,
|
235 |
+
pssm=None,
|
236 |
+
seq_mask=None,
|
237 |
+
str_mask=None,
|
238 |
+
rewrite_pdb=None
|
239 |
+
)
|
240 |
+
|
241 |
+
output_seq, output_pdb, structure_view, plddt_plot = next(protein_result)
|
242 |
+
|
243 |
+
# 5. ๊ฒฐ๊ณผ ์ค๋ช
์์ฑ
|
244 |
+
explanation = f"""
|
245 |
+
์์ฒญํ์ ๊ธฐ๋ฅ์ ๋ง๋ ๋จ๋ฐฑ์ง์ ์์ฑํ์ต๋๋ค:
|
246 |
+
|
247 |
+
๋ถ์๋ ์๊ตฌ์ฌํญ:
|
248 |
+
{analysis}
|
249 |
+
|
250 |
+
์ค๊ณ๋ ๊ตฌ์กฐ์ ํน์ง:
|
251 |
+
- ๊ธธ์ด: {params['sequence_length']} ์๋ฏธ๋
ธ์ฐ
|
252 |
+
- ์ํ ํฌ๋ฆญ์ค ๋น์จ: {params['helix_bias']*100:.1f}%
|
253 |
+
- ๋ฒ ํ ์ํธ ๋น์จ: {params['strand_bias']*100:.1f}%
|
254 |
+
- ๋ฃจํ ๊ตฌ์กฐ ๋น์จ: {params['loop_bias']*100:.1f}%
|
255 |
+
- ์์์ฑ ์ ์: {params['hydrophobic_target_score']}
|
256 |
+
|
257 |
+
์ฐธ์กฐ๋ ์ ์ฌ ๊ตฌ์กฐ: {len(similar_structures)}๊ฐ
|
258 |
+
|
259 |
+
์์ฑ๋ ๋จ๋ฐฑ์ง์ 3D ๊ตฌ์กฐ์ ์ํ์ค๋ฅผ ํ์ธํ์ค ์ ์์ต๋๋ค.
|
260 |
+
"""
|
261 |
+
|
262 |
+
# 6. ๊ฒฐ๊ณผ ์ ์ฅ
|
263 |
+
global current_protein_result
|
264 |
+
current_protein_result = {
|
265 |
+
'sequence': output_seq,
|
266 |
+
'pdb': output_pdb,
|
267 |
+
'structure_view': structure_view,
|
268 |
+
'plddt_plot': plddt_plot,
|
269 |
+
'params': params
|
270 |
+
}
|
271 |
+
|
272 |
+
return history + [
|
273 |
+
{"role": "user", "content": message},
|
274 |
+
{"role": "assistant", "content": explanation}
|
275 |
+
]
|
276 |
|
277 |
+
except Exception as e:
|
278 |
+
return history + [
|
279 |
+
{"role": "user", "content": message},
|
280 |
+
{"role": "assistant", "content": f"๋จ๋ฐฑ์ง ์์ฑ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}"}
|
281 |
+
]
|
282 |
else:
|
283 |
return history + [
|
284 |
{"role": "user", "content": message},
|
|
|
287 |
except Exception as e:
|
288 |
return history + [
|
289 |
{"role": "user", "content": message},
|
290 |
+
{"role": "assistant", "content": f"์ฒ๋ฆฌ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}"}
|
291 |
]
|
292 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
293 |
|
294 |
def generate_protein(params):
|
295 |
# ๊ธฐ์กด protein_diffusion_model ํจ์ ํธ์ถ
|