aiqcamp commited on
Commit
1b8d3ac
ยท
verified ยท
1 Parent(s): 33bb1d2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +167 -33
app.py CHANGED
@@ -107,26 +107,178 @@ def respond(
107
  return [{"role": "user", "content": message},
108
  {"role": "assistant", "content": f"์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"}]
109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  def process_chat(message, history):
111
  try:
112
- # ๋‹จ๋ฐฑ์งˆ ์ƒ์„ฑ ๊ด€๋ จ ํ‚ค์›Œ๋“œ ํ™•์ธ
113
- if any(keyword in message.lower() for keyword in ['protein', 'generate', '๋‹จ๋ฐฑ์งˆ', '์ƒ์„ฑ']):
114
- # ๋ฐ์ดํ„ฐ์…‹์—์„œ ๊ด€๋ จ ์ •๋ณด ๊ฒ€์ƒ‰
115
- relevant_data = search_protein_data(message)
 
 
 
 
116
 
117
- # ์ƒ์„ฑ ํŒŒ๋ผ๋ฏธํ„ฐ ์ถ”์ถœ
118
- params = extract_parameters(message, relevant_data)
 
 
 
 
 
119
 
120
- # ๋‹จ๋ฐฑ์งˆ ์ƒ์„ฑ
121
- protein_result = generate_protein(params)
 
 
 
 
 
122
 
123
- # ๊ฒฐ๊ณผ ์„ค๋ช… ์ƒ์„ฑ
124
- explanation = generate_explanation(protein_result, params)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
- return history + [
127
- {"role": "user", "content": message},
128
- {"role": "assistant", "content": explanation}
129
- ]
 
130
  else:
131
  return history + [
132
  {"role": "user", "content": message},
@@ -135,27 +287,9 @@ def process_chat(message, history):
135
  except Exception as e:
136
  return history + [
137
  {"role": "user", "content": message},
138
- {"role": "assistant", "content": f"์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"}
139
  ]
140
 
141
- def search_protein_data(query):
142
- # ๋ฐ์ดํ„ฐ์…‹์—์„œ ๊ด€๋ จ ํ•ญ๋ชฉ ๊ฒ€์ƒ‰
143
- relevant_entries = []
144
- for entry in ds['train']:
145
- if any(keyword in entry['sequence'].lower() for keyword in query.lower().split()):
146
- relevant_entries.append(entry)
147
- return relevant_entries
148
-
149
- def extract_parameters(llm_response, dataset_info):
150
- # LLM ์‘๋‹ต์—์„œ ํŒŒ๋ผ๋ฏธํ„ฐ ์ถ”์ถœ
151
- params = {
152
- 'sequence_length': 100, # ๊ธฐ๋ณธ๊ฐ’
153
- 'helix_bias': 0.02,
154
- 'strand_bias': 0.02,
155
- 'loop_bias': 0.1,
156
- 'hydrophobic_target_score': 0
157
- }
158
- return params
159
 
160
  def generate_protein(params):
161
  # ๊ธฐ์กด protein_diffusion_model ํ•จ์ˆ˜ ํ˜ธ์ถœ
 
107
  return [{"role": "user", "content": message},
108
  {"role": "assistant", "content": f"์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"}]
109
 
110
+ def analyze_prompt(message):
111
+ """LLM์„ ์‚ฌ์šฉํ•˜์—ฌ ํ”„๋กฌํ”„ํŠธ ๋ถ„์„"""
112
+ try:
113
+ analysis_prompt = f"""
114
+ ๋‹ค์Œ ์š”์ฒญ์„ ๋ถ„์„ํ•˜์—ฌ ๋‹จ๋ฐฑ์งˆ ์„ค๊ณ„์— ํ•„์š”ํ•œ ์ฃผ์š” ํŠน์„ฑ์„ ์ถ”์ถœํ•˜์„ธ์š”:
115
+ ์š”์ฒญ: {message}
116
+
117
+ ๋‹ค์Œ ํ•ญ๋ชฉ๋“ค์„ ๋ถ„์„ํ•ด์ฃผ์„ธ์š”:
118
+ 1. ์ฃผ์š” ๊ธฐ๋Šฅ (์˜ˆ: ์น˜๋ฃŒ, ๊ฒฐํ•ฉ, ์ด‰๋งค ๋“ฑ)
119
+ 2. ๋ชฉํ‘œ ํ™˜๊ฒฝ (์˜ˆ: ์„ธํฌ๋ง‰, ์ˆ˜์šฉ์„ฑ, ๋“ฑ)
120
+ 3. ํ•„์š”ํ•œ ๊ตฌ์กฐ์  ํŠน์ง•
121
+ 4. ํฌ๊ธฐ ๋ฐ ๋ณต์žก๋„ ์š”๊ตฌ์‚ฌํ•ญ
122
+ """
123
+
124
+ response = client.chat.completions.create(
125
+ model="CohereForAI/c4ai-command-r-plus-08-2024",
126
+ messages=[{"role": "user", "content": analysis_prompt}],
127
+ temperature=0.7
128
+ )
129
+
130
+ return response.choices[0].message.content
131
+ except Exception as e:
132
+ print(f"ํ”„๋กฌํ”„ํŠธ ๋ถ„์„ ์ค‘ ์˜ค๋ฅ˜: {str(e)}")
133
+ return None
134
+
135
+ def search_protein_data(analysis, dataset):
136
+ """๋ถ„์„ ๊ฒฐ๊ณผ๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ ๋ฐ์ดํ„ฐ์…‹์—์„œ ์œ ์‚ฌํ•œ ๊ตฌ์กฐ ๊ฒ€์ƒ‰"""
137
+ try:
138
+ # ๋ถ„์„์—์„œ ํ‚ค์›Œ๋“œ ์ถ”์ถœ
139
+ keywords = extract_keywords(analysis)
140
+
141
+ # ์œ ์‚ฌ๋„ ์ ์ˆ˜ ๊ณ„์‚ฐ
142
+ scored_entries = []
143
+ for entry in dataset['train']:
144
+ score = calculate_similarity(keywords, entry)
145
+ scored_entries.append((score, entry))
146
+
147
+ # ์ƒ์œ„ ์œ ์‚ฌ ๊ตฌ์กฐ ๋ฐ˜ํ™˜
148
+ scored_entries.sort(reverse=True)
149
+ return scored_entries[:3] # ์ƒ์œ„ 3๊ฐœ ๊ตฌ์กฐ ๋ฐ˜ํ™˜
150
+ except Exception as e:
151
+ print(f"๋ฐ์ดํ„ฐ ๊ฒ€์ƒ‰ ์ค‘ ์˜ค๋ฅ˜: {str(e)}")
152
+ return []
153
+
154
+ def extract_parameters(analysis, similar_structures):
155
+ """๋ถ„์„ ๊ฒฐ๊ณผ์™€ ์œ ์‚ฌ ๊ตฌ์กฐ๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ ์ƒ์„ฑ ํŒŒ๋ผ๋ฏธํ„ฐ ๊ฒฐ์ •"""
156
+ try:
157
+ # ๊ธฐ๋ณธ ํŒŒ๋ผ๋ฏธํ„ฐ ํ…œํ”Œ๋ฆฟ
158
+ params = {
159
+ 'sequence_length': 100,
160
+ 'helix_bias': 0.02,
161
+ 'strand_bias': 0.02,
162
+ 'loop_bias': 0.1,
163
+ 'hydrophobic_target_score': 0
164
+ }
165
+
166
+ # ๋ถ„์„ ๊ฒฐ๊ณผ์—์„œ ๊ตฌ์กฐ์  ์š”๊ตฌ์‚ฌํ•ญ ํŒŒ์•…
167
+ if "๋ง‰ ํˆฌ๊ณผ" in analysis or "์†Œ์ˆ˜์„ฑ" in analysis:
168
+ params['hydrophobic_target_score'] = -2
169
+ params['helix_bias'] = 0.03
170
+ elif "์ˆ˜์šฉ์„ฑ" in analysis or "๊ฐ€์šฉ์„ฑ" in analysis:
171
+ params['hydrophobic_target_score'] = 2
172
+ params['loop_bias'] = 0.15
173
+
174
+ # ์œ ์‚ฌ ๊ตฌ์กฐ๋“ค์˜ ํŠน์„ฑ ๋ฐ˜์˜
175
+ if similar_structures:
176
+ avg_length = sum(len(s[1]['sequence']) for s in similar_structures) / len(similar_structures)
177
+ params['sequence_length'] = int(avg_length)
178
+
179
+ # ๊ตฌ์กฐ์  ํŠน์„ฑ ๋ถ„์„ ๋ฐ ๋ฐ˜์˜
180
+ for _, structure in similar_structures:
181
+ if 'secondary_structure' in structure:
182
+ helix_ratio = structure['secondary_structure'].count('H') / len(structure['secondary_structure'])
183
+ sheet_ratio = structure['secondary_structure'].count('E') / len(structure['secondary_structure'])
184
+ params['helix_bias'] = max(0.01, min(0.05, helix_ratio))
185
+ params['strand_bias'] = max(0.01, min(0.05, sheet_ratio))
186
+
187
+ return params
188
+ except Exception as e:
189
+ print(f"ํŒŒ๋ผ๋ฏธํ„ฐ ์ถ”์ถœ ์ค‘ ์˜ค๋ฅ˜: {str(e)}")
190
+ return None
191
+
192
  def process_chat(message, history):
193
  try:
194
+ if any(keyword in message.lower() for keyword in ['protein', 'generate', '๋‹จ๋ฐฑ์งˆ', '์ƒ์„ฑ', '์น˜๋ฃŒ']):
195
+ # 1. LLM์„ ์‚ฌ์šฉํ•œ ํ”„๋กฌํ”„ํŠธ ๋ถ„์„
196
+ analysis = analyze_prompt(message)
197
+ if not analysis:
198
+ return history + [
199
+ {"role": "user", "content": message},
200
+ {"role": "assistant", "content": "์š”์ฒญ ๋ถ„์„์— ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค."}
201
+ ]
202
 
203
+ # 2. ์œ ์‚ฌ ๊ตฌ์กฐ ๊ฒ€์ƒ‰
204
+ similar_structures = search_protein_data(analysis, ds)
205
+ if not similar_structures:
206
+ return history + [
207
+ {"role": "user", "content": message},
208
+ {"role": "assistant", "content": "์ ํ•ฉํ•œ ์ฐธ์กฐ ๊ตฌ์กฐ๋ฅผ ์ฐพ์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค."}
209
+ ]
210
 
211
+ # 3. ์ƒ์„ฑ ํŒŒ๋ผ๋ฏธํ„ฐ ๊ฒฐ์ •
212
+ params = extract_parameters(analysis, similar_structures)
213
+ if not params:
214
+ return history + [
215
+ {"role": "user", "content": message},
216
+ {"role": "assistant", "content": "ํŒŒ๋ผ๋ฏธํ„ฐ ์„ค์ •์— ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค."}
217
+ ]
218
 
219
+ # 4. ๋‹จ๋ฐฑ์งˆ ์ƒ์„ฑ
220
+ try:
221
+ protein_result = protein_diffusion_model(
222
+ sequence=None,
223
+ seq_len=params['sequence_length'],
224
+ helix_bias=params['helix_bias'],
225
+ strand_bias=params['strand_bias'],
226
+ loop_bias=params['loop_bias'],
227
+ secondary_structure=None,
228
+ aa_bias=None,
229
+ aa_bias_potential=None,
230
+ num_steps="25",
231
+ noise="normal",
232
+ hydrophobic_target_score=str(params['hydrophobic_target_score']),
233
+ hydrophobic_potential="2",
234
+ contigs=None,
235
+ pssm=None,
236
+ seq_mask=None,
237
+ str_mask=None,
238
+ rewrite_pdb=None
239
+ )
240
+
241
+ output_seq, output_pdb, structure_view, plddt_plot = next(protein_result)
242
+
243
+ # 5. ๊ฒฐ๊ณผ ์„ค๋ช… ์ƒ์„ฑ
244
+ explanation = f"""
245
+ ์š”์ฒญํ•˜์‹  ๊ธฐ๋Šฅ์— ๋งž๋Š” ๋‹จ๋ฐฑ์งˆ์„ ์ƒ์„ฑํ–ˆ์Šต๋‹ˆ๋‹ค:
246
+
247
+ ๋ถ„์„๋œ ์š”๊ตฌ์‚ฌํ•ญ:
248
+ {analysis}
249
+
250
+ ์„ค๊ณ„๋œ ๊ตฌ์กฐ์  ํŠน์ง•:
251
+ - ๊ธธ์ด: {params['sequence_length']} ์•„๋ฏธ๋…ธ์‚ฐ
252
+ - ์•ŒํŒŒ ํ—ฌ๋ฆญ์Šค ๋น„์œจ: {params['helix_bias']*100:.1f}%
253
+ - ๋ฒ ํƒ€ ์‹œํŠธ ๋น„์œจ: {params['strand_bias']*100:.1f}%
254
+ - ๋ฃจํ”„ ๊ตฌ์กฐ ๋น„์œจ: {params['loop_bias']*100:.1f}%
255
+ - ์†Œ์ˆ˜์„ฑ ์ ์ˆ˜: {params['hydrophobic_target_score']}
256
+
257
+ ์ฐธ์กฐ๋œ ์œ ์‚ฌ ๊ตฌ์กฐ: {len(similar_structures)}๊ฐœ
258
+
259
+ ์ƒ์„ฑ๋œ ๋‹จ๋ฐฑ์งˆ์˜ 3D ๊ตฌ์กฐ์™€ ์‹œํ€€์Šค๋ฅผ ํ™•์ธํ•˜์‹ค ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
260
+ """
261
+
262
+ # 6. ๊ฒฐ๊ณผ ์ €์žฅ
263
+ global current_protein_result
264
+ current_protein_result = {
265
+ 'sequence': output_seq,
266
+ 'pdb': output_pdb,
267
+ 'structure_view': structure_view,
268
+ 'plddt_plot': plddt_plot,
269
+ 'params': params
270
+ }
271
+
272
+ return history + [
273
+ {"role": "user", "content": message},
274
+ {"role": "assistant", "content": explanation}
275
+ ]
276
 
277
+ except Exception as e:
278
+ return history + [
279
+ {"role": "user", "content": message},
280
+ {"role": "assistant", "content": f"๋‹จ๋ฐฑ์งˆ ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"}
281
+ ]
282
  else:
283
  return history + [
284
  {"role": "user", "content": message},
 
287
  except Exception as e:
288
  return history + [
289
  {"role": "user", "content": message},
290
+ {"role": "assistant", "content": f"์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"}
291
  ]
292
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
293
 
294
  def generate_protein(params):
295
  # ๊ธฐ์กด protein_diffusion_model ํ•จ์ˆ˜ ํ˜ธ์ถœ