ginipick commited on
Commit
b9810ca
Β·
verified Β·
1 Parent(s): d59bb1e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -21
app.py CHANGED
@@ -146,20 +146,36 @@ def text_to_parquet(text: str) -> Tuple[str, str, str]:
146
 
147
  def preprocess_text_with_llm(input_text: str) -> str:
148
  # LLMμ—κ²Œ μž…λ ₯ ν…μŠ€νŠΈλ₯Ό μ „μ²˜λ¦¬ν•˜λ„λ‘ μš”μ²­
149
- system_prompt = """당신은 μž…λ ₯된 κΈ΄ ν…μŠ€νŠΈλ₯Ό 데이터셋 ν˜•μ‹μ— 맞게 μ „μ²˜λ¦¬ν•˜λŠ” 역할을 ν•©λ‹ˆλ‹€.
150
- - 데이터셋 ν˜•μ‹μ€ id,text,label,metadataμž…λ‹ˆλ‹€.
151
- - 각 행은 μ‰Όν‘œλ‘œ κ΅¬λΆ„λ˜λ©°, **ν…μŠ€νŠΈλ‚˜ λ‹€λ₯Έ ν•„λ“œ 내에 μ‰Όν‘œκ°€ μžˆμ„ 경우 ν•΄λ‹Ή ν•„λ“œλ₯Ό λ°˜λ“œμ‹œ ν°λ”°μ˜΄ν‘œ(")둜 κ°μŒ‰λ‹ˆλ‹€.**
152
- - **ν•„λ“œ 내에 ν°λ”°μ˜΄ν‘œκ°€ ν¬ν•¨λœ 경우, ν•΄λ‹Ή ν°λ”°μ˜΄ν‘œ μ•žμ— λ°±μŠ¬λž˜μ‹œ(\\)λ₯Ό μΆ”κ°€ν•˜μ—¬ μ΄μŠ€μΌ€μ΄ν”„ μ²˜λ¦¬ν•©λ‹ˆλ‹€.**
153
- - ν…μŠ€νŠΈλ₯Ό 의미 λ‹¨μœ„λ‘œ λΆ„ν• ν•˜κ³ , 적절히 λ¬Έμž₯을 μž¬κ΅¬μ„±ν•˜κ³  νŽΈμ§‘ν•˜μ—¬ μ΅œμ ν™”λœ λ¬Έμž₯으둜 λ§Œλ“­λ‹ˆλ‹€.
154
- - 각 λ¬Έμž₯에 λŒ€ν•΄ idλ₯Ό λΆ€μ—¬ν•˜κ³ , μ μ ˆν•œ label(μΉ΄ν…Œκ³ λ¦¬)을 μ§€μ •ν•©λ‹ˆλ‹€.
155
- - metadataμ—λŠ” μΆœμ²˜λ‚˜ λ‚ μ§œ λ“±μ˜ μΆ”κ°€ 정보λ₯Ό 포함할 수 μžˆμŠ΅λ‹ˆλ‹€.
156
- - μ΅œμ’… κ²°κ³ΌλŠ” 각 행이 'id,text,label,metadata' ν˜•μ‹μ˜ CSV ν˜•νƒœκ°€ λ˜λ„λ‘ ν•©λ‹ˆλ‹€.
157
- """
158
- prompt = system_prompt + "\n\nμž…λ ₯ ν…μŠ€νŠΈ:\n" + input_text + "\n\nμ „μ²˜λ¦¬λœ 데이터셋:"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  try:
160
  response = ""
161
  stream = hf_client.text_generation(
162
- prompt=prompt,
163
  max_new_tokens=2000,
164
  temperature=0.5,
165
  top_p=0.9,
@@ -168,7 +184,8 @@ def preprocess_text_with_llm(input_text: str) -> str:
168
  for msg in stream:
169
  if msg:
170
  response += msg
171
- # μ‘λ‹΅μ—μ„œ μ „μ²˜λ¦¬λœ 데이터셋 λΆ€λΆ„λ§Œ μΆ”μΆœ
 
172
  processed_text = response.strip()
173
  return processed_text
174
  except Exception as e:
@@ -176,6 +193,7 @@ def preprocess_text_with_llm(input_text: str) -> str:
176
  print(error_message)
177
  return error_message
178
 
 
179
  # CSS μ„€μ •
180
  css = """
181
  footer {
@@ -262,15 +280,7 @@ with gr.Blocks(css=css) as demo:
262
 
263
  send_data_upload.click(
264
  handle_message_data_upload,
265
- inputs=[
266
- msg_data_upload,
267
- chatbot_data_upload,
268
- system_message,
269
- max_tokens,
270
- temperature,
271
- top_p,
272
- parquet_data_state, # parquet_data_stateλ₯Ό μ‚¬μš©ν•˜μ—¬ μ—…λ‘œλ“œλœ 데이터λ₯Ό 전달
273
- ],
274
  outputs=[chatbot_data_upload, msg_data_upload],
275
  queue=True
276
  )
@@ -406,3 +416,5 @@ if __name__ == "__main__":
406
 
407
 
408
 
 
 
 
146
 
147
  def preprocess_text_with_llm(input_text: str) -> str:
148
  # LLMμ—κ²Œ μž…λ ₯ ν…μŠ€νŠΈλ₯Ό μ „μ²˜λ¦¬ν•˜λ„λ‘ μš”μ²­
149
+ system_prompt = """당신은 데이터 μ „μ²˜λ¦¬ μ „λ¬Έκ°€μž…λ‹ˆλ‹€. μž…λ ₯된 κΈ΄ ν…μŠ€νŠΈλ₯Ό μ•„λž˜μ™€ 같은 데이터셋 ν˜•μ‹μœΌλ‘œ μ „μ²˜λ¦¬ν•˜μ„Έμš”:
150
+
151
+ - **데이터셋 ν˜•μ‹:** `id,text,label,metadata`
152
+ - **각 행은 μƒˆλ‘œμš΄ μ€„λ‘œ κ΅¬λΆ„λ˜κ³ **, ν•„λ“œλŠ” μ‰Όν‘œλ‘œ κ΅¬λΆ„λ©λ‹ˆλ‹€.
153
+ - **ν…μŠ€νŠΈλ‚˜ λ‹€λ₯Έ ν•„λ“œ 내에 μ‰Όν‘œκ°€ μžˆμ„ 경우**, ν•΄λ‹Ή ν•„λ“œλ₯Ό ν°λ”°μ˜΄ν‘œ(")둜 κ°μ‹Έμ„Έμš”.
154
+ - **ν•„λ“œ 내에 ν°λ”°μ˜΄ν‘œκ°€ μžˆμ„ 경우**, λ°±μŠ¬λž˜μ‹œ(\\)둜 μ΄μŠ€μΌ€μ΄ν”„ μ²˜λ¦¬ν•˜μ„Έμš”. 예: \\"
155
+ - ν…μŠ€νŠΈλ₯Ό **의미 λ‹¨μœ„λ‘œ λΆ„ν• **ν•˜κ³ , 각 λ¬Έμž₯에 λŒ€ν•΄ **1λΆ€ν„° μ‹œμž‘ν•˜λŠ” μ—°μ†λœ id**λ₯Ό λΆ€μ—¬ν•˜μ„Έμš”.
156
+ - 각 λ¬Έμž₯에 λŒ€ν•΄ **μ μ ˆν•œ label(μΉ΄ν…Œκ³ λ¦¬)**을 μ§€μ •ν•˜μ„Έμš”. 예: "기술", "μ‚¬νšŒ", "경제"
157
+ - **metadata**μ—λŠ” μΆœμ²˜λ‚˜ λ‚ μ§œ λ“±μ˜ μΆ”κ°€ 정보λ₯Ό ν¬ν•¨ν•˜μ„Έμš”.
158
+ - μ΅œμ’… κ²°κ³ΌλŠ” **각 행이 `id,text,label,metadata` ν˜•μ‹μ˜ CSV**κ°€ λ˜λ„λ‘ ν•˜μ„Έμš”.
159
+
160
+ **μ˜ˆμ‹œ:**
161
+
162
+ μž…λ ₯ ν…μŠ€νŠΈ:
163
+
164
+ "μ˜€λŠ˜μ€ 날씨가 μ’‹λ‹€. 내일은 λΉ„κ°€ 올 μ˜ˆμ •μ΄λ‹€."
165
+
166
+ μ „μ²˜λ¦¬λœ 데이터셋:
167
+ 1,"μ˜€λŠ˜μ€ 날씨가 μ’‹λ‹€.","날씨","2023-10-05"
168
+ 2,"내일은 λΉ„κ°€ 올 μ˜ˆμ •μ΄λ‹€.","날씨","2023-10-05"
169
+
170
+ **이제 μ•„λž˜μ˜ μž…λ ₯ ν…μŠ€νŠΈλ₯Ό μ²˜λ¦¬ν•˜μ„Έμš”:**
171
+
172
+ """ + input_text
173
+
174
+ # LLM 호좜 및 응닡 처리
175
  try:
176
  response = ""
177
  stream = hf_client.text_generation(
178
+ prompt=system_prompt,
179
  max_new_tokens=2000,
180
  temperature=0.5,
181
  top_p=0.9,
 
184
  for msg in stream:
185
  if msg:
186
  response += msg
187
+ # 디버깅: LLM의 응닡 좜λ ₯
188
+ print("LLM 응닡:\n", response)
189
  processed_text = response.strip()
190
  return processed_text
191
  except Exception as e:
 
193
  print(error_message)
194
  return error_message
195
 
196
+
197
  # CSS μ„€μ •
198
  css = """
199
  footer {
 
280
 
281
  send_data_upload.click(
282
  handle_message_data_upload,
283
+ inputs=[ msg_data_upload, chatbot_data_upload, system_message, max_tokens, temperature, top_p, parquet_data_state, # parquet_data_stateλ₯Ό μ‚¬μš©ν•˜μ—¬ μ—…λ‘œλ“œλœ 데이터λ₯Ό 전달 ],
 
 
 
 
 
 
 
 
284
  outputs=[chatbot_data_upload, msg_data_upload],
285
  queue=True
286
  )
 
416
 
417
 
418
 
419
+
420
+