Update app.py
Browse files
app.py
CHANGED
@@ -156,14 +156,95 @@ def upload_parquet(file_path: str) -> Tuple[str, str, str]:
|
|
156 |
try:
|
157 |
# Parquet ํ์ผ ์ฝ๊ธฐ
|
158 |
df = pd.read_parquet(file_path, engine='pyarrow')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
159 |
# Markdown์ผ๋ก ๋ณํํ์ฌ ๋ฏธ๋ฆฌ๋ณด๊ธฐ
|
160 |
-
parquet_content =
|
161 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
162 |
parquet_json = df.to_json(orient='records', force_ascii=False)
|
|
|
163 |
return "Parquet ํ์ผ์ด ์ฑ๊ณต์ ์ผ๋ก ์
๋ก๋๋์์ต๋๋ค.", parquet_content, parquet_json
|
164 |
except Exception as e:
|
165 |
return f"Parquet ํ์ผ ์
๋ก๋ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}", "", ""
|
166 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
167 |
def text_to_parquet(text: str) -> Tuple[str, str, str]:
|
168 |
try:
|
169 |
from io import StringIO
|
|
|
156 |
try:
|
157 |
# Parquet ํ์ผ ์ฝ๊ธฐ
|
158 |
df = pd.read_parquet(file_path, engine='pyarrow')
|
159 |
+
|
160 |
+
# ๋ฐ์ดํฐ ๊ฒ์ฆ ๋ฐ ์ ์ฒ๋ฆฌ
|
161 |
+
if 'text' not in df.columns or 'label' not in df.columns:
|
162 |
+
return "ํ์ ์ปฌ๋ผ(text, label)์ด ์์ต๋๋ค.", "", ""
|
163 |
+
|
164 |
+
# ๋ฐ์ดํฐ ์์ฝ ์ ๋ณด ์์ฑ
|
165 |
+
data_info = {
|
166 |
+
"์ด ๋ ์ฝ๋ ์": len(df),
|
167 |
+
"๋ ์ด๋ธ ๋ถํฌ": df['label'].value_counts().to_dict(),
|
168 |
+
"์ปฌ๋ผ ๋ชฉ๋ก": list(df.columns)
|
169 |
+
}
|
170 |
+
|
171 |
# Markdown์ผ๋ก ๋ณํํ์ฌ ๋ฏธ๋ฆฌ๋ณด๊ธฐ
|
172 |
+
parquet_content = f"""### ๋ฐ์ดํฐ์
์ ๋ณด:
|
173 |
+
- ์ด ๋ ์ฝ๋ ์: {data_info['์ด ๋ ์ฝ๋ ์']}
|
174 |
+
- ๋ ์ด๋ธ ๋ถํฌ: {data_info['๋ ์ด๋ธ ๋ถํฌ']}
|
175 |
+
- ์ปฌ๋ผ: {', '.join(data_info['์ปฌ๋ผ ๋ชฉ๋ก'])}
|
176 |
+
|
177 |
+
### ๋ฐ์ดํฐ ๋ฏธ๋ฆฌ๋ณด๊ธฐ:
|
178 |
+
{df.head(10).to_markdown(index=False)}
|
179 |
+
"""
|
180 |
+
# DataFrame์ JSON ๋ฌธ์์ด๋ก ๋ณํ (Q&A์์ ์ฌ์ฉ)
|
181 |
parquet_json = df.to_json(orient='records', force_ascii=False)
|
182 |
+
|
183 |
return "Parquet ํ์ผ์ด ์ฑ๊ณต์ ์ผ๋ก ์
๋ก๋๋์์ต๋๋ค.", parquet_content, parquet_json
|
184 |
except Exception as e:
|
185 |
return f"Parquet ํ์ผ ์
๋ก๋ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}", "", ""
|
186 |
|
187 |
+
def respond(message: str, history: List[Dict[str, str]], system_message: str = "", max_tokens: int = 4000, temperature: float = 0.5, top_p: float = 0.9, parquet_data: str = None) -> str:
|
188 |
+
try:
|
189 |
+
if parquet_data:
|
190 |
+
# JSON ๋ฌธ์์ด์ DataFrame์ผ๋ก ๋ณํ
|
191 |
+
df = pd.read_json(io.StringIO(parquet_data))
|
192 |
+
|
193 |
+
# ๋ฐ์ดํฐ์
์ปจํ
์คํธ ์์ฑ
|
194 |
+
data_context = f"""
|
195 |
+
ํ์ฌ ์
๋ก๋๋ ๋ฐ์ดํฐ์
์ ๋ณด:
|
196 |
+
- ์ด {len(df)} ๊ฐ์ ๋ ์ฝ๋
|
197 |
+
- ๋ ์ด๋ธ ์ข
๋ฅ: {', '.join(df['label'].unique())}
|
198 |
+
- ๋ฐ์ดํฐ ํ๋: {', '.join(df.columns)}
|
199 |
+
|
200 |
+
๋ค์ ๋ฐ์ดํฐ๋ฅผ ๊ธฐ๋ฐ์ผ๋ก ์ง๋ฌธ์ ๋ต๋ณํ์ธ์:
|
201 |
+
{df.head(50).to_string()} # ์ฒ์ 50๊ฐ ๋ ์ฝ๋๋ง ์ปจํ
์คํธ๋ก ์ ๊ณต
|
202 |
+
"""
|
203 |
+
system_prompt = f"""๋น์ ์ ์
๋ก๋๋ ๋ฐ์ดํฐ์
์ ๋ถ์ํ๊ณ ์ง๋ฌธ์ ๋ต๋ณํ๋ AI ์ด์์คํดํธ์
๋๋ค.
|
204 |
+
|
205 |
+
์ฃผ์ ์ง์นจ:
|
206 |
+
1. ๋ฐ๋์ ํ๊ธ๋ก ๋ต๋ณํ ๊ฒ
|
207 |
+
2. ๋ฐ์ดํฐ์
์ ๋ด์ฉ์ ๊ธฐ๋ฐ์ผ๋ก ์ ํํ๊ฒ ๋ต๋ณํ ๊ฒ
|
208 |
+
3. ๋ฐ์ดํฐ์ ์๋ ๋ด์ฉ์ ์ถ์ธกํ์ง ๋ง ๊ฒ
|
209 |
+
4. ๋ต๋ณ์ ๊ฐ๋จ๋ช
๋ฃํ๊ฒ ํ ๊ฒ
|
210 |
+
5. ๋ฐ์ดํฐ ํ๋ผ์ด๋ฒ์๋ฅผ ๊ณ ๋ คํ์ฌ ๋ต๋ณํ ๊ฒ
|
211 |
+
|
212 |
+
๋ฐ์ดํฐ์
์ปจํ
์คํธ:
|
213 |
+
{data_context}
|
214 |
+
"""
|
215 |
+
else:
|
216 |
+
system_prompt = system_message or "๋๋ AI ์กฐ์ธ์ ์ญํ ์ด๋ค."
|
217 |
+
|
218 |
+
# OpenAI API ํธ์ถ
|
219 |
+
messages = [{"role": "system", "content": system_prompt}]
|
220 |
+
|
221 |
+
# ์ต๊ทผ ๋ํ ๊ธฐ๋ก ์ถ๊ฐ
|
222 |
+
recent_history = history[-3:] if history else []
|
223 |
+
for chat in recent_history:
|
224 |
+
messages.append({"role": chat["role"], "content": chat["content"]})
|
225 |
+
|
226 |
+
messages.append({"role": "user", "content": message})
|
227 |
+
|
228 |
+
response = client.chat.completions.create(
|
229 |
+
model="gpt-4-0125-preview",
|
230 |
+
messages=messages,
|
231 |
+
max_tokens=max_tokens,
|
232 |
+
temperature=temperature,
|
233 |
+
top_p=top_p,
|
234 |
+
stream=True
|
235 |
+
)
|
236 |
+
|
237 |
+
full_response = ""
|
238 |
+
for chunk in response:
|
239 |
+
if chunk.choices[0].delta.content:
|
240 |
+
full_response += chunk.choices[0].delta.content
|
241 |
+
yield clean_response(full_response)
|
242 |
+
|
243 |
+
except Exception as e:
|
244 |
+
error_message = f"์๋ต ์์ฑ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}"
|
245 |
+
print(f"{error_message}\n{traceback.format_exc()}")
|
246 |
+
yield error_message
|
247 |
+
|
248 |
def text_to_parquet(text: str) -> Tuple[str, str, str]:
|
249 |
try:
|
250 |
from io import StringIO
|