Spaces:
aiqcamp
/
No application file

aiqcamp commited on
Commit
98f999d
Β·
verified Β·
1 Parent(s): f77297e

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -1505
app.py DELETED
@@ -1,1505 +0,0 @@
1
- import os, json, re, logging, requests, markdown, time, io
2
- from datetime import datetime
3
- import random
4
- import base64
5
- from io import BytesIO
6
- from PIL import Image
7
-
8
- import streamlit as st
9
- from openai import OpenAI
10
-
11
- from gradio_client import Client
12
- import pandas as pd
13
- import PyPDF2 # For handling PDF files
14
- import kagglehub
15
-
16
- # ──────────────────────────────── Environment Variables / Constants ─────────────────────────
17
- OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
18
- BRAVE_KEY = os.getenv("SERPHOUSE_API_KEY", "") # Keep this name
19
- BRAVE_ENDPOINT = "https://api.search.brave.com/res/v1/web/search"
20
- BRAVE_VIDEO_ENDPOINT = "https://api.search.brave.com/res/v1/videos/search"
21
- BRAVE_NEWS_ENDPOINT = "https://api.search.brave.com/res/v1/news/search"
22
- IMAGE_API_URL = "http://211.233.58.201:7896"
23
- MAX_TOKENS = 7999
24
- KAGGLE_API_KEY = os.getenv("KDATA_API", "")
25
-
26
- # Set Kaggle API key
27
- os.environ["KAGGLE_KEY"] = KAGGLE_API_KEY
28
-
29
- # Analysis modes and style definitions
30
- ANALYSIS_MODES = {
31
- "price_forecast": "농산물 가격 예츑과 μ‹œμž₯ 뢄석",
32
- "market_trend": "μ‹œμž₯ 동ν–₯ 및 μˆ˜μš” νŒ¨ν„΄ 뢄석",
33
- "production_analysis": "μƒμ‚°λŸ‰ 뢄석 및 μ‹λŸ‰ μ•ˆλ³΄ 전망",
34
- "agricultural_policy": "농업 μ •μ±… 및 규제 영ν–₯ 뢄석",
35
- "climate_impact": "κΈ°ν›„ λ³€ν™”κ°€ 농업에 λ―ΈμΉ˜λŠ” 영ν–₯ 뢄석"
36
- }
37
-
38
- RESPONSE_STYLES = {
39
- "professional": "전문적이고 ν•™μˆ μ μΈ 뢄석",
40
- "simple": "μ‰½κ²Œ 이해할 수 μžˆλŠ” κ°„κ²°ν•œ μ„€λͺ…",
41
- "detailed": "μƒμ„Έν•œ 톡계 기반 깊이 μžˆλŠ” 뢄석",
42
- "action_oriented": "μ‹€ν–‰ κ°€λŠ₯ν•œ μ‘°μ–Έκ³Ό μΆ”μ²œ 쀑심"
43
- }
44
-
45
- # Example search queries
46
- EXAMPLE_QUERIES = {
47
- "example1": "μŒ€ 가격 μΆ”μ„Έ 및 ν–₯ν›„ 6κ°œμ›” 전망을 λΆ„μ„ν•΄μ£Όμ„Έμš”",
48
- "example2": "κΈ°ν›„ λ³€ν™”λ‘œ ν•œκ΅­ 과일 생산 μ „λž΅κ³Ό μˆ˜μš” 예츑 λ³΄κ³ μ„œλ₯Ό μž‘μ„±ν•˜λΌ.",
49
- "example3": "2025λ…„λΆ€ν„° 2030λ…„κΉŒμ§€ 좩뢁 μ¦ν‰κ΅°μ—μ„œ μž¬λ°°ν•˜λ©΄ μœ λ§ν•œ μž‘λ¬Όμ€? μˆ˜μ΅μ„±κ³Ό 관리성이 μ’‹μ•„μ•Όν•œλ‹€"
50
- }
51
-
52
- # ──────────────────────────────── Logging ────────────────────────────────
53
- logging.basicConfig(level=logging.INFO,
54
- format="%(asctime)s - %(levelname)s - %(message)s")
55
-
56
- # ──────────────────────────────── OpenAI Client ──────────────────────────
57
-
58
- @st.cache_resource
59
- def get_openai_client():
60
- """Create an OpenAI client with timeout and retry settings."""
61
- if not OPENAI_API_KEY:
62
- raise RuntimeError("⚠️ OPENAI_API_KEY ν™˜κ²½ λ³€μˆ˜κ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.")
63
- return OpenAI(
64
- api_key=OPENAI_API_KEY,
65
- timeout=60.0,
66
- max_retries=3
67
- )
68
-
69
- # ────────────────────────────── Kaggle Dataset Access ──────────────────────
70
- @st.cache_resource
71
- def load_agriculture_dataset():
72
- """Download and load the UN agriculture dataset from Kaggle"""
73
- try:
74
- path = kagglehub.dataset_download("unitednations/global-food-agriculture-statistics")
75
- logging.info(f"Kaggle dataset downloaded to: {path}")
76
-
77
- # Load metadata about available files
78
- available_files = []
79
- for root, dirs, files in os.walk(path):
80
- for file in files:
81
- if file.endswith('.csv'):
82
- file_path = os.path.join(root, file)
83
- file_size = os.path.getsize(file_path) / (1024 * 1024) # Size in MB
84
- available_files.append({
85
- 'name': file,
86
- 'path': file_path,
87
- 'size_mb': round(file_size, 2)
88
- })
89
-
90
- return {
91
- 'base_path': path,
92
- 'files': available_files
93
- }
94
- except Exception as e:
95
- logging.error(f"Error loading Kaggle dataset: {e}")
96
- return None
97
-
98
- # New function to load Advanced Soybean Agricultural Dataset
99
- @st.cache_resource
100
- def load_soybean_dataset():
101
- """Download and load the Advanced Soybean Agricultural Dataset from Kaggle"""
102
- try:
103
- path = kagglehub.dataset_download("wisam1985/advanced-soybean-agricultural-dataset-2025")
104
- logging.info(f"Soybean dataset downloaded to: {path}")
105
-
106
- available_files = []
107
- for root, dirs, files in os.walk(path):
108
- for file in files:
109
- if file.endswith(('.csv', '.xlsx')):
110
- file_path = os.path.join(root, file)
111
- file_size = os.path.getsize(file_path) / (1024 * 1024) # Size in MB
112
- available_files.append({
113
- 'name': file,
114
- 'path': file_path,
115
- 'size_mb': round(file_size, 2)
116
- })
117
-
118
- return {
119
- 'base_path': path,
120
- 'files': available_files
121
- }
122
- except Exception as e:
123
- logging.error(f"Error loading Soybean dataset: {e}")
124
- return None
125
-
126
- # Function to load Crop Recommendation Dataset
127
- @st.cache_resource
128
- def load_crop_recommendation_dataset():
129
- """Download and load the Soil and Environmental Variables Crop Recommendation Dataset"""
130
- try:
131
- path = kagglehub.dataset_download("agriinnovate/agricultural-crop-dataset")
132
- logging.info(f"Crop recommendation dataset downloaded to: {path}")
133
-
134
- available_files = []
135
- for root, dirs, files in os.walk(path):
136
- for file in files:
137
- if file.endswith(('.csv', '.xlsx')):
138
- file_path = os.path.join(root, file)
139
- file_size = os.path.getsize(file_path) / (1024 * 1024) # Size in MB
140
- available_files.append({
141
- 'name': file,
142
- 'path': file_path,
143
- 'size_mb': round(file_size, 2)
144
- })
145
-
146
- return {
147
- 'base_path': path,
148
- 'files': available_files
149
- }
150
- except Exception as e:
151
- logging.error(f"Error loading Crop recommendation dataset: {e}")
152
- return None
153
-
154
- # Function to load Climate Change Impact Dataset
155
- @st.cache_resource
156
- def load_climate_impact_dataset():
157
- """Download and load the Climate Change Impact on Agriculture Dataset"""
158
- try:
159
- path = kagglehub.dataset_download("waqi786/climate-change-impact-on-agriculture")
160
- logging.info(f"Climate impact dataset downloaded to: {path}")
161
-
162
- available_files = []
163
- for root, dirs, files in os.walk(path):
164
- for file in files:
165
- if file.endswith(('.csv', '.xlsx')):
166
- file_path = os.path.join(root, file)
167
- file_size = os.path.getsize(file_path) / (1024 * 1024) # Size in MB
168
- available_files.append({
169
- 'name': file,
170
- 'path': file_path,
171
- 'size_mb': round(file_size, 2)
172
- })
173
-
174
- return {
175
- 'base_path': path,
176
- 'files': available_files
177
- }
178
- except Exception as e:
179
- logging.error(f"Error loading Climate impact dataset: {e}")
180
- return None
181
-
182
- def get_dataset_summary():
183
- """Generate a summary of the available agriculture datasets"""
184
- dataset_info = load_agriculture_dataset()
185
- if not dataset_info:
186
- return "Failed to load the UN global food and agriculture statistics dataset."
187
-
188
- summary = "# UN κΈ€λ‘œλ²Œ μ‹λŸ‰ 및 농업 톡계 데이터셋\n\n"
189
- summary += f"총 {len(dataset_info['files'])}개의 CSV 파일이 ν¬ν•¨λ˜μ–΄ μžˆμŠ΅λ‹ˆλ‹€.\n\n"
190
-
191
- # List files with sizes
192
- summary += "## μ‚¬μš© κ°€λŠ₯ν•œ 데이터 파일:\n\n"
193
- for i, file_info in enumerate(dataset_info['files'][:10], 1): # Limit to first 10 files
194
- summary += f"{i}. **{file_info['name']}** ({file_info['size_mb']} MB)\n"
195
-
196
- if len(dataset_info['files']) > 10:
197
- summary += f"\n...μ™Έ {len(dataset_info['files']) - 10}개 파일\n"
198
-
199
- # Add example of data structure
200
- try:
201
- if dataset_info['files']:
202
- sample_file = dataset_info['files'][0]['path']
203
- df = pd.read_csv(sample_file, nrows=5)
204
- summary += "\n## 데이터 μƒ˜ν”Œ ꡬ쑰:\n\n"
205
- summary += df.head(5).to_markdown() + "\n\n"
206
-
207
- summary += "## 데이터셋 λ³€μˆ˜ μ„€λͺ…:\n\n"
208
- for col in df.columns:
209
- summary += f"- **{col}**: [λ³€μˆ˜ μ„€λͺ… ν•„μš”]\n"
210
- except Exception as e:
211
- logging.error(f"Error generating dataset sample: {e}")
212
- summary += "\n데이터 μƒ˜ν”Œμ„ μƒμ„±ν•˜λŠ” 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€.\n"
213
-
214
- return summary
215
-
216
- def analyze_dataset_for_query(query):
217
- """Find and analyze relevant data from the dataset based on the query"""
218
- dataset_info = load_agriculture_dataset()
219
- if not dataset_info:
220
- return "데이터셋을 뢈러올 수 μ—†μŠ΅λ‹ˆλ‹€. Kaggle API 연결을 ν™•μΈν•΄μ£Όμ„Έμš”."
221
-
222
- # Extract key terms from the query
223
- query_lower = query.lower()
224
-
225
- # Define keywords to look for in the dataset files
226
- keywords = {
227
- "μŒ€": ["rice", "grain"],
228
- "λ°€": ["wheat", "grain"],
229
- "μ˜₯수수": ["corn", "maize", "grain"],
230
- "μ±„μ†Œ": ["vegetable", "produce"],
231
- "과일": ["fruit", "produce"],
232
- "가격": ["price", "cost", "value"],
233
- "생산": ["production", "yield", "harvest"],
234
- "수좜": ["export", "trade"],
235
- "μˆ˜μž…": ["import", "trade"],
236
- "μ†ŒλΉ„": ["consumption", "demand"]
237
- }
238
-
239
- # Find relevant files based on the query
240
- relevant_files = []
241
-
242
- # First check for Korean keywords in the query
243
- found_keywords = []
244
- for k_term, e_terms in keywords.items():
245
- if k_term in query_lower:
246
- found_keywords.extend([k_term] + e_terms)
247
-
248
- # If no Korean keywords found, check for English terms in the filenames
249
- if not found_keywords:
250
- # Generic search through all files
251
- relevant_files = dataset_info['files'][:5] # Take first 5 files as default
252
- else:
253
- # Search for files related to the found keywords
254
- for file_info in dataset_info['files']:
255
- file_name_lower = file_info['name'].lower()
256
- for keyword in found_keywords:
257
- if keyword.lower() in file_name_lower:
258
- relevant_files.append(file_info)
259
- break
260
-
261
- # If still no relevant files, take the first 5 files
262
- if not relevant_files:
263
- relevant_files = dataset_info['files'][:5]
264
-
265
- # Read and analyze the relevant files
266
- analysis_result = "# 농업 데이터 뢄석 κ²°κ³Ό\n\n"
267
- analysis_result += f"쿼리: '{query}'에 λŒ€ν•œ 뢄석을 μˆ˜ν–‰ν–ˆμŠ΅λ‹ˆλ‹€.\n\n"
268
-
269
- if found_keywords:
270
- analysis_result += f"## 뢄석 ν‚€μ›Œλ“œ: {', '.join(set(found_keywords))}\n\n"
271
-
272
- # Process each relevant file
273
- for file_info in relevant_files[:3]: # Limit to 3 files for performance
274
- try:
275
- analysis_result += f"## 파일: {file_info['name']}\n\n"
276
-
277
- # Read the CSV file
278
- df = pd.read_csv(file_info['path'])
279
-
280
- # Basic file stats
281
- analysis_result += f"- ν–‰ 수: {len(df)}\n"
282
- analysis_result += f"- μ—΄ 수: {len(df.columns)}\n"
283
- analysis_result += f"- μ—΄ λͺ©λ‘: {', '.join(df.columns.tolist())}\n\n"
284
-
285
- # Sample data
286
- analysis_result += "### 데이터 μƒ˜ν”Œ:\n\n"
287
- analysis_result += df.head(5).to_markdown() + "\n\n"
288
-
289
- # Statistical summary of numeric columns
290
- numeric_cols = df.select_dtypes(include=['number']).columns
291
- if len(numeric_cols) > 0:
292
- analysis_result += "### κΈ°λ³Έ 톡계:\n\n"
293
- stats_df = df[numeric_cols].describe()
294
- analysis_result += stats_df.to_markdown() + "\n\n"
295
-
296
- # Time series analysis if possible
297
- time_cols = [col for col in df.columns if 'year' in col.lower() or 'date' in col.lower()]
298
- if time_cols:
299
- analysis_result += "### μ‹œκ³„μ—΄ νŒ¨ν„΄:\n\n"
300
- analysis_result += "데이터셋에 μ‹œκ°„ κ΄€λ ¨ 열이 μžˆμ–΄ μ‹œκ³„μ—΄ 뢄석이 κ°€λŠ₯ν•©λ‹ˆλ‹€.\n\n"
301
-
302
- except Exception as e:
303
- logging.error(f"Error analyzing file {file_info['name']}: {e}")
304
- analysis_result += f"이 파일 뢄석 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}\n\n"
305
-
306
- analysis_result += "## 농산물 가격 예츑 및 μˆ˜μš” 뢄석에 λŒ€ν•œ μΈμ‚¬μ΄νŠΈ\n\n"
307
- analysis_result += "λ°μ΄ν„°μ…‹μ—μ„œ μΆ”μΆœν•œ 정보λ₯Ό λ°”νƒ•μœΌλ‘œ λ‹€μŒ μΈμ‚¬μ΄νŠΈλ₯Ό μ œκ³΅ν•©λ‹ˆλ‹€:\n\n"
308
- analysis_result += "1. 데이터 기반 뢄석 (기본적인 μš”μ•½)\n"
309
- analysis_result += "2. μ£Όμš” 가격 및 μˆ˜μš” 동ν–₯\n"
310
- analysis_result += "3. μƒμ‚°λŸ‰ 및 무역 νŒ¨ν„΄\n\n"
311
-
312
- analysis_result += "이 뢄석은 UN κΈ€λ‘œλ²Œ μ‹λŸ‰ 및 농업 톡계 데이터셋을 기반으둜 ν•©λ‹ˆλ‹€.\n\n"
313
-
314
- return analysis_result
315
-
316
- # Function to analyze crop recommendation dataset
317
- def analyze_crop_recommendation_dataset(query):
318
- """Find and analyze crop recommendation data based on the query"""
319
- try:
320
- dataset_info = load_crop_recommendation_dataset()
321
- if not dataset_info or not dataset_info['files']:
322
- return "μž‘λ¬Ό μΆ”μ²œ 데이터셋을 뢈러올 수 μ—†μŠ΅λ‹ˆλ‹€."
323
-
324
- analysis_result = "# ν† μ–‘ 및 ν™˜κ²½ λ³€μˆ˜ 기반 μž‘λ¬Ό μΆ”μ²œ 데이터 뢄석\n\n"
325
-
326
- # Process main files
327
- for file_info in dataset_info['files'][:2]: # Limit to the first 2 files
328
- try:
329
- analysis_result += f"## 파일: {file_info['name']}\n\n"
330
-
331
- if file_info['name'].endswith('.csv'):
332
- df = pd.read_csv(file_info['path'])
333
- elif file_info['name'].endswith('.xlsx'):
334
- df = pd.read_excel(file_info['path'])
335
- else:
336
- continue
337
-
338
- # Basic dataset info
339
- analysis_result += f"- 데이터 크기: {len(df)} ν–‰ Γ— {len(df.columns)} μ—΄\n"
340
- analysis_result += f"- ν¬ν•¨λœ μž‘λ¬Ό μ’…λ₯˜: "
341
-
342
- # Check if crop column exists
343
- crop_cols = [col for col in df.columns if 'crop' in col.lower() or 'μž‘λ¬Ό' in col.lower()]
344
- if crop_cols:
345
- main_crop_col = crop_cols[0]
346
- unique_crops = df[main_crop_col].unique()
347
- analysis_result += f"{len(unique_crops)}μ’… ({', '.join(str(c) for c in unique_crops[:10])})\n\n"
348
- else:
349
- analysis_result += "μž‘λ¬Ό 정보 열을 찾을 수 μ—†μŒ\n\n"
350
-
351
- # Extract environmental factors
352
- env_factors = [col for col in df.columns if col.lower() not in ['crop', 'label', 'id', 'index']]
353
- if env_factors:
354
- analysis_result += f"- 고렀된 ν™˜κ²½ μš”μ†Œ: {', '.join(env_factors)}\n\n"
355
-
356
- # Sample data
357
- analysis_result += "### 데이터 μƒ˜ν”Œ:\n\n"
358
- analysis_result += df.head(5).to_markdown() + "\n\n"
359
-
360
- # Summary statistics for environmental factors
361
- if env_factors:
362
- numeric_factors = df[env_factors].select_dtypes(include=['number']).columns
363
- if len(numeric_factors) > 0:
364
- analysis_result += "### ν™˜κ²½ μš”μ†Œ 톡계:\n\n"
365
- stats_df = df[numeric_factors].describe().round(2)
366
- analysis_result += stats_df.to_markdown() + "\n\n"
367
-
368
- # Check for query-specific crops
369
- query_terms = query.lower().split()
370
- relevant_crops = []
371
-
372
- if crop_cols:
373
- for crop in df[main_crop_col].unique():
374
- crop_str = str(crop).lower()
375
- if any(term in crop_str for term in query_terms):
376
- relevant_crops.append(crop)
377
-
378
- if relevant_crops:
379
- analysis_result += f"### 쿼리 κ΄€λ ¨ μž‘λ¬Ό 뢄석: {', '.join(str(c) for c in relevant_crops)}\n\n"
380
- for crop in relevant_crops[:3]: # Limit to 3 crops
381
- crop_data = df[df[main_crop_col] == crop]
382
- analysis_result += f"#### {crop} μž‘λ¬Ό μš”μ•½:\n\n"
383
- analysis_result += f"- μƒ˜ν”Œ 수: {len(crop_data)}개\n"
384
-
385
- if len(numeric_factors) > 0:
386
- crop_stats = crop_data[numeric_factors].describe().round(2)
387
- analysis_result += f"- 평균 ν™˜κ²½ 쑰건:\n"
388
- for factor in numeric_factors[:5]: # Limit to 5 factors
389
- analysis_result += f" * {factor}: {crop_stats.loc['mean', factor]}\n"
390
- analysis_result += "\n"
391
-
392
- except Exception as e:
393
- logging.error(f"Error analyzing crop recommendation file {file_info['name']}: {e}")
394
- analysis_result += f"뢄석 였λ₯˜: {str(e)}\n\n"
395
-
396
- analysis_result += "## μž‘λ¬Ό μΆ”μ²œ μΈμ‚¬μ΄νŠΈ\n\n"
397
- analysis_result += "ν† μ–‘ 및 ν™˜κ²½ λ³€μˆ˜ 데이터셋 뢄석 κ²°κ³Ό, λ‹€μŒκ³Ό 같은 μ£Όμš” μΈμ‚¬μ΄νŠΈλ₯Ό μ œκ³΅ν•©λ‹ˆλ‹€:\n\n"
398
- analysis_result += "1. μ§€μ—­ ν™˜κ²½μ— μ ν•©ν•œ μž‘λ¬Ό μΆ”μ²œ\n"
399
- analysis_result += "2. μž‘λ¬Ό 생산성에 영ν–₯을 λ―ΈμΉ˜λŠ” μ£Όμš” ν™˜κ²½ μš”μΈ\n"
400
- analysis_result += "3. 지속 κ°€λŠ₯ν•œ 농업을 μœ„ν•œ 졜적의 μž‘λ¬Ό 선택 κΈ°μ€€\n\n"
401
-
402
- return analysis_result
403
-
404
- except Exception as e:
405
- logging.error(f"Crop recommendation dataset analysis error: {e}")
406
- return "μž‘λ¬Ό μΆ”μ²œ 데이터셋 뢄석 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€."
407
-
408
- # Function to analyze climate impact dataset
409
- def analyze_climate_impact_dataset(query):
410
- """Find and analyze climate impact on agriculture data based on the query"""
411
- try:
412
- dataset_info = load_climate_impact_dataset()
413
- if not dataset_info or not dataset_info['files']:
414
- return "κΈ°ν›„ λ³€ν™” 영ν–₯ 데이터셋을 뢈러올 수 μ—†μŠ΅λ‹ˆλ‹€."
415
-
416
- analysis_result = "# κΈ°ν›„ λ³€ν™”κ°€ 농업에 λ―ΈμΉ˜λŠ” 영ν–₯ 데이터 뢄석\n\n"
417
-
418
- # Process main files
419
- for file_info in dataset_info['files'][:2]: # Limit to first 2 files
420
- try:
421
- analysis_result += f"## 파일: {file_info['name']}\n\n"
422
-
423
- if file_info['name'].endswith('.csv'):
424
- df = pd.read_csv(file_info['path'])
425
- elif file_info['name'].endswith('.xlsx'):
426
- df = pd.read_excel(file_info['path'])
427
- else:
428
- continue
429
-
430
- # Basic dataset info
431
- analysis_result += f"- 데이터 크기: {len(df)} ν–‰ Γ— {len(df.columns)} μ—΄\n"
432
-
433
- # Check for region column
434
- region_cols = [col for col in df.columns if 'region' in col.lower() or 'country' in col.lower() or 'μ§€μ—­' in col.lower()]
435
- if region_cols:
436
- main_region_col = region_cols[0]
437
- regions = df[main_region_col].unique()
438
- analysis_result += f"- ν¬ν•¨λœ μ§€μ—­: {len(regions)}개 ({', '.join(str(r) for r in regions[:5])})\n"
439
-
440
- # Identify climate and crop related columns
441
- climate_cols = [col for col in df.columns if any(term in col.lower() for term in
442
- ['temp', 'rainfall', 'precipitation', 'climate', 'weather', '기온', 'κ°•μˆ˜λŸ‰'])]
443
- crop_cols = [col for col in df.columns if any(term in col.lower() for term in
444
- ['yield', 'production', 'crop', 'harvest', 'μˆ˜ν™•λŸ‰', 'μƒμ‚°λŸ‰'])]
445
-
446
- if climate_cols:
447
- analysis_result += f"- κΈ°ν›„ κ΄€λ ¨ λ³€μˆ˜: {', '.join(climate_cols)}\n"
448
- if crop_cols:
449
- analysis_result += f"- μž‘λ¬Ό κ΄€λ ¨ λ³€μˆ˜: {', '.join(crop_cols)}\n\n"
450
-
451
- # Sample data
452
- analysis_result += "### 데이터 μƒ˜ν”Œ:\n\n"
453
- analysis_result += df.head(5).to_markdown() + "\n\n"
454
-
455
- # Time series pattern if available
456
- year_cols = [col for col in df.columns if 'year' in col.lower() or 'date' in col.lower() or '연도' in col.lower()]
457
- if year_cols:
458
- analysis_result += "### μ‹œκ³„μ—΄ κΈ°ν›„ 영ν–₯ νŒ¨ν„΄:\n\n"
459
- analysis_result += "이 데이터셋은 μ‹œκ°„μ— λ”°λ₯Έ κΈ°ν›„ 변화와 농업 생산성 κ°„μ˜ 관계λ₯Ό 뢄석할 수 μžˆμŠ΅λ‹ˆλ‹€.\n\n"
460
-
461
- # Statistical summary of key variables
462
- key_vars = climate_cols + crop_cols
463
- numeric_vars = df[key_vars].select_dtypes(include=['number']).columns
464
- if len(numeric_vars) > 0:
465
- analysis_result += "### μ£Όμš” λ³€μˆ˜ 톡계:\n\n"
466
- stats_df = df[numeric_vars].describe().round(2)
467
- analysis_result += stats_df.to_markdown() + "\n\n"
468
-
469
- # Check for correlations between climate and crop variables
470
- if len(climate_cols) > 0 and len(crop_cols) > 0:
471
- numeric_climate = df[climate_cols].select_dtypes(include=['number']).columns
472
- numeric_crop = df[crop_cols].select_dtypes(include=['number']).columns
473
-
474
- if len(numeric_climate) > 0 and len(numeric_crop) > 0:
475
- analysis_result += "### 기후와 μž‘λ¬Ό 생산 κ°„μ˜ 상관관계:\n\n"
476
- try:
477
- corr_vars = list(numeric_climate)[:2] + list(numeric_crop)[:2] # Limit to 2 of each type
478
- corr_df = df[corr_vars].corr().round(3)
479
- analysis_result += corr_df.to_markdown() + "\n\n"
480
- analysis_result += "μœ„ 상관관계 ν‘œλŠ” κΈ°ν›„ λ³€μˆ˜μ™€ μž‘λ¬Ό 생산성 κ°„μ˜ 관계 강도λ₯Ό λ³΄μ—¬μ€λ‹ˆλ‹€.\n\n"
481
- except:
482
- analysis_result += "상관관계 계산 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€.\n\n"
483
-
484
- except Exception as e:
485
- logging.error(f"Error analyzing climate impact file {file_info['name']}: {e}")
486
- analysis_result += f"뢄석 였λ₯˜: {str(e)}\n\n"
487
-
488
- analysis_result += "## κΈ°ν›„ λ³€ν™” 영ν–₯ μΈμ‚¬μ΄νŠΈ\n\n"
489
- analysis_result += "κΈ°ν›„ λ³€ν™”κ°€ 농업에 λ―ΈμΉ˜λŠ” 영ν–₯ 데이터 뢄석 κ²°κ³Ό, λ‹€μŒκ³Ό 같은 μΈμ‚¬μ΄νŠΈλ₯Ό μ œκ³΅ν•©λ‹ˆλ‹€:\n\n"
490
- analysis_result += "1. 기온 변화에 λ”°λ₯Έ μž‘λ¬Ό 생산성 변동 νŒ¨ν„΄\n"
491
- analysis_result += "2. κ°•μˆ˜λŸ‰ λ³€ν™”κ°€ 농업 μˆ˜ν™•λŸ‰μ— λ―ΈμΉ˜λŠ” 영ν–₯\n"
492
- analysis_result += "3. κΈ°ν›„ 변화에 λŒ€μ‘ν•˜κΈ° μœ„ν•œ 농업 μ „λž΅ μ œμ•ˆ\n"
493
- analysis_result += "4. 지역별 κΈ°ν›„ μ·¨μ•½μ„± 및 적응 λ°©μ•ˆ\n\n"
494
-
495
- return analysis_result
496
-
497
- except Exception as e:
498
- logging.error(f"Climate impact dataset analysis error: {e}")
499
- return "κΈ°ν›„ λ³€ν™” 영ν–₯ 데이터셋 뢄석 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€."
500
-
501
- # Function to analyze soybean dataset if selected
502
- def analyze_soybean_dataset(query):
503
- """Find and analyze soybean agriculture data based on the query"""
504
- try:
505
- dataset_info = load_soybean_dataset()
506
- if not dataset_info or not dataset_info['files']:
507
- return "λŒ€λ‘ 농업 데이터셋을 뢈러올 수 μ—†μŠ΅λ‹ˆλ‹€."
508
-
509
- analysis_result = "# κ³ κΈ‰ λŒ€λ‘ 농업 데이터 뢄석\n\n"
510
-
511
- # Process main files
512
- for file_info in dataset_info['files'][:2]: # Limit to the first 2 files
513
- try:
514
- analysis_result += f"## 파일: {file_info['name']}\n\n"
515
-
516
- if file_info['name'].endswith('.csv'):
517
- df = pd.read_csv(file_info['path'])
518
- elif file_info['name'].endswith('.xlsx'):
519
- df = pd.read_excel(file_info['path'])
520
- else:
521
- continue
522
-
523
- # Basic file stats
524
- analysis_result += f"- 데이터 크기: {len(df)} ν–‰ Γ— {len(df.columns)} μ—΄\n"
525
-
526
- # Check for region/location columns
527
- location_cols = [col for col in df.columns if any(term in col.lower() for term in
528
- ['region', 'location', 'area', 'country', 'μ§€μ—­'])]
529
- if location_cols:
530
- main_loc_col = location_cols[0]
531
- locations = df[main_loc_col].unique()
532
- analysis_result += f"- ν¬ν•¨λœ μ§€μ—­: {len(locations)}개 ({', '.join(str(loc) for loc in locations[:5])})\n"
533
-
534
- # Identify yield and production columns
535
- yield_cols = [col for col in df.columns if any(term in col.lower() for term in
536
- ['yield', 'production', 'harvest', 'μˆ˜ν™•λŸ‰', 'μƒμ‚°λŸ‰'])]
537
- if yield_cols:
538
- analysis_result += f"- 생산성 κ΄€λ ¨ λ³€μˆ˜: {', '.join(yield_cols)}\n"
539
-
540
- # Identify environmental factors
541
- env_cols = [col for col in df.columns if any(term in col.lower() for term in
542
- ['temp', 'rainfall', 'soil', 'fertilizer', 'nutrient', 'irrigation',
543
- '기온', 'κ°•μˆ˜λŸ‰', 'ν† μ–‘', 'λΉ„λ£Œ', 'κ΄€κ°œ'])]
544
- if env_cols:
545
- analysis_result += f"- ν™˜κ²½ κ΄€λ ¨ λ³€μˆ˜: {', '.join(env_cols)}\n\n"
546
-
547
- # Sample data
548
- analysis_result += "### 데이터 μƒ˜ν”Œ:\n\n"
549
- analysis_result += df.head(5).to_markdown() + "\n\n"
550
-
551
- # Statistical summary of key variables
552
- key_vars = yield_cols + env_cols
553
- numeric_vars = df[key_vars].select_dtypes(include=['number']).columns
554
- if len(numeric_vars) > 0:
555
- analysis_result += "### μ£Όμš” λ³€μˆ˜ 톡계:\n\n"
556
- stats_df = df[numeric_vars].describe().round(2)
557
- analysis_result += stats_df.to_markdown() + "\n\n"
558
-
559
- # Time series analysis if possible
560
- year_cols = [col for col in df.columns if 'year' in col.lower() or 'date' in col.lower() or '연도' in col.lower()]
561
- if year_cols:
562
- analysis_result += "### μ‹œκ³„μ—΄ 생산성 νŒ¨ν„΄:\n\n"
563
- analysis_result += "이 데이터셋은 μ‹œκ°„μ— λ”°λ₯Έ λŒ€λ‘ μƒμ‚°μ„±μ˜ λ³€ν™”λ₯Ό 좔적할 수 μžˆμŠ΅λ‹ˆλ‹€.\n\n"
564
-
565
- # Check for correlations between environmental factors and yield
566
- if len(env_cols) > 0 and len(yield_cols) > 0:
567
- numeric_env = df[env_cols].select_dtypes(include=['number']).columns
568
- numeric_yield = df[yield_cols].select_dtypes(include=['number']).columns
569
-
570
- if len(numeric_env) > 0 and len(numeric_yield) > 0:
571
- analysis_result += "### ν™˜κ²½ μš”μ†Œμ™€ λŒ€λ‘ 생산성 κ°„μ˜ 상관관계:\n\n"
572
- try:
573
- corr_vars = list(numeric_env)[:3] + list(numeric_yield)[:2] # Limit variables
574
- corr_df = df[corr_vars].corr().round(3)
575
- analysis_result += corr_df.to_markdown() + "\n\n"
576
- except:
577
- analysis_result += "상관관계 계산 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€.\n\n"
578
-
579
- except Exception as e:
580
- logging.error(f"Error analyzing soybean file {file_info['name']}: {e}")
581
- analysis_result += f"뢄석 였λ₯˜: {str(e)}\n\n"
582
-
583
- analysis_result += "## λŒ€λ‘ 농업 μΈμ‚¬μ΄νŠΈ\n\n"
584
- analysis_result += "κ³ κΈ‰ λŒ€λ‘ 농업 데이터셋 뢄석 κ²°κ³Ό, λ‹€μŒκ³Ό 같은 μΈμ‚¬μ΄νŠΈλ₯Ό μ œκ³΅ν•©λ‹ˆλ‹€:\n\n"
585
- analysis_result += "1. 졜적의 λŒ€λ‘ 생산을 μœ„ν•œ ν™˜κ²½ 쑰건\n"
586
- analysis_result += "2. 지역별 λŒ€λ‘ 생산성 λ³€ν™” νŒ¨ν„΄\n"
587
- analysis_result += "3. 생산성 ν–₯상을 μœ„ν•œ 농업 기술 및 접근법\n"
588
- analysis_result += "4. μ‹œμž₯ μˆ˜μš”μ— λ§žλŠ” λŒ€λ‘ ν’ˆμ’… 선택 κ°€μ΄λ“œ\n\n"
589
-
590
- return analysis_result
591
-
592
- except Exception as e:
593
- logging.error(f"Soybean dataset analysis error: {e}")
594
- return "λŒ€λ‘ 농업 데이터셋 뢄석 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€."
595
-
596
- # ──────────────────────────────── System Prompt ─────────────────────────
597
- def get_system_prompt(mode="price_forecast", style="professional", include_search_results=True, include_uploaded_files=False) -> str:
598
- """
599
- Generate a system prompt for the 'Agricultural Price & Demand Forecast AI Assistant' interface based on:
600
- - The selected analysis mode and style
601
- - Guidelines for using agricultural datasets, web search results and uploaded files
602
- """
603
- base_prompt = """
604
- 당신은 농업 데이터 μ „λ¬Έκ°€λ‘œμ„œ 농산물 가격 예츑과 μˆ˜μš” 뢄석을 μˆ˜ν–‰ν•˜λŠ” AI μ–΄μ‹œμŠ€ν„΄νŠΈμž…λ‹ˆλ‹€.
605
-
606
- μ£Όμš” μž„λ¬΄:
607
- 1. UN κΈ€λ‘œλ²Œ μ‹λŸ‰ 및 농업 톡계 데이터셋을 기반으둜 농산물 μ‹œμž₯ 뢄석
608
- 2. 농산물 가격 μΆ”μ„Έ 예츑 및 μˆ˜μš” νŒ¨ν„΄ 뢄석
609
- 3. 데이터λ₯Ό λ°”νƒ•μœΌλ‘œ λͺ…ν™•ν•˜κ³  κ·Όκ±° μžˆλŠ” 뢄석 제곡
610
- 4. κ΄€λ ¨ 정보와 μΈμ‚¬μ΄νŠΈλ₯Ό μ²΄κ³„μ μœΌλ‘œ κ΅¬μ„±ν•˜μ—¬ μ œμ‹œ
611
- 5. μ‹œκ°μ  이해λ₯Ό 돕기 μœ„ν•΄ 차트, κ·Έλž˜ν”„ 등을 적절히 ν™œμš©
612
- 6. ν† μ–‘ 및 ν™˜κ²½ λ³€μˆ˜ 기반 μž‘λ¬Ό μΆ”μ²œ λ°μ΄ν„°μ…‹μ—μ„œ μΆ”μΆœν•œ μΈμ‚¬μ΄νŠΈ 적용
613
- 7. κΈ°ν›„ λ³€ν™”κ°€ 농업에 λ―ΈμΉ˜λŠ” 영ν–₯ 데이터셋을 ν†΅ν•œ ν™˜κ²½ λ³€ν™” μ‹œλ‚˜λ¦¬μ˜€ 뢄석
614
-
615
- μ€‘μš” κ°€μ΄λ“œλΌμΈ:
616
- - 데이터에 κΈ°λ°˜ν•œ 객관적 뢄석을 μ œκ³΅ν•˜μ„Έμš”
617
- - 뢄석 κ³Όμ •κ³Ό 방법둠을 λͺ…ν™•νžˆ μ„€λͺ…ν•˜μ„Έμš”
618
- - 톡계적 μ‹ λ’°μ„±κ³Ό ν•œκ³„μ μ„ 투λͺ…ν•˜κ²Œ μ œμ‹œν•˜μ„Έμš”
619
- - μ΄ν•΄ν•˜κΈ° μ‰¬μš΄ μ‹œκ°μ  μš”μ†Œλ‘œ 뢄석 κ²°κ³Όλ₯Ό λ³΄μ™„ν•˜μ„Έμš”
620
- - λ§ˆν¬λ‹€μš΄μ„ ν™œμš©ν•΄ 응닡을 μ²΄κ³„μ μœΌλ‘œ κ΅¬μ„±ν•˜μ„Έμš”
621
- """
622
-
623
- mode_prompts = {
624
- "price_forecast": """
625
- 농산물 가격 예츑 및 μ‹œμž₯ 뢄석에 μ§‘μ€‘ν•©λ‹ˆλ‹€:
626
- - κ³Όκ±° 가격 데이터 νŒ¨ν„΄μ— κΈ°λ°˜ν•œ 예츑 제곡
627
- - 가격 변동성 μš”μΈ 뢄석(κ³„μ ˆμ„±, 날씨, μ •μ±… λ“±)
628
- - 단기 및 쀑μž₯κΈ° 가격 전망 μ œμ‹œ
629
- - 가격에 영ν–₯을 λ―ΈμΉ˜λŠ” κ΅­λ‚΄μ™Έ μš”μΈ 식별
630
- - μ‹œμž₯ λΆˆν™•μ‹€μ„±κ³Ό 리슀크 μš”μ†Œ κ°•μ‘°
631
- """,
632
- "market_trend": """
633
- μ‹œμž₯ 동ν–₯ 및 μˆ˜μš” νŒ¨ν„΄ 뢄석에 μ§‘μ€‘ν•©λ‹ˆλ‹€:
634
- - μ£Όμš” 농산물 μˆ˜μš” λ³€ν™” νŒ¨ν„΄ 식별
635
- - μ†ŒλΉ„μž μ„ ν˜Έλ„ 및 ꡬ맀 행동 뢄석
636
- - μ‹œμž₯ μ„Έκ·Έλ¨ΌνŠΈ 및 ν‹ˆμƒˆμ‹œμž₯ 기회 탐색
637
- - μ‹œμž₯ ν™•λŒ€/μΆ•μ†Œ νŠΈλ Œλ“œ 평가
638
- - μˆ˜μš” 탄λ ₯μ„± 및 가격 민감도 뢄석
639
- """,
640
- "production_analysis": """
641
- μƒμ‚°λŸ‰ 뢄석 및 μ‹λŸ‰ μ•ˆλ³΄ 전망에 μ§‘μ€‘ν•©λ‹ˆλ‹€:
642
- - μž‘λ¬Ό μƒμ‚°λŸ‰ μΆ”μ„Έ 및 변동 μš”μΈ 뢄석
643
- - μ‹λŸ‰ 생산과 인ꡬ μ„±μž₯ κ°„μ˜ 관계 평가
644
- - κ΅­κ°€/지역별 생산 μ—­λŸ‰ 비ꡐ
645
- - μ‹λŸ‰ μ•ˆλ³΄ μœ„ν˜‘ μš”μ†Œ 및 취약점 식별
646
- - 생산성 ν–₯상 μ „λž΅ 및 기회 μ œμ•ˆ
647
- """,
648
- "agricultural_policy": """
649
- 농업 μ •μ±… 및 규제 영ν–₯ 뢄석에 μ§‘μ€‘ν•©λ‹ˆλ‹€:
650
- - μ •λΆ€ μ •μ±…κ³Ό, 보쑰금, 규제의 μ‹œμž₯ 영ν–₯ 뢄석
651
- - ꡭ제 무역 μ •μ±…κ³Ό κ΄€μ„Έμ˜ 농산물 가격 영ν–₯ 평가
652
- - 농업 지원 ν”„λ‘œκ·Έλž¨μ˜ νš¨κ³Όμ„± κ²€ν† 
653
- - 규제 ν™˜κ²½ 변화에 λ”°λ₯Έ μ‹œμž₯ μ‘°μ • 예츑
654
- - 정책적 κ°œμž…μ˜ μ˜λ„λœ/μ˜λ„μΉ˜ μ•Šμ€ κ²°κ³Ό 뢄석
655
- """,
656
- "climate_impact": """
657
- κΈ°ν›„ λ³€ν™”κ°€ 농업에 λ―ΈμΉ˜λŠ” 영ν–₯ 뢄석에 μ§‘μ€‘ν•©λ‹ˆλ‹€:
658
- - κΈ°ν›„ 변화와 농산물 μƒμ‚°λŸ‰/ν’ˆμ§ˆ κ°„μ˜ 상관관계 뢄석
659
- - 기상 이변이 가격 변동성에 λ―ΈμΉ˜λŠ” 영ν–₯ 평가
660
- - μž₯기적 κΈ°ν›„ 좔세에 λ”°λ₯Έ 농업 νŒ¨ν„΄ λ³€ν™” 예츑
661
- - κΈ°ν›„ 회볡λ ₯ μžˆλŠ” 농업 μ‹œμŠ€ν…œ μ „λž΅ μ œμ•ˆ
662
- - 지역별 κΈ°ν›„ μœ„ν—˜ λ…ΈμΆœλ„ 및 μ·¨μ•½μ„± λ§€ν•‘
663
- """
664
- }
665
-
666
- style_guides = {
667
- "professional": "전문적이고 ν•™μˆ μ μΈ μ–΄μ‘°λ₯Ό μ‚¬μš©ν•˜μ„Έμš”. 기술적 μš©μ–΄λ₯Ό 적절히 μ‚¬μš©ν•˜κ³  체계적인 데이터 뢄석을 μ œκ³΅ν•˜μ„Έμš”.",
668
- "simple": "쉽고 κ°„κ²°ν•œ μ–Έμ–΄λ‘œ μ„€λͺ…ν•˜μ„Έμš”. μ „λ¬Έ μš©μ–΄λŠ” μ΅œμ†Œν™”ν•˜κ³  핡심 κ°œλ…μ„ 일상적인 ν‘œν˜„μœΌλ‘œ μ „λ‹¬ν•˜μ„Έμš”.",
669
- "detailed": "μƒμ„Έν•˜κ³  포괄적인 뢄석을 μ œκ³΅ν•˜μ„Έμš”. λ‹€μ–‘ν•œ 데이터 포인트, 톡계적 λ‰˜μ•™μŠ€, 그리고 μ—¬λŸ¬ μ‹œλ‚˜λ¦¬μ˜€λ₯Ό κ³ λ €ν•œ 심측 뢄석을 μ œμ‹œν•˜μ„Έμš”.",
670
- "action_oriented": "μ‹€ν–‰ κ°€λŠ₯ν•œ μΈμ‚¬μ΄νŠΈμ™€ ꡬ체적인 ꢌμž₯사항에 μ΄ˆμ μ„ λ§žμΆ”μ„Έμš”. 'λ‹€μŒ 단계' 및 'μ‹€μ§ˆμ  μ‘°μ–Έ' μ„Ήμ…˜μ„ ν¬ν•¨ν•˜μ„Έμš”."
671
- }
672
-
673
- dataset_guide = """
674
- 농업 데이터셋 ν™œμš© μ§€μΉ¨:
675
- - UN κΈ€λ‘œλ²Œ μ‹λŸ‰ 및 농업 톡계 데이터셋을 κΈ°λ³Έ λΆ„μ„μ˜ 근거둜 μ‚¬μš©ν•˜μ„Έμš”
676
- - ν† μ–‘ 및 ν™˜κ²½ λ³€μˆ˜ 기반 μž‘λ¬Ό μΆ”μ²œ λ°μ΄ν„°μ…‹μ˜ μΈμ‚¬μ΄νŠΈλ₯Ό μž‘λ¬Ό 선택 및 재배 쑰건 뢄석에 ν†΅ν•©ν•˜μ„Έμš”
677
- - κΈ°ν›„ λ³€ν™”κ°€ 농업에 λ―ΈμΉ˜λŠ” 영ν–₯ λ°μ΄ν„°μ…‹μ˜ 정보λ₯Ό 지속 κ°€λŠ₯μ„± 및 미래 전망 뢄석에 ν™œμš©ν•˜μ„Έμš”
678
- - λ°μ΄ν„°μ˜ μΆœμ²˜μ™€ 연도λ₯Ό λͺ…ν™•νžˆ μΈμš©ν•˜μ„Έμš”
679
- - 데이터셋 λ‚΄ μ£Όμš” λ³€μˆ˜ κ°„μ˜ 관계λ₯Ό λΆ„μ„ν•˜μ—¬ μΈμ‚¬μ΄νŠΈλ₯Ό λ„μΆœν•˜μ„Έμš”
680
- - λ°μ΄ν„°μ˜ ν•œκ³„μ™€ λΆˆν™•μ‹€μ„±μ„ 투λͺ…ν•˜κ²Œ μ–ΈκΈ‰ν•˜μ„Έμš”
681
- - ν•„μš”μ‹œ 데이터 격차λ₯Ό μ‹λ³„ν•˜κ³  μΆ”κ°€ 연ꡬ가 ν•„μš”ν•œ μ˜μ—­μ„ μ œμ•ˆν•˜μ„Έμš”
682
- """
683
-
684
- soybean_guide = """
685
- κ³ κΈ‰ λŒ€λ‘ 농업 데이터셋 ν™œμš© μ§€μΉ¨:
686
- - λŒ€λ‘ 생산 쑰건 및 μˆ˜ν™•λŸ‰ νŒ¨ν„΄μ„ λ‹€λ₯Έ μž‘λ¬Όκ³Ό λΉ„κ΅ν•˜μ—¬ λΆ„μ„ν•˜μ„Έμš”
687
- - λŒ€λ‘ λ†μ—…μ˜ 경제적 κ°€μΉ˜μ™€ μ‹œμž₯ κΈ°νšŒμ— λŒ€ν•œ μΈμ‚¬μ΄νŠΈλ₯Ό μ œκ³΅ν•˜μ„Έμš”
688
- - λŒ€λ‘ 생산성에 영ν–₯을 λ―ΈμΉ˜λŠ” μ£Όμš” ν™˜κ²½ μš”μΈμ„ κ°•μ‘°ν•˜μ„Έμš”
689
- - λŒ€λ‘ 재배 기술 ν˜μ‹ κ³Ό μˆ˜μ΅μ„± ν–₯상 λ°©μ•ˆμ„ μ œμ•ˆν•˜μ„Έμš”
690
- - 지속 κ°€λŠ₯ν•œ λŒ€λ‘ 농업을 μœ„ν•œ μ‹€μ§ˆμ μΈ 접���법을 κ³΅μœ ν•˜μ„Έμš”
691
- """
692
-
693
- crop_recommendation_guide = """
694
- ν† μ–‘ 및 ν™˜κ²½ λ³€μˆ˜ 기반 μž‘λ¬Ό μΆ”μ²œ ν™œμš© μ§€μΉ¨:
695
- - μ§€μ—­ νŠΉμ„±μ— λ§žλŠ” 졜적의 μž‘λ¬Ό 선택 기쀀을 μ œμ‹œν•˜μ„Έμš”
696
- - ν† μ–‘ 쑰건과 μž‘λ¬Ό 적합성 κ°„μ˜ 상관관계λ₯Ό λΆ„μ„ν•˜μ„Έμš”
697
- - ν™˜κ²½ λ³€μˆ˜μ— λ”°λ₯Έ μž‘λ¬Ό 생산성 예츑 λͺ¨λΈμ„ ν™œμš©ν•˜μ„Έμš”
698
- - 농업 생산성과 μˆ˜μ΅μ„± ν–₯상을 μœ„ν•œ μž‘λ¬Ό 선택 μ „λž΅μ„ μ œμ•ˆν•˜μ„Έμš”
699
- - 지속 κ°€λŠ₯ν•œ 농업을 μœ„ν•œ μž‘λ¬Ό λ‹€μ–‘ν™” 접근법을 ꢌμž₯ν•˜μ„Έμš”
700
- """
701
-
702
- climate_impact_guide = """
703
- κΈ°ν›„ λ³€ν™”κ°€ 농업에 λ―ΈμΉ˜λŠ” 영ν–₯ 데이터셋 ν™œμš© μ§€μΉ¨:
704
- - κΈ°ν›„ λ³€ν™” μ‹œλ‚˜λ¦¬μ˜€μ— λ”°λ₯Έ μž‘λ¬Ό 생산성 λ³€ν™”λ₯Ό μ˜ˆμΈ‘ν•˜μ„Έμš”
705
- - κΈ°ν›„ μ μ‘ν˜• 농업 기술 및 μ „λž΅μ„ μ œμ•ˆν•˜μ„Έμš”
706
- - 지역별 κΈ°ν›„ μœ„ν—˜ μš”μ†Œμ™€ λŒ€μ‘ λ°©μ•ˆμ„ λΆ„μ„ν•˜μ„Έμš”
707
- - κΈ°ν›„ 변화에 λŒ€μ‘ν•˜κΈ° μœ„ν•œ μž‘λ¬Ό 선택 및 재배 μ‹œκΈ° μ‘°μ • λ°©μ•ˆμ„ μ œμ‹œν•˜μ„Έμš”
708
- - κΈ°ν›„ λ³€ν™”κ°€ 농산물 가격 및 μ‹œμž₯ 동ν–₯에 λ―ΈμΉ˜λŠ” 영ν–₯을 ν‰κ°€ν•˜μ„Έμš”
709
- """
710
-
711
- search_guide = """
712
- μ›Ή 검색 κ²°κ³Ό ν™œμš© μ§€μΉ¨:
713
- - 데이터셋 뢄석을 λ³΄μ™„ν•˜λŠ” μ΅œμ‹  μ‹œμž₯ μ •λ³΄λ‘œ 검색 κ²°κ³Όλ₯Ό ν™œμš©ν•˜μ„Έμš”
714
- - 각 μ •λ³΄μ˜ 좜처λ₯Ό λ§ˆν¬λ‹€μš΄ 링크둜 ν¬ν•¨ν•˜μ„Έμš”: [좜처λͺ…](URL)
715
- - μ£Όμš” μ£Όμž₯μ΄λ‚˜ 데이터 ν¬μΈνŠΈλ§ˆλ‹€ 좜처λ₯Ό ν‘œμ‹œν•˜μ„Έμš”
716
- - μΆœμ²˜κ°€ 상좩할 경우, λ‹€μ–‘ν•œ 관점과 신뒰도λ₯Ό μ„€λͺ…ν•˜μ„Έμš”
717
- - κ΄€λ ¨ λ™μ˜μƒ λ§ν¬λŠ” [λΉ„λ””μ˜€: 제λͺ©](video_url) ν˜•μ‹μœΌλ‘œ ν¬ν•¨ν•˜μ„Έμš”
718
- - 검색 정보λ₯Ό μΌκ΄€λ˜κ³  체계적인 μ‘λ‹΅μœΌλ‘œ ν†΅ν•©ν•˜μ„Έμš”
719
- - λͺ¨λ“  μ£Όμš” 좜처λ₯Ό λ‚˜μ—΄ν•œ "μ°Έκ³  자료" μ„Ήμ…˜μ„ λ§ˆμ§€λ§‰μ— ν¬ν•¨ν•˜μ„Έμš”
720
- """
721
-
722
- upload_guide = """
723
- μ—…λ‘œλ“œλœ 파일 ν™œμš© μ§€μΉ¨:
724
- - μ—…λ‘œλ“œλœ νŒŒμΌμ„ μ‘λ‹΅μ˜ μ£Όμš” μ •λ³΄μ›μœΌλ‘œ ν™œμš©ν•˜μ„Έμš”
725
- - 쿼리와 직접 κ΄€λ ¨λœ 파일 정보λ₯Ό μΆ”μΆœν•˜κ³  κ°•μ‘°ν•˜μ„Έμš”
726
- - κ΄€λ ¨ κ΅¬μ ˆμ„ μΈμš©ν•˜κ³  νŠΉμ • νŒŒμΌμ„ 좜처둜 μΈμš©ν•˜μ„Έμš”
727
- - CSV 파일의 수치 λ°μ΄ν„°λŠ” μš”μ•½ λ¬Έμž₯으둜 λ³€ν™˜ν•˜μ„Έμš”
728
- - PDF μ½˜ν…μΈ λŠ” νŠΉμ • μ„Ήμ…˜μ΄λ‚˜ νŽ˜μ΄μ§€λ₯Ό μ°Έμ‘°ν•˜μ„Έμš”
729
- - 파일 정보λ₯Ό μ›Ή 검색 결과와 μ›ν™œν•˜κ²Œ ν†΅ν•©ν•˜μ„Έμš”
730
- - 정보가 상좩할 경우, 일반적인 μ›Ή 결과보닀 파일 μ½˜ν…μΈ λ₯Ό μš°μ„ μ‹œν•˜μ„Έμš”
731
- """
732
-
733
- # Base prompt
734
- final_prompt = base_prompt
735
-
736
- # Add mode-specific guidance
737
- if mode in mode_prompts:
738
- final_prompt += "\n" + mode_prompts[mode]
739
-
740
- # Style
741
- if style in style_guides:
742
- final_prompt += f"\n\n뢄석 μŠ€νƒ€μΌ: {style_guides[style]}"
743
-
744
- # Always include dataset guides
745
- final_prompt += f"\n\n{dataset_guide}"
746
- final_prompt += f"\n\n{crop_recommendation_guide}"
747
- final_prompt += f"\n\n{climate_impact_guide}"
748
-
749
- # Conditionally add soybean dataset guide if selected in UI
750
- if st.session_state.get('use_soybean_dataset', False):
751
- final_prompt += f"\n\n{soybean_guide}"
752
-
753
- if include_search_results:
754
- final_prompt += f"\n\n{search_guide}"
755
-
756
- if include_uploaded_files:
757
- final_prompt += f"\n\n{upload_guide}"
758
-
759
- final_prompt += """
760
- \n\n응닡 ν˜•μ‹ μš”κ΅¬μ‚¬ν•­:
761
- - λ§ˆν¬λ‹€μš΄ 제λͺ©(## 및 ###)을 μ‚¬μš©ν•˜μ—¬ 응닡을 μ²΄κ³„μ μœΌλ‘œ κ΅¬μ„±ν•˜μ„Έμš”
762
- - μ€‘μš”ν•œ 점은 ꡡ은 ν…μŠ€νŠΈ(**ν…μŠ€νŠΈ**)둜 κ°•μ‘°ν•˜μ„Έμš”
763
- - 3-5개의 후속 μ§ˆλ¬Έμ„ ν¬ν•¨ν•œ "κ΄€λ ¨ 질문" μ„Ήμ…˜μ„ λ§ˆμ§€λ§‰μ— μΆ”κ°€ν•˜μ„Έμš”
764
- - μ μ ˆν•œ 간격과 단락 κ΅¬λΆ„μœΌλ‘œ 응닡을 μ„œμ‹ν™”ν•˜μ„Έμš”
765
- - λͺ¨λ“  λ§ν¬λŠ” λ§ˆν¬λ‹€μš΄ ν˜•μ‹μœΌλ‘œ 클릭 κ°€λŠ₯ν•˜κ²Œ λ§Œλ“œμ„Έμš”: [ν…μŠ€νŠΈ](url)
766
- - κ°€λŠ₯ν•œ 경우 데이터λ₯Ό μ‹œκ°μ μœΌλ‘œ ν‘œν˜„(ν‘œ, κ·Έλž˜ν”„ λ“±μ˜ μ„€λͺ…)ν•˜μ„Έμš”
767
- """
768
- return final_prompt
769
-
770
- # ──────────────────────────────── Brave Search API ────────────────────────
771
- @st.cache_data(ttl=3600)
772
- def brave_search(query: str, count: int = 10):
773
- if not BRAVE_KEY:
774
- raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
775
-
776
- headers = {"Accept": "application/json", "Accept-Encoding": "gzip", "X-Subscription-Token": BRAVE_KEY}
777
- params = {"q": query + " 농산물 가격 동ν–₯ 농업 데이터", "count": str(count)}
778
-
779
- for attempt in range(3):
780
- try:
781
- r = requests.get(BRAVE_ENDPOINT, headers=headers, params=params, timeout=15)
782
- r.raise_for_status()
783
- data = r.json()
784
-
785
- raw = data.get("web", {}).get("results") or data.get("results", [])
786
- if not raw:
787
- logging.warning(f"No Brave search results found. Response: {data}")
788
- raise ValueError("No search results found.")
789
-
790
- arts = []
791
- for i, res in enumerate(raw[:count], 1):
792
- url = res.get("url", res.get("link", ""))
793
- host = re.sub(r"https?://(www\.)?", "", url).split("/")[0]
794
- arts.append({
795
- "index": i,
796
- "title": res.get("title", "No title"),
797
- "link": url,
798
- "snippet": res.get("description", res.get("text", "No snippet")),
799
- "displayed_link": host
800
- })
801
-
802
- return arts
803
-
804
- except Exception as e:
805
- logging.error(f"Brave search failure (attempt {attempt+1}/3): {e}")
806
- if attempt < 2:
807
- time.sleep(5)
808
-
809
- return []
810
-
811
- @st.cache_data(ttl=3600)
812
- def brave_video_search(query: str, count: int = 3):
813
- if not BRAVE_KEY:
814
- raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
815
-
816
- headers = {"Accept": "application/json","Accept-Encoding": "gzip","X-Subscription-Token": BRAVE_KEY}
817
- params = {"q": query + " 농산물 가격 농업 μ‹œμž₯", "count": str(count)}
818
-
819
- for attempt in range(3):
820
- try:
821
- r = requests.get(BRAVE_VIDEO_ENDPOINT, headers=headers, params=params, timeout=15)
822
- r.raise_for_status()
823
- data = r.json()
824
-
825
- results = []
826
- for i, vid in enumerate(data.get("results", [])[:count], 1):
827
- results.append({
828
- "index": i,
829
- "title": vid.get("title", "Video"),
830
- "video_url": vid.get("url", ""),
831
- "thumbnail_url": vid.get("thumbnail", {}).get("src", ""),
832
- "source": vid.get("provider", {}).get("name", "Unknown source")
833
- })
834
-
835
- return results
836
-
837
- except Exception as e:
838
- logging.error(f"Brave video search failure (attempt {attempt+1}/3): {e}")
839
- if attempt < 2:
840
- time.sleep(5)
841
-
842
- return []
843
-
844
- @st.cache_data(ttl=3600)
845
- def brave_news_search(query: str, count: int = 3):
846
- if not BRAVE_KEY:
847
- raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
848
-
849
- headers = {"Accept": "application/json","Accept-Encoding": "gzip","X-Subscription-Token": BRAVE_KEY}
850
- params = {"q": query + " 농산물 가격 동ν–₯ 농업", "count": str(count)}
851
-
852
- for attempt in range(3):
853
- try:
854
- r = requests.get(BRAVE_NEWS_ENDPOINT, headers=headers, params=params, timeout=15)
855
- r.raise_for_status()
856
- data = r.json()
857
-
858
- results = []
859
- for i, news in enumerate(data.get("results", [])[:count], 1):
860
- results.append({
861
- "index": i,
862
- "title": news.get("title", "News article"),
863
- "url": news.get("url", ""),
864
- "description": news.get("description", ""),
865
- "source": news.get("source", "Unknown source"),
866
- "date": news.get("age", "Unknown date")
867
- })
868
-
869
- return results
870
-
871
- except Exception as e:
872
- logging.error(f"Brave news search failure (attempt {attempt+1}/3): {e}")
873
- if attempt < 2:
874
- time.sleep(5)
875
-
876
- return []
877
-
878
- def mock_results(query: str) -> str:
879
- ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
880
- return (f"# λŒ€μ²΄ 검색 μ½˜ν…μΈ  (생성 μ‹œκ°„: {ts})\n\n"
881
- f"'{query}'에 λŒ€ν•œ 검색 API μš”μ²­μ΄ μ‹€νŒ¨ν–ˆκ±°λ‚˜ κ²°κ³Όκ°€ μ—†μŠ΅λ‹ˆλ‹€. "
882
- f"κΈ°μ‘΄ 지식을 기반으둜 응닡을 μƒμ„±ν•΄μ£Όμ„Έμš”.\n\n"
883
- f"λ‹€μŒ 사항을 κ³ λ €ν•˜μ„Έμš”:\n\n"
884
- f"- {query}에 κ΄€ν•œ κΈ°λ³Έ κ°œλ…κ³Ό μ€‘μš”μ„±\n"
885
- f"- 일반적으둜 μ•Œλ €μ§„ κ΄€λ ¨ ν†΅κ³„λ‚˜ μΆ”μ„Έ\n"
886
- f"- 이 μ£Όμ œμ— λŒ€ν•œ μ „λ¬Έκ°€ 의견\n"
887
- f"- λ…μžκ°€ κ°€μ§ˆ 수 μžˆλŠ” 질문\n\n"
888
- f"μ°Έκ³ : μ΄λŠ” μ‹€μ‹œκ°„ 데이터가 μ•„λ‹Œ λŒ€μ²΄ μ§€μΉ¨μž…λ‹ˆλ‹€.\n\n")
889
-
890
- def do_web_search(query: str) -> str:
891
- try:
892
- arts = brave_search(query, 10)
893
- if not arts:
894
- logging.warning("No search results, using fallback content")
895
- return mock_results(query)
896
-
897
- videos = brave_video_search(query, 2)
898
- news = brave_news_search(query, 3)
899
-
900
- result = "# μ›Ή 검색 κ²°κ³Ό\nλ‹€μŒ κ²°κ³Όλ₯Ό ν™œμš©ν•˜μ—¬ 데이터셋 뢄석을 λ³΄μ™„ν•˜λŠ” 포괄적인 닡변을 μ œκ³΅ν•˜μ„Έμš”.\n\n"
901
-
902
- result += "## μ›Ή κ²°κ³Ό\n\n"
903
- for a in arts[:5]:
904
- result += f"### κ²°κ³Ό {a['index']}: {a['title']}\n\n{a['snippet']}\n\n"
905
- result += f"**좜처**: [{a['displayed_link']}]({a['link']})\n\n---\n"
906
-
907
- if news:
908
- result += "## λ‰΄μŠ€ κ²°κ³Ό\n\n"
909
- for n in news:
910
- result += f"### {n['title']}\n\n{n['description']}\n\n"
911
- result += f"**좜처**: [{n['source']}]({n['url']}) - {n['date']}\n\n---\n"
912
-
913
- if videos:
914
- result += "## λΉ„λ””μ˜€ κ²°κ³Ό\n\n"
915
- for vid in videos:
916
- result += f"### {vid['title']}\n\n"
917
- if vid.get('thumbnail_url'):
918
- result += f"![썸넀일]({vid['thumbnail_url']})\n\n"
919
- result += f"**μ‹œμ²­**: [{vid['source']}]({vid['video_url']})\n\n"
920
-
921
- return result
922
-
923
- except Exception as e:
924
- logging.error(f"Web search process failed: {str(e)}")
925
- return mock_results(query)
926
-
927
- # ──────────────────────────────── File Upload Handling ─────────────────────
928
- def process_text_file(file):
929
- try:
930
- content = file.read()
931
- file.seek(0)
932
-
933
- text = content.decode('utf-8', errors='ignore')
934
- if len(text) > 10000:
935
- text = text[:9700] + "...(truncated)..."
936
-
937
- result = f"## ν…μŠ€νŠΈ 파일: {file.name}\n\n" + text
938
- return result
939
- except Exception as e:
940
- logging.error(f"Error processing text file: {str(e)}")
941
- return f"ν…μŠ€νŠΈ 파일 처리 였λ₯˜: {str(e)}"
942
-
943
- def process_csv_file(file):
944
- try:
945
- content = file.read()
946
- file.seek(0)
947
-
948
- df = pd.read_csv(io.BytesIO(content))
949
- result = f"## CSV 파일: {file.name}\n\n"
950
- result += f"- ν–‰: {len(df)}\n"
951
- result += f"- μ—΄: {len(df.columns)}\n"
952
- result += f"- μ—΄ 이름: {', '.join(df.columns.tolist())}\n\n"
953
-
954
- result += "### 데이터 미리보기\n\n"
955
- preview_df = df.head(10)
956
- try:
957
- markdown_table = preview_df.to_markdown(index=False)
958
- if markdown_table:
959
- result += markdown_table + "\n\n"
960
- else:
961
- result += "CSV 데이터λ₯Ό ν‘œμ‹œν•  수 μ—†μŠ΅λ‹ˆλ‹€.\n\n"
962
- except Exception as e:
963
- logging.error(f"Markdown table conversion error: {e}")
964
- result += "ν…μŠ€νŠΈλ‘œ 데이터 ν‘œμ‹œ:\n\n" + str(preview_df) + "\n\n"
965
-
966
- num_cols = df.select_dtypes(include=['number']).columns
967
- if len(num_cols) > 0:
968
- result += "### κΈ°λ³Έ 톡계 정보\n\n"
969
- try:
970
- stats_df = df[num_cols].describe().round(2)
971
- stats_markdown = stats_df.to_markdown()
972
- if stats_markdown:
973
- result += stats_markdown + "\n\n"
974
- else:
975
- result += "톡계 정보λ₯Ό ν‘œμ‹œν•  수 μ—†μŠ΅λ‹ˆλ‹€.\n\n"
976
- except Exception as e:
977
- logging.error(f"Statistical info conversion error: {e}")
978
- result += "톡계 정보λ₯Ό 생성할 수 μ—†μŠ΅λ‹ˆλ‹€.\n\n"
979
-
980
- return result
981
- except Exception as e:
982
- logging.error(f"CSV file processing error: {str(e)}")
983
- return f"CSV 파일 처리 였λ₯˜: {str(e)}"
984
-
985
- def process_pdf_file(file):
986
- try:
987
- file_bytes = file.read()
988
- file.seek(0)
989
-
990
- pdf_file = io.BytesIO(file_bytes)
991
- reader = PyPDF2.PdfReader(pdf_file, strict=False)
992
-
993
- result = f"## PDF 파일: {file.name}\n\n- 총 νŽ˜μ΄μ§€: {len(reader.pages)}\n\n"
994
-
995
- max_pages = min(5, len(reader.pages))
996
- all_text = ""
997
-
998
- for i in range(max_pages):
999
- try:
1000
- page = reader.pages[i]
1001
- page_text = page.extract_text()
1002
- current_page_text = f"### νŽ˜μ΄μ§€ {i+1}\n\n"
1003
- if page_text and len(page_text.strip()) > 0:
1004
- if len(page_text) > 1500:
1005
- current_page_text += page_text[:1500] + "...(좕약됨)...\n\n"
1006
- else:
1007
- current_page_text += page_text + "\n\n"
1008
- else:
1009
- current_page_text += "(ν…μŠ€νŠΈλ₯Ό μΆ”μΆœν•  수 μ—†μŒ)\n\n"
1010
-
1011
- all_text += current_page_text
1012
-
1013
- if len(all_text) > 8000:
1014
- all_text += "...(λ‚˜λ¨Έμ§€ νŽ˜μ΄μ§€ 좕약됨)...\n\n"
1015
- break
1016
-
1017
- except Exception as page_err:
1018
- logging.error(f"Error processing PDF page {i+1}: {str(page_err)}")
1019
- all_text += f"### νŽ˜μ΄μ§€ {i+1}\n\n(λ‚΄μš© μΆ”μΆœ 였λ₯˜: {str(page_err)})\n\n"
1020
-
1021
- if len(reader.pages) > max_pages:
1022
- all_text += f"\nμ°Έκ³ : 처음 {max_pages} νŽ˜μ΄μ§€λ§Œ ν‘œμ‹œλ©λ‹ˆλ‹€.\n\n"
1023
-
1024
- result += "### PDF λ‚΄μš©\n\n" + all_text
1025
- return result
1026
-
1027
- except Exception as e:
1028
- logging.error(f"PDF file processing error: {str(e)}")
1029
- return f"## PDF 파일: {file.name}\n\n였λ₯˜: {str(e)}\n\nμ²˜λ¦¬ν•  수 μ—†μŠ΅λ‹ˆλ‹€."
1030
-
1031
- def process_uploaded_files(files):
1032
- if not files:
1033
- return None
1034
-
1035
- result = "# μ—…λ‘œλ“œλœ 파일 λ‚΄μš©\n\nμ‚¬μš©μžκ°€ μ œκ³΅ν•œ 파일의 λ‚΄μš©μž…λ‹ˆλ‹€.\n\n"
1036
- for file in files:
1037
- try:
1038
- ext = file.name.split('.')[-1].lower()
1039
- if ext == 'txt':
1040
- result += process_text_file(file) + "\n\n---\n\n"
1041
- elif ext == 'csv':
1042
- result += process_csv_file(file) + "\n\n---\n\n"
1043
- elif ext == 'pdf':
1044
- result += process_pdf_file(file) + "\n\n---\n\n"
1045
- else:
1046
- result += f"### μ§€μ›λ˜μ§€ μ•ŠλŠ” 파일: {file.name}\n\n---\n\n"
1047
- except Exception as e:
1048
- logging.error(f"File processing error {file.name}: {e}")
1049
- result += f"### 파일 처리 였λ₯˜: {file.name}\n\n였λ₯˜: {e}\n\n---\n\n"
1050
-
1051
- return result
1052
-
1053
- # ──────────────────────────────── Image & Utility ─────────────────────────
1054
-
1055
- def generate_image(prompt, w=768, h=768, g=3.5, steps=30, seed=3):
1056
- if not prompt:
1057
- return None, "Insufficient prompt"
1058
- try:
1059
- res = Client(IMAGE_API_URL).predict(
1060
- prompt=prompt, width=w, height=h, guidance=g,
1061
- inference_steps=steps, seed=seed,
1062
- do_img2img=False, init_image=None,
1063
- image2image_strength=0.8, resize_img=True,
1064
- api_name="/generate_image"
1065
- )
1066
- return res[0], f"Seed: {res[1]}"
1067
- except Exception as e:
1068
- logging.error(e)
1069
- return None, str(e)
1070
-
1071
- def extract_image_prompt(response_text: str, topic: str):
1072
- client = get_openai_client()
1073
- try:
1074
- response = client.chat.completions.create(
1075
- model="gpt-4.1-mini",
1076
- messages=[
1077
- {"role": "system", "content": "농업 및 농산물에 κ΄€ν•œ 이미지 ν”„λ‘¬ν”„νŠΈλ₯Ό μƒμ„±ν•©λ‹ˆλ‹€. ν•œ μ€„μ˜ μ˜μ–΄λ‘œ 된 ν”„λ‘¬ν”„νŠΈλ§Œ λ°˜ν™˜ν•˜μ„Έμš”, λ‹€λ₯Έ ν…μŠ€νŠΈλŠ” ν¬ν•¨ν•˜μ§€ λ§ˆμ„Έμš”."},
1078
- {"role": "user", "content": f"주제: {topic}\n\n---\n{response_text}\n\n---"}
1079
- ],
1080
- temperature=1,
1081
- max_tokens=80,
1082
- top_p=1
1083
- )
1084
- return response.choices[0].message.content.strip()
1085
- except Exception as e:
1086
- logging.error(f"OpenAI image prompt generation error: {e}")
1087
- return f"A professional photograph of agricultural produce and farm fields, data visualization of crop prices and trends, high quality"
1088
-
1089
- def md_to_html(md: str, title="농산물 μˆ˜μš” 예츑 뢄석 κ²°κ³Ό"):
1090
- return f"<!DOCTYPE html><html><head><title>{title}</title><meta charset='utf-8'></head><body>{markdown.markdown(md)}</body></html>"
1091
-
1092
- def keywords(text: str, top=5):
1093
- cleaned = re.sub(r"[^κ°€-힣a-zA-Z0-9\s]", "", text)
1094
- return " ".join(cleaned.split()[:top])
1095
-
1096
- # ──────────────────────────────── Streamlit UI ────────────────────────────
1097
- def agricultural_price_forecast_app():
1098
- st.title("농산물 μˆ˜μš” 및 가격 예츑 AI μ–΄μ‹œμŠ€ν„΄νŠΈ")
1099
- st.markdown("UN κΈ€λ‘œλ²Œ μ‹λŸ‰ 및 농업 톡계 데이터셋 뢄석 기반의 농산물 μ‹œμž₯ 예츑")
1100
-
1101
- if "ai_model" not in st.session_state:
1102
- st.session_state.ai_model = "gpt-4.1-mini"
1103
- if "messages" not in st.session_state:
1104
- st.session_state.messages = []
1105
- if "auto_save" not in st.session_state:
1106
- st.session_state.auto_save = True
1107
- if "generate_image" not in st.session_state:
1108
- st.session_state.generate_image = False
1109
- if "web_search_enabled" not in st.session_state:
1110
- st.session_state.web_search_enabled = True
1111
- if "analysis_mode" not in st.session_state:
1112
- st.session_state.analysis_mode = "price_forecast"
1113
- if "response_style" not in st.session_state:
1114
- st.session_state.response_style = "professional"
1115
- if "use_soybean_dataset" not in st.session_state:
1116
- st.session_state.use_soybean_dataset = False
1117
-
1118
- sb = st.sidebar
1119
- sb.title("뢄석 μ„€μ •")
1120
-
1121
- # Kaggle dataset info display
1122
- if sb.checkbox("데이터셋 정보 ν‘œμ‹œ", value=False):
1123
- st.info("UN κΈ€λ‘œλ²Œ μ‹λŸ‰ 및 농업 톡계 데이터셋을 λΆˆλŸ¬μ˜€λŠ” 쀑...")
1124
- dataset_info = load_agriculture_dataset()
1125
- if dataset_info:
1126
- st.success(f"데이터셋 λ‘œλ“œ μ™„λ£Œ: {len(dataset_info['files'])}개 파일")
1127
-
1128
- with st.expander("데이터셋 미리보기", expanded=False):
1129
- for file_info in dataset_info['files'][:5]:
1130
- st.write(f"**{file_info['name']}** ({file_info['size_mb']} MB)")
1131
- else:
1132
- st.error("데이터셋을 λΆˆλŸ¬μ˜€λŠ”λ° μ‹€νŒ¨ν–ˆμŠ΅λ‹ˆλ‹€. Kaggle API 섀정을 ν™•μΈν•˜μ„Έμš”.")
1133
-
1134
- sb.subheader("뢄석 ꡬ성")
1135
- sb.selectbox(
1136
- "뢄석 λͺ¨λ“œ",
1137
- options=list(ANALYSIS_MODES.keys()),
1138
- format_func=lambda x: ANALYSIS_MODES[x],
1139
- key="analysis_mode"
1140
- )
1141
-
1142
- sb.selectbox(
1143
- "응닡 μŠ€νƒ€μΌ",
1144
- options=list(RESPONSE_STYLES.keys()),
1145
- format_func=lambda x: RESPONSE_STYLES[x],
1146
- key="response_style"
1147
- )
1148
-
1149
- # Dataset selection
1150
- sb.subheader("데이터셋 선택")
1151
- sb.checkbox(
1152
- "κ³ κΈ‰ λŒ€λ‘ 농업 데이터셋 μ‚¬μš©",
1153
- key="use_soybean_dataset",
1154
- help="λŒ€λ‘(콩) κ΄€λ ¨ μ§ˆλ¬Έμ— 더 μ •ν™•ν•œ 정보λ₯Ό μ œκ³΅ν•©λ‹ˆλ‹€."
1155
- )
1156
-
1157
- # Always enabled datasets info
1158
- sb.info("κΈ°λ³Έ ν™œμ„±ν™”λœ 데이터셋:\n- UN κΈ€λ‘œλ²Œ μ‹λŸ‰ 및 농업 톡계\n- ν† μ–‘ 및 ν™˜κ²½ λ³€μˆ˜ 기반 οΏ½οΏ½οΏ½λ¬Ό μΆ”μ²œ\n- κΈ°ν›„ λ³€ν™”κ°€ 농업에 λ―ΈμΉ˜λŠ” 영ν–₯")
1159
-
1160
- # Example queries
1161
- sb.subheader("μ˜ˆμ‹œ 질문")
1162
- c1, c2, c3 = sb.columns(3)
1163
- if c1.button("μŒ€ 가격 전망", key="ex1"):
1164
- process_example(EXAMPLE_QUERIES["example1"])
1165
- if c2.button("κΈ°ν›„ 영ν–₯", key="ex2"):
1166
- process_example(EXAMPLE_QUERIES["example2"])
1167
- if c3.button("증평ꡰ μž‘λ¬Ό", key="ex3"):
1168
- process_example(EXAMPLE_QUERIES["example3"])
1169
-
1170
- sb.subheader("기타 μ„€μ •")
1171
- sb.toggle("μžλ™ μ €μž₯", key="auto_save")
1172
- sb.toggle("이미지 μžλ™ 생성", key="generate_image")
1173
-
1174
- web_search_enabled = sb.toggle("μ›Ή 검색 μ‚¬μš©", value=st.session_state.web_search_enabled)
1175
- st.session_state.web_search_enabled = web_search_enabled
1176
-
1177
- if web_search_enabled:
1178
- st.sidebar.info("βœ… μ›Ή 검색 κ²°κ³Όκ°€ 응닡에 ν†΅ν•©λ©λ‹ˆλ‹€.")
1179
-
1180
- # Download the latest response
1181
- latest_response = next(
1182
- (m["content"] for m in reversed(st.session_state.messages)
1183
- if m["role"] == "assistant" and m["content"].strip()),
1184
- None
1185
- )
1186
- if latest_response:
1187
- title_match = re.search(r"# (.*?)(\n|$)", latest_response)
1188
- if title_match:
1189
- title = title_match.group(1).strip()
1190
- else:
1191
- first_line = latest_response.split('\n', 1)[0].strip()
1192
- title = first_line[:40] + "..." if len(first_line) > 40 else first_line
1193
-
1194
- sb.subheader("μ΅œμ‹  응닡 λ‹€μš΄λ‘œλ“œ")
1195
- d1, d2 = sb.columns(2)
1196
- d1.download_button("λ§ˆν¬λ‹€μš΄μœΌλ‘œ λ‹€μš΄λ‘œλ“œ", latest_response,
1197
- file_name=f"{title}.md", mime="text/markdown")
1198
- d2.download_button("HTML둜 λ‹€μš΄λ‘œλ“œ", md_to_html(latest_response, title),
1199
- file_name=f"{title}.html", mime="text/html")
1200
-
1201
- # JSON conversation record upload
1202
- up = sb.file_uploader("λŒ€ν™” 기둝 뢈러였기 (.json)", type=["json"], key="json_uploader")
1203
- if up:
1204
- try:
1205
- st.session_state.messages = json.load(up)
1206
- sb.success("λŒ€ν™” 기둝을 μ„±κ³΅μ μœΌλ‘œ λΆˆλŸ¬μ™”μŠ΅λ‹ˆλ‹€")
1207
- except Exception as e:
1208
- sb.error(f"뢈러였기 μ‹€νŒ¨: {e}")
1209
-
1210
- # JSON conversation record download
1211
- if sb.button("λŒ€ν™” 기둝을 JSON으둜 λ‹€μš΄λ‘œλ“œ"):
1212
- sb.download_button(
1213
- "μ €μž₯",
1214
- data=json.dumps(st.session_state.messages, ensure_ascii=False, indent=2),
1215
- file_name="conversation_history.json",
1216
- mime="application/json"
1217
- )
1218
-
1219
- # File Upload
1220
- st.subheader("파일 μ—…λ‘œλ“œ")
1221
- uploaded_files = st.file_uploader(
1222
- "μ°Έκ³  자료둜 μ‚¬μš©ν•  파일 μ—…λ‘œλ“œ (txt, csv, pdf)",
1223
- type=["txt", "csv", "pdf"],
1224
- accept_multiple_files=True,
1225
- key="file_uploader"
1226
- )
1227
-
1228
- if uploaded_files:
1229
- file_count = len(uploaded_files)
1230
- st.success(f"{file_count}개 파일이 μ—…λ‘œλ“œλ˜μ—ˆμŠ΅λ‹ˆλ‹€. μ§ˆμ˜μ— λŒ€ν•œ μ†ŒμŠ€λ‘œ μ‚¬μš©λ©λ‹ˆλ‹€.")
1231
-
1232
- with st.expander("μ—…λ‘œλ“œλœ 파일 미리보기", expanded=False):
1233
- for idx, file in enumerate(uploaded_files):
1234
- st.write(f"**파일λͺ…:** {file.name}")
1235
- ext = file.name.split('.')[-1].lower()
1236
-
1237
- if ext == 'txt':
1238
- preview = file.read(1000).decode('utf-8', errors='ignore')
1239
- file.seek(0)
1240
- st.text_area(
1241
- f"{file.name} 미리보기",
1242
- preview + ("..." if len(preview) >= 1000 else ""),
1243
- height=150
1244
- )
1245
- elif ext == 'csv':
1246
- try:
1247
- df = pd.read_csv(file)
1248
- file.seek(0)
1249
- st.write("CSV 미리보기 (μ΅œλŒ€ 5ν–‰)")
1250
- st.dataframe(df.head(5))
1251
- except Exception as e:
1252
- st.error(f"CSV 미리보기 μ‹€νŒ¨: {e}")
1253
- elif ext == 'pdf':
1254
- try:
1255
- file_bytes = file.read()
1256
- file.seek(0)
1257
-
1258
- pdf_file = io.BytesIO(file_bytes)
1259
- reader = PyPDF2.PdfReader(pdf_file, strict=False)
1260
-
1261
- pc = len(reader.pages)
1262
- st.write(f"PDF 파일: {pc}νŽ˜μ΄μ§€")
1263
-
1264
- if pc > 0:
1265
- try:
1266
- page_text = reader.pages[0].extract_text()
1267
- preview = page_text[:500] if page_text else "(ν…μŠ€νŠΈ μΆ”μΆœ λΆˆκ°€)"
1268
- st.text_area("첫 νŽ˜μ΄μ§€ 미리보기", preview + "...", height=150)
1269
- except:
1270
- st.warning("첫 νŽ˜μ΄μ§€ ν…μŠ€νŠΈ μΆ”μΆœ μ‹€νŒ¨")
1271
- except Exception as e:
1272
- st.error(f"PDF 미리보기 μ‹€νŒ¨: {e}")
1273
-
1274
- if idx < file_count - 1:
1275
- st.divider()
1276
-
1277
- # Display existing messages
1278
- for m in st.session_state.messages:
1279
- with st.chat_message(m["role"]):
1280
- st.markdown(m["content"], unsafe_allow_html=True)
1281
-
1282
- # Videos
1283
- if "videos" in m and m["videos"]:
1284
- st.subheader("κ΄€λ ¨ λΉ„λ””μ˜€")
1285
- for video in m["videos"]:
1286
- video_title = video.get('title', 'κ΄€λ ¨ λΉ„λ””μ˜€')
1287
- video_url = video.get('url', '')
1288
- thumbnail = video.get('thumbnail', '')
1289
-
1290
- if thumbnail:
1291
- col1, col2 = st.columns([1, 3])
1292
- with col1:
1293
- st.write("🎬")
1294
- with col2:
1295
- st.markdown(f"**[{video_title}]({video_url})**")
1296
- st.write(f"좜처: {video.get('source', 'μ•Œ 수 μ—†μŒ')}")
1297
- else:
1298
- st.markdown(f"🎬 **[{video_title}]({video_url})**")
1299
- st.write(f"좜처: {video.get('source', 'μ•Œ 수 μ—†μŒ')}")
1300
-
1301
- # User input
1302
- query = st.chat_input("농산물 가격, μˆ˜μš” λ˜λŠ” μ‹œμž₯ 동ν–₯ κ΄€λ ¨ μ§ˆλ¬Έμ„ μž…λ ₯ν•˜μ„Έμš”.")
1303
- if query:
1304
- process_input(query, uploaded_files)
1305
-
1306
- sb.markdown("---")
1307
- sb.markdown("Created by Vidraft | [Community](https://discord.gg/openfreeai)")
1308
-
1309
- def process_example(topic):
1310
- process_input(topic, [])
1311
-
1312
- def process_input(query: str, uploaded_files):
1313
- if not any(m["role"] == "user" and m["content"] == query for m in st.session_state.messages):
1314
- st.session_state.messages.append({"role": "user", "content": query})
1315
-
1316
- with st.chat_message("user"):
1317
- st.markdown(query)
1318
-
1319
- with st.chat_message("assistant"):
1320
- placeholder = st.empty()
1321
- message_placeholder = st.empty()
1322
- full_response = ""
1323
-
1324
- use_web_search = st.session_state.web_search_enabled
1325
- has_uploaded_files = bool(uploaded_files) and len(uploaded_files) > 0
1326
-
1327
- try:
1328
- status = st.status("μ§ˆλ¬Έμ— λ‹΅λ³€ μ€€λΉ„ 쀑...")
1329
- status.update(label="ν΄λΌμ΄μ–ΈνŠΈ μ΄ˆκΈ°ν™” 쀑...")
1330
-
1331
- client = get_openai_client()
1332
-
1333
- search_content = None
1334
- video_results = []
1335
- news_results = []
1336
-
1337
- # 농업 데이터셋 뢄석 κ²°κ³Ό κ°€μ Έμ˜€κΈ°
1338
- status.update(label="농업 데이터셋 뢄석 쀑...")
1339
- with st.spinner("데이터셋 뢄석 쀑..."):
1340
- dataset_analysis = analyze_dataset_for_query(query)
1341
-
1342
- # 항상 ν¬ν•¨λ˜λŠ” μΆ”κ°€ 데이터셋 뢄석
1343
- crop_recommendation_analysis = analyze_crop_recommendation_dataset(query)
1344
- climate_impact_analysis = analyze_climate_impact_dataset(query)
1345
-
1346
- #
1347
-
1348
-
1349
- # 쑰건뢀 데이터셋 뢄석
1350
- soybean_analysis = None
1351
- if st.session_state.use_soybean_dataset:
1352
- status.update(label="λŒ€λ‘ 농업 데이터셋 뢄석 쀑...")
1353
- with st.spinner("λŒ€λ‘ 데이터셋 뢄석 쀑..."):
1354
- soybean_analysis = analyze_soybean_dataset(query)
1355
-
1356
- if use_web_search:
1357
- # μ›Ή 검색 과정은 λ…ΈμΆœν•˜μ§€ μ•Šκ³  쑰용히 μ§„ν–‰
1358
- with st.spinner("정보 μˆ˜μ§‘ 쀑..."):
1359
- search_content = do_web_search(keywords(query, top=5))
1360
- video_results = brave_video_search(query, 2)
1361
- news_results = brave_news_search(query, 3)
1362
-
1363
- file_content = None
1364
- if has_uploaded_files:
1365
- status.update(label="μ—…λ‘œλ“œλœ 파일 처리 쀑...")
1366
- with st.spinner("파일 뢄석 쀑..."):
1367
- file_content = process_uploaded_files(uploaded_files)
1368
-
1369
- valid_videos = []
1370
- for vid in video_results:
1371
- url = vid.get('video_url')
1372
- if url and url.startswith('http'):
1373
- valid_videos.append({
1374
- 'url': url,
1375
- 'title': vid.get('title', 'λΉ„λ””μ˜€'),
1376
- 'thumbnail': vid.get('thumbnail_url', ''),
1377
- 'source': vid.get('source', 'λΉ„λ””μ˜€ 좜처')
1378
- })
1379
-
1380
- status.update(label="μ’…ν•© 뢄석 μ€€λΉ„ 쀑...")
1381
- sys_prompt = get_system_prompt(
1382
- mode=st.session_state.analysis_mode,
1383
- style=st.session_state.response_style,
1384
- include_search_results=use_web_search,
1385
- include_uploaded_files=has_uploaded_files
1386
- )
1387
-
1388
- api_messages = [
1389
- {"role": "system", "content": sys_prompt}
1390
- ]
1391
-
1392
- user_content = query
1393
- # 항상 κΈ°λ³Έ 데이터셋 뢄석 κ²°κ³Ό 포함
1394
- user_content += "\n\n" + dataset_analysis
1395
- user_content += "\n\n" + crop_recommendation_analysis
1396
- user_content += "\n\n" + climate_impact_analysis
1397
-
1398
- # 쑰건뢀 데이터셋 κ²°κ³Ό 포함
1399
- if soybean_analysis:
1400
- user_content += "\n\n" + soybean_analysis
1401
-
1402
- if search_content:
1403
- user_content += "\n\n" + search_content
1404
- if file_content:
1405
- user_content += "\n\n" + file_content
1406
-
1407
- if valid_videos:
1408
- user_content += "\n\n# κ΄€λ ¨ λ™μ˜μƒ\n"
1409
- for i, vid in enumerate(valid_videos):
1410
- user_content += f"\n{i+1}. **{vid['title']}** - [{vid['source']}]({vid['url']})\n"
1411
-
1412
- api_messages.append({"role": "user", "content": user_content})
1413
-
1414
- try:
1415
- stream = client.chat.completions.create(
1416
- model="gpt-4.1-mini",
1417
- messages=api_messages,
1418
- temperature=1,
1419
- max_tokens=MAX_TOKENS,
1420
- top_p=1,
1421
- stream=True
1422
- )
1423
-
1424
- for chunk in stream:
1425
- if chunk.choices and len(chunk.choices) > 0 and chunk.choices[0].delta.content is not None:
1426
- content_delta = chunk.choices[0].delta.content
1427
- full_response += content_delta
1428
- message_placeholder.markdown(full_response + "β–Œ", unsafe_allow_html=True)
1429
-
1430
- message_placeholder.markdown(full_response, unsafe_allow_html=True)
1431
-
1432
- if valid_videos:
1433
- st.subheader("κ΄€λ ¨ λΉ„λ””μ˜€")
1434
- for video in valid_videos:
1435
- video_title = video.get('title', 'κ΄€λ ¨ λΉ„λ””μ˜€')
1436
- video_url = video.get('url', '')
1437
-
1438
- st.markdown(f"🎬 **[{video_title}]({video_url})**")
1439
- st.write(f"좜처: {video.get('source', 'μ•Œ 수 μ—†μŒ')}")
1440
-
1441
- status.update(label="응닡 μ™„λ£Œ!", state="complete")
1442
-
1443
- st.session_state.messages.append({
1444
- "role": "assistant",
1445
- "content": full_response,
1446
- "videos": valid_videos
1447
- })
1448
-
1449
- except Exception as api_error:
1450
- error_message = str(api_error)
1451
- logging.error(f"API 였λ₯˜: {error_message}")
1452
- status.update(label=f"였λ₯˜: {error_message}", state="error")
1453
- raise Exception(f"응닡 생성 였λ₯˜: {error_message}")
1454
-
1455
- if st.session_state.generate_image and full_response:
1456
- with st.spinner("λ§žμΆ€ν˜• 이미지 생성 쀑..."):
1457
- try:
1458
- ip = extract_image_prompt(full_response, query)
1459
- img, cap = generate_image(ip)
1460
- if img:
1461
- st.subheader("AI 생성 이미지")
1462
- st.image(img, caption=cap, use_container_width=True)
1463
- except Exception as img_error:
1464
- logging.error(f"이미지 생성 였λ₯˜: {str(img_error)}")
1465
- st.warning("λ§žμΆ€ν˜• 이미지 생성에 μ‹€νŒ¨ν–ˆμŠ΅λ‹ˆλ‹€.")
1466
-
1467
- if full_response:
1468
- st.subheader("이 응닡 λ‹€μš΄λ‘œλ“œ")
1469
- c1, c2 = st.columns(2)
1470
- c1.download_button(
1471
- "λ§ˆν¬λ‹€μš΄",
1472
- data=full_response,
1473
- file_name=f"{query[:30]}.md",
1474
- mime="text/markdown"
1475
- )
1476
- c2.download_button(
1477
- "HTML",
1478
- data=md_to_html(full_response, query[:30]),
1479
- file_name=f"{query[:30]}.html",
1480
- mime="text/html"
1481
- )
1482
-
1483
- if st.session_state.auto_save and st.session_state.messages:
1484
- try:
1485
- fn = f"conversation_history_auto_{datetime.now():%Y%m%d_%H%M%S}.json"
1486
- with open(fn, "w", encoding="utf-8") as fp:
1487
- json.dump(st.session_state.messages, fp, ensure_ascii=False, indent=2)
1488
- except Exception as e:
1489
- logging.error(f"μžλ™ μ €μž₯ μ‹€νŒ¨: {e}")
1490
-
1491
- except Exception as e:
1492
- error_message = str(e)
1493
- placeholder.error(f"였λ₯˜ λ°œμƒ: {error_message}")
1494
- logging.error(f"μž…λ ₯ 처리 였λ₯˜: {error_message}")
1495
- ans = f"μš”μ²­ 처리 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {error_message}"
1496
- st.session_state.messages.append({"role": "assistant", "content": ans})
1497
-
1498
- # ──────────��───────────────────── main ────────────────────────────────────
1499
- def main():
1500
- st.write("==== μ• ν”Œλ¦¬μΌ€μ΄μ…˜ μ‹œμž‘ μ‹œκ°„:", datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "=====")
1501
- agricultural_price_forecast_app()
1502
-
1503
- if __name__ == "__main__":
1504
- main()
1505
-