Spaces:
aiqcamp
/
No application file

aiqcamp commited on
Commit
a2cc18f
Β·
verified Β·
1 Parent(s): 3dba369

Upload 3 files

Browse files
Files changed (3) hide show
  1. app (35).py +1504 -0
  2. requirements (16).txt +17 -0
  3. stanard_map (1).csv +0 -0
app (35).py ADDED
@@ -0,0 +1,1504 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, json, re, logging, requests, markdown, time, io
2
+ from datetime import datetime
3
+ import random
4
+ import base64
5
+ from io import BytesIO
6
+ from PIL import Image
7
+
8
+ import streamlit as st
9
+ from openai import OpenAI
10
+
11
+ from gradio_client import Client
12
+ import pandas as pd
13
+ import PyPDF2 # For handling PDF files
14
+ import kagglehub
15
+
16
+ # ──────────────────────────────── Environment Variables / Constants ─────────────────────────
17
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
18
+ BRAVE_KEY = os.getenv("SERPHOUSE_API_KEY", "") # Keep this name
19
+ BRAVE_ENDPOINT = "https://api.search.brave.com/res/v1/web/search"
20
+ BRAVE_VIDEO_ENDPOINT = "https://api.search.brave.com/res/v1/videos/search"
21
+ BRAVE_NEWS_ENDPOINT = "https://api.search.brave.com/res/v1/news/search"
22
+ IMAGE_API_URL = "http://211.233.58.201:7896"
23
+ MAX_TOKENS = 7999
24
+ KAGGLE_API_KEY = os.getenv("KDATA_API", "")
25
+
26
+ # Set Kaggle API key
27
+ os.environ["KAGGLE_KEY"] = KAGGLE_API_KEY
28
+
29
+ # Analysis modes and style definitions
30
+ ANALYSIS_MODES = {
31
+ "price_forecast": "농산물 가격 예츑과 μ‹œμž₯ 뢄석",
32
+ "market_trend": "μ‹œμž₯ 동ν–₯ 및 μˆ˜μš” νŒ¨ν„΄ 뢄석",
33
+ "production_analysis": "μƒμ‚°λŸ‰ 뢄석 및 μ‹λŸ‰ μ•ˆλ³΄ 전망",
34
+ "agricultural_policy": "농업 μ •μ±… 및 규제 영ν–₯ 뢄석",
35
+ "climate_impact": "κΈ°ν›„ λ³€ν™”κ°€ 농업에 λ―ΈμΉ˜λŠ” 영ν–₯ 뢄석"
36
+ }
37
+
38
+ RESPONSE_STYLES = {
39
+ "professional": "전문적이고 ν•™μˆ μ μΈ 뢄석",
40
+ "simple": "μ‰½κ²Œ 이해할 수 μžˆλŠ” κ°„κ²°ν•œ μ„€λͺ…",
41
+ "detailed": "μƒμ„Έν•œ 톡계 기반 깊이 μžˆλŠ” 뢄석",
42
+ "action_oriented": "μ‹€ν–‰ κ°€λŠ₯ν•œ μ‘°μ–Έκ³Ό μΆ”μ²œ 쀑심"
43
+ }
44
+
45
+ # Example search queries
46
+ EXAMPLE_QUERIES = {
47
+ "example1": "μŒ€ 가격 μΆ”μ„Έ 및 ν–₯ν›„ 6κ°œμ›” 전망을 λΆ„μ„ν•΄μ£Όμ„Έμš”",
48
+ "example2": "κΈ°ν›„ λ³€ν™”λ‘œ ν•œκ΅­ 과일 생산 μ „λž΅κ³Ό μˆ˜μš” 예츑 λ³΄κ³ μ„œλ₯Ό μž‘μ„±ν•˜λΌ.",
49
+ "example3": "2025λ…„λΆ€ν„° 2030λ…„κΉŒμ§€ 좩뢁 μ¦ν‰κ΅°μ—μ„œ μž¬λ°°ν•˜λ©΄ μœ λ§ν•œ μž‘λ¬Όμ€? μˆ˜μ΅μ„±κ³Ό 관리성이 μ’‹μ•„μ•Όν•œλ‹€"
50
+ }
51
+
52
+ # ──────────────────────────────── Logging ────────────────────────────────
53
+ logging.basicConfig(level=logging.INFO,
54
+ format="%(asctime)s - %(levelname)s - %(message)s")
55
+
56
+ # ──────────────────────────────── OpenAI Client ──────────────────────────
57
+
58
+ @st.cache_resource
59
+ def get_openai_client():
60
+ """Create an OpenAI client with timeout and retry settings."""
61
+ if not OPENAI_API_KEY:
62
+ raise RuntimeError("⚠️ OPENAI_API_KEY ν™˜κ²½ λ³€μˆ˜κ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.")
63
+ return OpenAI(
64
+ api_key=OPENAI_API_KEY,
65
+ timeout=60.0,
66
+ max_retries=3
67
+ )
68
+
69
+ # ────────────────────────────── Kaggle Dataset Access ──────────────────────
70
+ @st.cache_resource
71
+ def load_agriculture_dataset():
72
+ """Download and load the UN agriculture dataset from Kaggle"""
73
+ try:
74
+ path = kagglehub.dataset_download("unitednations/global-food-agriculture-statistics")
75
+ logging.info(f"Kaggle dataset downloaded to: {path}")
76
+
77
+ # Load metadata about available files
78
+ available_files = []
79
+ for root, dirs, files in os.walk(path):
80
+ for file in files:
81
+ if file.endswith('.csv'):
82
+ file_path = os.path.join(root, file)
83
+ file_size = os.path.getsize(file_path) / (1024 * 1024) # Size in MB
84
+ available_files.append({
85
+ 'name': file,
86
+ 'path': file_path,
87
+ 'size_mb': round(file_size, 2)
88
+ })
89
+
90
+ return {
91
+ 'base_path': path,
92
+ 'files': available_files
93
+ }
94
+ except Exception as e:
95
+ logging.error(f"Error loading Kaggle dataset: {e}")
96
+ return None
97
+
98
+ # New function to load Advanced Soybean Agricultural Dataset
99
+ @st.cache_resource
100
+ def load_soybean_dataset():
101
+ """Download and load the Advanced Soybean Agricultural Dataset from Kaggle"""
102
+ try:
103
+ path = kagglehub.dataset_download("wisam1985/advanced-soybean-agricultural-dataset-2025")
104
+ logging.info(f"Soybean dataset downloaded to: {path}")
105
+
106
+ available_files = []
107
+ for root, dirs, files in os.walk(path):
108
+ for file in files:
109
+ if file.endswith(('.csv', '.xlsx')):
110
+ file_path = os.path.join(root, file)
111
+ file_size = os.path.getsize(file_path) / (1024 * 1024) # Size in MB
112
+ available_files.append({
113
+ 'name': file,
114
+ 'path': file_path,
115
+ 'size_mb': round(file_size, 2)
116
+ })
117
+
118
+ return {
119
+ 'base_path': path,
120
+ 'files': available_files
121
+ }
122
+ except Exception as e:
123
+ logging.error(f"Error loading Soybean dataset: {e}")
124
+ return None
125
+
126
+ # Function to load Crop Recommendation Dataset
127
+ @st.cache_resource
128
+ def load_crop_recommendation_dataset():
129
+ """Download and load the Soil and Environmental Variables Crop Recommendation Dataset"""
130
+ try:
131
+ path = kagglehub.dataset_download("agriinnovate/agricultural-crop-dataset")
132
+ logging.info(f"Crop recommendation dataset downloaded to: {path}")
133
+
134
+ available_files = []
135
+ for root, dirs, files in os.walk(path):
136
+ for file in files:
137
+ if file.endswith(('.csv', '.xlsx')):
138
+ file_path = os.path.join(root, file)
139
+ file_size = os.path.getsize(file_path) / (1024 * 1024) # Size in MB
140
+ available_files.append({
141
+ 'name': file,
142
+ 'path': file_path,
143
+ 'size_mb': round(file_size, 2)
144
+ })
145
+
146
+ return {
147
+ 'base_path': path,
148
+ 'files': available_files
149
+ }
150
+ except Exception as e:
151
+ logging.error(f"Error loading Crop recommendation dataset: {e}")
152
+ return None
153
+
154
+ # Function to load Climate Change Impact Dataset
155
+ @st.cache_resource
156
+ def load_climate_impact_dataset():
157
+ """Download and load the Climate Change Impact on Agriculture Dataset"""
158
+ try:
159
+ path = kagglehub.dataset_download("waqi786/climate-change-impact-on-agriculture")
160
+ logging.info(f"Climate impact dataset downloaded to: {path}")
161
+
162
+ available_files = []
163
+ for root, dirs, files in os.walk(path):
164
+ for file in files:
165
+ if file.endswith(('.csv', '.xlsx')):
166
+ file_path = os.path.join(root, file)
167
+ file_size = os.path.getsize(file_path) / (1024 * 1024) # Size in MB
168
+ available_files.append({
169
+ 'name': file,
170
+ 'path': file_path,
171
+ 'size_mb': round(file_size, 2)
172
+ })
173
+
174
+ return {
175
+ 'base_path': path,
176
+ 'files': available_files
177
+ }
178
+ except Exception as e:
179
+ logging.error(f"Error loading Climate impact dataset: {e}")
180
+ return None
181
+
182
+ def get_dataset_summary():
183
+ """Generate a summary of the available agriculture datasets"""
184
+ dataset_info = load_agriculture_dataset()
185
+ if not dataset_info:
186
+ return "Failed to load the UN global food and agriculture statistics dataset."
187
+
188
+ summary = "# UN κΈ€λ‘œλ²Œ μ‹λŸ‰ 및 농업 톡계 데이터셋\n\n"
189
+ summary += f"총 {len(dataset_info['files'])}개의 CSV 파일이 ν¬ν•¨λ˜μ–΄ μžˆμŠ΅λ‹ˆλ‹€.\n\n"
190
+
191
+ # List files with sizes
192
+ summary += "## μ‚¬μš© κ°€λŠ₯ν•œ 데이터 파일:\n\n"
193
+ for i, file_info in enumerate(dataset_info['files'][:10], 1): # Limit to first 10 files
194
+ summary += f"{i}. **{file_info['name']}** ({file_info['size_mb']} MB)\n"
195
+
196
+ if len(dataset_info['files']) > 10:
197
+ summary += f"\n...μ™Έ {len(dataset_info['files']) - 10}개 파일\n"
198
+
199
+ # Add example of data structure
200
+ try:
201
+ if dataset_info['files']:
202
+ sample_file = dataset_info['files'][0]['path']
203
+ df = pd.read_csv(sample_file, nrows=5)
204
+ summary += "\n## 데이터 μƒ˜ν”Œ ꡬ쑰:\n\n"
205
+ summary += df.head(5).to_markdown() + "\n\n"
206
+
207
+ summary += "## 데이터셋 λ³€μˆ˜ μ„€λͺ…:\n\n"
208
+ for col in df.columns:
209
+ summary += f"- **{col}**: [λ³€μˆ˜ μ„€λͺ… ν•„μš”]\n"
210
+ except Exception as e:
211
+ logging.error(f"Error generating dataset sample: {e}")
212
+ summary += "\n데이터 μƒ˜ν”Œμ„ μƒμ„±ν•˜λŠ” 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€.\n"
213
+
214
+ return summary
215
+
216
+ def analyze_dataset_for_query(query):
217
+ """Find and analyze relevant data from the dataset based on the query"""
218
+ dataset_info = load_agriculture_dataset()
219
+ if not dataset_info:
220
+ return "데이터셋을 뢈러올 수 μ—†μŠ΅λ‹ˆλ‹€. Kaggle API 연결을 ν™•μΈν•΄μ£Όμ„Έμš”."
221
+
222
+ # Extract key terms from the query
223
+ query_lower = query.lower()
224
+
225
+ # Define keywords to look for in the dataset files
226
+ keywords = {
227
+ "μŒ€": ["rice", "grain"],
228
+ "λ°€": ["wheat", "grain"],
229
+ "μ˜₯수수": ["corn", "maize", "grain"],
230
+ "μ±„μ†Œ": ["vegetable", "produce"],
231
+ "과일": ["fruit", "produce"],
232
+ "가격": ["price", "cost", "value"],
233
+ "생산": ["production", "yield", "harvest"],
234
+ "수좜": ["export", "trade"],
235
+ "μˆ˜μž…": ["import", "trade"],
236
+ "μ†ŒλΉ„": ["consumption", "demand"]
237
+ }
238
+
239
+ # Find relevant files based on the query
240
+ relevant_files = []
241
+
242
+ # First check for Korean keywords in the query
243
+ found_keywords = []
244
+ for k_term, e_terms in keywords.items():
245
+ if k_term in query_lower:
246
+ found_keywords.extend([k_term] + e_terms)
247
+
248
+ # If no Korean keywords found, check for English terms in the filenames
249
+ if not found_keywords:
250
+ # Generic search through all files
251
+ relevant_files = dataset_info['files'][:5] # Take first 5 files as default
252
+ else:
253
+ # Search for files related to the found keywords
254
+ for file_info in dataset_info['files']:
255
+ file_name_lower = file_info['name'].lower()
256
+ for keyword in found_keywords:
257
+ if keyword.lower() in file_name_lower:
258
+ relevant_files.append(file_info)
259
+ break
260
+
261
+ # If still no relevant files, take the first 5 files
262
+ if not relevant_files:
263
+ relevant_files = dataset_info['files'][:5]
264
+
265
+ # Read and analyze the relevant files
266
+ analysis_result = "# 농업 데이터 뢄석 κ²°κ³Ό\n\n"
267
+ analysis_result += f"쿼리: '{query}'에 λŒ€ν•œ 뢄석을 μˆ˜ν–‰ν–ˆμŠ΅λ‹ˆλ‹€.\n\n"
268
+
269
+ if found_keywords:
270
+ analysis_result += f"## 뢄석 ν‚€μ›Œλ“œ: {', '.join(set(found_keywords))}\n\n"
271
+
272
+ # Process each relevant file
273
+ for file_info in relevant_files[:3]: # Limit to 3 files for performance
274
+ try:
275
+ analysis_result += f"## 파일: {file_info['name']}\n\n"
276
+
277
+ # Read the CSV file
278
+ df = pd.read_csv(file_info['path'])
279
+
280
+ # Basic file stats
281
+ analysis_result += f"- ν–‰ 수: {len(df)}\n"
282
+ analysis_result += f"- μ—΄ 수: {len(df.columns)}\n"
283
+ analysis_result += f"- μ—΄ λͺ©λ‘: {', '.join(df.columns.tolist())}\n\n"
284
+
285
+ # Sample data
286
+ analysis_result += "### 데이터 μƒ˜ν”Œ:\n\n"
287
+ analysis_result += df.head(5).to_markdown() + "\n\n"
288
+
289
+ # Statistical summary of numeric columns
290
+ numeric_cols = df.select_dtypes(include=['number']).columns
291
+ if len(numeric_cols) > 0:
292
+ analysis_result += "### κΈ°λ³Έ 톡계:\n\n"
293
+ stats_df = df[numeric_cols].describe()
294
+ analysis_result += stats_df.to_markdown() + "\n\n"
295
+
296
+ # Time series analysis if possible
297
+ time_cols = [col for col in df.columns if 'year' in col.lower() or 'date' in col.lower()]
298
+ if time_cols:
299
+ analysis_result += "### μ‹œκ³„μ—΄ νŒ¨ν„΄:\n\n"
300
+ analysis_result += "데이터셋에 μ‹œκ°„ κ΄€λ ¨ 열이 μžˆμ–΄ μ‹œκ³„μ—΄ 뢄석이 κ°€λŠ₯ν•©λ‹ˆλ‹€.\n\n"
301
+
302
+ except Exception as e:
303
+ logging.error(f"Error analyzing file {file_info['name']}: {e}")
304
+ analysis_result += f"이 파일 뢄석 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}\n\n"
305
+
306
+ analysis_result += "## 농산물 가격 예츑 및 μˆ˜μš” 뢄석에 λŒ€ν•œ μΈμ‚¬μ΄νŠΈ\n\n"
307
+ analysis_result += "λ°μ΄ν„°μ…‹μ—μ„œ μΆ”μΆœν•œ 정보λ₯Ό λ°”νƒ•μœΌλ‘œ λ‹€μŒ μΈμ‚¬μ΄νŠΈλ₯Ό μ œκ³΅ν•©λ‹ˆλ‹€:\n\n"
308
+ analysis_result += "1. 데이터 기반 뢄석 (기본적인 μš”μ•½)\n"
309
+ analysis_result += "2. μ£Όμš” 가격 및 μˆ˜μš” 동ν–₯\n"
310
+ analysis_result += "3. μƒμ‚°λŸ‰ 및 무역 νŒ¨ν„΄\n\n"
311
+
312
+ analysis_result += "이 뢄석은 UN κΈ€λ‘œλ²Œ μ‹λŸ‰ 및 농업 톡계 데이터셋을 기반으둜 ν•©λ‹ˆλ‹€.\n\n"
313
+
314
+ return analysis_result
315
+
316
+ # Function to analyze crop recommendation dataset
317
+ def analyze_crop_recommendation_dataset(query):
318
+ """Find and analyze crop recommendation data based on the query"""
319
+ try:
320
+ dataset_info = load_crop_recommendation_dataset()
321
+ if not dataset_info or not dataset_info['files']:
322
+ return "μž‘λ¬Ό μΆ”μ²œ 데이터셋을 뢈러올 수 μ—†μŠ΅λ‹ˆλ‹€."
323
+
324
+ analysis_result = "# ν† μ–‘ 및 ν™˜κ²½ λ³€μˆ˜ 기반 μž‘λ¬Ό μΆ”μ²œ 데이터 뢄석\n\n"
325
+
326
+ # Process main files
327
+ for file_info in dataset_info['files'][:2]: # Limit to the first 2 files
328
+ try:
329
+ analysis_result += f"## 파일: {file_info['name']}\n\n"
330
+
331
+ if file_info['name'].endswith('.csv'):
332
+ df = pd.read_csv(file_info['path'])
333
+ elif file_info['name'].endswith('.xlsx'):
334
+ df = pd.read_excel(file_info['path'])
335
+ else:
336
+ continue
337
+
338
+ # Basic dataset info
339
+ analysis_result += f"- 데이터 크기: {len(df)} ν–‰ Γ— {len(df.columns)} μ—΄\n"
340
+ analysis_result += f"- ν¬ν•¨λœ μž‘λ¬Ό μ’…λ₯˜: "
341
+
342
+ # Check if crop column exists
343
+ crop_cols = [col for col in df.columns if 'crop' in col.lower() or 'μž‘λ¬Ό' in col.lower()]
344
+ if crop_cols:
345
+ main_crop_col = crop_cols[0]
346
+ unique_crops = df[main_crop_col].unique()
347
+ analysis_result += f"{len(unique_crops)}μ’… ({', '.join(str(c) for c in unique_crops[:10])})\n\n"
348
+ else:
349
+ analysis_result += "μž‘λ¬Ό 정보 열을 찾을 수 μ—†μŒ\n\n"
350
+
351
+ # Extract environmental factors
352
+ env_factors = [col for col in df.columns if col.lower() not in ['crop', 'label', 'id', 'index']]
353
+ if env_factors:
354
+ analysis_result += f"- 고렀된 ν™˜κ²½ μš”μ†Œ: {', '.join(env_factors)}\n\n"
355
+
356
+ # Sample data
357
+ analysis_result += "### 데이터 μƒ˜ν”Œ:\n\n"
358
+ analysis_result += df.head(5).to_markdown() + "\n\n"
359
+
360
+ # Summary statistics for environmental factors
361
+ if env_factors:
362
+ numeric_factors = df[env_factors].select_dtypes(include=['number']).columns
363
+ if len(numeric_factors) > 0:
364
+ analysis_result += "### ν™˜κ²½ μš”μ†Œ 톡계:\n\n"
365
+ stats_df = df[numeric_factors].describe().round(2)
366
+ analysis_result += stats_df.to_markdown() + "\n\n"
367
+
368
+ # Check for query-specific crops
369
+ query_terms = query.lower().split()
370
+ relevant_crops = []
371
+
372
+ if crop_cols:
373
+ for crop in df[main_crop_col].unique():
374
+ crop_str = str(crop).lower()
375
+ if any(term in crop_str for term in query_terms):
376
+ relevant_crops.append(crop)
377
+
378
+ if relevant_crops:
379
+ analysis_result += f"### 쿼리 κ΄€λ ¨ μž‘λ¬Ό 뢄석: {', '.join(str(c) for c in relevant_crops)}\n\n"
380
+ for crop in relevant_crops[:3]: # Limit to 3 crops
381
+ crop_data = df[df[main_crop_col] == crop]
382
+ analysis_result += f"#### {crop} μž‘λ¬Ό μš”μ•½:\n\n"
383
+ analysis_result += f"- μƒ˜ν”Œ 수: {len(crop_data)}개\n"
384
+
385
+ if len(numeric_factors) > 0:
386
+ crop_stats = crop_data[numeric_factors].describe().round(2)
387
+ analysis_result += f"- 평균 ν™˜κ²½ 쑰건:\n"
388
+ for factor in numeric_factors[:5]: # Limit to 5 factors
389
+ analysis_result += f" * {factor}: {crop_stats.loc['mean', factor]}\n"
390
+ analysis_result += "\n"
391
+
392
+ except Exception as e:
393
+ logging.error(f"Error analyzing crop recommendation file {file_info['name']}: {e}")
394
+ analysis_result += f"뢄석 였λ₯˜: {str(e)}\n\n"
395
+
396
+ analysis_result += "## μž‘λ¬Ό μΆ”μ²œ μΈμ‚¬μ΄νŠΈ\n\n"
397
+ analysis_result += "ν† μ–‘ 및 ν™˜κ²½ λ³€μˆ˜ 데이터셋 뢄석 κ²°κ³Ό, λ‹€μŒκ³Ό 같은 μ£Όμš” μΈμ‚¬μ΄νŠΈλ₯Ό μ œκ³΅ν•©λ‹ˆλ‹€:\n\n"
398
+ analysis_result += "1. μ§€μ—­ ν™˜κ²½μ— μ ν•©ν•œ μž‘λ¬Ό μΆ”μ²œ\n"
399
+ analysis_result += "2. μž‘λ¬Ό 생산성에 영ν–₯을 λ―ΈμΉ˜λŠ” μ£Όμš” ν™˜κ²½ μš”μΈ\n"
400
+ analysis_result += "3. 지속 κ°€λŠ₯ν•œ 농업을 μœ„ν•œ 졜적의 μž‘λ¬Ό 선택 κΈ°μ€€\n\n"
401
+
402
+ return analysis_result
403
+
404
+ except Exception as e:
405
+ logging.error(f"Crop recommendation dataset analysis error: {e}")
406
+ return "μž‘λ¬Ό μΆ”μ²œ 데이터셋 뢄석 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€."
407
+
408
+ # Function to analyze climate impact dataset
409
+ def analyze_climate_impact_dataset(query):
410
+ """Find and analyze climate impact on agriculture data based on the query"""
411
+ try:
412
+ dataset_info = load_climate_impact_dataset()
413
+ if not dataset_info or not dataset_info['files']:
414
+ return "κΈ°ν›„ λ³€ν™” 영ν–₯ 데이터셋을 뢈러올 수 μ—†μŠ΅λ‹ˆλ‹€."
415
+
416
+ analysis_result = "# κΈ°ν›„ λ³€ν™”κ°€ 농업에 λ―ΈμΉ˜λŠ” 영ν–₯ 데이터 뢄석\n\n"
417
+
418
+ # Process main files
419
+ for file_info in dataset_info['files'][:2]: # Limit to first 2 files
420
+ try:
421
+ analysis_result += f"## 파일: {file_info['name']}\n\n"
422
+
423
+ if file_info['name'].endswith('.csv'):
424
+ df = pd.read_csv(file_info['path'])
425
+ elif file_info['name'].endswith('.xlsx'):
426
+ df = pd.read_excel(file_info['path'])
427
+ else:
428
+ continue
429
+
430
+ # Basic dataset info
431
+ analysis_result += f"- 데이터 크기: {len(df)} ν–‰ Γ— {len(df.columns)} μ—΄\n"
432
+
433
+ # Check for region column
434
+ region_cols = [col for col in df.columns if 'region' in col.lower() or 'country' in col.lower() or 'μ§€μ—­' in col.lower()]
435
+ if region_cols:
436
+ main_region_col = region_cols[0]
437
+ regions = df[main_region_col].unique()
438
+ analysis_result += f"- ν¬ν•¨λœ μ§€μ—­: {len(regions)}개 ({', '.join(str(r) for r in regions[:5])})\n"
439
+
440
+ # Identify climate and crop related columns
441
+ climate_cols = [col for col in df.columns if any(term in col.lower() for term in
442
+ ['temp', 'rainfall', 'precipitation', 'climate', 'weather', '기온', 'κ°•μˆ˜λŸ‰'])]
443
+ crop_cols = [col for col in df.columns if any(term in col.lower() for term in
444
+ ['yield', 'production', 'crop', 'harvest', 'μˆ˜ν™•λŸ‰', 'μƒμ‚°λŸ‰'])]
445
+
446
+ if climate_cols:
447
+ analysis_result += f"- κΈ°ν›„ κ΄€λ ¨ λ³€μˆ˜: {', '.join(climate_cols)}\n"
448
+ if crop_cols:
449
+ analysis_result += f"- μž‘λ¬Ό κ΄€λ ¨ λ³€μˆ˜: {', '.join(crop_cols)}\n\n"
450
+
451
+ # Sample data
452
+ analysis_result += "### 데이터 μƒ˜ν”Œ:\n\n"
453
+ analysis_result += df.head(5).to_markdown() + "\n\n"
454
+
455
+ # Time series pattern if available
456
+ year_cols = [col for col in df.columns if 'year' in col.lower() or 'date' in col.lower() or '연도' in col.lower()]
457
+ if year_cols:
458
+ analysis_result += "### μ‹œκ³„μ—΄ κΈ°ν›„ 영ν–₯ νŒ¨ν„΄:\n\n"
459
+ analysis_result += "이 데이터셋은 μ‹œκ°„μ— λ”°λ₯Έ κΈ°ν›„ 변화와 농업 생산성 κ°„μ˜ 관계λ₯Ό 뢄석할 수 μžˆμŠ΅λ‹ˆλ‹€.\n\n"
460
+
461
+ # Statistical summary of key variables
462
+ key_vars = climate_cols + crop_cols
463
+ numeric_vars = df[key_vars].select_dtypes(include=['number']).columns
464
+ if len(numeric_vars) > 0:
465
+ analysis_result += "### μ£Όμš” λ³€μˆ˜ 톡계:\n\n"
466
+ stats_df = df[numeric_vars].describe().round(2)
467
+ analysis_result += stats_df.to_markdown() + "\n\n"
468
+
469
+ # Check for correlations between climate and crop variables
470
+ if len(climate_cols) > 0 and len(crop_cols) > 0:
471
+ numeric_climate = df[climate_cols].select_dtypes(include=['number']).columns
472
+ numeric_crop = df[crop_cols].select_dtypes(include=['number']).columns
473
+
474
+ if len(numeric_climate) > 0 and len(numeric_crop) > 0:
475
+ analysis_result += "### 기후와 μž‘λ¬Ό 생산 κ°„μ˜ 상관관계:\n\n"
476
+ try:
477
+ corr_vars = list(numeric_climate)[:2] + list(numeric_crop)[:2] # Limit to 2 of each type
478
+ corr_df = df[corr_vars].corr().round(3)
479
+ analysis_result += corr_df.to_markdown() + "\n\n"
480
+ analysis_result += "μœ„ 상관관계 ν‘œλŠ” κΈ°ν›„ λ³€μˆ˜μ™€ μž‘λ¬Ό 생산성 κ°„μ˜ 관계 강도λ₯Ό λ³΄μ—¬μ€λ‹ˆλ‹€.\n\n"
481
+ except:
482
+ analysis_result += "상관관계 계산 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€.\n\n"
483
+
484
+ except Exception as e:
485
+ logging.error(f"Error analyzing climate impact file {file_info['name']}: {e}")
486
+ analysis_result += f"뢄석 였λ₯˜: {str(e)}\n\n"
487
+
488
+ analysis_result += "## κΈ°ν›„ λ³€ν™” 영ν–₯ μΈμ‚¬μ΄νŠΈ\n\n"
489
+ analysis_result += "κΈ°ν›„ λ³€ν™”κ°€ 농업에 λ―ΈμΉ˜λŠ” 영ν–₯ 데이터 뢄석 κ²°κ³Ό, λ‹€μŒκ³Ό 같은 μΈμ‚¬μ΄νŠΈλ₯Ό μ œκ³΅ν•©λ‹ˆλ‹€:\n\n"
490
+ analysis_result += "1. 기온 변화에 λ”°λ₯Έ μž‘λ¬Ό 생산성 변동 νŒ¨ν„΄\n"
491
+ analysis_result += "2. κ°•μˆ˜λŸ‰ λ³€ν™”κ°€ 농업 μˆ˜ν™•λŸ‰μ— λ―ΈμΉ˜λŠ” 영ν–₯\n"
492
+ analysis_result += "3. κΈ°ν›„ 변화에 λŒ€μ‘ν•˜κΈ° μœ„ν•œ 농업 μ „λž΅ μ œμ•ˆ\n"
493
+ analysis_result += "4. 지역별 κΈ°ν›„ μ·¨μ•½μ„± 및 적응 λ°©μ•ˆ\n\n"
494
+
495
+ return analysis_result
496
+
497
+ except Exception as e:
498
+ logging.error(f"Climate impact dataset analysis error: {e}")
499
+ return "κΈ°ν›„ λ³€ν™” 영ν–₯ 데이터셋 뢄석 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€."
500
+
501
+ # Function to analyze soybean dataset if selected
502
+ def analyze_soybean_dataset(query):
503
+ """Find and analyze soybean agriculture data based on the query"""
504
+ try:
505
+ dataset_info = load_soybean_dataset()
506
+ if not dataset_info or not dataset_info['files']:
507
+ return "λŒ€λ‘ 농업 데이터셋을 뢈러올 수 μ—†μŠ΅λ‹ˆλ‹€."
508
+
509
+ analysis_result = "# κ³ κΈ‰ λŒ€λ‘ 농업 데이터 뢄석\n\n"
510
+
511
+ # Process main files
512
+ for file_info in dataset_info['files'][:2]: # Limit to the first 2 files
513
+ try:
514
+ analysis_result += f"## 파일: {file_info['name']}\n\n"
515
+
516
+ if file_info['name'].endswith('.csv'):
517
+ df = pd.read_csv(file_info['path'])
518
+ elif file_info['name'].endswith('.xlsx'):
519
+ df = pd.read_excel(file_info['path'])
520
+ else:
521
+ continue
522
+
523
+ # Basic file stats
524
+ analysis_result += f"- 데이터 크기: {len(df)} ν–‰ Γ— {len(df.columns)} μ—΄\n"
525
+
526
+ # Check for region/location columns
527
+ location_cols = [col for col in df.columns if any(term in col.lower() for term in
528
+ ['region', 'location', 'area', 'country', 'μ§€μ—­'])]
529
+ if location_cols:
530
+ main_loc_col = location_cols[0]
531
+ locations = df[main_loc_col].unique()
532
+ analysis_result += f"- ν¬ν•¨λœ μ§€μ—­: {len(locations)}개 ({', '.join(str(loc) for loc in locations[:5])})\n"
533
+
534
+ # Identify yield and production columns
535
+ yield_cols = [col for col in df.columns if any(term in col.lower() for term in
536
+ ['yield', 'production', 'harvest', 'μˆ˜ν™•λŸ‰', 'μƒμ‚°λŸ‰'])]
537
+ if yield_cols:
538
+ analysis_result += f"- 생산성 κ΄€λ ¨ λ³€μˆ˜: {', '.join(yield_cols)}\n"
539
+
540
+ # Identify environmental factors
541
+ env_cols = [col for col in df.columns if any(term in col.lower() for term in
542
+ ['temp', 'rainfall', 'soil', 'fertilizer', 'nutrient', 'irrigation',
543
+ '기온', 'κ°•μˆ˜λŸ‰', 'ν† μ–‘', 'λΉ„λ£Œ', 'κ΄€κ°œ'])]
544
+ if env_cols:
545
+ analysis_result += f"- ν™˜κ²½ κ΄€λ ¨ λ³€μˆ˜: {', '.join(env_cols)}\n\n"
546
+
547
+ # Sample data
548
+ analysis_result += "### 데이터 μƒ˜ν”Œ:\n\n"
549
+ analysis_result += df.head(5).to_markdown() + "\n\n"
550
+
551
+ # Statistical summary of key variables
552
+ key_vars = yield_cols + env_cols
553
+ numeric_vars = df[key_vars].select_dtypes(include=['number']).columns
554
+ if len(numeric_vars) > 0:
555
+ analysis_result += "### μ£Όμš” λ³€μˆ˜ 톡계:\n\n"
556
+ stats_df = df[numeric_vars].describe().round(2)
557
+ analysis_result += stats_df.to_markdown() + "\n\n"
558
+
559
+ # Time series analysis if possible
560
+ year_cols = [col for col in df.columns if 'year' in col.lower() or 'date' in col.lower() or '연도' in col.lower()]
561
+ if year_cols:
562
+ analysis_result += "### μ‹œκ³„μ—΄ 생산성 νŒ¨ν„΄:\n\n"
563
+ analysis_result += "이 데이터셋은 μ‹œκ°„μ— λ”°λ₯Έ λŒ€λ‘ μƒμ‚°μ„±μ˜ λ³€ν™”λ₯Ό 좔적할 수 μžˆμŠ΅λ‹ˆλ‹€.\n\n"
564
+
565
+ # Check for correlations between environmental factors and yield
566
+ if len(env_cols) > 0 and len(yield_cols) > 0:
567
+ numeric_env = df[env_cols].select_dtypes(include=['number']).columns
568
+ numeric_yield = df[yield_cols].select_dtypes(include=['number']).columns
569
+
570
+ if len(numeric_env) > 0 and len(numeric_yield) > 0:
571
+ analysis_result += "### ν™˜κ²½ μš”μ†Œμ™€ λŒ€λ‘ 생산성 κ°„μ˜ 상관관계:\n\n"
572
+ try:
573
+ corr_vars = list(numeric_env)[:3] + list(numeric_yield)[:2] # Limit variables
574
+ corr_df = df[corr_vars].corr().round(3)
575
+ analysis_result += corr_df.to_markdown() + "\n\n"
576
+ except:
577
+ analysis_result += "상관관계 계산 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€.\n\n"
578
+
579
+ except Exception as e:
580
+ logging.error(f"Error analyzing soybean file {file_info['name']}: {e}")
581
+ analysis_result += f"뢄석 였λ₯˜: {str(e)}\n\n"
582
+
583
+ analysis_result += "## λŒ€λ‘ 농업 μΈμ‚¬μ΄νŠΈ\n\n"
584
+ analysis_result += "κ³ κΈ‰ λŒ€λ‘ 농업 데이터셋 뢄석 κ²°κ³Ό, λ‹€μŒκ³Ό 같은 μΈμ‚¬μ΄νŠΈλ₯Ό μ œκ³΅ν•©λ‹ˆλ‹€:\n\n"
585
+ analysis_result += "1. 졜적의 λŒ€λ‘ 생산을 μœ„ν•œ ν™˜κ²½ 쑰건\n"
586
+ analysis_result += "2. 지역별 λŒ€λ‘ 생산성 λ³€ν™” νŒ¨ν„΄\n"
587
+ analysis_result += "3. 생산성 ν–₯상을 μœ„ν•œ 농업 기술 및 접근법\n"
588
+ analysis_result += "4. μ‹œμž₯ μˆ˜μš”μ— λ§žλŠ” λŒ€λ‘ ν’ˆμ’… 선택 κ°€μ΄λ“œ\n\n"
589
+
590
+ return analysis_result
591
+
592
+ except Exception as e:
593
+ logging.error(f"Soybean dataset analysis error: {e}")
594
+ return "λŒ€λ‘ 농업 데이터셋 뢄석 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€."
595
+
596
+ # ──────────────────────────────── System Prompt ─────────────────────────
597
+ def get_system_prompt(mode="price_forecast", style="professional", include_search_results=True, include_uploaded_files=False) -> str:
598
+ """
599
+ Generate a system prompt for the 'Agricultural Price & Demand Forecast AI Assistant' interface based on:
600
+ - The selected analysis mode and style
601
+ - Guidelines for using agricultural datasets, web search results and uploaded files
602
+ """
603
+ base_prompt = """
604
+ 당신은 농업 데이터 μ „λ¬Έκ°€λ‘œμ„œ 농산물 가격 예츑과 μˆ˜μš” 뢄석을 μˆ˜ν–‰ν•˜λŠ” AI μ–΄μ‹œμŠ€ν„΄νŠΈμž…λ‹ˆλ‹€.
605
+
606
+ μ£Όμš” μž„λ¬΄:
607
+ 1. UN κΈ€λ‘œλ²Œ μ‹λŸ‰ 및 농업 톡계 데이터셋을 기반으둜 농산물 μ‹œμž₯ 뢄석
608
+ 2. 농산물 가격 μΆ”μ„Έ 예츑 및 μˆ˜μš” νŒ¨ν„΄ 뢄석
609
+ 3. 데이터λ₯Ό λ°”νƒ•μœΌλ‘œ λͺ…ν™•ν•˜κ³  κ·Όκ±° μžˆλŠ” 뢄석 제곡
610
+ 4. κ΄€λ ¨ 정보와 μΈμ‚¬μ΄νŠΈλ₯Ό μ²΄κ³„μ μœΌλ‘œ κ΅¬μ„±ν•˜μ—¬ μ œμ‹œ
611
+ 5. μ‹œκ°μ  이해λ₯Ό 돕기 μœ„ν•΄ 차트, κ·Έλž˜ν”„ 등을 적절히 ν™œμš©
612
+ 6. ν† μ–‘ 및 ν™˜κ²½ λ³€μˆ˜ 기반 μž‘λ¬Ό μΆ”μ²œ λ°μ΄ν„°μ…‹μ—μ„œ μΆ”μΆœν•œ μΈμ‚¬μ΄νŠΈ 적용
613
+ 7. κΈ°ν›„ λ³€ν™”κ°€ 농업에 λ―ΈμΉ˜λŠ” 영ν–₯ 데이터셋을 ν†΅ν•œ ν™˜κ²½ λ³€ν™” μ‹œλ‚˜λ¦¬μ˜€ 뢄석
614
+
615
+ μ€‘μš” κ°€μ΄λ“œλΌμΈ:
616
+ - 데이터에 κΈ°λ°˜ν•œ 객관적 뢄석을 μ œκ³΅ν•˜μ„Έμš”
617
+ - 뢄석 κ³Όμ •κ³Ό 방법둠을 λͺ…ν™•νžˆ μ„€λͺ…ν•˜μ„Έμš”
618
+ - 톡계적 μ‹ λ’°μ„±κ³Ό ν•œκ³„μ μ„ 투λͺ…ν•˜κ²Œ μ œμ‹œν•˜μ„Έμš”
619
+ - μ΄ν•΄ν•˜κΈ° μ‰¬μš΄ μ‹œκ°μ  μš”μ†Œλ‘œ 뢄석 κ²°κ³Όλ₯Ό λ³΄μ™„ν•˜μ„Έμš”
620
+ - λ§ˆν¬λ‹€μš΄μ„ ν™œμš©ν•΄ 응닡을 μ²΄κ³„μ μœΌλ‘œ κ΅¬μ„±ν•˜μ„Έμš”
621
+ """
622
+
623
+ mode_prompts = {
624
+ "price_forecast": """
625
+ 농산물 가격 예츑 및 μ‹œμž₯ 뢄석에 μ§‘μ€‘ν•©λ‹ˆλ‹€:
626
+ - κ³Όκ±° 가격 데이터 νŒ¨ν„΄μ— κΈ°λ°˜ν•œ 예츑 제곡
627
+ - 가격 변동성 μš”μΈ 뢄석(κ³„μ ˆμ„±, 날씨, μ •μ±… λ“±)
628
+ - 단기 및 쀑μž₯κΈ° 가격 전망 μ œμ‹œ
629
+ - 가격에 영ν–₯을 λ―ΈμΉ˜λŠ” κ΅­λ‚΄μ™Έ μš”μΈ 식별
630
+ - μ‹œμž₯ λΆˆν™•μ‹€μ„±κ³Ό 리슀크 μš”μ†Œ κ°•μ‘°
631
+ """,
632
+ "market_trend": """
633
+ μ‹œμž₯ 동ν–₯ 및 μˆ˜μš” νŒ¨ν„΄ 뢄석에 μ§‘μ€‘ν•©λ‹ˆλ‹€:
634
+ - μ£Όμš” 농산물 μˆ˜μš” λ³€ν™” νŒ¨ν„΄ 식별
635
+ - μ†ŒλΉ„μž μ„ ν˜Έλ„ 및 ꡬ맀 행동 뢄석
636
+ - μ‹œμž₯ μ„Έκ·Έλ¨ΌνŠΈ 및 ν‹ˆμƒˆμ‹œμž₯ 기회 탐색
637
+ - μ‹œμž₯ ν™•λŒ€/μΆ•μ†Œ νŠΈλ Œλ“œ 평가
638
+ - μˆ˜μš” 탄λ ₯μ„± 및 가격 민감도 뢄석
639
+ """,
640
+ "production_analysis": """
641
+ μƒμ‚°λŸ‰ 뢄석 및 μ‹λŸ‰ μ•ˆλ³΄ 전망에 μ§‘μ€‘ν•©λ‹ˆλ‹€:
642
+ - μž‘λ¬Ό μƒμ‚°λŸ‰ μΆ”μ„Έ 및 변동 μš”μΈ 뢄석
643
+ - μ‹λŸ‰ 생산과 인ꡬ μ„±μž₯ κ°„μ˜ 관계 평가
644
+ - κ΅­κ°€/지역별 생산 μ—­λŸ‰ 비ꡐ
645
+ - μ‹λŸ‰ μ•ˆλ³΄ μœ„ν˜‘ μš”μ†Œ 및 취약점 식별
646
+ - 생산성 ν–₯상 μ „λž΅ 및 기회 μ œμ•ˆ
647
+ """,
648
+ "agricultural_policy": """
649
+ 농업 μ •μ±… 및 규제 영ν–₯ 뢄석에 μ§‘μ€‘ν•©λ‹ˆλ‹€:
650
+ - μ •λΆ€ μ •μ±…κ³Ό, 보쑰금, 규제의 μ‹œμž₯ 영ν–₯ 뢄석
651
+ - ꡭ제 무역 μ •μ±…κ³Ό κ΄€μ„Έμ˜ 농산물 가격 영ν–₯ 평가
652
+ - 농업 지원 ν”„λ‘œκ·Έλž¨μ˜ νš¨κ³Όμ„± κ²€ν† 
653
+ - 규제 ν™˜κ²½ 변화에 λ”°λ₯Έ μ‹œμž₯ μ‘°μ • 예츑
654
+ - 정책적 κ°œμž…μ˜ μ˜λ„λœ/μ˜λ„μΉ˜ μ•Šμ€ κ²°κ³Ό 뢄석
655
+ """,
656
+ "climate_impact": """
657
+ κΈ°ν›„ λ³€ν™”κ°€ 농업에 λ―ΈμΉ˜λŠ” 영ν–₯ 뢄석에 μ§‘μ€‘ν•©λ‹ˆλ‹€:
658
+ - κΈ°ν›„ 변화와 농산물 μƒμ‚°λŸ‰/ν’ˆμ§ˆ κ°„μ˜ 상관관계 뢄석
659
+ - 기상 이변이 가격 변동성에 λ―ΈμΉ˜λŠ” 영ν–₯ 평가
660
+ - μž₯기적 κΈ°ν›„ 좔세에 λ”°λ₯Έ 농업 νŒ¨ν„΄ λ³€ν™” 예츑
661
+ - κΈ°ν›„ 회볡λ ₯ μžˆλŠ” 농업 μ‹œμŠ€ν…œ μ „λž΅ μ œμ•ˆ
662
+ - 지역별 κΈ°ν›„ μœ„ν—˜ λ…ΈμΆœλ„ 및 μ·¨μ•½μ„± λ§€ν•‘
663
+ """
664
+ }
665
+
666
+ style_guides = {
667
+ "professional": "전문적이고 ν•™μˆ μ μΈ μ–΄μ‘°λ₯Ό μ‚¬μš©ν•˜μ„Έμš”. 기술적 μš©μ–΄λ₯Ό 적절히 μ‚¬μš©ν•˜κ³  체계적인 데이터 뢄석을 μ œκ³΅ν•˜μ„Έμš”.",
668
+ "simple": "쉽고 κ°„κ²°ν•œ μ–Έμ–΄λ‘œ μ„€λͺ…ν•˜μ„Έμš”. μ „λ¬Έ μš©μ–΄λŠ” μ΅œμ†Œν™”ν•˜κ³  핡심 κ°œλ…μ„ 일상적인 ν‘œν˜„μœΌλ‘œ μ „λ‹¬ν•˜μ„Έμš”.",
669
+ "detailed": "μƒμ„Έν•˜κ³  포괄적인 뢄석을 μ œκ³΅ν•˜μ„Έμš”. λ‹€μ–‘ν•œ 데이터 포인트, 톡계적 λ‰˜μ•™μŠ€, 그리고 μ—¬λŸ¬ μ‹œλ‚˜λ¦¬μ˜€λ₯Ό κ³ λ €ν•œ 심측 뢄석을 μ œμ‹œν•˜μ„Έμš”.",
670
+ "action_oriented": "μ‹€ν–‰ κ°€λŠ₯ν•œ μΈμ‚¬μ΄νŠΈμ™€ ꡬ체적인 ꢌμž₯사항에 μ΄ˆμ μ„ λ§žμΆ”μ„Έμš”. 'λ‹€μŒ 단계' 및 'μ‹€μ§ˆμ  μ‘°μ–Έ' μ„Ήμ…˜μ„ ν¬ν•¨ν•˜μ„Έμš”."
671
+ }
672
+
673
+ dataset_guide = """
674
+ 농업 데이터셋 ν™œμš© μ§€μΉ¨:
675
+ - UN κΈ€λ‘œλ²Œ μ‹λŸ‰ 및 농업 톡계 데이터셋을 κΈ°λ³Έ λΆ„μ„μ˜ 근거둜 μ‚¬μš©ν•˜μ„Έμš”
676
+ - ν† μ–‘ 및 ν™˜κ²½ λ³€μˆ˜ 기반 μž‘λ¬Ό μΆ”μ²œ λ°μ΄ν„°μ…‹μ˜ μΈμ‚¬μ΄νŠΈλ₯Ό μž‘λ¬Ό 선택 및 재배 쑰건 뢄석에 ν†΅ν•©ν•˜μ„Έμš”
677
+ - κΈ°ν›„ λ³€ν™”κ°€ 농업에 λ―ΈμΉ˜λŠ” 영ν–₯ λ°μ΄ν„°μ…‹μ˜ 정보λ₯Ό 지속 κ°€λŠ₯μ„± 및 미래 전망 뢄석에 ν™œμš©ν•˜μ„Έμš”
678
+ - λ°μ΄ν„°μ˜ μΆœμ²˜μ™€ 연도λ₯Ό λͺ…ν™•νžˆ μΈμš©ν•˜μ„Έμš”
679
+ - 데이터셋 λ‚΄ μ£Όμš” λ³€μˆ˜ κ°„μ˜ 관계λ₯Ό λΆ„μ„ν•˜μ—¬ μΈμ‚¬μ΄νŠΈλ₯Ό λ„μΆœν•˜μ„Έμš”
680
+ - λ°μ΄ν„°μ˜ ν•œκ³„μ™€ λΆˆν™•μ‹€μ„±μ„ 투λͺ…ν•˜κ²Œ μ–ΈκΈ‰ν•˜μ„Έμš”
681
+ - ν•„μš”μ‹œ 데이터 격차λ₯Ό μ‹λ³„ν•˜κ³  μΆ”κ°€ 연ꡬ가 ν•„μš”ν•œ μ˜μ—­μ„ μ œμ•ˆν•˜μ„Έμš”
682
+ """
683
+
684
+ soybean_guide = """
685
+ κ³ κΈ‰ λŒ€λ‘ 농업 데이터셋 ν™œμš© μ§€μΉ¨:
686
+ - λŒ€λ‘ 생산 쑰건 및 μˆ˜ν™•λŸ‰ νŒ¨ν„΄μ„ λ‹€λ₯Έ μž‘λ¬Όκ³Ό λΉ„κ΅ν•˜μ—¬ λΆ„μ„ν•˜μ„Έμš”
687
+ - λŒ€λ‘ λ†μ—…μ˜ 경제적 κ°€μΉ˜μ™€ μ‹œμž₯ κΈ°νšŒμ— λŒ€ν•œ μΈμ‚¬μ΄νŠΈλ₯Ό μ œκ³΅ν•˜μ„Έμš”
688
+ - λŒ€λ‘ 생산성에 영ν–₯을 λ―ΈμΉ˜λŠ” μ£Όμš” ν™˜κ²½ μš”μΈμ„ κ°•μ‘°ν•˜μ„Έμš”
689
+ - λŒ€λ‘ 재배 기술 ν˜μ‹ κ³Ό μˆ˜μ΅μ„± ν–₯상 λ°©μ•ˆμ„ μ œμ•ˆν•˜μ„Έμš”
690
+ - 지속 κ°€λŠ₯ν•œ λŒ€λ‘ 농업을 μœ„ν•œ μ‹€μ§ˆμ μΈ 접���법을 κ³΅μœ ν•˜μ„Έμš”
691
+ """
692
+
693
+ crop_recommendation_guide = """
694
+ ν† μ–‘ 및 ν™˜κ²½ λ³€μˆ˜ 기반 μž‘λ¬Ό μΆ”μ²œ ν™œμš© μ§€μΉ¨:
695
+ - μ§€μ—­ νŠΉμ„±μ— λ§žλŠ” 졜적의 μž‘λ¬Ό 선택 기쀀을 μ œμ‹œν•˜μ„Έμš”
696
+ - ν† μ–‘ 쑰건과 μž‘λ¬Ό 적합성 κ°„μ˜ 상관관계λ₯Ό λΆ„μ„ν•˜μ„Έμš”
697
+ - ν™˜κ²½ λ³€μˆ˜μ— λ”°λ₯Έ μž‘λ¬Ό 생산성 예츑 λͺ¨λΈμ„ ν™œμš©ν•˜μ„Έμš”
698
+ - 농업 생산성과 μˆ˜μ΅μ„± ν–₯상을 μœ„ν•œ μž‘λ¬Ό 선택 μ „λž΅μ„ μ œμ•ˆν•˜μ„Έμš”
699
+ - 지속 κ°€λŠ₯ν•œ 농업을 μœ„ν•œ μž‘λ¬Ό λ‹€μ–‘ν™” 접근법을 ꢌμž₯ν•˜μ„Έμš”
700
+ """
701
+
702
+ climate_impact_guide = """
703
+ κΈ°ν›„ λ³€ν™”κ°€ 농업에 λ―ΈμΉ˜λŠ” 영ν–₯ 데이터셋 ν™œμš© μ§€μΉ¨:
704
+ - κΈ°ν›„ λ³€ν™” μ‹œλ‚˜λ¦¬μ˜€μ— λ”°λ₯Έ μž‘λ¬Ό 생산성 λ³€ν™”λ₯Ό μ˜ˆμΈ‘ν•˜μ„Έμš”
705
+ - κΈ°ν›„ μ μ‘ν˜• 농업 기술 및 μ „λž΅μ„ μ œμ•ˆν•˜μ„Έμš”
706
+ - 지역별 κΈ°ν›„ μœ„ν—˜ μš”μ†Œμ™€ λŒ€μ‘ λ°©μ•ˆμ„ λΆ„μ„ν•˜μ„Έμš”
707
+ - κΈ°ν›„ 변화에 λŒ€μ‘ν•˜κΈ° μœ„ν•œ μž‘λ¬Ό 선택 및 재배 μ‹œκΈ° μ‘°μ • λ°©μ•ˆμ„ μ œμ‹œν•˜μ„Έμš”
708
+ - κΈ°ν›„ λ³€ν™”κ°€ 농산물 가격 및 μ‹œμž₯ 동ν–₯에 λ―ΈμΉ˜λŠ” 영ν–₯을 ν‰κ°€ν•˜μ„Έμš”
709
+ """
710
+
711
+ search_guide = """
712
+ μ›Ή 검색 κ²°κ³Ό ν™œμš© μ§€μΉ¨:
713
+ - 데이터셋 뢄석을 λ³΄μ™„ν•˜λŠ” μ΅œμ‹  μ‹œμž₯ μ •λ³΄λ‘œ 검색 κ²°κ³Όλ₯Ό ν™œμš©ν•˜μ„Έμš”
714
+ - 각 μ •λ³΄μ˜ 좜처λ₯Ό λ§ˆν¬λ‹€μš΄ 링크둜 ν¬ν•¨ν•˜μ„Έμš”: [좜처λͺ…](URL)
715
+ - μ£Όμš” μ£Όμž₯μ΄λ‚˜ 데이터 ν¬μΈνŠΈλ§ˆλ‹€ 좜처λ₯Ό ν‘œμ‹œν•˜μ„Έμš”
716
+ - μΆœμ²˜κ°€ 상좩할 경우, λ‹€μ–‘ν•œ 관점과 신뒰도λ₯Ό μ„€λͺ…ν•˜μ„Έμš”
717
+ - κ΄€λ ¨ λ™μ˜μƒ λ§ν¬λŠ” [λΉ„λ””μ˜€: 제λͺ©](video_url) ν˜•μ‹μœΌλ‘œ ν¬ν•¨ν•˜μ„Έμš”
718
+ - 검색 정보λ₯Ό μΌκ΄€λ˜κ³  체계적인 μ‘λ‹΅μœΌλ‘œ ν†΅ν•©ν•˜μ„Έμš”
719
+ - λͺ¨λ“  μ£Όμš” 좜처λ₯Ό λ‚˜μ—΄ν•œ "μ°Έκ³  자료" μ„Ήμ…˜μ„ λ§ˆμ§€λ§‰μ— ν¬ν•¨ν•˜μ„Έμš”
720
+ """
721
+
722
+ upload_guide = """
723
+ μ—…λ‘œλ“œλœ 파일 ν™œμš© μ§€μΉ¨:
724
+ - μ—…λ‘œλ“œλœ νŒŒμΌμ„ μ‘λ‹΅μ˜ μ£Όμš” μ •λ³΄μ›μœΌλ‘œ ν™œμš©ν•˜μ„Έμš”
725
+ - 쿼리와 직접 κ΄€λ ¨λœ 파일 정보λ₯Ό μΆ”μΆœν•˜κ³  κ°•μ‘°ν•˜μ„Έμš”
726
+ - κ΄€λ ¨ κ΅¬μ ˆμ„ μΈμš©ν•˜κ³  νŠΉμ • νŒŒμΌμ„ 좜처둜 μΈμš©ν•˜μ„Έμš”
727
+ - CSV 파일의 수치 λ°μ΄ν„°λŠ” μš”μ•½ λ¬Έμž₯으둜 λ³€ν™˜ν•˜μ„Έμš”
728
+ - PDF μ½˜ν…μΈ λŠ” νŠΉμ • μ„Ήμ…˜μ΄λ‚˜ νŽ˜μ΄μ§€λ₯Ό μ°Έμ‘°ν•˜μ„Έμš”
729
+ - 파일 정보λ₯Ό μ›Ή 검색 결과와 μ›ν™œν•˜κ²Œ ν†΅ν•©ν•˜μ„Έμš”
730
+ - 정보가 상좩할 경우, 일반적인 μ›Ή 결과보닀 파일 μ½˜ν…μΈ λ₯Ό μš°μ„ μ‹œν•˜μ„Έμš”
731
+ """
732
+
733
+ # Base prompt
734
+ final_prompt = base_prompt
735
+
736
+ # Add mode-specific guidance
737
+ if mode in mode_prompts:
738
+ final_prompt += "\n" + mode_prompts[mode]
739
+
740
+ # Style
741
+ if style in style_guides:
742
+ final_prompt += f"\n\n뢄석 μŠ€νƒ€μΌ: {style_guides[style]}"
743
+
744
+ # Always include dataset guides
745
+ final_prompt += f"\n\n{dataset_guide}"
746
+ final_prompt += f"\n\n{crop_recommendation_guide}"
747
+ final_prompt += f"\n\n{climate_impact_guide}"
748
+
749
+ # Conditionally add soybean dataset guide if selected in UI
750
+ if st.session_state.get('use_soybean_dataset', False):
751
+ final_prompt += f"\n\n{soybean_guide}"
752
+
753
+ if include_search_results:
754
+ final_prompt += f"\n\n{search_guide}"
755
+
756
+ if include_uploaded_files:
757
+ final_prompt += f"\n\n{upload_guide}"
758
+
759
+ final_prompt += """
760
+ \n\n응닡 ν˜•μ‹ μš”κ΅¬μ‚¬ν•­:
761
+ - λ§ˆν¬λ‹€μš΄ 제λͺ©(## 및 ###)을 μ‚¬μš©ν•˜μ—¬ 응닡을 μ²΄κ³„μ μœΌλ‘œ κ΅¬μ„±ν•˜μ„Έμš”
762
+ - μ€‘μš”ν•œ 점은 ꡡ은 ν…μŠ€νŠΈ(**ν…μŠ€νŠΈ**)둜 κ°•μ‘°ν•˜μ„Έμš”
763
+ - 3-5개의 후속 μ§ˆλ¬Έμ„ ν¬ν•¨ν•œ "κ΄€λ ¨ 질문" μ„Ήμ…˜μ„ λ§ˆμ§€λ§‰μ— μΆ”κ°€ν•˜μ„Έμš”
764
+ - μ μ ˆν•œ 간격과 단락 κ΅¬λΆ„μœΌλ‘œ 응닡을 μ„œμ‹ν™”ν•˜μ„Έμš”
765
+ - λͺ¨λ“  λ§ν¬λŠ” λ§ˆν¬λ‹€μš΄ ν˜•μ‹μœΌλ‘œ 클릭 κ°€λŠ₯ν•˜κ²Œ λ§Œλ“œμ„Έμš”: [ν…μŠ€νŠΈ](url)
766
+ - κ°€λŠ₯ν•œ 경우 데이터λ₯Ό μ‹œκ°μ μœΌλ‘œ ν‘œν˜„(ν‘œ, κ·Έλž˜ν”„ λ“±μ˜ μ„€λͺ…)ν•˜μ„Έμš”
767
+ """
768
+ return final_prompt
769
+
770
+ # ──────────────────────────────── Brave Search API ────────────────────────
771
+ @st.cache_data(ttl=3600)
772
+ def brave_search(query: str, count: int = 10):
773
+ if not BRAVE_KEY:
774
+ raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
775
+
776
+ headers = {"Accept": "application/json", "Accept-Encoding": "gzip", "X-Subscription-Token": BRAVE_KEY}
777
+ params = {"q": query + " 농산물 가격 동ν–₯ 농업 데이터", "count": str(count)}
778
+
779
+ for attempt in range(3):
780
+ try:
781
+ r = requests.get(BRAVE_ENDPOINT, headers=headers, params=params, timeout=15)
782
+ r.raise_for_status()
783
+ data = r.json()
784
+
785
+ raw = data.get("web", {}).get("results") or data.get("results", [])
786
+ if not raw:
787
+ logging.warning(f"No Brave search results found. Response: {data}")
788
+ raise ValueError("No search results found.")
789
+
790
+ arts = []
791
+ for i, res in enumerate(raw[:count], 1):
792
+ url = res.get("url", res.get("link", ""))
793
+ host = re.sub(r"https?://(www\.)?", "", url).split("/")[0]
794
+ arts.append({
795
+ "index": i,
796
+ "title": res.get("title", "No title"),
797
+ "link": url,
798
+ "snippet": res.get("description", res.get("text", "No snippet")),
799
+ "displayed_link": host
800
+ })
801
+
802
+ return arts
803
+
804
+ except Exception as e:
805
+ logging.error(f"Brave search failure (attempt {attempt+1}/3): {e}")
806
+ if attempt < 2:
807
+ time.sleep(5)
808
+
809
+ return []
810
+
811
+ @st.cache_data(ttl=3600)
812
+ def brave_video_search(query: str, count: int = 3):
813
+ if not BRAVE_KEY:
814
+ raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
815
+
816
+ headers = {"Accept": "application/json","Accept-Encoding": "gzip","X-Subscription-Token": BRAVE_KEY}
817
+ params = {"q": query + " 농산물 가격 농업 μ‹œμž₯", "count": str(count)}
818
+
819
+ for attempt in range(3):
820
+ try:
821
+ r = requests.get(BRAVE_VIDEO_ENDPOINT, headers=headers, params=params, timeout=15)
822
+ r.raise_for_status()
823
+ data = r.json()
824
+
825
+ results = []
826
+ for i, vid in enumerate(data.get("results", [])[:count], 1):
827
+ results.append({
828
+ "index": i,
829
+ "title": vid.get("title", "Video"),
830
+ "video_url": vid.get("url", ""),
831
+ "thumbnail_url": vid.get("thumbnail", {}).get("src", ""),
832
+ "source": vid.get("provider", {}).get("name", "Unknown source")
833
+ })
834
+
835
+ return results
836
+
837
+ except Exception as e:
838
+ logging.error(f"Brave video search failure (attempt {attempt+1}/3): {e}")
839
+ if attempt < 2:
840
+ time.sleep(5)
841
+
842
+ return []
843
+
844
+ @st.cache_data(ttl=3600)
845
+ def brave_news_search(query: str, count: int = 3):
846
+ if not BRAVE_KEY:
847
+ raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
848
+
849
+ headers = {"Accept": "application/json","Accept-Encoding": "gzip","X-Subscription-Token": BRAVE_KEY}
850
+ params = {"q": query + " 농산물 가격 동ν–₯ 농업", "count": str(count)}
851
+
852
+ for attempt in range(3):
853
+ try:
854
+ r = requests.get(BRAVE_NEWS_ENDPOINT, headers=headers, params=params, timeout=15)
855
+ r.raise_for_status()
856
+ data = r.json()
857
+
858
+ results = []
859
+ for i, news in enumerate(data.get("results", [])[:count], 1):
860
+ results.append({
861
+ "index": i,
862
+ "title": news.get("title", "News article"),
863
+ "url": news.get("url", ""),
864
+ "description": news.get("description", ""),
865
+ "source": news.get("source", "Unknown source"),
866
+ "date": news.get("age", "Unknown date")
867
+ })
868
+
869
+ return results
870
+
871
+ except Exception as e:
872
+ logging.error(f"Brave news search failure (attempt {attempt+1}/3): {e}")
873
+ if attempt < 2:
874
+ time.sleep(5)
875
+
876
+ return []
877
+
878
+ def mock_results(query: str) -> str:
879
+ ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
880
+ return (f"# λŒ€μ²΄ 검색 μ½˜ν…μΈ  (생성 μ‹œκ°„: {ts})\n\n"
881
+ f"'{query}'에 λŒ€ν•œ 검색 API μš”μ²­μ΄ μ‹€νŒ¨ν–ˆκ±°λ‚˜ κ²°κ³Όκ°€ μ—†μŠ΅λ‹ˆλ‹€. "
882
+ f"κΈ°μ‘΄ 지식을 기반으둜 응닡을 μƒμ„±ν•΄μ£Όμ„Έμš”.\n\n"
883
+ f"λ‹€μŒ 사항을 κ³ λ €ν•˜μ„Έμš”:\n\n"
884
+ f"- {query}에 κ΄€ν•œ κΈ°λ³Έ κ°œλ…κ³Ό μ€‘μš”μ„±\n"
885
+ f"- 일반적으둜 μ•Œλ €μ§„ κ΄€λ ¨ ν†΅κ³„λ‚˜ μΆ”μ„Έ\n"
886
+ f"- 이 μ£Όμ œμ— λŒ€ν•œ μ „λ¬Έκ°€ 의견\n"
887
+ f"- λ…μžκ°€ κ°€μ§ˆ 수 μžˆλŠ” 질문\n\n"
888
+ f"μ°Έκ³ : μ΄λŠ” μ‹€μ‹œκ°„ 데이터가 μ•„λ‹Œ λŒ€μ²΄ μ§€μΉ¨μž…λ‹ˆλ‹€.\n\n")
889
+
890
+ def do_web_search(query: str) -> str:
891
+ try:
892
+ arts = brave_search(query, 10)
893
+ if not arts:
894
+ logging.warning("No search results, using fallback content")
895
+ return mock_results(query)
896
+
897
+ videos = brave_video_search(query, 2)
898
+ news = brave_news_search(query, 3)
899
+
900
+ result = "# μ›Ή 검색 κ²°κ³Ό\nλ‹€μŒ κ²°κ³Όλ₯Ό ν™œμš©ν•˜μ—¬ 데이터셋 뢄석을 λ³΄μ™„ν•˜λŠ” 포괄적인 닡변을 μ œκ³΅ν•˜μ„Έμš”.\n\n"
901
+
902
+ result += "## μ›Ή κ²°κ³Ό\n\n"
903
+ for a in arts[:5]:
904
+ result += f"### κ²°κ³Ό {a['index']}: {a['title']}\n\n{a['snippet']}\n\n"
905
+ result += f"**좜처**: [{a['displayed_link']}]({a['link']})\n\n---\n"
906
+
907
+ if news:
908
+ result += "## λ‰΄μŠ€ κ²°κ³Ό\n\n"
909
+ for n in news:
910
+ result += f"### {n['title']}\n\n{n['description']}\n\n"
911
+ result += f"**좜처**: [{n['source']}]({n['url']}) - {n['date']}\n\n---\n"
912
+
913
+ if videos:
914
+ result += "## λΉ„λ””μ˜€ κ²°κ³Ό\n\n"
915
+ for vid in videos:
916
+ result += f"### {vid['title']}\n\n"
917
+ if vid.get('thumbnail_url'):
918
+ result += f"![썸넀일]({vid['thumbnail_url']})\n\n"
919
+ result += f"**μ‹œμ²­**: [{vid['source']}]({vid['video_url']})\n\n"
920
+
921
+ return result
922
+
923
+ except Exception as e:
924
+ logging.error(f"Web search process failed: {str(e)}")
925
+ return mock_results(query)
926
+
927
+ # ──────────────────────────────── File Upload Handling ─────────────────────
928
+ def process_text_file(file):
929
+ try:
930
+ content = file.read()
931
+ file.seek(0)
932
+
933
+ text = content.decode('utf-8', errors='ignore')
934
+ if len(text) > 10000:
935
+ text = text[:9700] + "...(truncated)..."
936
+
937
+ result = f"## ν…μŠ€νŠΈ 파일: {file.name}\n\n" + text
938
+ return result
939
+ except Exception as e:
940
+ logging.error(f"Error processing text file: {str(e)}")
941
+ return f"ν…μŠ€νŠΈ 파일 처리 였λ₯˜: {str(e)}"
942
+
943
+ def process_csv_file(file):
944
+ try:
945
+ content = file.read()
946
+ file.seek(0)
947
+
948
+ df = pd.read_csv(io.BytesIO(content))
949
+ result = f"## CSV 파일: {file.name}\n\n"
950
+ result += f"- ν–‰: {len(df)}\n"
951
+ result += f"- μ—΄: {len(df.columns)}\n"
952
+ result += f"- μ—΄ 이름: {', '.join(df.columns.tolist())}\n\n"
953
+
954
+ result += "### 데이터 미리보기\n\n"
955
+ preview_df = df.head(10)
956
+ try:
957
+ markdown_table = preview_df.to_markdown(index=False)
958
+ if markdown_table:
959
+ result += markdown_table + "\n\n"
960
+ else:
961
+ result += "CSV 데이터λ₯Ό ν‘œμ‹œν•  수 μ—†μŠ΅λ‹ˆλ‹€.\n\n"
962
+ except Exception as e:
963
+ logging.error(f"Markdown table conversion error: {e}")
964
+ result += "ν…μŠ€νŠΈλ‘œ 데이터 ν‘œμ‹œ:\n\n" + str(preview_df) + "\n\n"
965
+
966
+ num_cols = df.select_dtypes(include=['number']).columns
967
+ if len(num_cols) > 0:
968
+ result += "### κΈ°λ³Έ 톡계 정보\n\n"
969
+ try:
970
+ stats_df = df[num_cols].describe().round(2)
971
+ stats_markdown = stats_df.to_markdown()
972
+ if stats_markdown:
973
+ result += stats_markdown + "\n\n"
974
+ else:
975
+ result += "톡계 정보λ₯Ό ν‘œμ‹œν•  수 μ—†μŠ΅λ‹ˆλ‹€.\n\n"
976
+ except Exception as e:
977
+ logging.error(f"Statistical info conversion error: {e}")
978
+ result += "톡계 정보λ₯Ό 생성할 수 μ—†μŠ΅λ‹ˆλ‹€.\n\n"
979
+
980
+ return result
981
+ except Exception as e:
982
+ logging.error(f"CSV file processing error: {str(e)}")
983
+ return f"CSV 파일 처리 였λ₯˜: {str(e)}"
984
+
985
+ def process_pdf_file(file):
986
+ try:
987
+ file_bytes = file.read()
988
+ file.seek(0)
989
+
990
+ pdf_file = io.BytesIO(file_bytes)
991
+ reader = PyPDF2.PdfReader(pdf_file, strict=False)
992
+
993
+ result = f"## PDF 파일: {file.name}\n\n- 총 νŽ˜μ΄μ§€: {len(reader.pages)}\n\n"
994
+
995
+ max_pages = min(5, len(reader.pages))
996
+ all_text = ""
997
+
998
+ for i in range(max_pages):
999
+ try:
1000
+ page = reader.pages[i]
1001
+ page_text = page.extract_text()
1002
+ current_page_text = f"### νŽ˜μ΄μ§€ {i+1}\n\n"
1003
+ if page_text and len(page_text.strip()) > 0:
1004
+ if len(page_text) > 1500:
1005
+ current_page_text += page_text[:1500] + "...(좕약됨)...\n\n"
1006
+ else:
1007
+ current_page_text += page_text + "\n\n"
1008
+ else:
1009
+ current_page_text += "(ν…μŠ€νŠΈλ₯Ό μΆ”μΆœν•  수 μ—†μŒ)\n\n"
1010
+
1011
+ all_text += current_page_text
1012
+
1013
+ if len(all_text) > 8000:
1014
+ all_text += "...(λ‚˜λ¨Έμ§€ νŽ˜μ΄μ§€ 좕약됨)...\n\n"
1015
+ break
1016
+
1017
+ except Exception as page_err:
1018
+ logging.error(f"Error processing PDF page {i+1}: {str(page_err)}")
1019
+ all_text += f"### νŽ˜μ΄μ§€ {i+1}\n\n(λ‚΄μš© μΆ”μΆœ 였λ₯˜: {str(page_err)})\n\n"
1020
+
1021
+ if len(reader.pages) > max_pages:
1022
+ all_text += f"\nμ°Έκ³ : 처음 {max_pages} νŽ˜μ΄μ§€λ§Œ ν‘œμ‹œλ©λ‹ˆλ‹€.\n\n"
1023
+
1024
+ result += "### PDF λ‚΄μš©\n\n" + all_text
1025
+ return result
1026
+
1027
+ except Exception as e:
1028
+ logging.error(f"PDF file processing error: {str(e)}")
1029
+ return f"## PDF 파일: {file.name}\n\n였λ₯˜: {str(e)}\n\nμ²˜λ¦¬ν•  수 μ—†μŠ΅λ‹ˆλ‹€."
1030
+
1031
+ def process_uploaded_files(files):
1032
+ if not files:
1033
+ return None
1034
+
1035
+ result = "# μ—…λ‘œλ“œλœ 파일 λ‚΄μš©\n\nμ‚¬μš©μžκ°€ μ œκ³΅ν•œ 파일의 λ‚΄μš©μž…λ‹ˆλ‹€.\n\n"
1036
+ for file in files:
1037
+ try:
1038
+ ext = file.name.split('.')[-1].lower()
1039
+ if ext == 'txt':
1040
+ result += process_text_file(file) + "\n\n---\n\n"
1041
+ elif ext == 'csv':
1042
+ result += process_csv_file(file) + "\n\n---\n\n"
1043
+ elif ext == 'pdf':
1044
+ result += process_pdf_file(file) + "\n\n---\n\n"
1045
+ else:
1046
+ result += f"### μ§€μ›λ˜μ§€ μ•ŠλŠ” 파일: {file.name}\n\n---\n\n"
1047
+ except Exception as e:
1048
+ logging.error(f"File processing error {file.name}: {e}")
1049
+ result += f"### 파일 처리 였λ₯˜: {file.name}\n\n였λ₯˜: {e}\n\n---\n\n"
1050
+
1051
+ return result
1052
+
1053
+ # ──────────────────────────────── Image & Utility ─────────────────────────
1054
+
1055
+ def generate_image(prompt, w=768, h=768, g=3.5, steps=30, seed=3):
1056
+ if not prompt:
1057
+ return None, "Insufficient prompt"
1058
+ try:
1059
+ res = Client(IMAGE_API_URL).predict(
1060
+ prompt=prompt, width=w, height=h, guidance=g,
1061
+ inference_steps=steps, seed=seed,
1062
+ do_img2img=False, init_image=None,
1063
+ image2image_strength=0.8, resize_img=True,
1064
+ api_name="/generate_image"
1065
+ )
1066
+ return res[0], f"Seed: {res[1]}"
1067
+ except Exception as e:
1068
+ logging.error(e)
1069
+ return None, str(e)
1070
+
1071
+ def extract_image_prompt(response_text: str, topic: str):
1072
+ client = get_openai_client()
1073
+ try:
1074
+ response = client.chat.completions.create(
1075
+ model="gpt-4.1-mini",
1076
+ messages=[
1077
+ {"role": "system", "content": "농업 및 농산물에 κ΄€ν•œ 이미지 ν”„λ‘¬ν”„νŠΈλ₯Ό μƒμ„±ν•©λ‹ˆλ‹€. ν•œ μ€„μ˜ μ˜μ–΄λ‘œ 된 ν”„λ‘¬ν”„νŠΈλ§Œ λ°˜ν™˜ν•˜μ„Έμš”, λ‹€λ₯Έ ν…μŠ€νŠΈλŠ” ν¬ν•¨ν•˜μ§€ λ§ˆμ„Έμš”."},
1078
+ {"role": "user", "content": f"주제: {topic}\n\n---\n{response_text}\n\n---"}
1079
+ ],
1080
+ temperature=1,
1081
+ max_tokens=80,
1082
+ top_p=1
1083
+ )
1084
+ return response.choices[0].message.content.strip()
1085
+ except Exception as e:
1086
+ logging.error(f"OpenAI image prompt generation error: {e}")
1087
+ return f"A professional photograph of agricultural produce and farm fields, data visualization of crop prices and trends, high quality"
1088
+
1089
+ def md_to_html(md: str, title="농산물 μˆ˜μš” 예츑 뢄석 κ²°κ³Ό"):
1090
+ return f"<!DOCTYPE html><html><head><title>{title}</title><meta charset='utf-8'></head><body>{markdown.markdown(md)}</body></html>"
1091
+
1092
+ def keywords(text: str, top=5):
1093
+ cleaned = re.sub(r"[^κ°€-힣a-zA-Z0-9\s]", "", text)
1094
+ return " ".join(cleaned.split()[:top])
1095
+
1096
+ # ──────────────────────────────── Streamlit UI ────────────────────────────
1097
+ def agricultural_price_forecast_app():
1098
+ st.title("농산물 μˆ˜μš” 및 가격 예츑 AI μ–΄μ‹œμŠ€ν„΄νŠΈ")
1099
+ st.markdown("UN κΈ€λ‘œλ²Œ μ‹λŸ‰ 및 농업 톡계 데이터셋 뢄석 기반의 농산물 μ‹œμž₯ 예츑")
1100
+
1101
+ if "ai_model" not in st.session_state:
1102
+ st.session_state.ai_model = "gpt-4.1-mini"
1103
+ if "messages" not in st.session_state:
1104
+ st.session_state.messages = []
1105
+ if "auto_save" not in st.session_state:
1106
+ st.session_state.auto_save = True
1107
+ if "generate_image" not in st.session_state:
1108
+ st.session_state.generate_image = False
1109
+ if "web_search_enabled" not in st.session_state:
1110
+ st.session_state.web_search_enabled = True
1111
+ if "analysis_mode" not in st.session_state:
1112
+ st.session_state.analysis_mode = "price_forecast"
1113
+ if "response_style" not in st.session_state:
1114
+ st.session_state.response_style = "professional"
1115
+ if "use_soybean_dataset" not in st.session_state:
1116
+ st.session_state.use_soybean_dataset = False
1117
+
1118
+ sb = st.sidebar
1119
+ sb.title("뢄석 μ„€μ •")
1120
+
1121
+ # Kaggle dataset info display
1122
+ if sb.checkbox("데이터셋 정보 ν‘œμ‹œ", value=False):
1123
+ st.info("UN κΈ€λ‘œλ²Œ μ‹λŸ‰ 및 농업 톡계 데이터셋을 λΆˆλŸ¬μ˜€λŠ” 쀑...")
1124
+ dataset_info = load_agriculture_dataset()
1125
+ if dataset_info:
1126
+ st.success(f"데이터셋 λ‘œλ“œ μ™„λ£Œ: {len(dataset_info['files'])}개 파일")
1127
+
1128
+ with st.expander("데이터셋 미리보기", expanded=False):
1129
+ for file_info in dataset_info['files'][:5]:
1130
+ st.write(f"**{file_info['name']}** ({file_info['size_mb']} MB)")
1131
+ else:
1132
+ st.error("데이터셋을 λΆˆλŸ¬μ˜€λŠ”λ° μ‹€νŒ¨ν–ˆμŠ΅λ‹ˆλ‹€. Kaggle API 섀정을 ν™•μΈν•˜μ„Έμš”.")
1133
+
1134
+ sb.subheader("뢄석 ꡬ성")
1135
+ sb.selectbox(
1136
+ "뢄석 λͺ¨λ“œ",
1137
+ options=list(ANALYSIS_MODES.keys()),
1138
+ format_func=lambda x: ANALYSIS_MODES[x],
1139
+ key="analysis_mode"
1140
+ )
1141
+
1142
+ sb.selectbox(
1143
+ "응닡 μŠ€νƒ€μΌ",
1144
+ options=list(RESPONSE_STYLES.keys()),
1145
+ format_func=lambda x: RESPONSE_STYLES[x],
1146
+ key="response_style"
1147
+ )
1148
+
1149
+ # Dataset selection
1150
+ sb.subheader("데이터셋 선택")
1151
+ sb.checkbox(
1152
+ "κ³ κΈ‰ λŒ€λ‘ 농업 데이터셋 μ‚¬μš©",
1153
+ key="use_soybean_dataset",
1154
+ help="λŒ€λ‘(콩) κ΄€λ ¨ μ§ˆλ¬Έμ— 더 μ •ν™•ν•œ 정보λ₯Ό μ œκ³΅ν•©λ‹ˆλ‹€."
1155
+ )
1156
+
1157
+ # Always enabled datasets info
1158
+ sb.info("κΈ°λ³Έ ν™œμ„±ν™”λœ 데이터셋:\n- UN κΈ€λ‘œλ²Œ μ‹λŸ‰ 및 농업 톡계\n- ν† μ–‘ 및 ν™˜κ²½ λ³€μˆ˜ 기반 οΏ½οΏ½οΏ½λ¬Ό μΆ”μ²œ\n- κΈ°ν›„ λ³€ν™”κ°€ 농업에 λ―ΈμΉ˜λŠ” 영ν–₯")
1159
+
1160
+ # Example queries
1161
+ sb.subheader("μ˜ˆμ‹œ 질문")
1162
+ c1, c2, c3 = sb.columns(3)
1163
+ if c1.button("μŒ€ 가격 전망", key="ex1"):
1164
+ process_example(EXAMPLE_QUERIES["example1"])
1165
+ if c2.button("κΈ°ν›„ 영ν–₯", key="ex2"):
1166
+ process_example(EXAMPLE_QUERIES["example2"])
1167
+ if c3.button("증평ꡰ μž‘λ¬Ό", key="ex3"):
1168
+ process_example(EXAMPLE_QUERIES["example3"])
1169
+
1170
+ sb.subheader("기타 μ„€μ •")
1171
+ sb.toggle("μžλ™ μ €μž₯", key="auto_save")
1172
+ sb.toggle("이미지 μžλ™ 생성", key="generate_image")
1173
+
1174
+ web_search_enabled = sb.toggle("μ›Ή 검색 μ‚¬μš©", value=st.session_state.web_search_enabled)
1175
+ st.session_state.web_search_enabled = web_search_enabled
1176
+
1177
+ if web_search_enabled:
1178
+ st.sidebar.info("βœ… μ›Ή 검색 κ²°κ³Όκ°€ 응닡에 ν†΅ν•©λ©λ‹ˆλ‹€.")
1179
+
1180
+ # Download the latest response
1181
+ latest_response = next(
1182
+ (m["content"] for m in reversed(st.session_state.messages)
1183
+ if m["role"] == "assistant" and m["content"].strip()),
1184
+ None
1185
+ )
1186
+ if latest_response:
1187
+ title_match = re.search(r"# (.*?)(\n|$)", latest_response)
1188
+ if title_match:
1189
+ title = title_match.group(1).strip()
1190
+ else:
1191
+ first_line = latest_response.split('\n', 1)[0].strip()
1192
+ title = first_line[:40] + "..." if len(first_line) > 40 else first_line
1193
+
1194
+ sb.subheader("μ΅œμ‹  응닡 λ‹€μš΄λ‘œλ“œ")
1195
+ d1, d2 = sb.columns(2)
1196
+ d1.download_button("λ§ˆν¬λ‹€μš΄μœΌλ‘œ λ‹€μš΄λ‘œλ“œ", latest_response,
1197
+ file_name=f"{title}.md", mime="text/markdown")
1198
+ d2.download_button("HTML둜 λ‹€μš΄λ‘œλ“œ", md_to_html(latest_response, title),
1199
+ file_name=f"{title}.html", mime="text/html")
1200
+
1201
+ # JSON conversation record upload
1202
+ up = sb.file_uploader("λŒ€ν™” 기둝 뢈러였기 (.json)", type=["json"], key="json_uploader")
1203
+ if up:
1204
+ try:
1205
+ st.session_state.messages = json.load(up)
1206
+ sb.success("λŒ€ν™” 기둝을 μ„±κ³΅μ μœΌλ‘œ λΆˆλŸ¬μ™”μŠ΅λ‹ˆλ‹€")
1207
+ except Exception as e:
1208
+ sb.error(f"뢈러였기 μ‹€νŒ¨: {e}")
1209
+
1210
+ # JSON conversation record download
1211
+ if sb.button("λŒ€ν™” 기둝을 JSON으둜 λ‹€μš΄λ‘œλ“œ"):
1212
+ sb.download_button(
1213
+ "μ €μž₯",
1214
+ data=json.dumps(st.session_state.messages, ensure_ascii=False, indent=2),
1215
+ file_name="conversation_history.json",
1216
+ mime="application/json"
1217
+ )
1218
+
1219
+ # File Upload
1220
+ st.subheader("파일 μ—…λ‘œλ“œ")
1221
+ uploaded_files = st.file_uploader(
1222
+ "μ°Έκ³  자료둜 μ‚¬μš©ν•  파일 μ—…λ‘œλ“œ (txt, csv, pdf)",
1223
+ type=["txt", "csv", "pdf"],
1224
+ accept_multiple_files=True,
1225
+ key="file_uploader"
1226
+ )
1227
+
1228
+ if uploaded_files:
1229
+ file_count = len(uploaded_files)
1230
+ st.success(f"{file_count}개 파일이 μ—…λ‘œλ“œλ˜μ—ˆμŠ΅λ‹ˆλ‹€. μ§ˆμ˜μ— λŒ€ν•œ μ†ŒμŠ€λ‘œ μ‚¬μš©λ©λ‹ˆλ‹€.")
1231
+
1232
+ with st.expander("μ—…λ‘œλ“œλœ 파일 미리보기", expanded=False):
1233
+ for idx, file in enumerate(uploaded_files):
1234
+ st.write(f"**파일λͺ…:** {file.name}")
1235
+ ext = file.name.split('.')[-1].lower()
1236
+
1237
+ if ext == 'txt':
1238
+ preview = file.read(1000).decode('utf-8', errors='ignore')
1239
+ file.seek(0)
1240
+ st.text_area(
1241
+ f"{file.name} 미리보기",
1242
+ preview + ("..." if len(preview) >= 1000 else ""),
1243
+ height=150
1244
+ )
1245
+ elif ext == 'csv':
1246
+ try:
1247
+ df = pd.read_csv(file)
1248
+ file.seek(0)
1249
+ st.write("CSV 미리보기 (μ΅œλŒ€ 5ν–‰)")
1250
+ st.dataframe(df.head(5))
1251
+ except Exception as e:
1252
+ st.error(f"CSV 미리보기 μ‹€νŒ¨: {e}")
1253
+ elif ext == 'pdf':
1254
+ try:
1255
+ file_bytes = file.read()
1256
+ file.seek(0)
1257
+
1258
+ pdf_file = io.BytesIO(file_bytes)
1259
+ reader = PyPDF2.PdfReader(pdf_file, strict=False)
1260
+
1261
+ pc = len(reader.pages)
1262
+ st.write(f"PDF 파일: {pc}νŽ˜μ΄μ§€")
1263
+
1264
+ if pc > 0:
1265
+ try:
1266
+ page_text = reader.pages[0].extract_text()
1267
+ preview = page_text[:500] if page_text else "(ν…μŠ€νŠΈ μΆ”μΆœ λΆˆκ°€)"
1268
+ st.text_area("첫 νŽ˜μ΄μ§€ 미리보기", preview + "...", height=150)
1269
+ except:
1270
+ st.warning("첫 νŽ˜μ΄μ§€ ν…μŠ€νŠΈ μΆ”μΆœ μ‹€νŒ¨")
1271
+ except Exception as e:
1272
+ st.error(f"PDF 미리보기 μ‹€νŒ¨: {e}")
1273
+
1274
+ if idx < file_count - 1:
1275
+ st.divider()
1276
+
1277
+ # Display existing messages
1278
+ for m in st.session_state.messages:
1279
+ with st.chat_message(m["role"]):
1280
+ st.markdown(m["content"], unsafe_allow_html=True)
1281
+
1282
+ # Videos
1283
+ if "videos" in m and m["videos"]:
1284
+ st.subheader("κ΄€λ ¨ λΉ„λ””μ˜€")
1285
+ for video in m["videos"]:
1286
+ video_title = video.get('title', 'κ΄€λ ¨ λΉ„λ””μ˜€')
1287
+ video_url = video.get('url', '')
1288
+ thumbnail = video.get('thumbnail', '')
1289
+
1290
+ if thumbnail:
1291
+ col1, col2 = st.columns([1, 3])
1292
+ with col1:
1293
+ st.write("🎬")
1294
+ with col2:
1295
+ st.markdown(f"**[{video_title}]({video_url})**")
1296
+ st.write(f"좜처: {video.get('source', 'μ•Œ 수 μ—†μŒ')}")
1297
+ else:
1298
+ st.markdown(f"🎬 **[{video_title}]({video_url})**")
1299
+ st.write(f"좜처: {video.get('source', 'μ•Œ 수 μ—†μŒ')}")
1300
+
1301
+ # User input
1302
+ query = st.chat_input("농산물 가격, μˆ˜μš” λ˜λŠ” μ‹œμž₯ 동ν–₯ κ΄€λ ¨ μ§ˆλ¬Έμ„ μž…λ ₯ν•˜μ„Έμš”.")
1303
+ if query:
1304
+ process_input(query, uploaded_files)
1305
+
1306
+ sb.markdown("---")
1307
+ sb.markdown("Created by Vidraft | [Community](https://discord.gg/openfreeai)")
1308
+
1309
+ def process_example(topic):
1310
+ process_input(topic, [])
1311
+
1312
+ def process_input(query: str, uploaded_files):
1313
+ if not any(m["role"] == "user" and m["content"] == query for m in st.session_state.messages):
1314
+ st.session_state.messages.append({"role": "user", "content": query})
1315
+
1316
+ with st.chat_message("user"):
1317
+ st.markdown(query)
1318
+
1319
+ with st.chat_message("assistant"):
1320
+ placeholder = st.empty()
1321
+ message_placeholder = st.empty()
1322
+ full_response = ""
1323
+
1324
+ use_web_search = st.session_state.web_search_enabled
1325
+ has_uploaded_files = bool(uploaded_files) and len(uploaded_files) > 0
1326
+
1327
+ try:
1328
+ status = st.status("μ§ˆλ¬Έμ— λ‹΅λ³€ μ€€λΉ„ 쀑...")
1329
+ status.update(label="ν΄λΌμ΄μ–ΈνŠΈ μ΄ˆκΈ°ν™” 쀑...")
1330
+
1331
+ client = get_openai_client()
1332
+
1333
+ search_content = None
1334
+ video_results = []
1335
+ news_results = []
1336
+
1337
+ # 농업 데이터셋 뢄석 κ²°κ³Ό κ°€μ Έμ˜€κΈ°
1338
+ status.update(label="농업 데이터셋 뢄석 쀑...")
1339
+ with st.spinner("데이터셋 뢄석 쀑..."):
1340
+ dataset_analysis = analyze_dataset_for_query(query)
1341
+
1342
+ # 항상 ν¬ν•¨λ˜λŠ” μΆ”κ°€ 데이터셋 뢄석
1343
+ crop_recommendation_analysis = analyze_crop_recommendation_dataset(query)
1344
+ climate_impact_analysis = analyze_climate_impact_dataset(query)
1345
+
1346
+ #
1347
+
1348
+
1349
+ # 쑰건뢀 데이터셋 뢄석
1350
+ soybean_analysis = None
1351
+ if st.session_state.use_soybean_dataset:
1352
+ status.update(label="λŒ€λ‘ 농업 데이터셋 뢄석 쀑...")
1353
+ with st.spinner("λŒ€λ‘ 데이터셋 뢄석 쀑..."):
1354
+ soybean_analysis = analyze_soybean_dataset(query)
1355
+
1356
+ if use_web_search:
1357
+ # μ›Ή 검색 과정은 λ…ΈμΆœν•˜μ§€ μ•Šκ³  쑰용히 μ§„ν–‰
1358
+ with st.spinner("정보 μˆ˜μ§‘ 쀑..."):
1359
+ search_content = do_web_search(keywords(query, top=5))
1360
+ video_results = brave_video_search(query, 2)
1361
+ news_results = brave_news_search(query, 3)
1362
+
1363
+ file_content = None
1364
+ if has_uploaded_files:
1365
+ status.update(label="μ—…λ‘œλ“œλœ 파일 처리 쀑...")
1366
+ with st.spinner("파일 뢄석 쀑..."):
1367
+ file_content = process_uploaded_files(uploaded_files)
1368
+
1369
+ valid_videos = []
1370
+ for vid in video_results:
1371
+ url = vid.get('video_url')
1372
+ if url and url.startswith('http'):
1373
+ valid_videos.append({
1374
+ 'url': url,
1375
+ 'title': vid.get('title', 'λΉ„λ””μ˜€'),
1376
+ 'thumbnail': vid.get('thumbnail_url', ''),
1377
+ 'source': vid.get('source', 'λΉ„λ””μ˜€ 좜처')
1378
+ })
1379
+
1380
+ status.update(label="μ’…ν•© 뢄석 μ€€λΉ„ 쀑...")
1381
+ sys_prompt = get_system_prompt(
1382
+ mode=st.session_state.analysis_mode,
1383
+ style=st.session_state.response_style,
1384
+ include_search_results=use_web_search,
1385
+ include_uploaded_files=has_uploaded_files
1386
+ )
1387
+
1388
+ api_messages = [
1389
+ {"role": "system", "content": sys_prompt}
1390
+ ]
1391
+
1392
+ user_content = query
1393
+ # 항상 κΈ°λ³Έ 데이터셋 뢄석 κ²°κ³Ό 포함
1394
+ user_content += "\n\n" + dataset_analysis
1395
+ user_content += "\n\n" + crop_recommendation_analysis
1396
+ user_content += "\n\n" + climate_impact_analysis
1397
+
1398
+ # 쑰건뢀 데이터셋 κ²°κ³Ό 포함
1399
+ if soybean_analysis:
1400
+ user_content += "\n\n" + soybean_analysis
1401
+
1402
+ if search_content:
1403
+ user_content += "\n\n" + search_content
1404
+ if file_content:
1405
+ user_content += "\n\n" + file_content
1406
+
1407
+ if valid_videos:
1408
+ user_content += "\n\n# κ΄€λ ¨ λ™μ˜μƒ\n"
1409
+ for i, vid in enumerate(valid_videos):
1410
+ user_content += f"\n{i+1}. **{vid['title']}** - [{vid['source']}]({vid['url']})\n"
1411
+
1412
+ api_messages.append({"role": "user", "content": user_content})
1413
+
1414
+ try:
1415
+ stream = client.chat.completions.create(
1416
+ model="gpt-4.1-mini",
1417
+ messages=api_messages,
1418
+ temperature=1,
1419
+ max_tokens=MAX_TOKENS,
1420
+ top_p=1,
1421
+ stream=True
1422
+ )
1423
+
1424
+ for chunk in stream:
1425
+ if chunk.choices and len(chunk.choices) > 0 and chunk.choices[0].delta.content is not None:
1426
+ content_delta = chunk.choices[0].delta.content
1427
+ full_response += content_delta
1428
+ message_placeholder.markdown(full_response + "β–Œ", unsafe_allow_html=True)
1429
+
1430
+ message_placeholder.markdown(full_response, unsafe_allow_html=True)
1431
+
1432
+ if valid_videos:
1433
+ st.subheader("κ΄€λ ¨ λΉ„λ””μ˜€")
1434
+ for video in valid_videos:
1435
+ video_title = video.get('title', 'κ΄€λ ¨ λΉ„λ””μ˜€')
1436
+ video_url = video.get('url', '')
1437
+
1438
+ st.markdown(f"🎬 **[{video_title}]({video_url})**")
1439
+ st.write(f"좜처: {video.get('source', 'μ•Œ 수 μ—†μŒ')}")
1440
+
1441
+ status.update(label="응닡 μ™„λ£Œ!", state="complete")
1442
+
1443
+ st.session_state.messages.append({
1444
+ "role": "assistant",
1445
+ "content": full_response,
1446
+ "videos": valid_videos
1447
+ })
1448
+
1449
+ except Exception as api_error:
1450
+ error_message = str(api_error)
1451
+ logging.error(f"API 였λ₯˜: {error_message}")
1452
+ status.update(label=f"였λ₯˜: {error_message}", state="error")
1453
+ raise Exception(f"응닡 생성 였λ₯˜: {error_message}")
1454
+
1455
+ if st.session_state.generate_image and full_response:
1456
+ with st.spinner("λ§žμΆ€ν˜• 이미지 생성 쀑..."):
1457
+ try:
1458
+ ip = extract_image_prompt(full_response, query)
1459
+ img, cap = generate_image(ip)
1460
+ if img:
1461
+ st.subheader("AI 생성 이미지")
1462
+ st.image(img, caption=cap, use_container_width=True)
1463
+ except Exception as img_error:
1464
+ logging.error(f"이미지 생성 였λ₯˜: {str(img_error)}")
1465
+ st.warning("λ§žμΆ€ν˜• 이미지 생성에 μ‹€νŒ¨ν–ˆμŠ΅λ‹ˆλ‹€.")
1466
+
1467
+ if full_response:
1468
+ st.subheader("이 응닡 λ‹€μš΄λ‘œλ“œ")
1469
+ c1, c2 = st.columns(2)
1470
+ c1.download_button(
1471
+ "λ§ˆν¬λ‹€μš΄",
1472
+ data=full_response,
1473
+ file_name=f"{query[:30]}.md",
1474
+ mime="text/markdown"
1475
+ )
1476
+ c2.download_button(
1477
+ "HTML",
1478
+ data=md_to_html(full_response, query[:30]),
1479
+ file_name=f"{query[:30]}.html",
1480
+ mime="text/html"
1481
+ )
1482
+
1483
+ if st.session_state.auto_save and st.session_state.messages:
1484
+ try:
1485
+ fn = f"conversation_history_auto_{datetime.now():%Y%m%d_%H%M%S}.json"
1486
+ with open(fn, "w", encoding="utf-8") as fp:
1487
+ json.dump(st.session_state.messages, fp, ensure_ascii=False, indent=2)
1488
+ except Exception as e:
1489
+ logging.error(f"μžλ™ μ €μž₯ μ‹€νŒ¨: {e}")
1490
+
1491
+ except Exception as e:
1492
+ error_message = str(e)
1493
+ placeholder.error(f"였λ₯˜ λ°œμƒ: {error_message}")
1494
+ logging.error(f"μž…λ ₯ 처리 였λ₯˜: {error_message}")
1495
+ ans = f"μš”μ²­ 처리 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {error_message}"
1496
+ st.session_state.messages.append({"role": "assistant", "content": ans})
1497
+
1498
+ # ──────────��───────────────────── main ────────────────────────────────────
1499
+ def main():
1500
+ st.write("==== μ• ν”Œλ¦¬μΌ€μ΄μ…˜ μ‹œμž‘ μ‹œκ°„:", datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "=====")
1501
+ agricultural_price_forecast_app()
1502
+
1503
+ if __name__ == "__main__":
1504
+ main()
requirements (16).txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ anthropic
3
+ gradio
4
+ gradio-client
5
+ httpx
6
+ requests
7
+ python-dotenv
8
+ markdown
9
+ weasyprint
10
+ beautifulsoup4>=4.12.0
11
+ lxml>=4.9.0
12
+ pillow
13
+ pandas
14
+ PyPDF2
15
+ openai
16
+ tabulate
17
+ kagglehub
stanard_map (1).csv ADDED
The diff for this file is too large to render. See raw diff