|
import os, json, re, logging, requests, markdown, time, io |
|
from datetime import datetime |
|
import random |
|
import base64 |
|
from io import BytesIO |
|
from PIL import Image |
|
|
|
import streamlit as st |
|
from openai import OpenAI |
|
|
|
from gradio_client import Client |
|
import pandas as pd |
|
import PyPDF2 |
|
import kagglehub |
|
|
|
|
|
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "") |
|
BRAVE_KEY = os.getenv("SERPHOUSE_API_KEY", "") |
|
BRAVE_ENDPOINT = "https://api.search.brave.com/res/v1/web/search" |
|
BRAVE_VIDEO_ENDPOINT = "https://api.search.brave.com/res/v1/videos/search" |
|
BRAVE_NEWS_ENDPOINT = "https://api.search.brave.com/res/v1/news/search" |
|
IMAGE_API_URL = "http://211.233.58.201:7896" |
|
MAX_TOKENS = 7999 |
|
KAGGLE_API_KEY = os.getenv("KDATA_API", "") |
|
|
|
|
|
os.environ["KAGGLE_KEY"] = KAGGLE_API_KEY |
|
|
|
|
|
ANALYSIS_MODES = { |
|
"price_forecast": "λμ°λ¬Ό κ°κ²© μμΈ‘κ³Ό μμ₯ λΆμ", |
|
"market_trend": "μμ₯ λν₯ λ° μμ ν¨ν΄ λΆμ", |
|
"production_analysis": "μμ°λ λΆμ λ° μλ μ보 μ λ§", |
|
"agricultural_policy": "λμ
μ μ±
λ° κ·μ μν₯ λΆμ", |
|
"climate_impact": "κΈ°ν λ³νκ° λμ
μ λ―ΈμΉλ μν₯ λΆμ" |
|
} |
|
|
|
RESPONSE_STYLES = { |
|
"professional": "μ λ¬Έμ μ΄κ³ νμ μ μΈ λΆμ", |
|
"simple": "μ½κ² μ΄ν΄ν μ μλ κ°κ²°ν μ€λͺ
", |
|
"detailed": "μμΈν ν΅κ³ κΈ°λ° κΉμ΄ μλ λΆμ", |
|
"action_oriented": "μ€ν κ°λ₯ν μ‘°μΈκ³Ό μΆμ² μ€μ¬" |
|
} |
|
|
|
|
|
EXAMPLE_QUERIES = { |
|
"example1": "μ κ°κ²© μΆμΈ λ° ν₯ν 6κ°μ μ λ§μ λΆμν΄μ£ΌμΈμ", |
|
"example2": "κΈ°ν λ³νλ‘ νκ΅ κ³ΌμΌ μμ° μ λ΅κ³Ό μμ μμΈ‘ λ³΄κ³ μλ₯Ό μμ±νλΌ.", |
|
"example3": "2025λ
λΆν° 2030λ
κΉμ§ μΆ©λΆ μ¦νκ΅°μμ μ¬λ°°νλ©΄ μ λ§ν μλ¬Όμ? μμ΅μ±κ³Ό κ΄λ¦¬μ±μ΄ μ’μμΌνλ€" |
|
} |
|
|
|
|
|
logging.basicConfig(level=logging.INFO, |
|
format="%(asctime)s - %(levelname)s - %(message)s") |
|
|
|
|
|
|
|
@st.cache_resource |
|
def get_openai_client(): |
|
"""Create an OpenAI client with timeout and retry settings.""" |
|
if not OPENAI_API_KEY: |
|
raise RuntimeError("β οΈ OPENAI_API_KEY νκ²½ λ³μκ° μ€μ λμ§ μμμ΅λλ€.") |
|
return OpenAI( |
|
api_key=OPENAI_API_KEY, |
|
timeout=60.0, |
|
max_retries=3 |
|
) |
|
|
|
|
|
@st.cache_resource |
|
def load_agriculture_dataset(): |
|
"""Download and load the UN agriculture dataset from Kaggle""" |
|
try: |
|
path = kagglehub.dataset_download("unitednations/global-food-agriculture-statistics") |
|
logging.info(f"Kaggle dataset downloaded to: {path}") |
|
|
|
|
|
available_files = [] |
|
for root, dirs, files in os.walk(path): |
|
for file in files: |
|
if file.endswith('.csv'): |
|
file_path = os.path.join(root, file) |
|
file_size = os.path.getsize(file_path) / (1024 * 1024) |
|
available_files.append({ |
|
'name': file, |
|
'path': file_path, |
|
'size_mb': round(file_size, 2) |
|
}) |
|
|
|
return { |
|
'base_path': path, |
|
'files': available_files |
|
} |
|
except Exception as e: |
|
logging.error(f"Error loading Kaggle dataset: {e}") |
|
return None |
|
|
|
|
|
@st.cache_resource |
|
def load_soybean_dataset(): |
|
"""Download and load the Advanced Soybean Agricultural Dataset from Kaggle""" |
|
try: |
|
path = kagglehub.dataset_download("wisam1985/advanced-soybean-agricultural-dataset-2025") |
|
logging.info(f"Soybean dataset downloaded to: {path}") |
|
|
|
available_files = [] |
|
for root, dirs, files in os.walk(path): |
|
for file in files: |
|
if file.endswith(('.csv', '.xlsx')): |
|
file_path = os.path.join(root, file) |
|
file_size = os.path.getsize(file_path) / (1024 * 1024) |
|
available_files.append({ |
|
'name': file, |
|
'path': file_path, |
|
'size_mb': round(file_size, 2) |
|
}) |
|
|
|
return { |
|
'base_path': path, |
|
'files': available_files |
|
} |
|
except Exception as e: |
|
logging.error(f"Error loading Soybean dataset: {e}") |
|
return None |
|
|
|
|
|
@st.cache_resource |
|
def load_crop_recommendation_dataset(): |
|
"""Download and load the Soil and Environmental Variables Crop Recommendation Dataset""" |
|
try: |
|
path = kagglehub.dataset_download("agriinnovate/agricultural-crop-dataset") |
|
logging.info(f"Crop recommendation dataset downloaded to: {path}") |
|
|
|
available_files = [] |
|
for root, dirs, files in os.walk(path): |
|
for file in files: |
|
if file.endswith(('.csv', '.xlsx')): |
|
file_path = os.path.join(root, file) |
|
file_size = os.path.getsize(file_path) / (1024 * 1024) |
|
available_files.append({ |
|
'name': file, |
|
'path': file_path, |
|
'size_mb': round(file_size, 2) |
|
}) |
|
|
|
return { |
|
'base_path': path, |
|
'files': available_files |
|
} |
|
except Exception as e: |
|
logging.error(f"Error loading Crop recommendation dataset: {e}") |
|
return None |
|
|
|
|
|
@st.cache_resource |
|
def load_climate_impact_dataset(): |
|
"""Download and load the Climate Change Impact on Agriculture Dataset""" |
|
try: |
|
path = kagglehub.dataset_download("waqi786/climate-change-impact-on-agriculture") |
|
logging.info(f"Climate impact dataset downloaded to: {path}") |
|
|
|
available_files = [] |
|
for root, dirs, files in os.walk(path): |
|
for file in files: |
|
if file.endswith(('.csv', '.xlsx')): |
|
file_path = os.path.join(root, file) |
|
file_size = os.path.getsize(file_path) / (1024 * 1024) |
|
available_files.append({ |
|
'name': file, |
|
'path': file_path, |
|
'size_mb': round(file_size, 2) |
|
}) |
|
|
|
return { |
|
'base_path': path, |
|
'files': available_files |
|
} |
|
except Exception as e: |
|
logging.error(f"Error loading Climate impact dataset: {e}") |
|
return None |
|
|
|
def get_dataset_summary(): |
|
"""Generate a summary of the available agriculture datasets""" |
|
dataset_info = load_agriculture_dataset() |
|
if not dataset_info: |
|
return "Failed to load the UN global food and agriculture statistics dataset." |
|
|
|
summary = "# UN κΈλ‘λ² μλ λ° λμ
ν΅κ³ λ°μ΄ν°μ
\n\n" |
|
summary += f"μ΄ {len(dataset_info['files'])}κ°μ CSV νμΌμ΄ ν¬ν¨λμ΄ μμ΅λλ€.\n\n" |
|
|
|
|
|
summary += "## μ¬μ© κ°λ₯ν λ°μ΄ν° νμΌ:\n\n" |
|
for i, file_info in enumerate(dataset_info['files'][:10], 1): |
|
summary += f"{i}. **{file_info['name']}** ({file_info['size_mb']} MB)\n" |
|
|
|
if len(dataset_info['files']) > 10: |
|
summary += f"\n...μΈ {len(dataset_info['files']) - 10}κ° νμΌ\n" |
|
|
|
|
|
try: |
|
if dataset_info['files']: |
|
sample_file = dataset_info['files'][0]['path'] |
|
df = pd.read_csv(sample_file, nrows=5) |
|
summary += "\n## λ°μ΄ν° μν ꡬ쑰:\n\n" |
|
summary += df.head(5).to_markdown() + "\n\n" |
|
|
|
summary += "## λ°μ΄ν°μ
λ³μ μ€λͺ
:\n\n" |
|
for col in df.columns: |
|
summary += f"- **{col}**: [λ³μ μ€λͺ
νμ]\n" |
|
except Exception as e: |
|
logging.error(f"Error generating dataset sample: {e}") |
|
summary += "\nλ°μ΄ν° μνμ μμ±νλ μ€ μ€λ₯κ° λ°μνμ΅λλ€.\n" |
|
|
|
return summary |
|
|
|
def analyze_dataset_for_query(query): |
|
"""Find and analyze relevant data from the dataset based on the query""" |
|
dataset_info = load_agriculture_dataset() |
|
if not dataset_info: |
|
return "λ°μ΄ν°μ
μ λΆλ¬μ¬ μ μμ΅λλ€. Kaggle API μ°κ²°μ νμΈν΄μ£ΌμΈμ." |
|
|
|
|
|
query_lower = query.lower() |
|
|
|
|
|
keywords = { |
|
"μ": ["rice", "grain"], |
|
"λ°": ["wheat", "grain"], |
|
"μ₯μμ": ["corn", "maize", "grain"], |
|
"μ±μ": ["vegetable", "produce"], |
|
"κ³ΌμΌ": ["fruit", "produce"], |
|
"κ°κ²©": ["price", "cost", "value"], |
|
"μμ°": ["production", "yield", "harvest"], |
|
"μμΆ": ["export", "trade"], |
|
"μμ
": ["import", "trade"], |
|
"μλΉ": ["consumption", "demand"] |
|
} |
|
|
|
|
|
relevant_files = [] |
|
|
|
|
|
found_keywords = [] |
|
for k_term, e_terms in keywords.items(): |
|
if k_term in query_lower: |
|
found_keywords.extend([k_term] + e_terms) |
|
|
|
|
|
if not found_keywords: |
|
|
|
relevant_files = dataset_info['files'][:5] |
|
else: |
|
|
|
for file_info in dataset_info['files']: |
|
file_name_lower = file_info['name'].lower() |
|
for keyword in found_keywords: |
|
if keyword.lower() in file_name_lower: |
|
relevant_files.append(file_info) |
|
break |
|
|
|
|
|
if not relevant_files: |
|
relevant_files = dataset_info['files'][:5] |
|
|
|
|
|
analysis_result = "# λμ
λ°μ΄ν° λΆμ κ²°κ³Ό\n\n" |
|
analysis_result += f"쿼리: '{query}'μ λν λΆμμ μννμ΅λλ€.\n\n" |
|
|
|
if found_keywords: |
|
analysis_result += f"## λΆμ ν€μλ: {', '.join(set(found_keywords))}\n\n" |
|
|
|
|
|
for file_info in relevant_files[:3]: |
|
try: |
|
analysis_result += f"## νμΌ: {file_info['name']}\n\n" |
|
|
|
|
|
df = pd.read_csv(file_info['path']) |
|
|
|
|
|
analysis_result += f"- ν μ: {len(df)}\n" |
|
analysis_result += f"- μ΄ μ: {len(df.columns)}\n" |
|
analysis_result += f"- μ΄ λͺ©λ‘: {', '.join(df.columns.tolist())}\n\n" |
|
|
|
|
|
analysis_result += "### λ°μ΄ν° μν:\n\n" |
|
analysis_result += df.head(5).to_markdown() + "\n\n" |
|
|
|
|
|
numeric_cols = df.select_dtypes(include=['number']).columns |
|
if len(numeric_cols) > 0: |
|
analysis_result += "### κΈ°λ³Έ ν΅κ³:\n\n" |
|
stats_df = df[numeric_cols].describe() |
|
analysis_result += stats_df.to_markdown() + "\n\n" |
|
|
|
|
|
time_cols = [col for col in df.columns if 'year' in col.lower() or 'date' in col.lower()] |
|
if time_cols: |
|
analysis_result += "### μκ³μ΄ ν¨ν΄:\n\n" |
|
analysis_result += "λ°μ΄ν°μ
μ μκ° κ΄λ ¨ μ΄μ΄ μμ΄ μκ³μ΄ λΆμμ΄ κ°λ₯ν©λλ€.\n\n" |
|
|
|
except Exception as e: |
|
logging.error(f"Error analyzing file {file_info['name']}: {e}") |
|
analysis_result += f"μ΄ νμΌ λΆμ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}\n\n" |
|
|
|
analysis_result += "## λμ°λ¬Ό κ°κ²© μμΈ‘ λ° μμ λΆμμ λν μΈμ¬μ΄νΈ\n\n" |
|
analysis_result += "λ°μ΄ν°μ
μμ μΆμΆν μ 보λ₯Ό λ°νμΌλ‘ λ€μ μΈμ¬μ΄νΈλ₯Ό μ 곡ν©λλ€:\n\n" |
|
analysis_result += "1. λ°μ΄ν° κΈ°λ° λΆμ (κΈ°λ³Έμ μΈ μμ½)\n" |
|
analysis_result += "2. μ£Όμ κ°κ²© λ° μμ λν₯\n" |
|
analysis_result += "3. μμ°λ λ° λ¬΄μ ν¨ν΄\n\n" |
|
|
|
analysis_result += "μ΄ λΆμμ UN κΈλ‘λ² μλ λ° λμ
ν΅κ³ λ°μ΄ν°μ
μ κΈ°λ°μΌλ‘ ν©λλ€.\n\n" |
|
|
|
return analysis_result |
|
|
|
|
|
def analyze_crop_recommendation_dataset(query): |
|
"""Find and analyze crop recommendation data based on the query""" |
|
try: |
|
dataset_info = load_crop_recommendation_dataset() |
|
if not dataset_info or not dataset_info['files']: |
|
return "μλ¬Ό μΆμ² λ°μ΄ν°μ
μ λΆλ¬μ¬ μ μμ΅λλ€." |
|
|
|
analysis_result = "# ν μ λ° νκ²½ λ³μ κΈ°λ° μλ¬Ό μΆμ² λ°μ΄ν° λΆμ\n\n" |
|
|
|
|
|
for file_info in dataset_info['files'][:2]: |
|
try: |
|
analysis_result += f"## νμΌ: {file_info['name']}\n\n" |
|
|
|
if file_info['name'].endswith('.csv'): |
|
df = pd.read_csv(file_info['path']) |
|
elif file_info['name'].endswith('.xlsx'): |
|
df = pd.read_excel(file_info['path']) |
|
else: |
|
continue |
|
|
|
|
|
analysis_result += f"- λ°μ΄ν° ν¬κΈ°: {len(df)} ν Γ {len(df.columns)} μ΄\n" |
|
analysis_result += f"- ν¬ν¨λ μλ¬Ό μ’
λ₯: " |
|
|
|
|
|
crop_cols = [col for col in df.columns if 'crop' in col.lower() or 'μλ¬Ό' in col.lower()] |
|
if crop_cols: |
|
main_crop_col = crop_cols[0] |
|
unique_crops = df[main_crop_col].unique() |
|
analysis_result += f"{len(unique_crops)}μ’
({', '.join(str(c) for c in unique_crops[:10])})\n\n" |
|
else: |
|
analysis_result += "μλ¬Ό μ 보 μ΄μ μ°Ύμ μ μμ\n\n" |
|
|
|
|
|
env_factors = [col for col in df.columns if col.lower() not in ['crop', 'label', 'id', 'index']] |
|
if env_factors: |
|
analysis_result += f"- κ³ λ €λ νκ²½ μμ: {', '.join(env_factors)}\n\n" |
|
|
|
|
|
analysis_result += "### λ°μ΄ν° μν:\n\n" |
|
analysis_result += df.head(5).to_markdown() + "\n\n" |
|
|
|
|
|
if env_factors: |
|
numeric_factors = df[env_factors].select_dtypes(include=['number']).columns |
|
if len(numeric_factors) > 0: |
|
analysis_result += "### νκ²½ μμ ν΅κ³:\n\n" |
|
stats_df = df[numeric_factors].describe().round(2) |
|
analysis_result += stats_df.to_markdown() + "\n\n" |
|
|
|
|
|
query_terms = query.lower().split() |
|
relevant_crops = [] |
|
|
|
if crop_cols: |
|
for crop in df[main_crop_col].unique(): |
|
crop_str = str(crop).lower() |
|
if any(term in crop_str for term in query_terms): |
|
relevant_crops.append(crop) |
|
|
|
if relevant_crops: |
|
analysis_result += f"### 쿼리 κ΄λ ¨ μλ¬Ό λΆμ: {', '.join(str(c) for c in relevant_crops)}\n\n" |
|
for crop in relevant_crops[:3]: |
|
crop_data = df[df[main_crop_col] == crop] |
|
analysis_result += f"#### {crop} μλ¬Ό μμ½:\n\n" |
|
analysis_result += f"- μν μ: {len(crop_data)}κ°\n" |
|
|
|
if len(numeric_factors) > 0: |
|
crop_stats = crop_data[numeric_factors].describe().round(2) |
|
analysis_result += f"- νκ· νκ²½ 쑰건:\n" |
|
for factor in numeric_factors[:5]: |
|
analysis_result += f" * {factor}: {crop_stats.loc['mean', factor]}\n" |
|
analysis_result += "\n" |
|
|
|
except Exception as e: |
|
logging.error(f"Error analyzing crop recommendation file {file_info['name']}: {e}") |
|
analysis_result += f"λΆμ μ€λ₯: {str(e)}\n\n" |
|
|
|
analysis_result += "## μλ¬Ό μΆμ² μΈμ¬μ΄νΈ\n\n" |
|
analysis_result += "ν μ λ° νκ²½ λ³μ λ°μ΄ν°μ
λΆμ κ²°κ³Ό, λ€μκ³Ό κ°μ μ£Όμ μΈμ¬μ΄νΈλ₯Ό μ 곡ν©λλ€:\n\n" |
|
analysis_result += "1. μ§μ νκ²½μ μ ν©ν μλ¬Ό μΆμ²\n" |
|
analysis_result += "2. μλ¬Ό μμ°μ±μ μν₯μ λ―ΈμΉλ μ£Όμ νκ²½ μμΈ\n" |
|
analysis_result += "3. μ§μ κ°λ₯ν λμ
μ μν μ΅μ μ μλ¬Ό μ ν κΈ°μ€\n\n" |
|
|
|
return analysis_result |
|
|
|
except Exception as e: |
|
logging.error(f"Crop recommendation dataset analysis error: {e}") |
|
return "μλ¬Ό μΆμ² λ°μ΄ν°μ
λΆμ μ€ μ€λ₯κ° λ°μνμ΅λλ€." |
|
|
|
|
|
def analyze_climate_impact_dataset(query): |
|
"""Find and analyze climate impact on agriculture data based on the query""" |
|
try: |
|
dataset_info = load_climate_impact_dataset() |
|
if not dataset_info or not dataset_info['files']: |
|
return "κΈ°ν λ³ν μν₯ λ°μ΄ν°μ
μ λΆλ¬μ¬ μ μμ΅λλ€." |
|
|
|
analysis_result = "# κΈ°ν λ³νκ° λμ
μ λ―ΈμΉλ μν₯ λ°μ΄ν° λΆμ\n\n" |
|
|
|
|
|
for file_info in dataset_info['files'][:2]: |
|
try: |
|
analysis_result += f"## νμΌ: {file_info['name']}\n\n" |
|
|
|
if file_info['name'].endswith('.csv'): |
|
df = pd.read_csv(file_info['path']) |
|
elif file_info['name'].endswith('.xlsx'): |
|
df = pd.read_excel(file_info['path']) |
|
else: |
|
continue |
|
|
|
|
|
analysis_result += f"- λ°μ΄ν° ν¬κΈ°: {len(df)} ν Γ {len(df.columns)} μ΄\n" |
|
|
|
|
|
region_cols = [col for col in df.columns if 'region' in col.lower() or 'country' in col.lower() or 'μ§μ' in col.lower()] |
|
if region_cols: |
|
main_region_col = region_cols[0] |
|
regions = df[main_region_col].unique() |
|
analysis_result += f"- ν¬ν¨λ μ§μ: {len(regions)}κ° ({', '.join(str(r) for r in regions[:5])})\n" |
|
|
|
|
|
climate_cols = [col for col in df.columns if any(term in col.lower() for term in |
|
['temp', 'rainfall', 'precipitation', 'climate', 'weather', 'κΈ°μ¨', 'κ°μλ'])] |
|
crop_cols = [col for col in df.columns if any(term in col.lower() for term in |
|
['yield', 'production', 'crop', 'harvest', 'μνλ', 'μμ°λ'])] |
|
|
|
if climate_cols: |
|
analysis_result += f"- κΈ°ν κ΄λ ¨ λ³μ: {', '.join(climate_cols)}\n" |
|
if crop_cols: |
|
analysis_result += f"- μλ¬Ό κ΄λ ¨ λ³μ: {', '.join(crop_cols)}\n\n" |
|
|
|
|
|
analysis_result += "### λ°μ΄ν° μν:\n\n" |
|
analysis_result += df.head(5).to_markdown() + "\n\n" |
|
|
|
|
|
year_cols = [col for col in df.columns if 'year' in col.lower() or 'date' in col.lower() or 'μ°λ' in col.lower()] |
|
if year_cols: |
|
analysis_result += "### μκ³μ΄ κΈ°ν μν₯ ν¨ν΄:\n\n" |
|
analysis_result += "μ΄ λ°μ΄ν°μ
μ μκ°μ λ°λ₯Έ κΈ°ν λ³νμ λμ
μμ°μ± κ°μ κ΄κ³λ₯Ό λΆμν μ μμ΅λλ€.\n\n" |
|
|
|
|
|
key_vars = climate_cols + crop_cols |
|
numeric_vars = df[key_vars].select_dtypes(include=['number']).columns |
|
if len(numeric_vars) > 0: |
|
analysis_result += "### μ£Όμ λ³μ ν΅κ³:\n\n" |
|
stats_df = df[numeric_vars].describe().round(2) |
|
analysis_result += stats_df.to_markdown() + "\n\n" |
|
|
|
|
|
if len(climate_cols) > 0 and len(crop_cols) > 0: |
|
numeric_climate = df[climate_cols].select_dtypes(include=['number']).columns |
|
numeric_crop = df[crop_cols].select_dtypes(include=['number']).columns |
|
|
|
if len(numeric_climate) > 0 and len(numeric_crop) > 0: |
|
analysis_result += "### κΈ°νμ μλ¬Ό μμ° κ°μ μκ΄κ΄κ³:\n\n" |
|
try: |
|
corr_vars = list(numeric_climate)[:2] + list(numeric_crop)[:2] |
|
corr_df = df[corr_vars].corr().round(3) |
|
analysis_result += corr_df.to_markdown() + "\n\n" |
|
analysis_result += "μ μκ΄κ΄κ³ νλ κΈ°ν λ³μμ μλ¬Ό μμ°μ± κ°μ κ΄κ³ κ°λλ₯Ό 보μ¬μ€λλ€.\n\n" |
|
except: |
|
analysis_result += "μκ΄κ΄κ³ κ³μ° μ€ μ€λ₯κ° λ°μνμ΅λλ€.\n\n" |
|
|
|
except Exception as e: |
|
logging.error(f"Error analyzing climate impact file {file_info['name']}: {e}") |
|
analysis_result += f"λΆμ μ€λ₯: {str(e)}\n\n" |
|
|
|
analysis_result += "## κΈ°ν λ³ν μν₯ μΈμ¬μ΄νΈ\n\n" |
|
analysis_result += "κΈ°ν λ³νκ° λμ
μ λ―ΈμΉλ μν₯ λ°μ΄ν° λΆμ κ²°κ³Ό, λ€μκ³Ό κ°μ μΈμ¬μ΄νΈλ₯Ό μ 곡ν©λλ€:\n\n" |
|
analysis_result += "1. κΈ°μ¨ λ³νμ λ°λ₯Έ μλ¬Ό μμ°μ± λ³λ ν¨ν΄\n" |
|
analysis_result += "2. κ°μλ λ³νκ° λμ
μνλμ λ―ΈμΉλ μν₯\n" |
|
analysis_result += "3. κΈ°ν λ³νμ λμνκΈ° μν λμ
μ λ΅ μ μ\n" |
|
analysis_result += "4. μ§μλ³ κΈ°ν μ·¨μ½μ± λ° μ μ λ°©μ\n\n" |
|
|
|
return analysis_result |
|
|
|
except Exception as e: |
|
logging.error(f"Climate impact dataset analysis error: {e}") |
|
return "κΈ°ν λ³ν μν₯ λ°μ΄ν°μ
λΆμ μ€ μ€λ₯κ° λ°μνμ΅λλ€." |
|
|
|
|
|
def analyze_soybean_dataset(query): |
|
"""Find and analyze soybean agriculture data based on the query""" |
|
try: |
|
dataset_info = load_soybean_dataset() |
|
if not dataset_info or not dataset_info['files']: |
|
return "λλ λμ
λ°μ΄ν°μ
μ λΆλ¬μ¬ μ μμ΅λλ€." |
|
|
|
analysis_result = "# κ³ κΈ λλ λμ
λ°μ΄ν° λΆμ\n\n" |
|
|
|
|
|
for file_info in dataset_info['files'][:2]: |
|
try: |
|
analysis_result += f"## νμΌ: {file_info['name']}\n\n" |
|
|
|
if file_info['name'].endswith('.csv'): |
|
df = pd.read_csv(file_info['path']) |
|
elif file_info['name'].endswith('.xlsx'): |
|
df = pd.read_excel(file_info['path']) |
|
else: |
|
continue |
|
|
|
|
|
analysis_result += f"- λ°μ΄ν° ν¬κΈ°: {len(df)} ν Γ {len(df.columns)} μ΄\n" |
|
|
|
|
|
location_cols = [col for col in df.columns if any(term in col.lower() for term in |
|
['region', 'location', 'area', 'country', 'μ§μ'])] |
|
if location_cols: |
|
main_loc_col = location_cols[0] |
|
locations = df[main_loc_col].unique() |
|
analysis_result += f"- ν¬ν¨λ μ§μ: {len(locations)}κ° ({', '.join(str(loc) for loc in locations[:5])})\n" |
|
|
|
|
|
yield_cols = [col for col in df.columns if any(term in col.lower() for term in |
|
['yield', 'production', 'harvest', 'μνλ', 'μμ°λ'])] |
|
if yield_cols: |
|
analysis_result += f"- μμ°μ± κ΄λ ¨ λ³μ: {', '.join(yield_cols)}\n" |
|
|
|
|
|
env_cols = [col for col in df.columns if any(term in col.lower() for term in |
|
['temp', 'rainfall', 'soil', 'fertilizer', 'nutrient', 'irrigation', |
|
'κΈ°μ¨', 'κ°μλ', 'ν μ', 'λΉλ£', 'κ΄κ°'])] |
|
if env_cols: |
|
analysis_result += f"- νκ²½ κ΄λ ¨ λ³μ: {', '.join(env_cols)}\n\n" |
|
|
|
|
|
analysis_result += "### λ°μ΄ν° μν:\n\n" |
|
analysis_result += df.head(5).to_markdown() + "\n\n" |
|
|
|
|
|
key_vars = yield_cols + env_cols |
|
numeric_vars = df[key_vars].select_dtypes(include=['number']).columns |
|
if len(numeric_vars) > 0: |
|
analysis_result += "### μ£Όμ λ³μ ν΅κ³:\n\n" |
|
stats_df = df[numeric_vars].describe().round(2) |
|
analysis_result += stats_df.to_markdown() + "\n\n" |
|
|
|
|
|
year_cols = [col for col in df.columns if 'year' in col.lower() or 'date' in col.lower() or 'μ°λ' in col.lower()] |
|
if year_cols: |
|
analysis_result += "### μκ³μ΄ μμ°μ± ν¨ν΄:\n\n" |
|
analysis_result += "μ΄ λ°μ΄ν°μ
μ μκ°μ λ°λ₯Έ λλ μμ°μ±μ λ³νλ₯Ό μΆμ ν μ μμ΅λλ€.\n\n" |
|
|
|
|
|
if len(env_cols) > 0 and len(yield_cols) > 0: |
|
numeric_env = df[env_cols].select_dtypes(include=['number']).columns |
|
numeric_yield = df[yield_cols].select_dtypes(include=['number']).columns |
|
|
|
if len(numeric_env) > 0 and len(numeric_yield) > 0: |
|
analysis_result += "### νκ²½ μμμ λλ μμ°μ± κ°μ μκ΄κ΄κ³:\n\n" |
|
try: |
|
corr_vars = list(numeric_env)[:3] + list(numeric_yield)[:2] |
|
corr_df = df[corr_vars].corr().round(3) |
|
analysis_result += corr_df.to_markdown() + "\n\n" |
|
except: |
|
analysis_result += "μκ΄κ΄κ³ κ³μ° μ€ μ€λ₯κ° λ°μνμ΅λλ€.\n\n" |
|
|
|
except Exception as e: |
|
logging.error(f"Error analyzing soybean file {file_info['name']}: {e}") |
|
analysis_result += f"λΆμ μ€λ₯: {str(e)}\n\n" |
|
|
|
analysis_result += "## λλ λμ
μΈμ¬μ΄νΈ\n\n" |
|
analysis_result += "κ³ κΈ λλ λμ
λ°μ΄ν°μ
λΆμ κ²°κ³Ό, λ€μκ³Ό κ°μ μΈμ¬μ΄νΈλ₯Ό μ 곡ν©λλ€:\n\n" |
|
analysis_result += "1. μ΅μ μ λλ μμ°μ μν νκ²½ 쑰건\n" |
|
analysis_result += "2. μ§μλ³ λλ μμ°μ± λ³ν ν¨ν΄\n" |
|
analysis_result += "3. μμ°μ± ν₯μμ μν λμ
κΈ°μ λ° μ κ·Όλ²\n" |
|
analysis_result += "4. μμ₯ μμμ λ§λ λλ νμ’
μ ν κ°μ΄λ\n\n" |
|
|
|
return analysis_result |
|
|
|
except Exception as e: |
|
logging.error(f"Soybean dataset analysis error: {e}") |
|
return "λλ λμ
λ°μ΄ν°μ
λΆμ μ€ μ€λ₯κ° λ°μνμ΅λλ€." |
|
|
|
|
|
def get_system_prompt(mode="price_forecast", style="professional", include_search_results=True, include_uploaded_files=False) -> str: |
|
""" |
|
Generate a system prompt for the 'Agricultural Price & Demand Forecast AI Assistant' interface based on: |
|
- The selected analysis mode and style |
|
- Guidelines for using agricultural datasets, web search results and uploaded files |
|
""" |
|
base_prompt = """ |
|
λΉμ μ λμ
λ°μ΄ν° μ λ¬Έκ°λ‘μ λμ°λ¬Ό κ°κ²© μμΈ‘κ³Ό μμ λΆμμ μννλ AI μ΄μμ€ν΄νΈμ
λλ€. |
|
|
|
μ£Όμ μ무: |
|
1. UN κΈλ‘λ² μλ λ° λμ
ν΅κ³ λ°μ΄ν°μ
μ κΈ°λ°μΌλ‘ λμ°λ¬Ό μμ₯ λΆμ |
|
2. λμ°λ¬Ό κ°κ²© μΆμΈ μμΈ‘ λ° μμ ν¨ν΄ λΆμ |
|
3. λ°μ΄ν°λ₯Ό λ°νμΌλ‘ λͺ
ννκ³ κ·Όκ±° μλ λΆμ μ 곡 |
|
4. κ΄λ ¨ μ 보μ μΈμ¬μ΄νΈλ₯Ό 체κ³μ μΌλ‘ ꡬμ±νμ¬ μ μ |
|
5. μκ°μ μ΄ν΄λ₯Ό λκΈ° μν΄ μ°¨νΈ, κ·Έλν λ±μ μ μ ν νμ© |
|
6. ν μ λ° νκ²½ λ³μ κΈ°λ° μλ¬Ό μΆμ² λ°μ΄ν°μ
μμ μΆμΆν μΈμ¬μ΄νΈ μ μ© |
|
7. κΈ°ν λ³νκ° λμ
μ λ―ΈμΉλ μν₯ λ°μ΄ν°μ
μ ν΅ν νκ²½ λ³ν μλλ¦¬μ€ λΆμ |
|
|
|
μ€μ κ°μ΄λλΌμΈ: |
|
- λ°μ΄ν°μ κΈ°λ°ν κ°κ΄μ λΆμμ μ 곡νμΈμ |
|
- λΆμ κ³Όμ κ³Ό λ°©λ²λ‘ μ λͺ
νν μ€λͺ
νμΈμ |
|
- ν΅κ³μ μ λ’°μ±κ³Ό νκ³μ μ ν¬λͺ
νκ² μ μνμΈμ |
|
- μ΄ν΄νκΈ° μ¬μ΄ μκ°μ μμλ‘ λΆμ κ²°κ³Όλ₯Ό 보μνμΈμ |
|
- λ§ν¬λ€μ΄μ νμ©ν΄ μλ΅μ 체κ³μ μΌλ‘ ꡬμ±νμΈμ |
|
""" |
|
|
|
mode_prompts = { |
|
"price_forecast": """ |
|
λμ°λ¬Ό κ°κ²© μμΈ‘ λ° μμ₯ λΆμμ μ§μ€ν©λλ€: |
|
- κ³Όκ±° κ°κ²© λ°μ΄ν° ν¨ν΄μ κΈ°λ°ν μμΈ‘ μ 곡 |
|
- κ°κ²© λ³λμ± μμΈ λΆμ(κ³μ μ±, λ μ¨, μ μ±
λ±) |
|
- λ¨κΈ° λ° μ€μ₯κΈ° κ°κ²© μ λ§ μ μ |
|
- κ°κ²©μ μν₯μ λ―ΈμΉλ κ΅λ΄μΈ μμΈ μλ³ |
|
- μμ₯ λΆνμ€μ±κ³Ό 리μ€ν¬ μμ κ°μ‘° |
|
""", |
|
"market_trend": """ |
|
μμ₯ λν₯ λ° μμ ν¨ν΄ λΆμμ μ§μ€ν©λλ€: |
|
- μ£Όμ λμ°λ¬Ό μμ λ³ν ν¨ν΄ μλ³ |
|
- μλΉμ μ νΈλ λ° κ΅¬λ§€ νλ λΆμ |
|
- μμ₯ μΈκ·Έλ¨ΌνΈ λ° νμμμ₯ κΈ°ν νμ |
|
- μμ₯ νλ/μΆμ νΈλ λ νκ° |
|
- μμ νλ ₯μ± λ° κ°κ²© λ―Όκ°λ λΆμ |
|
""", |
|
"production_analysis": """ |
|
μμ°λ λΆμ λ° μλ μ보 μ λ§μ μ§μ€ν©λλ€: |
|
- μλ¬Ό μμ°λ μΆμΈ λ° λ³λ μμΈ λΆμ |
|
- μλ μμ°κ³Ό μΈκ΅¬ μ±μ₯ κ°μ κ΄κ³ νκ° |
|
- κ΅κ°/μ§μλ³ μμ° μλ λΉκ΅ |
|
- μλ μ보 μν μμ λ° μ·¨μ½μ μλ³ |
|
- μμ°μ± ν₯μ μ λ΅ λ° κΈ°ν μ μ |
|
""", |
|
"agricultural_policy": """ |
|
λμ
μ μ±
λ° κ·μ μν₯ λΆμμ μ§μ€ν©λλ€: |
|
- μ λΆ μ μ±
κ³Ό, 보쑰κΈ, κ·μ μ μμ₯ μν₯ λΆμ |
|
- κ΅μ 무μ μ μ±
κ³Ό κ΄μΈμ λμ°λ¬Ό κ°κ²© μν₯ νκ° |
|
- λμ
μ§μ νλ‘κ·Έλ¨μ ν¨κ³Όμ± κ²ν |
|
- κ·μ νκ²½ λ³νμ λ°λ₯Έ μμ₯ μ‘°μ μμΈ‘ |
|
- μ μ±
μ κ°μ
μ μλλ/μλμΉ μμ κ²°κ³Ό λΆμ |
|
""", |
|
"climate_impact": """ |
|
κΈ°ν λ³νκ° λμ
μ λ―ΈμΉλ μν₯ λΆμμ μ§μ€ν©λλ€: |
|
- κΈ°ν λ³νμ λμ°λ¬Ό μμ°λ/νμ§ κ°μ μκ΄κ΄κ³ λΆμ |
|
- κΈ°μ μ΄λ³μ΄ κ°κ²© λ³λμ±μ λ―ΈμΉλ μν₯ νκ° |
|
- μ₯κΈ°μ κΈ°ν μΆμΈμ λ°λ₯Έ λμ
ν¨ν΄ λ³ν μμΈ‘ |
|
- κΈ°ν ν볡λ ₯ μλ λμ
μμ€ν
μ λ΅ μ μ |
|
- μ§μλ³ κΈ°ν μν λ
ΈμΆλ λ° μ·¨μ½μ± λ§€ν |
|
""" |
|
} |
|
|
|
style_guides = { |
|
"professional": "μ λ¬Έμ μ΄κ³ νμ μ μΈ μ΄μ‘°λ₯Ό μ¬μ©νμΈμ. κΈ°μ μ μ©μ΄λ₯Ό μ μ ν μ¬μ©νκ³ μ²΄κ³μ μΈ λ°μ΄ν° λΆμμ μ 곡νμΈμ.", |
|
"simple": "μ½κ³ κ°κ²°ν μΈμ΄λ‘ μ€λͺ
νμΈμ. μ λ¬Έ μ©μ΄λ μ΅μννκ³ ν΅μ¬ κ°λ
μ μΌμμ μΈ ννμΌλ‘ μ λ¬νμΈμ.", |
|
"detailed": "μμΈνκ³ ν¬κ΄μ μΈ λΆμμ μ 곡νμΈμ. λ€μν λ°μ΄ν° ν¬μΈνΈ, ν΅κ³μ λμμ€, κ·Έλ¦¬κ³ μ¬λ¬ μλ리μ€λ₯Ό κ³ λ €ν μ¬μΈ΅ λΆμμ μ μνμΈμ.", |
|
"action_oriented": "μ€ν κ°λ₯ν μΈμ¬μ΄νΈμ ꡬ체μ μΈ κΆμ₯μ¬νμ μ΄μ μ λ§μΆμΈμ. 'λ€μ λ¨κ³' λ° 'μ€μ§μ μ‘°μΈ' μΉμ
μ ν¬ν¨νμΈμ." |
|
} |
|
|
|
dataset_guide = """ |
|
λμ
λ°μ΄ν°μ
νμ© μ§μΉ¨: |
|
- UN κΈλ‘λ² μλ λ° λμ
ν΅κ³ λ°μ΄ν°μ
μ κΈ°λ³Έ λΆμμ κ·Όκ±°λ‘ μ¬μ©νμΈμ |
|
- ν μ λ° νκ²½ λ³μ κΈ°λ° μλ¬Ό μΆμ² λ°μ΄ν°μ
μ μΈμ¬μ΄νΈλ₯Ό μλ¬Ό μ ν λ° μ¬λ°° 쑰건 λΆμμ ν΅ν©νμΈμ |
|
- κΈ°ν λ³νκ° λμ
μ λ―ΈμΉλ μν₯ λ°μ΄ν°μ
μ μ 보λ₯Ό μ§μ κ°λ₯μ± λ° λ―Έλ μ λ§ λΆμμ νμ©νμΈμ |
|
- λ°μ΄ν°μ μΆμ²μ μ°λλ₯Ό λͺ
νν μΈμ©νμΈμ |
|
- λ°μ΄ν°μ
λ΄ μ£Όμ λ³μ κ°μ κ΄κ³λ₯Ό λΆμνμ¬ μΈμ¬μ΄νΈλ₯Ό λμΆνμΈμ |
|
- λ°μ΄ν°μ νκ³μ λΆνμ€μ±μ ν¬λͺ
νκ² μΈκΈνμΈμ |
|
- νμμ λ°μ΄ν° 격차λ₯Ό μλ³νκ³ μΆκ° μ°κ΅¬κ° νμν μμμ μ μνμΈμ |
|
""" |
|
|
|
soybean_guide = """ |
|
κ³ κΈ λλ λμ
λ°μ΄ν°μ
νμ© μ§μΉ¨: |
|
- λλ μμ° μ‘°κ±΄ λ° μνλ ν¨ν΄μ λ€λ₯Έ μλ¬Όκ³Ό λΉκ΅νμ¬ λΆμνμΈμ |
|
- λλ λμ
μ κ²½μ μ κ°μΉμ μμ₯ κΈ°νμ λν μΈμ¬μ΄νΈλ₯Ό μ 곡νμΈμ |
|
- λλ μμ°μ±μ μν₯μ λ―ΈμΉλ μ£Όμ νκ²½ μμΈμ κ°μ‘°νμΈμ |
|
- λλ μ¬λ°° κΈ°μ νμ κ³Ό μμ΅μ± ν₯μ λ°©μμ μ μνμΈμ |
|
- μ§μ κ°λ₯ν λλ λμ
μ μν μ€μ§μ μΈ μ κ·Όλ²μ 곡μ νμΈμ |
|
""" |
|
|
|
crop_recommendation_guide = """ |
|
ν μ λ° νκ²½ λ³μ κΈ°λ° μλ¬Ό μΆμ² νμ© μ§μΉ¨: |
|
- μ§μ νΉμ±μ λ§λ μ΅μ μ μλ¬Ό μ ν κΈ°μ€μ μ μνμΈμ |
|
- ν μ 쑰건과 μλ¬Ό μ ν©μ± κ°μ μκ΄κ΄κ³λ₯Ό λΆμνμΈμ |
|
- νκ²½ λ³μμ λ°λ₯Έ μλ¬Ό μμ°μ± μμΈ‘ λͺ¨λΈμ νμ©νμΈμ |
|
- λμ
μμ°μ±κ³Ό μμ΅μ± ν₯μμ μν μλ¬Ό μ ν μ λ΅μ μ μνμΈμ |
|
- μ§μ κ°λ₯ν λμ
μ μν μλ¬Ό λ€μν μ κ·Όλ²μ κΆμ₯νμΈμ |
|
""" |
|
|
|
climate_impact_guide = """ |
|
κΈ°ν λ³νκ° λμ
μ λ―ΈμΉλ μν₯ λ°μ΄ν°μ
νμ© μ§μΉ¨: |
|
- κΈ°ν λ³ν μλ리μ€μ λ°λ₯Έ μλ¬Ό μμ°μ± λ³νλ₯Ό μμΈ‘νμΈμ |
|
- κΈ°ν μ μν λμ
κΈ°μ λ° μ λ΅μ μ μνμΈμ |
|
- μ§μλ³ κΈ°ν μν μμμ λμ λ°©μμ λΆμνμΈμ |
|
- κΈ°ν λ³νμ λμνκΈ° μν μλ¬Ό μ ν λ° μ¬λ°° μκΈ° μ‘°μ λ°©μμ μ μνμΈμ |
|
- κΈ°ν λ³νκ° λμ°λ¬Ό κ°κ²© λ° μμ₯ λν₯μ λ―ΈμΉλ μν₯μ νκ°νμΈμ |
|
""" |
|
|
|
search_guide = """ |
|
μΉ κ²μ κ²°κ³Ό νμ© μ§μΉ¨: |
|
- λ°μ΄ν°μ
λΆμμ 보μνλ μ΅μ μμ₯ μ λ³΄λ‘ κ²μ κ²°κ³Όλ₯Ό νμ©νμΈμ |
|
- κ° μ 보μ μΆμ²λ₯Ό λ§ν¬λ€μ΄ λ§ν¬λ‘ ν¬ν¨νμΈμ: [μΆμ²λͺ
](URL) |
|
- μ£Όμ μ£Όμ₯μ΄λ λ°μ΄ν° ν¬μΈνΈλ§λ€ μΆμ²λ₯Ό νμνμΈμ |
|
- μΆμ²κ° μμΆ©ν κ²½μ°, λ€μν κ΄μ κ³Ό μ λ’°λλ₯Ό μ€λͺ
νμΈμ |
|
- κ΄λ ¨ λμμ λ§ν¬λ [λΉλμ€: μ λͺ©](video_url) νμμΌλ‘ ν¬ν¨νμΈμ |
|
- κ²μ μ 보λ₯Ό μΌκ΄λκ³ μ²΄κ³μ μΈ μλ΅μΌλ‘ ν΅ν©νμΈμ |
|
- λͺ¨λ μ£Όμ μΆμ²λ₯Ό λμ΄ν "μ°Έκ³ μλ£" μΉμ
μ λ§μ§λ§μ ν¬ν¨νμΈμ |
|
""" |
|
|
|
upload_guide = """ |
|
μ
λ‘λλ νμΌ νμ© μ§μΉ¨: |
|
- μ
λ‘λλ νμΌμ μλ΅μ μ£Όμ μ 보μμΌλ‘ νμ©νμΈμ |
|
- 쿼리μ μ§μ κ΄λ ¨λ νμΌ μ 보λ₯Ό μΆμΆνκ³ κ°μ‘°νμΈμ |
|
- κ΄λ ¨ ꡬμ μ μΈμ©νκ³ νΉμ νμΌμ μΆμ²λ‘ μΈμ©νμΈμ |
|
- CSV νμΌμ μμΉ λ°μ΄ν°λ μμ½ λ¬Έμ₯μΌλ‘ λ³ννμΈμ |
|
- PDF μ½ν
μΈ λ νΉμ μΉμ
μ΄λ νμ΄μ§λ₯Ό μ°Έμ‘°νμΈμ |
|
- νμΌ μ 보λ₯Ό μΉ κ²μ κ²°κ³Όμ μννκ² ν΅ν©νμΈμ |
|
- μ λ³΄κ° μμΆ©ν κ²½μ°, μΌλ°μ μΈ μΉ κ²°κ³Όλ³΄λ€ νμΌ μ½ν
μΈ λ₯Ό μ°μ μνμΈμ |
|
""" |
|
|
|
|
|
final_prompt = base_prompt |
|
|
|
|
|
if mode in mode_prompts: |
|
final_prompt += "\n" + mode_prompts[mode] |
|
|
|
|
|
if style in style_guides: |
|
final_prompt += f"\n\nλΆμ μ€νμΌ: {style_guides[style]}" |
|
|
|
|
|
final_prompt += f"\n\n{dataset_guide}" |
|
final_prompt += f"\n\n{crop_recommendation_guide}" |
|
final_prompt += f"\n\n{climate_impact_guide}" |
|
|
|
|
|
if st.session_state.get('use_soybean_dataset', False): |
|
final_prompt += f"\n\n{soybean_guide}" |
|
|
|
if include_search_results: |
|
final_prompt += f"\n\n{search_guide}" |
|
|
|
if include_uploaded_files: |
|
final_prompt += f"\n\n{upload_guide}" |
|
|
|
final_prompt += """ |
|
\n\nμλ΅ νμ μꡬμ¬ν: |
|
- λ§ν¬λ€μ΄ μ λͺ©(## λ° ###)μ μ¬μ©νμ¬ μλ΅μ 체κ³μ μΌλ‘ ꡬμ±νμΈμ |
|
- μ€μν μ μ κ΅΅μ ν
μ€νΈ(**ν
μ€νΈ**)λ‘ κ°μ‘°νμΈμ |
|
- 3-5κ°μ νμ μ§λ¬Έμ ν¬ν¨ν "κ΄λ ¨ μ§λ¬Έ" μΉμ
μ λ§μ§λ§μ μΆκ°νμΈμ |
|
- μ μ ν κ°κ²©κ³Ό λ¨λ½ ꡬλΆμΌλ‘ μλ΅μ μμννμΈμ |
|
- λͺ¨λ λ§ν¬λ λ§ν¬λ€μ΄ νμμΌλ‘ ν΄λ¦ κ°λ₯νκ² λ§λμΈμ: [ν
μ€νΈ](url) |
|
- κ°λ₯ν κ²½μ° λ°μ΄ν°λ₯Ό μκ°μ μΌλ‘ νν(ν, κ·Έλν λ±μ μ€λͺ
)νμΈμ |
|
""" |
|
return final_prompt |
|
|
|
|
|
@st.cache_data(ttl=3600) |
|
def brave_search(query: str, count: int = 10): |
|
if not BRAVE_KEY: |
|
raise RuntimeError("β οΈ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.") |
|
|
|
headers = {"Accept": "application/json", "Accept-Encoding": "gzip", "X-Subscription-Token": BRAVE_KEY} |
|
params = {"q": query + " λμ°λ¬Ό κ°κ²© λν₯ λμ
λ°μ΄ν°", "count": str(count)} |
|
|
|
for attempt in range(3): |
|
try: |
|
r = requests.get(BRAVE_ENDPOINT, headers=headers, params=params, timeout=15) |
|
r.raise_for_status() |
|
data = r.json() |
|
|
|
raw = data.get("web", {}).get("results") or data.get("results", []) |
|
if not raw: |
|
logging.warning(f"No Brave search results found. Response: {data}") |
|
raise ValueError("No search results found.") |
|
|
|
arts = [] |
|
for i, res in enumerate(raw[:count], 1): |
|
url = res.get("url", res.get("link", "")) |
|
host = re.sub(r"https?://(www\.)?", "", url).split("/")[0] |
|
arts.append({ |
|
"index": i, |
|
"title": res.get("title", "No title"), |
|
"link": url, |
|
"snippet": res.get("description", res.get("text", "No snippet")), |
|
"displayed_link": host |
|
}) |
|
|
|
return arts |
|
|
|
except Exception as e: |
|
logging.error(f"Brave search failure (attempt {attempt+1}/3): {e}") |
|
if attempt < 2: |
|
time.sleep(5) |
|
|
|
return [] |
|
|
|
@st.cache_data(ttl=3600) |
|
def brave_video_search(query: str, count: int = 3): |
|
if not BRAVE_KEY: |
|
raise RuntimeError("β οΈ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.") |
|
|
|
headers = {"Accept": "application/json","Accept-Encoding": "gzip","X-Subscription-Token": BRAVE_KEY} |
|
params = {"q": query + " λμ°λ¬Ό κ°κ²© λμ
μμ₯", "count": str(count)} |
|
|
|
for attempt in range(3): |
|
try: |
|
r = requests.get(BRAVE_VIDEO_ENDPOINT, headers=headers, params=params, timeout=15) |
|
r.raise_for_status() |
|
data = r.json() |
|
|
|
results = [] |
|
for i, vid in enumerate(data.get("results", [])[:count], 1): |
|
results.append({ |
|
"index": i, |
|
"title": vid.get("title", "Video"), |
|
"video_url": vid.get("url", ""), |
|
"thumbnail_url": vid.get("thumbnail", {}).get("src", ""), |
|
"source": vid.get("provider", {}).get("name", "Unknown source") |
|
}) |
|
|
|
return results |
|
|
|
except Exception as e: |
|
logging.error(f"Brave video search failure (attempt {attempt+1}/3): {e}") |
|
if attempt < 2: |
|
time.sleep(5) |
|
|
|
return [] |
|
|
|
@st.cache_data(ttl=3600) |
|
def brave_news_search(query: str, count: int = 3): |
|
if not BRAVE_KEY: |
|
raise RuntimeError("β οΈ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.") |
|
|
|
headers = {"Accept": "application/json","Accept-Encoding": "gzip","X-Subscription-Token": BRAVE_KEY} |
|
params = {"q": query + " λμ°λ¬Ό κ°κ²© λν₯ λμ
", "count": str(count)} |
|
|
|
for attempt in range(3): |
|
try: |
|
r = requests.get(BRAVE_NEWS_ENDPOINT, headers=headers, params=params, timeout=15) |
|
r.raise_for_status() |
|
data = r.json() |
|
|
|
results = [] |
|
for i, news in enumerate(data.get("results", [])[:count], 1): |
|
results.append({ |
|
"index": i, |
|
"title": news.get("title", "News article"), |
|
"url": news.get("url", ""), |
|
"description": news.get("description", ""), |
|
"source": news.get("source", "Unknown source"), |
|
"date": news.get("age", "Unknown date") |
|
}) |
|
|
|
return results |
|
|
|
except Exception as e: |
|
logging.error(f"Brave news search failure (attempt {attempt+1}/3): {e}") |
|
if attempt < 2: |
|
time.sleep(5) |
|
|
|
return [] |
|
|
|
def mock_results(query: str) -> str: |
|
ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S") |
|
return (f"# λ체 κ²μ μ½ν
μΈ (μμ± μκ°: {ts})\n\n" |
|
f"'{query}'μ λν κ²μ API μμ²μ΄ μ€ν¨νκ±°λ κ²°κ³Όκ° μμ΅λλ€. " |
|
f"κΈ°μ‘΄ μ§μμ κΈ°λ°μΌλ‘ μλ΅μ μμ±ν΄μ£ΌμΈμ.\n\n" |
|
f"λ€μ μ¬νμ κ³ λ €νμΈμ:\n\n" |
|
f"- {query}μ κ΄ν κΈ°λ³Έ κ°λ
κ³Ό μ€μμ±\n" |
|
f"- μΌλ°μ μΌλ‘ μλ €μ§ κ΄λ ¨ ν΅κ³λ μΆμΈ\n" |
|
f"- μ΄ μ£Όμ μ λν μ λ¬Έκ° μ견\n" |
|
f"- λ
μκ° κ°μ§ μ μλ μ§λ¬Έ\n\n" |
|
f"μ°Έκ³ : μ΄λ μ€μκ° λ°μ΄ν°κ° μλ λ체 μ§μΉ¨μ
λλ€.\n\n") |
|
|
|
def do_web_search(query: str) -> str: |
|
try: |
|
arts = brave_search(query, 10) |
|
if not arts: |
|
logging.warning("No search results, using fallback content") |
|
return mock_results(query) |
|
|
|
videos = brave_video_search(query, 2) |
|
news = brave_news_search(query, 3) |
|
|
|
result = "# μΉ κ²μ κ²°κ³Ό\nλ€μ κ²°κ³Όλ₯Ό νμ©νμ¬ λ°μ΄ν°μ
λΆμμ 보μνλ ν¬κ΄μ μΈ λ΅λ³μ μ 곡νμΈμ.\n\n" |
|
|
|
result += "## μΉ κ²°κ³Ό\n\n" |
|
for a in arts[:5]: |
|
result += f"### κ²°κ³Ό {a['index']}: {a['title']}\n\n{a['snippet']}\n\n" |
|
result += f"**μΆμ²**: [{a['displayed_link']}]({a['link']})\n\n---\n" |
|
|
|
if news: |
|
result += "## λ΄μ€ κ²°κ³Ό\n\n" |
|
for n in news: |
|
result += f"### {n['title']}\n\n{n['description']}\n\n" |
|
result += f"**μΆμ²**: [{n['source']}]({n['url']}) - {n['date']}\n\n---\n" |
|
|
|
if videos: |
|
result += "## λΉλμ€ κ²°κ³Ό\n\n" |
|
for vid in videos: |
|
result += f"### {vid['title']}\n\n" |
|
if vid.get('thumbnail_url'): |
|
result += f"\n\n" |
|
result += f"**μμ²**: [{vid['source']}]({vid['video_url']})\n\n" |
|
|
|
return result |
|
|
|
except Exception as e: |
|
logging.error(f"Web search process failed: {str(e)}") |
|
return mock_results(query) |
|
|
|
|
|
def process_text_file(file): |
|
try: |
|
content = file.read() |
|
file.seek(0) |
|
|
|
text = content.decode('utf-8', errors='ignore') |
|
if len(text) > 10000: |
|
text = text[:9700] + "...(truncated)..." |
|
|
|
result = f"## ν
μ€νΈ νμΌ: {file.name}\n\n" + text |
|
return result |
|
except Exception as e: |
|
logging.error(f"Error processing text file: {str(e)}") |
|
return f"ν
μ€νΈ νμΌ μ²λ¦¬ μ€λ₯: {str(e)}" |
|
|
|
def process_csv_file(file): |
|
try: |
|
content = file.read() |
|
file.seek(0) |
|
|
|
df = pd.read_csv(io.BytesIO(content)) |
|
result = f"## CSV νμΌ: {file.name}\n\n" |
|
result += f"- ν: {len(df)}\n" |
|
result += f"- μ΄: {len(df.columns)}\n" |
|
result += f"- μ΄ μ΄λ¦: {', '.join(df.columns.tolist())}\n\n" |
|
|
|
result += "### λ°μ΄ν° 미리보기\n\n" |
|
preview_df = df.head(10) |
|
try: |
|
markdown_table = preview_df.to_markdown(index=False) |
|
if markdown_table: |
|
result += markdown_table + "\n\n" |
|
else: |
|
result += "CSV λ°μ΄ν°λ₯Ό νμν μ μμ΅λλ€.\n\n" |
|
except Exception as e: |
|
logging.error(f"Markdown table conversion error: {e}") |
|
result += "ν
μ€νΈλ‘ λ°μ΄ν° νμ:\n\n" + str(preview_df) + "\n\n" |
|
|
|
num_cols = df.select_dtypes(include=['number']).columns |
|
if len(num_cols) > 0: |
|
result += "### κΈ°λ³Έ ν΅κ³ μ 보\n\n" |
|
try: |
|
stats_df = df[num_cols].describe().round(2) |
|
stats_markdown = stats_df.to_markdown() |
|
if stats_markdown: |
|
result += stats_markdown + "\n\n" |
|
else: |
|
result += "ν΅κ³ μ 보λ₯Ό νμν μ μμ΅λλ€.\n\n" |
|
except Exception as e: |
|
logging.error(f"Statistical info conversion error: {e}") |
|
result += "ν΅κ³ μ 보λ₯Ό μμ±ν μ μμ΅λλ€.\n\n" |
|
|
|
return result |
|
except Exception as e: |
|
logging.error(f"CSV file processing error: {str(e)}") |
|
return f"CSV νμΌ μ²λ¦¬ μ€λ₯: {str(e)}" |
|
|
|
def process_pdf_file(file): |
|
try: |
|
file_bytes = file.read() |
|
file.seek(0) |
|
|
|
pdf_file = io.BytesIO(file_bytes) |
|
reader = PyPDF2.PdfReader(pdf_file, strict=False) |
|
|
|
result = f"## PDF νμΌ: {file.name}\n\n- μ΄ νμ΄μ§: {len(reader.pages)}\n\n" |
|
|
|
max_pages = min(5, len(reader.pages)) |
|
all_text = "" |
|
|
|
for i in range(max_pages): |
|
try: |
|
page = reader.pages[i] |
|
page_text = page.extract_text() |
|
current_page_text = f"### νμ΄μ§ {i+1}\n\n" |
|
if page_text and len(page_text.strip()) > 0: |
|
if len(page_text) > 1500: |
|
current_page_text += page_text[:1500] + "...(μΆμ½λ¨)...\n\n" |
|
else: |
|
current_page_text += page_text + "\n\n" |
|
else: |
|
current_page_text += "(ν
μ€νΈλ₯Ό μΆμΆν μ μμ)\n\n" |
|
|
|
all_text += current_page_text |
|
|
|
if len(all_text) > 8000: |
|
all_text += "...(λλ¨Έμ§ νμ΄μ§ μΆμ½λ¨)...\n\n" |
|
break |
|
|
|
except Exception as page_err: |
|
logging.error(f"Error processing PDF page {i+1}: {str(page_err)}") |
|
all_text += f"### νμ΄μ§ {i+1}\n\n(λ΄μ© μΆμΆ μ€λ₯: {str(page_err)})\n\n" |
|
|
|
if len(reader.pages) > max_pages: |
|
all_text += f"\nμ°Έκ³ : μ²μ {max_pages} νμ΄μ§λ§ νμλ©λλ€.\n\n" |
|
|
|
result += "### PDF λ΄μ©\n\n" + all_text |
|
return result |
|
|
|
except Exception as e: |
|
logging.error(f"PDF file processing error: {str(e)}") |
|
return f"## PDF νμΌ: {file.name}\n\nμ€λ₯: {str(e)}\n\nμ²λ¦¬ν μ μμ΅λλ€." |
|
|
|
def process_uploaded_files(files): |
|
if not files: |
|
return None |
|
|
|
result = "# μ
λ‘λλ νμΌ λ΄μ©\n\nμ¬μ©μκ° μ 곡ν νμΌμ λ΄μ©μ
λλ€.\n\n" |
|
for file in files: |
|
try: |
|
ext = file.name.split('.')[-1].lower() |
|
if ext == 'txt': |
|
result += process_text_file(file) + "\n\n---\n\n" |
|
elif ext == 'csv': |
|
result += process_csv_file(file) + "\n\n---\n\n" |
|
elif ext == 'pdf': |
|
result += process_pdf_file(file) + "\n\n---\n\n" |
|
else: |
|
result += f"### μ§μλμ§ μλ νμΌ: {file.name}\n\n---\n\n" |
|
except Exception as e: |
|
logging.error(f"File processing error {file.name}: {e}") |
|
result += f"### νμΌ μ²λ¦¬ μ€λ₯: {file.name}\n\nμ€λ₯: {e}\n\n---\n\n" |
|
|
|
return result |
|
|
|
|
|
|
|
def generate_image(prompt, w=768, h=768, g=3.5, steps=30, seed=3): |
|
if not prompt: |
|
return None, "Insufficient prompt" |
|
try: |
|
res = Client(IMAGE_API_URL).predict( |
|
prompt=prompt, width=w, height=h, guidance=g, |
|
inference_steps=steps, seed=seed, |
|
do_img2img=False, init_image=None, |
|
image2image_strength=0.8, resize_img=True, |
|
api_name="/generate_image" |
|
) |
|
return res[0], f"Seed: {res[1]}" |
|
except Exception as e: |
|
logging.error(e) |
|
return None, str(e) |
|
|
|
def extract_image_prompt(response_text: str, topic: str): |
|
client = get_openai_client() |
|
try: |
|
response = client.chat.completions.create( |
|
model="gpt-4.1-mini", |
|
messages=[ |
|
{"role": "system", "content": "λμ
λ° λμ°λ¬Όμ κ΄ν μ΄λ―Έμ§ ν둬ννΈλ₯Ό μμ±ν©λλ€. ν μ€μ μμ΄λ‘ λ ν둬ννΈλ§ λ°ννμΈμ, λ€λ₯Έ ν
μ€νΈλ ν¬ν¨νμ§ λ§μΈμ."}, |
|
{"role": "user", "content": f"μ£Όμ : {topic}\n\n---\n{response_text}\n\n---"} |
|
], |
|
temperature=1, |
|
max_tokens=80, |
|
top_p=1 |
|
) |
|
return response.choices[0].message.content.strip() |
|
except Exception as e: |
|
logging.error(f"OpenAI image prompt generation error: {e}") |
|
return f"A professional photograph of agricultural produce and farm fields, data visualization of crop prices and trends, high quality" |
|
|
|
def md_to_html(md: str, title="λμ°λ¬Ό μμ μμΈ‘ λΆμ κ²°κ³Ό"): |
|
return f"<!DOCTYPE html><html><head><title>{title}</title><meta charset='utf-8'></head><body>{markdown.markdown(md)}</body></html>" |
|
|
|
def keywords(text: str, top=5): |
|
cleaned = re.sub(r"[^κ°-ν£a-zA-Z0-9\s]", "", text) |
|
return " ".join(cleaned.split()[:top]) |
|
|
|
|
|
def agricultural_price_forecast_app(): |
|
st.title("λμ°λ¬Ό μμ λ° κ°κ²© μμΈ‘ AI μ΄μμ€ν΄νΈ") |
|
st.markdown("UN κΈλ‘λ² μλ λ° λμ
ν΅κ³ λ°μ΄ν°μ
λΆμ κΈ°λ°μ λμ°λ¬Ό μμ₯ μμΈ‘") |
|
|
|
if "ai_model" not in st.session_state: |
|
st.session_state.ai_model = "gpt-4.1-mini" |
|
if "messages" not in st.session_state: |
|
st.session_state.messages = [] |
|
if "auto_save" not in st.session_state: |
|
st.session_state.auto_save = True |
|
if "generate_image" not in st.session_state: |
|
st.session_state.generate_image = False |
|
if "web_search_enabled" not in st.session_state: |
|
st.session_state.web_search_enabled = True |
|
if "analysis_mode" not in st.session_state: |
|
st.session_state.analysis_mode = "price_forecast" |
|
if "response_style" not in st.session_state: |
|
st.session_state.response_style = "professional" |
|
if "use_soybean_dataset" not in st.session_state: |
|
st.session_state.use_soybean_dataset = False |
|
|
|
sb = st.sidebar |
|
sb.title("λΆμ μ€μ ") |
|
|
|
|
|
if sb.checkbox("λ°μ΄ν°μ
μ 보 νμ", value=False): |
|
st.info("UN κΈλ‘λ² μλ λ° λμ
ν΅κ³ λ°μ΄ν°μ
μ λΆλ¬μ€λ μ€...") |
|
dataset_info = load_agriculture_dataset() |
|
if dataset_info: |
|
st.success(f"λ°μ΄ν°μ
λ‘λ μλ£: {len(dataset_info['files'])}κ° νμΌ") |
|
|
|
with st.expander("λ°μ΄ν°μ
미리보기", expanded=False): |
|
for file_info in dataset_info['files'][:5]: |
|
st.write(f"**{file_info['name']}** ({file_info['size_mb']} MB)") |
|
else: |
|
st.error("λ°μ΄ν°μ
μ λΆλ¬μ€λλ° μ€ν¨νμ΅λλ€. Kaggle API μ€μ μ νμΈνμΈμ.") |
|
|
|
sb.subheader("λΆμ ꡬμ±") |
|
sb.selectbox( |
|
"λΆμ λͺ¨λ", |
|
options=list(ANALYSIS_MODES.keys()), |
|
format_func=lambda x: ANALYSIS_MODES[x], |
|
key="analysis_mode" |
|
) |
|
|
|
sb.selectbox( |
|
"μλ΅ μ€νμΌ", |
|
options=list(RESPONSE_STYLES.keys()), |
|
format_func=lambda x: RESPONSE_STYLES[x], |
|
key="response_style" |
|
) |
|
|
|
|
|
sb.subheader("λ°μ΄ν°μ
μ ν") |
|
sb.checkbox( |
|
"κ³ κΈ λλ λμ
λ°μ΄ν°μ
μ¬μ©", |
|
key="use_soybean_dataset", |
|
help="λλ(콩) κ΄λ ¨ μ§λ¬Έμ λ μ νν μ 보λ₯Ό μ 곡ν©λλ€." |
|
) |
|
|
|
|
|
sb.info("κΈ°λ³Έ νμ±νλ λ°μ΄ν°μ
:\n- UN κΈλ‘λ² μλ λ° λμ
ν΅κ³\n- ν μ λ° νκ²½ λ³μ κΈ°λ° μλ¬Ό μΆμ²\n- κΈ°ν λ³νκ° λμ
μ λ―ΈμΉλ μν₯") |
|
|
|
|
|
sb.subheader("μμ μ§λ¬Έ") |
|
c1, c2, c3 = sb.columns(3) |
|
if c1.button("μ κ°κ²© μ λ§", key="ex1"): |
|
process_example(EXAMPLE_QUERIES["example1"]) |
|
if c2.button("κΈ°ν μν₯", key="ex2"): |
|
process_example(EXAMPLE_QUERIES["example2"]) |
|
if c3.button("μ¦νκ΅° μλ¬Ό", key="ex3"): |
|
process_example(EXAMPLE_QUERIES["example3"]) |
|
|
|
sb.subheader("κΈ°ν μ€μ ") |
|
sb.toggle("μλ μ μ₯", key="auto_save") |
|
sb.toggle("μ΄λ―Έμ§ μλ μμ±", key="generate_image") |
|
|
|
web_search_enabled = sb.toggle("μΉ κ²μ μ¬μ©", value=st.session_state.web_search_enabled) |
|
st.session_state.web_search_enabled = web_search_enabled |
|
|
|
if web_search_enabled: |
|
st.sidebar.info("β
μΉ κ²μ κ²°κ³Όκ° μλ΅μ ν΅ν©λ©λλ€.") |
|
|
|
|
|
latest_response = next( |
|
(m["content"] for m in reversed(st.session_state.messages) |
|
if m["role"] == "assistant" and m["content"].strip()), |
|
None |
|
) |
|
if latest_response: |
|
title_match = re.search(r"# (.*?)(\n|$)", latest_response) |
|
if title_match: |
|
title = title_match.group(1).strip() |
|
else: |
|
first_line = latest_response.split('\n', 1)[0].strip() |
|
title = first_line[:40] + "..." if len(first_line) > 40 else first_line |
|
|
|
sb.subheader("μ΅μ μλ΅ λ€μ΄λ‘λ") |
|
d1, d2 = sb.columns(2) |
|
d1.download_button("λ§ν¬λ€μ΄μΌλ‘ λ€μ΄λ‘λ", latest_response, |
|
file_name=f"{title}.md", mime="text/markdown") |
|
d2.download_button("HTMLλ‘ λ€μ΄λ‘λ", md_to_html(latest_response, title), |
|
file_name=f"{title}.html", mime="text/html") |
|
|
|
|
|
up = sb.file_uploader("λν κΈ°λ‘ λΆλ¬μ€κΈ° (.json)", type=["json"], key="json_uploader") |
|
if up: |
|
try: |
|
st.session_state.messages = json.load(up) |
|
sb.success("λν κΈ°λ‘μ μ±κ³΅μ μΌλ‘ λΆλ¬μμ΅λλ€") |
|
except Exception as e: |
|
sb.error(f"λΆλ¬μ€κΈ° μ€ν¨: {e}") |
|
|
|
|
|
if sb.button("λν κΈ°λ‘μ JSONμΌλ‘ λ€μ΄λ‘λ"): |
|
sb.download_button( |
|
"μ μ₯", |
|
data=json.dumps(st.session_state.messages, ensure_ascii=False, indent=2), |
|
file_name="conversation_history.json", |
|
mime="application/json" |
|
) |
|
|
|
|
|
st.subheader("νμΌ μ
λ‘λ") |
|
uploaded_files = st.file_uploader( |
|
"μ°Έκ³ μλ£λ‘ μ¬μ©ν νμΌ μ
λ‘λ (txt, csv, pdf)", |
|
type=["txt", "csv", "pdf"], |
|
accept_multiple_files=True, |
|
key="file_uploader" |
|
) |
|
|
|
if uploaded_files: |
|
file_count = len(uploaded_files) |
|
st.success(f"{file_count}κ° νμΌμ΄ μ
λ‘λλμμ΅λλ€. μ§μμ λν μμ€λ‘ μ¬μ©λ©λλ€.") |
|
|
|
with st.expander("μ
λ‘λλ νμΌ λ―Έλ¦¬λ³΄κΈ°", expanded=False): |
|
for idx, file in enumerate(uploaded_files): |
|
st.write(f"**νμΌλͺ
:** {file.name}") |
|
ext = file.name.split('.')[-1].lower() |
|
|
|
if ext == 'txt': |
|
preview = file.read(1000).decode('utf-8', errors='ignore') |
|
file.seek(0) |
|
st.text_area( |
|
f"{file.name} 미리보기", |
|
preview + ("..." if len(preview) >= 1000 else ""), |
|
height=150 |
|
) |
|
elif ext == 'csv': |
|
try: |
|
df = pd.read_csv(file) |
|
file.seek(0) |
|
st.write("CSV 미리보기 (μ΅λ 5ν)") |
|
st.dataframe(df.head(5)) |
|
except Exception as e: |
|
st.error(f"CSV 미리보기 μ€ν¨: {e}") |
|
elif ext == 'pdf': |
|
try: |
|
file_bytes = file.read() |
|
file.seek(0) |
|
|
|
pdf_file = io.BytesIO(file_bytes) |
|
reader = PyPDF2.PdfReader(pdf_file, strict=False) |
|
|
|
pc = len(reader.pages) |
|
st.write(f"PDF νμΌ: {pc}νμ΄μ§") |
|
|
|
if pc > 0: |
|
try: |
|
page_text = reader.pages[0].extract_text() |
|
preview = page_text[:500] if page_text else "(ν
μ€νΈ μΆμΆ λΆκ°)" |
|
st.text_area("첫 νμ΄μ§ 미리보기", preview + "...", height=150) |
|
except: |
|
st.warning("첫 νμ΄μ§ ν
μ€νΈ μΆμΆ μ€ν¨") |
|
except Exception as e: |
|
st.error(f"PDF 미리보기 μ€ν¨: {e}") |
|
|
|
if idx < file_count - 1: |
|
st.divider() |
|
|
|
|
|
for m in st.session_state.messages: |
|
with st.chat_message(m["role"]): |
|
st.markdown(m["content"], unsafe_allow_html=True) |
|
|
|
|
|
if "videos" in m and m["videos"]: |
|
st.subheader("κ΄λ ¨ λΉλμ€") |
|
for video in m["videos"]: |
|
video_title = video.get('title', 'κ΄λ ¨ λΉλμ€') |
|
video_url = video.get('url', '') |
|
thumbnail = video.get('thumbnail', '') |
|
|
|
if thumbnail: |
|
col1, col2 = st.columns([1, 3]) |
|
with col1: |
|
st.write("π¬") |
|
with col2: |
|
st.markdown(f"**[{video_title}]({video_url})**") |
|
st.write(f"μΆμ²: {video.get('source', 'μ μ μμ')}") |
|
else: |
|
st.markdown(f"π¬ **[{video_title}]({video_url})**") |
|
st.write(f"μΆμ²: {video.get('source', 'μ μ μμ')}") |
|
|
|
|
|
query = st.chat_input("λμ°λ¬Ό κ°κ²©, μμ λλ μμ₯ λν₯ κ΄λ ¨ μ§λ¬Έμ μ
λ ₯νμΈμ.") |
|
if query: |
|
process_input(query, uploaded_files) |
|
|
|
sb.markdown("---") |
|
sb.markdown("Created by Vidraft | [Community](https://discord.gg/openfreeai)") |
|
|
|
def process_example(topic): |
|
process_input(topic, []) |
|
|
|
def process_input(query: str, uploaded_files): |
|
if not any(m["role"] == "user" and m["content"] == query for m in st.session_state.messages): |
|
st.session_state.messages.append({"role": "user", "content": query}) |
|
|
|
with st.chat_message("user"): |
|
st.markdown(query) |
|
|
|
with st.chat_message("assistant"): |
|
placeholder = st.empty() |
|
message_placeholder = st.empty() |
|
full_response = "" |
|
|
|
use_web_search = st.session_state.web_search_enabled |
|
has_uploaded_files = bool(uploaded_files) and len(uploaded_files) > 0 |
|
|
|
try: |
|
status = st.status("μ§λ¬Έμ λ΅λ³ μ€λΉ μ€...") |
|
status.update(label="ν΄λΌμ΄μΈνΈ μ΄κΈ°ν μ€...") |
|
|
|
client = get_openai_client() |
|
|
|
search_content = None |
|
video_results = [] |
|
news_results = [] |
|
|
|
|
|
status.update(label="λμ
λ°μ΄ν°μ
λΆμ μ€...") |
|
with st.spinner("λ°μ΄ν°μ
λΆμ μ€..."): |
|
dataset_analysis = analyze_dataset_for_query(query) |
|
|
|
|
|
crop_recommendation_analysis = analyze_crop_recommendation_dataset(query) |
|
climate_impact_analysis = analyze_climate_impact_dataset(query) |
|
|
|
|
|
|
|
|
|
|
|
soybean_analysis = None |
|
if st.session_state.use_soybean_dataset: |
|
status.update(label="λλ λμ
λ°μ΄ν°μ
λΆμ μ€...") |
|
with st.spinner("λλ λ°μ΄ν°μ
λΆμ μ€..."): |
|
soybean_analysis = analyze_soybean_dataset(query) |
|
|
|
if use_web_search: |
|
|
|
with st.spinner("μ 보 μμ§ μ€..."): |
|
search_content = do_web_search(keywords(query, top=5)) |
|
video_results = brave_video_search(query, 2) |
|
news_results = brave_news_search(query, 3) |
|
|
|
file_content = None |
|
if has_uploaded_files: |
|
status.update(label="μ
λ‘λλ νμΌ μ²λ¦¬ μ€...") |
|
with st.spinner("νμΌ λΆμ μ€..."): |
|
file_content = process_uploaded_files(uploaded_files) |
|
|
|
valid_videos = [] |
|
for vid in video_results: |
|
url = vid.get('video_url') |
|
if url and url.startswith('http'): |
|
valid_videos.append({ |
|
'url': url, |
|
'title': vid.get('title', 'λΉλμ€'), |
|
'thumbnail': vid.get('thumbnail_url', ''), |
|
'source': vid.get('source', 'λΉλμ€ μΆμ²') |
|
}) |
|
|
|
status.update(label="μ’
ν© λΆμ μ€λΉ μ€...") |
|
sys_prompt = get_system_prompt( |
|
mode=st.session_state.analysis_mode, |
|
style=st.session_state.response_style, |
|
include_search_results=use_web_search, |
|
include_uploaded_files=has_uploaded_files |
|
) |
|
|
|
api_messages = [ |
|
{"role": "system", "content": sys_prompt} |
|
] |
|
|
|
user_content = query |
|
|
|
user_content += "\n\n" + dataset_analysis |
|
user_content += "\n\n" + crop_recommendation_analysis |
|
user_content += "\n\n" + climate_impact_analysis |
|
|
|
|
|
if soybean_analysis: |
|
user_content += "\n\n" + soybean_analysis |
|
|
|
if search_content: |
|
user_content += "\n\n" + search_content |
|
if file_content: |
|
user_content += "\n\n" + file_content |
|
|
|
if valid_videos: |
|
user_content += "\n\n# κ΄λ ¨ λμμ\n" |
|
for i, vid in enumerate(valid_videos): |
|
user_content += f"\n{i+1}. **{vid['title']}** - [{vid['source']}]({vid['url']})\n" |
|
|
|
api_messages.append({"role": "user", "content": user_content}) |
|
|
|
try: |
|
stream = client.chat.completions.create( |
|
model="gpt-4.1-mini", |
|
messages=api_messages, |
|
temperature=1, |
|
max_tokens=MAX_TOKENS, |
|
top_p=1, |
|
stream=True |
|
) |
|
|
|
for chunk in stream: |
|
if chunk.choices and len(chunk.choices) > 0 and chunk.choices[0].delta.content is not None: |
|
content_delta = chunk.choices[0].delta.content |
|
full_response += content_delta |
|
message_placeholder.markdown(full_response + "β", unsafe_allow_html=True) |
|
|
|
message_placeholder.markdown(full_response, unsafe_allow_html=True) |
|
|
|
if valid_videos: |
|
st.subheader("κ΄λ ¨ λΉλμ€") |
|
for video in valid_videos: |
|
video_title = video.get('title', 'κ΄λ ¨ λΉλμ€') |
|
video_url = video.get('url', '') |
|
|
|
st.markdown(f"π¬ **[{video_title}]({video_url})**") |
|
st.write(f"μΆμ²: {video.get('source', 'μ μ μμ')}") |
|
|
|
status.update(label="μλ΅ μλ£!", state="complete") |
|
|
|
st.session_state.messages.append({ |
|
"role": "assistant", |
|
"content": full_response, |
|
"videos": valid_videos |
|
}) |
|
|
|
except Exception as api_error: |
|
error_message = str(api_error) |
|
logging.error(f"API μ€λ₯: {error_message}") |
|
status.update(label=f"μ€λ₯: {error_message}", state="error") |
|
raise Exception(f"μλ΅ μμ± μ€λ₯: {error_message}") |
|
|
|
if st.session_state.generate_image and full_response: |
|
with st.spinner("λ§μΆ€ν μ΄λ―Έμ§ μμ± μ€..."): |
|
try: |
|
ip = extract_image_prompt(full_response, query) |
|
img, cap = generate_image(ip) |
|
if img: |
|
st.subheader("AI μμ± μ΄λ―Έμ§") |
|
st.image(img, caption=cap, use_container_width=True) |
|
except Exception as img_error: |
|
logging.error(f"μ΄λ―Έμ§ μμ± μ€λ₯: {str(img_error)}") |
|
st.warning("λ§μΆ€ν μ΄λ―Έμ§ μμ±μ μ€ν¨νμ΅λλ€.") |
|
|
|
if full_response: |
|
st.subheader("μ΄ μλ΅ λ€μ΄λ‘λ") |
|
c1, c2 = st.columns(2) |
|
c1.download_button( |
|
"λ§ν¬λ€μ΄", |
|
data=full_response, |
|
file_name=f"{query[:30]}.md", |
|
mime="text/markdown" |
|
) |
|
c2.download_button( |
|
"HTML", |
|
data=md_to_html(full_response, query[:30]), |
|
file_name=f"{query[:30]}.html", |
|
mime="text/html" |
|
) |
|
|
|
if st.session_state.auto_save and st.session_state.messages: |
|
try: |
|
fn = f"conversation_history_auto_{datetime.now():%Y%m%d_%H%M%S}.json" |
|
with open(fn, "w", encoding="utf-8") as fp: |
|
json.dump(st.session_state.messages, fp, ensure_ascii=False, indent=2) |
|
except Exception as e: |
|
logging.error(f"μλ μ μ₯ μ€ν¨: {e}") |
|
|
|
except Exception as e: |
|
error_message = str(e) |
|
placeholder.error(f"μ€λ₯ λ°μ: {error_message}") |
|
logging.error(f"μ
λ ₯ μ²λ¦¬ μ€λ₯: {error_message}") |
|
ans = f"μμ² μ²λ¦¬ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {error_message}" |
|
st.session_state.messages.append({"role": "assistant", "content": ans}) |
|
|
|
|
|
def main(): |
|
st.write("==== μ ν리μΌμ΄μ
μμ μκ°:", datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "=====") |
|
agricultural_price_forecast_app() |
|
|
|
if __name__ == "__main__": |
|
main() |