Spaces:
Runtime error
Runtime error
import os | |
import logging | |
from logging.handlers import RotatingFileHandler # Add this import statement | |
from flask import Flask, render_template, request, jsonify, send_file | |
import requests | |
import pandas as pd | |
from datetime import datetime | |
import plotly.express as px | |
import plotly.io as pio | |
import numpy as np | |
import dotenv | |
import json | |
import gtts | |
import uuid | |
from pathlib import Path | |
dotenv.load_dotenv() | |
app = Flask(__name__) | |
# Create audio directory if it doesn't exist using absolute path | |
AUDIO_DIR = Path(__file__).parent.absolute() / "static" / "audio" | |
AUDIO_DIR.mkdir(parents=True, exist_ok=True) | |
# Configure static folder explicitly | |
app.static_folder = str(Path(__file__).parent.absolute() / "static") | |
def fetch_market_data(state=None, district=None, market=None, commodity=None): | |
"""Fetch data from the agricultural market API. | |
If the API fails or returns empty data, fallback to the CSV file. | |
Filters (state, district, market, commodity) are applied manually on CSV data. | |
""" | |
api_key = "579b464db66ec23bdd000001189bbb99e979428764bdbe8fdd44ebb7" | |
base_url = "https://api.data.gov.in/resource/9ef84268-d588-465a-a308-a864a43d007" | |
params = { | |
"api-key": api_key, | |
"format": "json", | |
"limit": 1000, | |
} | |
if state: | |
params["filters[state]"] = state | |
if district: | |
params["filters[district]"] = district | |
if market: | |
params["filters[market]"] = market | |
if commodity: | |
params["filters[commodity]"] = commodity | |
try: | |
response = requests.get(base_url, params=params) | |
if response.status_code == 200: | |
data = response.json() | |
records = data.get("records", []) | |
df = pd.DataFrame(records) | |
else: | |
print(f"API Error: {response.status_code}") | |
raise Exception(f"API Error: {response.status_code}") | |
except Exception as e: | |
print(f"Error fetching data from API: {str(e)}. Falling back to CSV file.") | |
df = pd.read_csv("final_price_data.csv") | |
if 'min_price' not in df.columns: | |
rename_mapping = { | |
'State': 'state', | |
'District': 'district', | |
'Market': 'market', | |
'Commodity': 'commodity', | |
'Variety': 'variety', | |
'Grade': 'grade', | |
'Arrival_Date': 'arrival_date', | |
'Min_x0020_Price': 'min_price', | |
'Max_x0020_Price': 'max_price', | |
'Modal_x0020_Price': 'modal_price' | |
} | |
df.rename(columns=rename_mapping, inplace=True) | |
if df.empty: | |
print("API returned empty data. Falling back to CSV file.") | |
df = pd.read_csv("final_price_data.csv") | |
if 'min_price' not in df.columns: | |
rename_mapping = { | |
'State': 'state', | |
'District': 'district', | |
'Market': 'market', | |
'Commodity': 'commodity', | |
'Variety': 'variety', | |
'Grade': 'grade', | |
'Arrival_Date': 'arrival_date', | |
'Min_x0020_Price': 'min_price', | |
'Max_x0020_Price': 'max_price', | |
'Modal_x0020_Price': 'modal_price' | |
} | |
df.rename(columns=rename_mapping, inplace=True) | |
if state: | |
df = df[df['state'] == state] | |
if district: | |
df = df[df['district'] == district] | |
if market: | |
df = df[df['market'] == market] | |
if commodity: | |
df = df[df['commodity'] == commodity] | |
return df | |
def get_ai_insights(market_data, state, district, market=None, commodity=None, language="English"): | |
"""Get enhanced insights from Gemini API with focus on profitable suggestions for farmers. | |
Supports multiple languages through the prompt. | |
Returns dynamic insights only. If something goes wrong, returns an empty string. | |
""" | |
if not state or not district or market_data.empty: | |
return "" | |
try: | |
# Filter data based on provided parameters | |
district_data = market_data[market_data['district'] == district] | |
if district_data.empty: | |
return "" | |
# Apply market filter if provided | |
if market and not market_data[market_data['market'] == market].empty: | |
market_specific = True | |
district_data = district_data[district_data['market'] == market] | |
else: | |
market_specific = False | |
# Apply commodity filter if provided | |
if commodity and not market_data[market_data['commodity'] == commodity].empty: | |
commodity_specific = True | |
district_data = district_data[district_data['commodity'] == commodity] | |
else: | |
commodity_specific = False | |
# Calculate price trends | |
price_trends = district_data.groupby('commodity').agg({ | |
'modal_price': ['mean', 'min', 'max', 'std'] | |
}).round(2) | |
# Using environment variable for Gemini API key | |
GEMINI_API = os.getenv("GEMINI_API") | |
if not GEMINI_API: | |
print("Warning: Gemini API key not set") | |
return "" | |
price_trends['price_stability'] = (price_trends['modal_price']['std'] / | |
price_trends['modal_price']['mean']).round(2) | |
district_data['arrival_date'] = pd.to_datetime(district_data['arrival_date']) | |
district_data['month'] = district_data['arrival_date'].dt.month | |
monthly_trends = district_data.groupby(['commodity', 'month'])['modal_price'].mean().round(2) | |
market_competition = len(district_data['market'].unique()) | |
top_commodities = district_data.groupby('commodity')['modal_price'].mean().nlargest(5).index.tolist() | |
# Get min and max prices for key commodities | |
price_range_info = {} | |
for commodity in top_commodities[:3]: | |
comm_data = district_data[district_data['commodity'] == commodity] | |
if not comm_data.empty: | |
price_range_info[commodity] = { | |
'min': comm_data['modal_price'].min(), | |
'max': comm_data['modal_price'].max(), | |
'avg': comm_data['modal_price'].mean() | |
} | |
# Calculate market-specific metrics if market is selected | |
market_details = "" | |
if market_specific: | |
market_details = f""" | |
Market-specific information for {market}: | |
- Number of commodities: {len(district_data['commodity'].unique())} | |
- Most expensive commodity: {district_data.groupby('commodity')['modal_price'].mean().idxmax()} | |
- Cheapest commodity: {district_data.groupby('commodity')['modal_price'].mean().idxmin()} | |
""" | |
# Commodity-specific details if commodity is selected | |
commodity_details = "" | |
if commodity_specific: | |
commodity_data = district_data[district_data['commodity'] == commodity] | |
best_market = commodity_data.loc[commodity_data['modal_price'].idxmin()]['market'] | |
worst_market = commodity_data.loc[commodity_data['modal_price'].idxmax()]['market'] | |
commodity_details = f""" | |
Commodity-specific information for {commodity}: | |
- Best market to buy (lowest price): {best_market} | |
- Highest priced market: {worst_market} | |
- Price variance across markets: {commodity_data['modal_price'].std().round(2)} | |
""" | |
# Improved prompt for better structured output with language support | |
prompt = f""" | |
Analyze the following agricultural market data for {district}, {state} and provide insights in {language} language. | |
Market data: | |
- Active markets: {market_competition} | |
- Top crops: {', '.join(top_commodities[:5])} | |
- Data from {len(price_trends.index)} crops and {len(monthly_trends)} monthly entries. | |
Price information: | |
{json.dumps(price_range_info, indent=2)} | |
{market_details} | |
{commodity_details} | |
Analyze this data and provide insights about crop market trends and profitability. | |
Include specific numbers from the data about prices. | |
Provide structured insights with clear sections. Use this exact format with bullet points: | |
Crop Profitability Analysis: | |
* [First insight about profitable crops with specific prices mentioned] | |
* [Second insight] | |
Market Price Analysis: | |
* [First insight about markets with specific price ranges] | |
* [Second insight] | |
Recommendations for Farmers: | |
* [Action item 1] | |
* [Action item 2] | |
""" | |
api_url = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro:generateContent" | |
headers = {"Content-Type": "application/json"} | |
payload = { | |
"contents": [ | |
{ | |
"parts": [ | |
{"text": prompt} | |
] | |
} | |
], | |
"generationConfig": { | |
"temperature": 0.4, | |
"maxOutputTokens": 1024 | |
} | |
} | |
response = requests.post( | |
f"{api_url}?key={api_key}", | |
headers=headers, | |
json=payload, | |
timeout=20 | |
) | |
if response.status_code == 200: | |
response_data = response.json() | |
if 'candidates' in response_data and len(response_data['candidates']) > 0: | |
content = response_data['candidates'][0]['content'] | |
if 'parts' in content and len(content['parts']) > 0: | |
insights = content['parts'][0]['text'] | |
return format_ai_insights(insights, language) | |
print(f"API Response issue: {response.text[:100]}") | |
else: | |
print(f"Gemini API Error: {response.status_code} - {response.text[:100]}") | |
return "" | |
except Exception as e: | |
print(f"Error generating insights: {str(e)}") | |
return "" | |
def extract_text_from_insights(insights_html): | |
"""Extract pure text content from HTML insights for text-to-speech conversion.""" | |
# Simple HTML tag removal - for production, consider using BeautifulSoup for better parsing | |
import re | |
text = re.sub(r'<.*?>', ' ', insights_html) | |
text = re.sub(r'\s+', ' ', text) # Remove extra whitespace | |
return text.strip() | |
def create_audio_from_text(text, language_code="en"): | |
"""Generate audio file from text using gTTS.""" | |
if not text: | |
return None | |
# Map UI language selection to gTTS language codes | |
language_map = { | |
"English": "en", | |
"Hindi": "hi", | |
"Tamil": "ta", | |
"Telugu": "te", | |
"Marathi": "mr", | |
"Bengali": "bn", | |
"Gujarati": "gu", | |
"Kannada": "kn", | |
"Malayalam": "ml", | |
"Punjabi": "pa" | |
} | |
tts_lang = language_map.get(language_code, "en") | |
# Generate unique filename | |
filename = f"{uuid.uuid4()}.mp3" | |
filepath = AUDIO_DIR / filename | |
try: | |
tts = gtts.gTTS(text, lang=tts_lang, slow=False) | |
tts.save(str(filepath)) | |
return f"/static/audio/{filename}" | |
except Exception as e: | |
print(f"Error creating audio: {str(e)}") | |
return None | |
def create_audio_local_fallback(text, language_code="en"): | |
"""Local fallback for TTS when network is unavailable.""" | |
try: | |
# This requires pyttsx3 to be installed | |
import pyttsx3 | |
engine = pyttsx3.init() | |
# Generate unique filename | |
filename = f"{uuid.uuid4()}.mp3" | |
filepath = AUDIO_DIR / filename | |
engine.save_to_file(text, str(filepath)) | |
engine.runAndWait() | |
return f"/static/audio/{filename}" | |
except Exception as e: | |
print(f"Local TTS fallback failed: {str(e)}") | |
return None | |
def format_ai_insights(insights_data, language="English"): | |
"""Format AI insights into structured HTML. | |
Returns an empty string if no valid insights are provided. | |
""" | |
if not insights_data or not insights_data.strip(): | |
return "" | |
# Process the insights text - each bullet point becomes a formatted item | |
formatted_content = "" | |
# Split by bullet points | |
bullet_points = insights_data.split('*') | |
# Filter out empty items and process each bullet point | |
bullet_points = [point.strip() for point in bullet_points if point.strip()] | |
# Check if any section headers exist in the content | |
sections = {} | |
current_section = "Recommendations" | |
for point in bullet_points: | |
if ":" in point and len(point.split(":")[0]) < 30: # Likely a section header | |
current_section = point.split(":")[0].strip() | |
# Start a new section | |
if current_section not in sections: | |
sections[current_section] = [] | |
else: | |
# Add to current section | |
if current_section not in sections: | |
sections[current_section] = [] | |
sections[current_section].append(point) | |
# Now build the HTML with proper sections | |
for section, points in sections.items(): | |
formatted_content += f'<div class="insight-card"><h5>{section}</h5><ul class="insight-list">' | |
for point in points: | |
# Highlight prices with special styling | |
if "₹" in point: | |
# Replace price mentions with highlighted spans | |
parts = point.split("₹") | |
styled_point = parts[0] | |
for i in range(1, len(parts)): | |
# Extract the price value | |
price_text = parts[i].split()[0] | |
# Add the highlighted price and the rest of the text | |
styled_point += f'<span class="price-highlight">₹{price_text}</span>' + parts[i][len(price_text):] | |
formatted_content += f'<li>{styled_point}</li>' | |
else: | |
formatted_content += f'<li>{point}</li>' | |
formatted_content += '</ul></div>' | |
# Create the plain text version for audio generation | |
plain_text = f"Market Insights for {language}.\n\n" | |
for section, points in sections.items(): | |
plain_text += f"{section}:\n" | |
for point in points: | |
# Clean up for speech | |
clean_point = point.replace("₹", " rupees ") | |
plain_text += f"• {clean_point}\n" | |
plain_text += "\n" | |
# Generate audio file | |
audio_path = create_audio_from_text(plain_text, language) | |
if audio_path is None: | |
audio_path = create_audio_local_fallback(plain_text) | |
# Add a wrapper for the insights with audio player | |
audio_player = "" | |
if audio_path: | |
audio_player = f""" | |
<div class="audio-player-container"> | |
<h4>Listen to Insights</h4> | |
<audio id="insightsAudio" controls> | |
<source src="{audio_path}" type="audio/mpeg"> | |
Your browser does not support the audio element. | |
</audio> | |
<button class="btn btn-sm btn-custom mt-2" id="playAudioBtn"> | |
<i class="fa fa-play"></i> Play Audio | |
</button> | |
</div> | |
""" | |
html = f""" | |
<div class="insights-header"> | |
<h3>AI Market Insights</h3> | |
{audio_player} | |
</div> | |
<div class="insight-section"> | |
{formatted_content} | |
</div> | |
""" | |
return html | |
def generate_plots(df): | |
"""Generate all plots in English""" | |
if df.empty: | |
return {}, "No data available" | |
price_cols = ['min_price', 'max_price', 'modal_price'] | |
for col in price_cols: | |
df[col] = pd.to_numeric(df[col], errors='coerce') | |
colors = ["#4CAF50", "#8BC34A", "#CDDC39", "#FFC107", "#FF5722"] | |
df_bar = df.groupby('commodity')['modal_price'].mean().reset_index() | |
fig_bar = px.bar(df_bar, | |
x='commodity', | |
y='modal_price', | |
title="Average Price by Commodity", | |
color_discrete_sequence=colors) | |
fig_line = None | |
if 'commodity' in df.columns and len(df['commodity'].unique()) == 1: | |
df['arrival_date'] = pd.to_datetime(df['arrival_date']) | |
df_line = df.sort_values('arrival_date') | |
fig_line = px.line(df_line, | |
x='arrival_date', | |
y='modal_price', | |
title="Price Trend", | |
color_discrete_sequence=colors) | |
fig_box = px.box(df, | |
x='commodity', | |
y='modal_price', | |
title="Price Distribution", | |
color='commodity', | |
color_discrete_sequence=colors) | |
plots = { | |
'bar': pio.to_html(fig_bar, full_html=False), | |
'box': pio.to_html(fig_box, full_html=False) | |
} | |
if fig_line: | |
plots['line'] = pio.to_html(fig_line, full_html=False) | |
return plots | |
# Configure logging | |
logging.basicConfig(level=logging.INFO) | |
handler = logging.handlers.RotatingFileHandler('app.log', maxBytes=10000, backupCount=1) | |
handler.setLevel(logging.INFO) | |
app.logger.addHandler(handler) | |
def index(): | |
try: | |
app.logger.info("Fetching initial market data") | |
initial_data = fetch_market_data() | |
states = sorted(initial_data['state'].dropna().unique()) if not initial_data.empty else [] | |
except Exception as e: | |
app.logger.error(f"Error fetching initial data: {str(e)}") | |
states = [] | |
try: | |
app.logger.info("Rendering index template") | |
return render_template('index.html', | |
states=states, | |
today=datetime.today().strftime('%Y-%m-%d')) | |
except Exception as e: | |
app.logger.error(f"Template rendering error: {str(e)}") | |
return f"Error loading application: {str(e)}", 500 | |
def filter_data(): | |
app.logger.info("Received filter_data request") | |
state = request.form.get('state') | |
district = request.form.get('district') | |
market = request.form.get('market') | |
commodity = request.form.get('commodity') | |
language = request.form.get('language', 'English') # Default to English | |
try: | |
df = fetch_market_data(state, district, market, commodity) | |
plots = generate_plots(df) | |
insights = get_ai_insights(df, state, district, market, commodity, language) if state and district and not df.empty else "" | |
app.logger.info("Successfully processed filter_data request") | |
response = { | |
'plots': plots, | |
'insights': insights, | |
'success': not df.empty, | |
'hasStateDistrict': bool(state and district), | |
'market_html': market_table_html, | |
'cheapest_html': cheapest_table_html, | |
'costliest_html': costliest_table_html, | |
'market_stats': market_stats | |
} | |
return jsonify(response) | |
except Exception as e: | |
app.logger.error(f"Error processing filter_data request: {str(e)}") | |
return jsonify({'success': False, 'error': str(e)}), 500 | |
def get_districts(): | |
state = request.form.get('state') | |
df = fetch_market_data(state=state) | |
districts = sorted(df['district'].dropna().unique()) | |
return jsonify(districts) | |
def get_markets(): | |
district = request.form.get('district') | |
df = fetch_market_data(district=district) | |
markets = sorted(df['market'].dropna().unique()) | |
return jsonify(markets) | |
def get_commodities(): | |
market = request.form.get('market') | |
df = fetch_market_data(market=market) | |
commodities = sorted(df['commodity'].dropna().unique()) | |
return jsonify(commodities) | |
def serve_audio(filename): | |
try: | |
audio_path = AUDIO_DIR / filename | |
if not audio_path.is_file(): | |
return "Audio file not found", 404 | |
return send_file(str(audio_path), mimetype="audio/mpeg") | |
except Exception as e: | |
print(f"Error serving audio file: {str(e)}") | |
return "Error serving audio file", 500 | |
if __name__ == '__main__': | |
# pio.templates.default = "plotly_white" | |
app.run(debug=True, host='0.0.0.0', port=7860) |