BetterCrops / app.py
Shahzad8515's picture
Update app.py
5ef27d0 verified
import os
import speech_recognition as sr
import fitz # PyMuPDF
from transformers import AutoTokenizer, AutoModel
import torch
import faiss
import numpy as np
from gtts import gTTS
from pydub import AudioSegment
from groq import Groq
from dotenv import load_dotenv
import gradio as gr
# Load environment variables
load_dotenv()
# Initialize Groq API client
client = Groq(api_key=os.getenv("GROQ_API_KEY"))
# Initialize model and tokenizer for embedding
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
model = AutoModel.from_pretrained("bert-base-uncased")
# Initialize vector database
dimension = 768 # Size of BERT embeddings
index = faiss.IndexFlatL2(dimension)
# Folder path containing PDFs
pdf_folder_path = "agriculture_pdfs"
# Function to convert audio file to text
def audio_to_text(audio_file_path):
recognizer = sr.Recognizer()
try:
with sr.AudioFile(audio_file_path) as source:
audio = recognizer.record(source)
text = recognizer.recognize_google(audio)
return text
except sr.UnknownValueError:
return None
except sr.RequestError:
return None
# Function to convert audio to WAV format
def convert_to_wav(audio_file_path):
if not audio_file_path:
raise ValueError("Invalid audio file path")
try:
audio = AudioSegment.from_file(audio_file_path)
wav_path = "temp_audio.wav"
audio.export(wav_path, format="wav")
return wav_path
except Exception:
return None
# Function to extract text from a PDF file
def extract_text_from_pdf(pdf_file):
text = ""
try:
pdf_document = fitz.open(pdf_file)
for page_num in range(len(pdf_document)):
page = pdf_document.load_page(page_num)
text += page.get_text()
except Exception:
pass
return text
# Function to embed text using a transformer model
def embed_text(texts, model, tokenizer):
try:
inputs = tokenizer(texts, return_tensors='pt', truncation=True, padding=True)
with torch.no_grad():
embeddings = model(**inputs).last_hidden_state.mean(dim=1).numpy()
return embeddings
except Exception:
return np.array([]) # Return empty array on error
# Function to convert text to speech
def text_to_speech(text, output_file):
try:
tts = gTTS(text=text, lang='en')
tts.save(output_file)
return output_file
except Exception:
return None
# Read all PDF files from the specified folder
pdf_paths = [os.path.join(pdf_folder_path, f) for f in os.listdir(pdf_folder_path) if f.endswith('.pdf')]
texts = []
for path in pdf_paths:
pdf_text = extract_text_from_pdf(path)
if pdf_text:
texts.append(pdf_text)
# Embed PDF texts and add to vector database
embeddings = embed_text(texts, model, tokenizer)
if embeddings.size > 0:
index.add(embeddings)
def process_audio(audio_file):
if audio_file is None:
return "No audio file provided", None
audio_file_path = audio_file if isinstance(audio_file, str) else audio_file.name
wav_path = convert_to_wav(audio_file_path)
if wav_path is None:
return "Error converting audio file to WAV format", None
text = audio_to_text(wav_path)
if not text:
return "No valid text extracted from audio", None
try:
audio_embedding = embed_text([text], model, tokenizer)[0]
if audio_embedding.size == 0:
return "Error generating embedding for the audio text", None
distances, indices = index.search(np.array([audio_embedding]), k=5)
relevant_texts = [texts[idx] for idx in indices[0]]
combined_text = " ".join(relevant_texts)
if len(combined_text) > 1000:
combined_text = combined_text[:1000]
if not combined_text.strip():
return "No relevant information found in the PDFs", None
prompt = (
f"The user has asked a query related to agricultural practices: {text}. "
f"Here are relevant excerpts from the Better Crops South Asia document: {combined_text}. "
"Based on this information, please provide accurate advice related to sustainable crop management, pest control, irrigation practices, and any recommendations for improving crop yield in the South Asian region."
)
chat_completion = client.chat.completions.create(
messages=[
{
"role": "user",
"content": prompt,
}
],
model="llama-3.1-70b-versatile",
)
response = chat_completion.choices[0].message.content
output_file = "advice.mp3"
output_path = text_to_speech(response, output_file)
if output_path is None:
return "Error generating speech output", None
return response, output_path
except Exception:
return "An error occurred while processing the audio", None
# Enhanced Gradio interface customization
iface = gr.Interface(
fn=process_audio,
inputs=gr.Audio(type="filepath"),
outputs=[gr.Textbox(label="Advice", lines=10), gr.Audio(label="Advice Audio")],
title="๐ŸŒพ BetterCrops: Agriculture Support for Farmers",
description=(
"๐Ÿ’ก **BetterCrops** is designed to assist farmers with their crops by analyzing agricultural PDFs "
"and generating personalized audio advice based on your voice queries."
),
article=(
"<div style='text-align: center; color: #003f6e;'>"
"<h1 style='font-size: 36px; font-weight: bold;'>BetterCrops</h1>"
"<h3 style='font-size: 24px; font-weight: normal;'>Empowering Farmers with AI-driven Insights</h3>"
"</div>"
),
theme="grass",
css=(
"""
body {
background-color: #f0f5e9;
color: #2f4f2f;
font-family: 'Helvetica Neue', sans-serif;
}
h1, h3 {
color: #003f6e;
}
.gradio-container {
padding: 20px;
background: linear-gradient(135deg, #a3cfba 0%, #e8f5e9 100%);
border-radius: 15px;
}
.gradio-inputs, .gradio-outputs {
margin: 20px;
padding: 20px;
background-color: #ffffff;
border-radius: 10px;
box-shadow: 0px 4px 8px rgba(0, 0, 0, 0.1);
}
"""
)
)
if __name__ == "__main__":
iface.launch()