import os import speech_recognition as sr import fitz # PyMuPDF from transformers import AutoTokenizer, AutoModel import torch import faiss import numpy as np from gtts import gTTS from pydub import AudioSegment from groq import Groq from dotenv import load_dotenv import gradio as gr # Load environment variables load_dotenv() # Initialize Groq API client client = Groq(api_key=os.getenv("GROQ_API_KEY")) # Initialize model and tokenizer for embedding tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") model = AutoModel.from_pretrained("bert-base-uncased") # Initialize vector database dimension = 768 # Size of BERT embeddings index = faiss.IndexFlatL2(dimension) # Folder path containing PDFs pdf_folder_path = "agriculture_pdfs" # Function to convert audio file to text def audio_to_text(audio_file_path): recognizer = sr.Recognizer() try: with sr.AudioFile(audio_file_path) as source: audio = recognizer.record(source) text = recognizer.recognize_google(audio) return text except sr.UnknownValueError: return None except sr.RequestError: return None # Function to convert audio to WAV format def convert_to_wav(audio_file_path): if not audio_file_path: raise ValueError("Invalid audio file path") try: audio = AudioSegment.from_file(audio_file_path) wav_path = "temp_audio.wav" audio.export(wav_path, format="wav") return wav_path except Exception: return None # Function to extract text from a PDF file def extract_text_from_pdf(pdf_file): text = "" try: pdf_document = fitz.open(pdf_file) for page_num in range(len(pdf_document)): page = pdf_document.load_page(page_num) text += page.get_text() except Exception: pass return text # Function to embed text using a transformer model def embed_text(texts, model, tokenizer): try: inputs = tokenizer(texts, return_tensors='pt', truncation=True, padding=True) with torch.no_grad(): embeddings = model(**inputs).last_hidden_state.mean(dim=1).numpy() return embeddings except Exception: return np.array([]) # Return empty array on error # Function to convert text to speech def text_to_speech(text, output_file): try: tts = gTTS(text=text, lang='en') tts.save(output_file) return output_file except Exception: return None # Read all PDF files from the specified folder pdf_paths = [os.path.join(pdf_folder_path, f) for f in os.listdir(pdf_folder_path) if f.endswith('.pdf')] texts = [] for path in pdf_paths: pdf_text = extract_text_from_pdf(path) if pdf_text: texts.append(pdf_text) # Embed PDF texts and add to vector database embeddings = embed_text(texts, model, tokenizer) if embeddings.size > 0: index.add(embeddings) def process_audio(audio_file): if audio_file is None: return "No audio file provided", None audio_file_path = audio_file if isinstance(audio_file, str) else audio_file.name wav_path = convert_to_wav(audio_file_path) if wav_path is None: return "Error converting audio file to WAV format", None text = audio_to_text(wav_path) if not text: return "No valid text extracted from audio", None try: audio_embedding = embed_text([text], model, tokenizer)[0] if audio_embedding.size == 0: return "Error generating embedding for the audio text", None distances, indices = index.search(np.array([audio_embedding]), k=5) relevant_texts = [texts[idx] for idx in indices[0]] combined_text = " ".join(relevant_texts) if len(combined_text) > 1000: combined_text = combined_text[:1000] if not combined_text.strip(): return "No relevant information found in the PDFs", None prompt = ( f"The user has asked a query related to agricultural practices: {text}. " f"Here are relevant excerpts from the Better Crops South Asia document: {combined_text}. " "Based on this information, please provide accurate advice related to sustainable crop management, pest control, irrigation practices, and any recommendations for improving crop yield in the South Asian region." ) chat_completion = client.chat.completions.create( messages=[ { "role": "user", "content": prompt, } ], model="llama-3.1-70b-versatile", ) response = chat_completion.choices[0].message.content output_file = "advice.mp3" output_path = text_to_speech(response, output_file) if output_path is None: return "Error generating speech output", None return response, output_path except Exception: return "An error occurred while processing the audio", None # Enhanced Gradio interface customization iface = gr.Interface( fn=process_audio, inputs=gr.Audio(type="filepath"), outputs=[gr.Textbox(label="Advice", lines=10), gr.Audio(label="Advice Audio")], title="🌾 BetterCrops: Agriculture Support for Farmers", description=( "💡 **BetterCrops** is designed to assist farmers with their crops by analyzing agricultural PDFs " "and generating personalized audio advice based on your voice queries." ), article=( "
" "

BetterCrops

" "

Empowering Farmers with AI-driven Insights

" "
" ), theme="grass", css=( """ body { background-color: #f0f5e9; color: #2f4f2f; font-family: 'Helvetica Neue', sans-serif; } h1, h3 { color: #003f6e; } .gradio-container { padding: 20px; background: linear-gradient(135deg, #a3cfba 0%, #e8f5e9 100%); border-radius: 15px; } .gradio-inputs, .gradio-outputs { margin: 20px; padding: 20px; background-color: #ffffff; border-radius: 10px; box-shadow: 0px 4px 8px rgba(0, 0, 0, 0.1); } """ ) ) if __name__ == "__main__": iface.launch()