Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,167 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import speech_recognition as sr
|
3 |
+
import fitz # PyMuPDF
|
4 |
+
from transformers import AutoTokenizer, AutoModel
|
5 |
+
import torch
|
6 |
+
import faiss
|
7 |
+
import numpy as np
|
8 |
+
from gtts import gTTS
|
9 |
+
from pydub import AudioSegment
|
10 |
+
from groq import Groq
|
11 |
+
from dotenv import load_dotenv
|
12 |
+
import gradio as gr
|
13 |
+
|
14 |
+
# Load environment variables
|
15 |
+
load_dotenv()
|
16 |
+
|
17 |
+
# Initialize Groq API client
|
18 |
+
client = Groq(api_key=os.getenv("GROQ_API_KEY"))
|
19 |
+
|
20 |
+
# Initialize model and tokenizer for embedding
|
21 |
+
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
|
22 |
+
model = AutoModel.from_pretrained("bert-base-uncased")
|
23 |
+
|
24 |
+
# Initialize vector database
|
25 |
+
dimension = 768 # Size of BERT embeddings
|
26 |
+
index = faiss.IndexFlatL2(dimension)
|
27 |
+
|
28 |
+
# Folder path containing agriculture-related PDFs
|
29 |
+
pdf_folder_path = "agriculture_pdfs"
|
30 |
+
|
31 |
+
# Function to convert audio file to text
|
32 |
+
def audio_to_text(audio_file_path):
|
33 |
+
recognizer = sr.Recognizer()
|
34 |
+
try:
|
35 |
+
with sr.AudioFile(audio_file_path) as source:
|
36 |
+
audio = recognizer.record(source)
|
37 |
+
text = recognizer.recognize_google(audio)
|
38 |
+
print(f"Extracted Text: {text}") # Debugging line
|
39 |
+
return text
|
40 |
+
except sr.UnknownValueError:
|
41 |
+
print("Audio could not be understood") # Debugging line
|
42 |
+
return None
|
43 |
+
except sr.RequestError:
|
44 |
+
print("Request error") # Debugging line
|
45 |
+
return None
|
46 |
+
|
47 |
+
# Function to convert audio to WAV format
|
48 |
+
def convert_to_wav(audio_file_path):
|
49 |
+
if not audio_file_path:
|
50 |
+
raise ValueError("Invalid audio file path")
|
51 |
+
try:
|
52 |
+
audio = AudioSegment.from_file(audio_file_path)
|
53 |
+
wav_path = "temp_audio.wav"
|
54 |
+
audio.export(wav_path, format="wav")
|
55 |
+
return wav_path
|
56 |
+
except Exception as e:
|
57 |
+
print(f"Error converting audio to WAV: {e}")
|
58 |
+
return None
|
59 |
+
|
60 |
+
# Function to extract text from a PDF file
|
61 |
+
def extract_text_from_pdf(pdf_file):
|
62 |
+
text = ""
|
63 |
+
try:
|
64 |
+
pdf_document = fitz.open(pdf_file)
|
65 |
+
for page_num in range(len(pdf_document)):
|
66 |
+
page = pdf_document.load_page(page_num)
|
67 |
+
text += page.get_text()
|
68 |
+
except Exception as e:
|
69 |
+
print(f"Error extracting text from PDF: {e}")
|
70 |
+
return text
|
71 |
+
|
72 |
+
# Function to embed text using a transformer model
|
73 |
+
def embed_text(texts, model, tokenizer):
|
74 |
+
try:
|
75 |
+
inputs = tokenizer(texts, return_tensors='pt', truncation=True, padding=True)
|
76 |
+
with torch.no_grad():
|
77 |
+
embeddings = model(**inputs).last_hidden_state.mean(dim=1).numpy()
|
78 |
+
return embeddings
|
79 |
+
except Exception as e:
|
80 |
+
print(f"Error embedding text: {e}")
|
81 |
+
return np.array([]) # Return empty array on error
|
82 |
+
|
83 |
+
# Function to convert text to speech
|
84 |
+
def text_to_speech(text, output_file):
|
85 |
+
try:
|
86 |
+
tts = gTTS(text=text, lang='en')
|
87 |
+
tts.save(output_file)
|
88 |
+
return output_file
|
89 |
+
except Exception as e:
|
90 |
+
print(f"Error converting text to speech: {e}")
|
91 |
+
return None
|
92 |
+
|
93 |
+
# Read all PDF files from the specified folder
|
94 |
+
pdf_paths = [os.path.join(pdf_folder_path, f) for f in os.listdir(pdf_folder_path) if f.endswith('.pdf')]
|
95 |
+
|
96 |
+
texts = []
|
97 |
+
for path in pdf_paths:
|
98 |
+
pdf_text = extract_text_from_pdf(path)
|
99 |
+
if pdf_text:
|
100 |
+
texts.append(pdf_text)
|
101 |
+
else:
|
102 |
+
print(f"Failed to extract text from {path}")
|
103 |
+
|
104 |
+
# Embed PDF texts and add to vector database
|
105 |
+
embeddings = embed_text(texts, model, tokenizer)
|
106 |
+
if embeddings.size > 0:
|
107 |
+
index.add(embeddings)
|
108 |
+
else:
|
109 |
+
print("No embeddings to add to the vector database")
|
110 |
+
|
111 |
+
def process_audio(audio_file):
|
112 |
+
if audio_file is None:
|
113 |
+
return "No audio file provided", None # Handle case where no file is uploaded
|
114 |
+
|
115 |
+
if isinstance(audio_file, str):
|
116 |
+
audio_file_path = audio_file
|
117 |
+
else:
|
118 |
+
audio_file_path = audio_file.name
|
119 |
+
|
120 |
+
wav_path = convert_to_wav(audio_file_path)
|
121 |
+
if wav_path is None:
|
122 |
+
return "Error converting audio file to WAV format", None
|
123 |
+
|
124 |
+
text = audio_to_text(wav_path)
|
125 |
+
if not text:
|
126 |
+
return "No valid text extracted from audio", None
|
127 |
+
|
128 |
+
try:
|
129 |
+
audio_embedding = embed_text([text], model, tokenizer)[0]
|
130 |
+
if audio_embedding.size == 0:
|
131 |
+
return "Error generating embedding for the audio text", None
|
132 |
+
|
133 |
+
distances, indices = index.search(np.array([audio_embedding]), k=2)
|
134 |
+
relevant_texts = [texts[idx] for idx in indices[0]]
|
135 |
+
combined_text = " ".join(relevant_texts)
|
136 |
+
if len(combined_text) > 1000:
|
137 |
+
combined_text = combined_text[:1000]
|
138 |
+
|
139 |
+
if not combined_text.strip():
|
140 |
+
return "No relevant information found in the PDFs", None
|
141 |
+
|
142 |
+
# Modify the prompt to reflect the agricultural domain
|
143 |
+
prompt = (
|
144 |
+
f"The user has provided the following agriculture-related query: {combined_text}. "
|
145 |
+
"Based on this, provide 4 clear and actionable steps to address the agricultural issue. "
|
146 |
+
"The response should be concise and no longer than 5 lines."
|
147 |
+
)
|
148 |
+
|
149 |
+
print(f"Prompt: {prompt}") # Debugging line
|
150 |
+
|
151 |
+
chat_completion = client.chat.completions.create(
|
152 |
+
messages=[
|
153 |
+
{
|
154 |
+
"role": "user",
|
155 |
+
"content": prompt,
|
156 |
+
}
|
157 |
+
],
|
158 |
+
model="llama-3.1-70b-versatile",
|
159 |
+
)
|
160 |
+
response = chat_completion.choices[0].message.content
|
161 |
+
output_file = "agriculture_advice.mp3"
|
162 |
+
output_path = text_to_speech(response, output_file)
|
163 |
+
|
164 |
+
if output_path is None:
|
165 |
+
return "Error generating speech output", None
|
166 |
+
|
167 |
+
return response
|