Shahzad8515 commited on
Commit
adb1b3e
·
verified ·
1 Parent(s): a8ca7e6

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +167 -0
app.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import speech_recognition as sr
3
+ import fitz # PyMuPDF
4
+ from transformers import AutoTokenizer, AutoModel
5
+ import torch
6
+ import faiss
7
+ import numpy as np
8
+ from gtts import gTTS
9
+ from pydub import AudioSegment
10
+ from groq import Groq
11
+ from dotenv import load_dotenv
12
+ import gradio as gr
13
+
14
+ # Load environment variables
15
+ load_dotenv()
16
+
17
+ # Initialize Groq API client
18
+ client = Groq(api_key=os.getenv("GROQ_API_KEY"))
19
+
20
+ # Initialize model and tokenizer for embedding
21
+ tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
22
+ model = AutoModel.from_pretrained("bert-base-uncased")
23
+
24
+ # Initialize vector database
25
+ dimension = 768 # Size of BERT embeddings
26
+ index = faiss.IndexFlatL2(dimension)
27
+
28
+ # Folder path containing agriculture-related PDFs
29
+ pdf_folder_path = "agriculture_pdfs"
30
+
31
+ # Function to convert audio file to text
32
+ def audio_to_text(audio_file_path):
33
+ recognizer = sr.Recognizer()
34
+ try:
35
+ with sr.AudioFile(audio_file_path) as source:
36
+ audio = recognizer.record(source)
37
+ text = recognizer.recognize_google(audio)
38
+ print(f"Extracted Text: {text}") # Debugging line
39
+ return text
40
+ except sr.UnknownValueError:
41
+ print("Audio could not be understood") # Debugging line
42
+ return None
43
+ except sr.RequestError:
44
+ print("Request error") # Debugging line
45
+ return None
46
+
47
+ # Function to convert audio to WAV format
48
+ def convert_to_wav(audio_file_path):
49
+ if not audio_file_path:
50
+ raise ValueError("Invalid audio file path")
51
+ try:
52
+ audio = AudioSegment.from_file(audio_file_path)
53
+ wav_path = "temp_audio.wav"
54
+ audio.export(wav_path, format="wav")
55
+ return wav_path
56
+ except Exception as e:
57
+ print(f"Error converting audio to WAV: {e}")
58
+ return None
59
+
60
+ # Function to extract text from a PDF file
61
+ def extract_text_from_pdf(pdf_file):
62
+ text = ""
63
+ try:
64
+ pdf_document = fitz.open(pdf_file)
65
+ for page_num in range(len(pdf_document)):
66
+ page = pdf_document.load_page(page_num)
67
+ text += page.get_text()
68
+ except Exception as e:
69
+ print(f"Error extracting text from PDF: {e}")
70
+ return text
71
+
72
+ # Function to embed text using a transformer model
73
+ def embed_text(texts, model, tokenizer):
74
+ try:
75
+ inputs = tokenizer(texts, return_tensors='pt', truncation=True, padding=True)
76
+ with torch.no_grad():
77
+ embeddings = model(**inputs).last_hidden_state.mean(dim=1).numpy()
78
+ return embeddings
79
+ except Exception as e:
80
+ print(f"Error embedding text: {e}")
81
+ return np.array([]) # Return empty array on error
82
+
83
+ # Function to convert text to speech
84
+ def text_to_speech(text, output_file):
85
+ try:
86
+ tts = gTTS(text=text, lang='en')
87
+ tts.save(output_file)
88
+ return output_file
89
+ except Exception as e:
90
+ print(f"Error converting text to speech: {e}")
91
+ return None
92
+
93
+ # Read all PDF files from the specified folder
94
+ pdf_paths = [os.path.join(pdf_folder_path, f) for f in os.listdir(pdf_folder_path) if f.endswith('.pdf')]
95
+
96
+ texts = []
97
+ for path in pdf_paths:
98
+ pdf_text = extract_text_from_pdf(path)
99
+ if pdf_text:
100
+ texts.append(pdf_text)
101
+ else:
102
+ print(f"Failed to extract text from {path}")
103
+
104
+ # Embed PDF texts and add to vector database
105
+ embeddings = embed_text(texts, model, tokenizer)
106
+ if embeddings.size > 0:
107
+ index.add(embeddings)
108
+ else:
109
+ print("No embeddings to add to the vector database")
110
+
111
+ def process_audio(audio_file):
112
+ if audio_file is None:
113
+ return "No audio file provided", None # Handle case where no file is uploaded
114
+
115
+ if isinstance(audio_file, str):
116
+ audio_file_path = audio_file
117
+ else:
118
+ audio_file_path = audio_file.name
119
+
120
+ wav_path = convert_to_wav(audio_file_path)
121
+ if wav_path is None:
122
+ return "Error converting audio file to WAV format", None
123
+
124
+ text = audio_to_text(wav_path)
125
+ if not text:
126
+ return "No valid text extracted from audio", None
127
+
128
+ try:
129
+ audio_embedding = embed_text([text], model, tokenizer)[0]
130
+ if audio_embedding.size == 0:
131
+ return "Error generating embedding for the audio text", None
132
+
133
+ distances, indices = index.search(np.array([audio_embedding]), k=2)
134
+ relevant_texts = [texts[idx] for idx in indices[0]]
135
+ combined_text = " ".join(relevant_texts)
136
+ if len(combined_text) > 1000:
137
+ combined_text = combined_text[:1000]
138
+
139
+ if not combined_text.strip():
140
+ return "No relevant information found in the PDFs", None
141
+
142
+ # Modify the prompt to reflect the agricultural domain
143
+ prompt = (
144
+ f"The user has provided the following agriculture-related query: {combined_text}. "
145
+ "Based on this, provide 4 clear and actionable steps to address the agricultural issue. "
146
+ "The response should be concise and no longer than 5 lines."
147
+ )
148
+
149
+ print(f"Prompt: {prompt}") # Debugging line
150
+
151
+ chat_completion = client.chat.completions.create(
152
+ messages=[
153
+ {
154
+ "role": "user",
155
+ "content": prompt,
156
+ }
157
+ ],
158
+ model="llama-3.1-70b-versatile",
159
+ )
160
+ response = chat_completion.choices[0].message.content
161
+ output_file = "agriculture_advice.mp3"
162
+ output_path = text_to_speech(response, output_file)
163
+
164
+ if output_path is None:
165
+ return "Error generating speech output", None
166
+
167
+ return response