Amarsaish commited on
Commit
36d2dcd
·
verified ·
1 Parent(s): 6caa9e4

Upload streamlit_merged.py

Browse files
Files changed (1) hide show
  1. streamlit_merged.py +273 -0
streamlit_merged.py ADDED
@@ -0,0 +1,273 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import openai
4
+ from sklearn.metrics.pairwise import cosine_similarity
5
+ import numpy as np
6
+ from PyPDF2 import PdfReader
7
+ import json
8
+ import pickle
9
+ from pathlib import Path
10
+ from pydub import AudioSegment
11
+ from groq import Groq
12
+ from typing import List, Dict
13
+ import tempfile
14
+
15
+ ffmpeg_path = r"C:\Users\AMAR\Downloads\ffmpeg-7.0.2-essentials_build\ffmpeg-7.0.2-essentials_build\bin\ffmpeg.exe"
16
+ os.environ["PATH"] += os.pathsep + os.path.dirname(ffmpeg_path)
17
+ AudioSegment.converter = ffmpeg_path
18
+ class VoiceStockMarketEvaluator:
19
+ def __init__(self, openai_api_key, groq_api_key):
20
+ # OpenAI configuration
21
+ self.openai_api_key = openai_api_key
22
+ self.pdf_path = "STOCK1.pdf" # Update this to your PDF path in Streamlit
23
+ openai.api_key = self.openai_api_key
24
+ self.llm_model = "gpt-3.5-turbo"
25
+
26
+ # Groq configuration for audio transcription
27
+ self.groq_api_key = groq_api_key
28
+ self.groq_client = Groq(api_key=self.groq_api_key)
29
+ self.whisper_model = 'whisper-large-v3'
30
+
31
+ # Questions
32
+ self.questions = {
33
+ 1: "What are different major investors?",
34
+ 2: "Who are major traders?",
35
+ 3: "who are participants of stock makets?"
36
+ }
37
+
38
+ # Load embeddings
39
+ self.load_embeddings()
40
+
41
+ def load_embeddings(self):
42
+ """Load embeddings from pickle file or create if not exists"""
43
+ pickle_path = Path('stock_market_embeddings.pkl')
44
+
45
+ if pickle_path.exists():
46
+ with open(pickle_path, 'rb') as f:
47
+ data = pickle.load(f)
48
+ self.pdf_content = data['content']
49
+ self.pdf_chunks = data['chunks']
50
+ self.pdf_embeddings = data['embeddings']
51
+ else:
52
+ self.create_and_save_embeddings(pickle_path)
53
+
54
+ def create_and_save_embeddings(self, pickle_path):
55
+ """Create embeddings and save to pickle file"""
56
+ reader = PdfReader(self.pdf_path)
57
+ self.pdf_content = ""
58
+ for page in reader.pages:
59
+ self.pdf_content += page.extract_text()
60
+
61
+ self.pdf_chunks = self._chunk_text(self.pdf_content)
62
+ self.pdf_embeddings = self.get_openai_embeddings(self.pdf_chunks)
63
+
64
+ data = {
65
+ 'content': self.pdf_content,
66
+ 'chunks': self.pdf_chunks,
67
+ 'embeddings': self.pdf_embeddings
68
+ }
69
+ with open(pickle_path, 'wb') as f:
70
+ pickle.dump(data, f)
71
+
72
+ def get_openai_embeddings(self, texts):
73
+ """Generate embeddings using OpenAI API"""
74
+ response = openai.Embedding.create(
75
+ model="text-embedding-ada-002",
76
+ input=texts
77
+ )
78
+ return [embedding['embedding'] for embedding in response['data']]
79
+
80
+ def _chunk_text(self, text, chunk_size=500, overlap=50):
81
+ """Split text into overlapping chunks"""
82
+ words = text.split()
83
+ chunks = []
84
+ for i in range(0, len(words), chunk_size - overlap):
85
+ chunk = ' '.join(words[i:i + chunk_size])
86
+ chunks.append(chunk)
87
+ return chunks
88
+
89
+ def process_audio_file(self, uploaded_file):
90
+ """Process uploaded audio file and convert if necessary"""
91
+ allowed_formats = ["flac", "mp3", "mp4", "mpeg", "mpga", "m4a", "ogg", "opus", "wav", "webm"]
92
+
93
+ # Create a temporary directory to store the processed file
94
+ with tempfile.TemporaryDirectory() as temp_dir:
95
+ # Get the original file extension
96
+ original_extension = uploaded_file.name.split('.')[-1].lower()
97
+ temp_file_path = os.path.join(temp_dir, f"audio_file.{original_extension}")
98
+
99
+ # Save the uploaded file
100
+ with open(temp_file_path, 'wb') as f:
101
+ f.write(uploaded_file.getbuffer())
102
+
103
+ # If the format is not supported, convert to WAV
104
+ if original_extension not in allowed_formats:
105
+ output_path = os.path.join(temp_dir, "converted_audio.wav")
106
+ audio = AudioSegment.from_file(temp_file_path)
107
+ audio.export(output_path, format="wav")
108
+ final_path = output_path
109
+ else:
110
+ final_path = temp_file_path
111
+
112
+ # Read the file for transcription
113
+ with open(final_path, 'rb') as audio_file:
114
+ return self.audio_to_text(final_path)
115
+
116
+ def audio_to_text(self, filepath):
117
+ """Transcribe audio to text using Groq"""
118
+ with open(filepath, "rb") as file:
119
+ translation = self.groq_client.audio.translations.create(
120
+ file=(filepath, file.read()),
121
+ model=self.whisper_model,
122
+ )
123
+ return translation.text
124
+
125
+ def _find_relevant_context(self, question, answer, top_k=3):
126
+ """Find relevant PDF chunks for the question-answer pair"""
127
+ search_text = f"{question} {answer}"
128
+ search_embedding = self.get_openai_embeddings([search_text])[0]
129
+ similarities = cosine_similarity([search_embedding], self.pdf_embeddings)[0]
130
+ top_indices = similarities.argsort()[-top_k:][::-1]
131
+ return ' '.join(self.pdf_chunks[i] for i in top_indices)
132
+
133
+ def evaluate_answer(self, question_num, user_answer):
134
+ """Evaluate user answer for a given question number"""
135
+ if question_num not in self.questions:
136
+ raise ValueError("Invalid question number")
137
+
138
+ question = self.questions[question_num]
139
+ relevant_context = self._find_relevant_context(question, user_answer)
140
+
141
+ prompt = f"""
142
+ You are an expert evaluating answers about the stock market. Compare the answer with the reference material and provide detailed analysis.
143
+
144
+ Question: {question}
145
+
146
+ Reference Material:
147
+ {relevant_context}
148
+
149
+ Student Answer:
150
+ {user_answer}
151
+
152
+ Analyze this answer carefully and provide:
153
+ 1. Points that are correct according to the reference material
154
+ 2. Points that are incorrect or need clarification
155
+ 3. Important points from the reference material that were missing
156
+
157
+ Provide your analysis in JSON format:
158
+ {{
159
+ "correct_points": ["point1", "point2"],
160
+ "incorrect_points": {{"incorrect_statement": "correction_based_on_reference"}},
161
+ "missing_points": ["point1", "point2"],
162
+ "explanation": "Brief explanation of the score"
163
+ }}
164
+ """
165
+
166
+ response = openai.ChatCompletion.create(
167
+ model=self.llm_model,
168
+ messages=[{"role": "user", "content": prompt}],
169
+ temperature=0.3
170
+ )
171
+
172
+ try:
173
+ return json.loads(response.choices[0].message.content)
174
+ except json.JSONDecodeError:
175
+ return {"error": "Could not parse response"}
176
+
177
+ def main():
178
+ st.set_page_config(page_title="Voice-based Stock Market Evaluator", layout="wide")
179
+
180
+ st.title("Voice-based Stock Market Evaluator")
181
+
182
+ # Create sidebar for API keys
183
+ st.sidebar.header("API Configuration")
184
+
185
+ # Use session state to persist API keys
186
+ if 'openai_key' not in st.session_state:
187
+ st.session_state['openai_key'] = ''
188
+ if 'groq_key' not in st.session_state:
189
+ st.session_state['groq_key'] = ''
190
+
191
+ # API key inputs in sidebar
192
+ openai_key = st.sidebar.text_input(
193
+ "OpenAI API Key",
194
+ type="password",
195
+ value=st.session_state['openai_key'],
196
+ help="Enter your OpenAI API key to use the evaluation features"
197
+ )
198
+ groq_key = st.sidebar.text_input(
199
+ "Groq API Key",
200
+ type="password",
201
+ value=st.session_state['groq_key'],
202
+ help="Enter your Groq API key for audio transcription"
203
+ )
204
+
205
+ # Update session state
206
+ st.session_state['openai_key'] = openai_key
207
+ st.session_state['groq_key'] = groq_key
208
+
209
+ # Check if API keys are provided
210
+ if not openai_key or not groq_key:
211
+ st.warning("Please enter both API keys in the sidebar to use the application.")
212
+ return
213
+
214
+ try:
215
+ # Initialize the evaluator with API keys
216
+ evaluator = VoiceStockMarketEvaluator(openai_key, groq_key)
217
+
218
+ st.write("Upload an audio file with your answer to get evaluated!")
219
+
220
+ # Display questions
221
+ st.header("Available Questions")
222
+ question_num = st.radio(
223
+ "Select a question:",
224
+ options=list(evaluator.questions.keys()),
225
+ format_func=lambda x: f"Question {x}: {evaluator.questions[x]}"
226
+ )
227
+
228
+ # File uploader
229
+ uploaded_file = st.file_uploader("Upload your audio response", type=None) # Accept all file types
230
+
231
+ if uploaded_file is not None:
232
+ with st.spinner("Processing audio file..."):
233
+ try:
234
+ # Process audio and get transcription
235
+ transcribed_text = evaluator.process_audio_file(uploaded_file)
236
+
237
+ st.subheader("Transcribed Text")
238
+ st.write(transcribed_text)
239
+
240
+ # Evaluate answer
241
+ with st.spinner("Analyzing your answer..."):
242
+ result = evaluator.evaluate_answer(question_num, transcribed_text)
243
+
244
+ # Display results
245
+ st.subheader("Analysis Results")
246
+
247
+ # Correct points
248
+ st.success("Correct Points:")
249
+ for point in result.get("correct_points", []):
250
+ st.write(f"✓ {point}")
251
+
252
+ # Incorrect points
253
+ st.error("Points Needing Correction:")
254
+ for statement, correction in result.get("incorrect_points", {}).items():
255
+ st.write(f"✗ {statement}")
256
+ st.write(f"Correction: {correction}")
257
+
258
+ # Missing points
259
+ st.warning("Missing Points:")
260
+ for point in result.get("missing_points", []):
261
+ st.write(f"• {point}")
262
+
263
+ # Score and explanation
264
+ st.info(f"Explanation: {result.get('explanation', 'No explanation provided')}")
265
+
266
+ except Exception as e:
267
+ st.error(f"Error processing file: {str(e)}")
268
+
269
+ except Exception as e:
270
+ st.error(f"Error initializing the evaluator: {str(e)}")
271
+
272
+ if __name__ == "__main__":
273
+ main()