Spaces:
Sleeping
Sleeping
import streamlit as st | |
import os | |
import openai | |
from sklearn.metrics.pairwise import cosine_similarity | |
import numpy as np | |
from PyPDF2 import PdfReader | |
import json | |
import pickle | |
from pathlib import Path | |
from pydub import AudioSegment | |
from groq import Groq | |
from typing import List, Dict | |
import tempfile | |
#ffmpeg_path = r"C:\Users\AMAR\Downloads\ffmpeg-7.0.2-essentials_build\ffmpeg-7.0.2-essentials_build\bin\ffmpeg.exe" | |
#os.environ["PATH"] += os.pathsep + os.path.dirname(ffmpeg_path) | |
#AudioSegment.converter = ffmpeg_path | |
class VoiceStockMarketEvaluator: | |
def __init__(self, openai_api_key, groq_api_key): | |
# OpenAI configuration | |
self.openai_api_key = openai_api_key | |
self.pdf_path = "STOCK1.pdf" # Update this to your PDF path in Streamlit | |
openai.api_key = self.openai_api_key | |
self.llm_model = "gpt-3.5-turbo" | |
# Groq configuration for audio transcription | |
self.groq_api_key = groq_api_key | |
self.groq_client = Groq(api_key=self.groq_api_key) | |
self.whisper_model = 'whisper-large-v3' | |
# Questions | |
self.questions = { | |
1: "What are different major investors?", | |
2: "Who are major traders?", | |
3: "who are participants of stock makets?" | |
} | |
# Load embeddings | |
self.load_embeddings() | |
def load_embeddings(self): | |
"""Load embeddings from pickle file or create if not exists""" | |
pickle_path = Path('stock_market_embeddings.pkl') | |
if pickle_path.exists(): | |
with open(pickle_path, 'rb') as f: | |
data = pickle.load(f) | |
self.pdf_content = data['content'] | |
self.pdf_chunks = data['chunks'] | |
self.pdf_embeddings = data['embeddings'] | |
else: | |
self.create_and_save_embeddings(pickle_path) | |
def create_and_save_embeddings(self, pickle_path): | |
"""Create embeddings and save to pickle file""" | |
reader = PdfReader(self.pdf_path) | |
self.pdf_content = "" | |
for page in reader.pages: | |
self.pdf_content += page.extract_text() | |
self.pdf_chunks = self._chunk_text(self.pdf_content) | |
self.pdf_embeddings = self.get_openai_embeddings(self.pdf_chunks) | |
data = { | |
'content': self.pdf_content, | |
'chunks': self.pdf_chunks, | |
'embeddings': self.pdf_embeddings | |
} | |
with open(pickle_path, 'wb') as f: | |
pickle.dump(data, f) | |
def get_openai_embeddings(self, texts): | |
"""Generate embeddings using OpenAI API""" | |
response = openai.Embedding.create( | |
model="text-embedding-ada-002", | |
input=texts | |
) | |
return [embedding['embedding'] for embedding in response['data']] | |
def _chunk_text(self, text, chunk_size=500, overlap=50): | |
"""Split text into overlapping chunks""" | |
words = text.split() | |
chunks = [] | |
for i in range(0, len(words), chunk_size - overlap): | |
chunk = ' '.join(words[i:i + chunk_size]) | |
chunks.append(chunk) | |
return chunks | |
def process_audio_file(self, uploaded_file): | |
"""Process uploaded audio file and convert if necessary""" | |
allowed_formats = ["flac", "mp3", "mp4", "mpeg", "mpga", "m4a", "ogg", "opus", "wav", "webm"] | |
# Create a temporary directory to store the processed file | |
with tempfile.TemporaryDirectory() as temp_dir: | |
# Get the original file extension | |
original_extension = uploaded_file.name.split('.')[-1].lower() | |
temp_file_path = os.path.join(temp_dir, f"audio_file.{original_extension}") | |
# Save the uploaded file | |
with open(temp_file_path, 'wb') as f: | |
f.write(uploaded_file.getbuffer()) | |
# If the format is not supported, convert to WAV | |
if original_extension not in allowed_formats: | |
output_path = os.path.join(temp_dir, "converted_audio.wav") | |
audio = AudioSegment.from_file(temp_file_path) | |
audio.export(output_path, format="wav") | |
final_path = output_path | |
else: | |
final_path = temp_file_path | |
# Read the file for transcription | |
with open(final_path, 'rb') as audio_file: | |
return self.audio_to_text(final_path) | |
def audio_to_text(self, filepath): | |
"""Transcribe audio to text using Groq""" | |
with open(filepath, "rb") as file: | |
translation = self.groq_client.audio.translations.create( | |
file=(filepath, file.read()), | |
model=self.whisper_model, | |
) | |
return translation.text | |
def _find_relevant_context(self, question, answer, top_k=3): | |
"""Find relevant PDF chunks for the question-answer pair""" | |
search_text = f"{question} {answer}" | |
search_embedding = self.get_openai_embeddings([search_text])[0] | |
similarities = cosine_similarity([search_embedding], self.pdf_embeddings)[0] | |
top_indices = similarities.argsort()[-top_k:][::-1] | |
return ' '.join(self.pdf_chunks[i] for i in top_indices) | |
def evaluate_answer(self, question_num, user_answer): | |
"""Evaluate user answer for a given question number""" | |
if question_num not in self.questions: | |
raise ValueError("Invalid question number") | |
question = self.questions[question_num] | |
relevant_context = self._find_relevant_context(question, user_answer) | |
prompt = f""" | |
You are an expert evaluating answers about the stock market. Compare the answer with the reference material and provide detailed analysis. | |
Question: {question} | |
Reference Material: | |
{relevant_context} | |
Student Answer: | |
{user_answer} | |
Analyze this answer carefully and provide: | |
1. Points that are correct according to the reference material | |
2. Points that are incorrect or need clarification | |
3. Important points from the reference material that were missing | |
Provide your analysis in JSON format: | |
{{ | |
"correct_points": ["point1", "point2"], | |
"incorrect_points": {{"incorrect_statement": "correction_based_on_reference"}}, | |
"missing_points": ["point1", "point2"], | |
"explanation": "Brief explanation of the score" | |
}} | |
""" | |
response = openai.ChatCompletion.create( | |
model=self.llm_model, | |
messages=[{"role": "user", "content": prompt}], | |
temperature=0.3 | |
) | |
try: | |
return json.loads(response.choices[0].message.content) | |
except json.JSONDecodeError: | |
return {"error": "Could not parse response"} | |
def main(): | |
st.set_page_config(page_title="Voice-based Stock Market Evaluator", layout="wide") | |
st.title("Voice-based Stock Market Evaluator") | |
# Create sidebar for API keys | |
st.sidebar.header("API Configuration") | |
# Use session state to persist API keys | |
if 'openai_key' not in st.session_state: | |
st.session_state['openai_key'] = '' | |
if 'groq_key' not in st.session_state: | |
st.session_state['groq_key'] = '' | |
# API key inputs in sidebar | |
openai_key = st.sidebar.text_input( | |
"OpenAI API Key", | |
type="password", | |
value=st.session_state['openai_key'], | |
help="Enter your OpenAI API key to use the evaluation features" | |
) | |
groq_key = st.sidebar.text_input( | |
"Groq API Key", | |
type="password", | |
value=st.session_state['groq_key'], | |
help="Enter your Groq API key for audio transcription" | |
) | |
# Update session state | |
st.session_state['openai_key'] = openai_key | |
st.session_state['groq_key'] = groq_key | |
# Check if API keys are provided | |
if not openai_key or not groq_key: | |
st.warning("Please enter both API keys in the sidebar to use the application.") | |
return | |
try: | |
# Initialize the evaluator with API keys | |
evaluator = VoiceStockMarketEvaluator(openai_key, groq_key) | |
st.write("Upload an audio file with your answer to get evaluated!") | |
# Display questions | |
st.header("Available Questions") | |
question_num = st.radio( | |
"Select a question:", | |
options=list(evaluator.questions.keys()), | |
format_func=lambda x: f"Question {x}: {evaluator.questions[x]}" | |
) | |
# File uploader | |
uploaded_file = st.file_uploader("Upload your audio response", type=None) # Accept all file types | |
if uploaded_file is not None: | |
with st.spinner("Processing audio file..."): | |
try: | |
# Process audio and get transcription | |
transcribed_text = evaluator.process_audio_file(uploaded_file) | |
st.subheader("Transcribed Text") | |
st.write(transcribed_text) | |
# Evaluate answer | |
with st.spinner("Analyzing your answer..."): | |
result = evaluator.evaluate_answer(question_num, transcribed_text) | |
# Display results | |
st.subheader("Analysis Results") | |
# Correct points | |
st.success("Correct Points:") | |
for point in result.get("correct_points", []): | |
st.write(f"✓ {point}") | |
# Incorrect points | |
st.error("Points Needing Correction:") | |
for statement, correction in result.get("incorrect_points", {}).items(): | |
st.write(f"✗ {statement}") | |
st.write(f"Correction: {correction}") | |
# Missing points | |
st.warning("Missing Points:") | |
for point in result.get("missing_points", []): | |
st.write(f"• {point}") | |
# Score and explanation | |
st.info(f"Explanation: {result.get('explanation', 'No explanation provided')}") | |
except Exception as e: | |
st.error(f"Error processing file: {str(e)}") | |
except Exception as e: | |
st.error(f"Error initializing the evaluator: {str(e)}") | |
if __name__ == "__main__": | |
main() |