speech-to-text / app.py
Amarsaish's picture
Rename streamlit_merged.py to app.py
f9e7655 verified
import streamlit as st
import os
import openai
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from PyPDF2 import PdfReader
import json
import pickle
from pathlib import Path
from pydub import AudioSegment
from groq import Groq
from typing import List, Dict
import tempfile
#ffmpeg_path = r"C:\Users\AMAR\Downloads\ffmpeg-7.0.2-essentials_build\ffmpeg-7.0.2-essentials_build\bin\ffmpeg.exe"
#os.environ["PATH"] += os.pathsep + os.path.dirname(ffmpeg_path)
#AudioSegment.converter = ffmpeg_path
class VoiceStockMarketEvaluator:
def __init__(self, openai_api_key, groq_api_key):
# OpenAI configuration
self.openai_api_key = openai_api_key
self.pdf_path = "STOCK1.pdf" # Update this to your PDF path in Streamlit
openai.api_key = self.openai_api_key
self.llm_model = "gpt-3.5-turbo"
# Groq configuration for audio transcription
self.groq_api_key = groq_api_key
self.groq_client = Groq(api_key=self.groq_api_key)
self.whisper_model = 'whisper-large-v3'
# Questions
self.questions = {
1: "What are different major investors?",
2: "Who are major traders?",
3: "who are participants of stock makets?"
}
# Load embeddings
self.load_embeddings()
def load_embeddings(self):
"""Load embeddings from pickle file or create if not exists"""
pickle_path = Path('stock_market_embeddings.pkl')
if pickle_path.exists():
with open(pickle_path, 'rb') as f:
data = pickle.load(f)
self.pdf_content = data['content']
self.pdf_chunks = data['chunks']
self.pdf_embeddings = data['embeddings']
else:
self.create_and_save_embeddings(pickle_path)
def create_and_save_embeddings(self, pickle_path):
"""Create embeddings and save to pickle file"""
reader = PdfReader(self.pdf_path)
self.pdf_content = ""
for page in reader.pages:
self.pdf_content += page.extract_text()
self.pdf_chunks = self._chunk_text(self.pdf_content)
self.pdf_embeddings = self.get_openai_embeddings(self.pdf_chunks)
data = {
'content': self.pdf_content,
'chunks': self.pdf_chunks,
'embeddings': self.pdf_embeddings
}
with open(pickle_path, 'wb') as f:
pickle.dump(data, f)
def get_openai_embeddings(self, texts):
"""Generate embeddings using OpenAI API"""
response = openai.Embedding.create(
model="text-embedding-ada-002",
input=texts
)
return [embedding['embedding'] for embedding in response['data']]
def _chunk_text(self, text, chunk_size=500, overlap=50):
"""Split text into overlapping chunks"""
words = text.split()
chunks = []
for i in range(0, len(words), chunk_size - overlap):
chunk = ' '.join(words[i:i + chunk_size])
chunks.append(chunk)
return chunks
def process_audio_file(self, uploaded_file):
"""Process uploaded audio file and convert if necessary"""
allowed_formats = ["flac", "mp3", "mp4", "mpeg", "mpga", "m4a", "ogg", "opus", "wav", "webm"]
# Create a temporary directory to store the processed file
with tempfile.TemporaryDirectory() as temp_dir:
# Get the original file extension
original_extension = uploaded_file.name.split('.')[-1].lower()
temp_file_path = os.path.join(temp_dir, f"audio_file.{original_extension}")
# Save the uploaded file
with open(temp_file_path, 'wb') as f:
f.write(uploaded_file.getbuffer())
# If the format is not supported, convert to WAV
if original_extension not in allowed_formats:
output_path = os.path.join(temp_dir, "converted_audio.wav")
audio = AudioSegment.from_file(temp_file_path)
audio.export(output_path, format="wav")
final_path = output_path
else:
final_path = temp_file_path
# Read the file for transcription
with open(final_path, 'rb') as audio_file:
return self.audio_to_text(final_path)
def audio_to_text(self, filepath):
"""Transcribe audio to text using Groq"""
with open(filepath, "rb") as file:
translation = self.groq_client.audio.translations.create(
file=(filepath, file.read()),
model=self.whisper_model,
)
return translation.text
def _find_relevant_context(self, question, answer, top_k=3):
"""Find relevant PDF chunks for the question-answer pair"""
search_text = f"{question} {answer}"
search_embedding = self.get_openai_embeddings([search_text])[0]
similarities = cosine_similarity([search_embedding], self.pdf_embeddings)[0]
top_indices = similarities.argsort()[-top_k:][::-1]
return ' '.join(self.pdf_chunks[i] for i in top_indices)
def evaluate_answer(self, question_num, user_answer):
"""Evaluate user answer for a given question number"""
if question_num not in self.questions:
raise ValueError("Invalid question number")
question = self.questions[question_num]
relevant_context = self._find_relevant_context(question, user_answer)
prompt = f"""
You are an expert evaluating answers about the stock market. Compare the answer with the reference material and provide detailed analysis.
Question: {question}
Reference Material:
{relevant_context}
Student Answer:
{user_answer}
Analyze this answer carefully and provide:
1. Points that are correct according to the reference material
2. Points that are incorrect or need clarification
3. Important points from the reference material that were missing
Provide your analysis in JSON format:
{{
"correct_points": ["point1", "point2"],
"incorrect_points": {{"incorrect_statement": "correction_based_on_reference"}},
"missing_points": ["point1", "point2"],
"explanation": "Brief explanation of the score"
}}
"""
response = openai.ChatCompletion.create(
model=self.llm_model,
messages=[{"role": "user", "content": prompt}],
temperature=0.3
)
try:
return json.loads(response.choices[0].message.content)
except json.JSONDecodeError:
return {"error": "Could not parse response"}
def main():
st.set_page_config(page_title="Voice-based Stock Market Evaluator", layout="wide")
st.title("Voice-based Stock Market Evaluator")
# Create sidebar for API keys
st.sidebar.header("API Configuration")
# Use session state to persist API keys
if 'openai_key' not in st.session_state:
st.session_state['openai_key'] = ''
if 'groq_key' not in st.session_state:
st.session_state['groq_key'] = ''
# API key inputs in sidebar
openai_key = st.sidebar.text_input(
"OpenAI API Key",
type="password",
value=st.session_state['openai_key'],
help="Enter your OpenAI API key to use the evaluation features"
)
groq_key = st.sidebar.text_input(
"Groq API Key",
type="password",
value=st.session_state['groq_key'],
help="Enter your Groq API key for audio transcription"
)
# Update session state
st.session_state['openai_key'] = openai_key
st.session_state['groq_key'] = groq_key
# Check if API keys are provided
if not openai_key or not groq_key:
st.warning("Please enter both API keys in the sidebar to use the application.")
return
try:
# Initialize the evaluator with API keys
evaluator = VoiceStockMarketEvaluator(openai_key, groq_key)
st.write("Upload an audio file with your answer to get evaluated!")
# Display questions
st.header("Available Questions")
question_num = st.radio(
"Select a question:",
options=list(evaluator.questions.keys()),
format_func=lambda x: f"Question {x}: {evaluator.questions[x]}"
)
# File uploader
uploaded_file = st.file_uploader("Upload your audio response", type=None) # Accept all file types
if uploaded_file is not None:
with st.spinner("Processing audio file..."):
try:
# Process audio and get transcription
transcribed_text = evaluator.process_audio_file(uploaded_file)
st.subheader("Transcribed Text")
st.write(transcribed_text)
# Evaluate answer
with st.spinner("Analyzing your answer..."):
result = evaluator.evaluate_answer(question_num, transcribed_text)
# Display results
st.subheader("Analysis Results")
# Correct points
st.success("Correct Points:")
for point in result.get("correct_points", []):
st.write(f"✓ {point}")
# Incorrect points
st.error("Points Needing Correction:")
for statement, correction in result.get("incorrect_points", {}).items():
st.write(f"✗ {statement}")
st.write(f"Correction: {correction}")
# Missing points
st.warning("Missing Points:")
for point in result.get("missing_points", []):
st.write(f"• {point}")
# Score and explanation
st.info(f"Explanation: {result.get('explanation', 'No explanation provided')}")
except Exception as e:
st.error(f"Error processing file: {str(e)}")
except Exception as e:
st.error(f"Error initializing the evaluator: {str(e)}")
if __name__ == "__main__":
main()