Spaces:
Runtime error
Runtime error
import time | |
import os | |
import queue | |
import sounddevice as sd | |
import numpy as np | |
import requests | |
from sentence_transformers import SentenceTransformer | |
from sklearn.metrics.pairwise import cosine_similarity | |
import json | |
import PyPDF2 | |
# Hugging Face API endpoint and keys | |
HF_API_URL_STT = "https://api-inference.huggingface.co/models/openai/whisper-base" | |
HF_API_URL_CONVERSATION = "https://api-inference.huggingface.co/models/facebook/blenderbot-400M-distill" | |
HF_API_KEY = os.getenv('HF_API_KEY') # Replace with your Hugging Face API key | |
# Parameters | |
silence_threshold = 0.01 # Silence threshold for audio detection | |
silence_duration = 2.0 # Duration of silence to detect end of speech | |
sample_rate = 16000 # Audio sample rate | |
# Audio buffer | |
audio_queue = queue.Queue() | |
# Load a pre-trained model for vector embeddings | |
embedding_model = SentenceTransformer('all-MiniLM-L6-v2') | |
# Parse PDF and create resume content | |
def parse_resume(pdf_path): | |
"""Extract text from a PDF file.""" | |
try: | |
with open(pdf_path, 'rb') as f: | |
reader = PyPDF2.PdfReader(f) | |
text = "\n".join(page.extract_text() for page in reader.pages if page.extract_text()) | |
sections = {"Resume Content": text} | |
return sections | |
except Exception as e: | |
print(f"Error reading PDF: {e}") | |
return {} | |
# Load vector database (resume content) | |
def load_resume(pdf_path): | |
resume_content = parse_resume(pdf_path) | |
resume_embeddings = { | |
section: embedding_model.encode(content) for section, content in resume_content.items() | |
} | |
return resume_embeddings | |
def get_relevant_question(user_input, resume_embeddings): | |
"""Find the most relevant section in the resume and generate a question.""" | |
user_embedding = embedding_model.encode(user_input) | |
similarities = { | |
section: cosine_similarity([user_embedding], [embedding])[0][0] | |
for section, embedding in resume_embeddings.items() | |
} | |
most_relevant_section = max(similarities, key=similarities.get) | |
return f"Based on your experience in {most_relevant_section}, can you elaborate more?" | |
def audio_callback(indata, frames, time, status): | |
"""Audio callback to store audio in the queue.""" | |
if status: | |
print(f"Audio error: {status}") | |
audio_queue.put(indata.copy()) | |
def record_audio(): | |
"""Record audio and stop on silence detection.""" | |
print("Recording. Speak now!") | |
buffer = [] | |
silence_start = None | |
with sd.InputStream(samplerate=sample_rate, channels=1, callback=audio_callback): | |
while True: | |
try: | |
# Fetch audio data | |
data = audio_queue.get() | |
buffer.append(data) | |
# Check for silence | |
rms = np.sqrt(np.mean(data**2)) | |
if rms < silence_threshold: | |
if silence_start is None: | |
silence_start = time.time() | |
elif time.time() - silence_start > silence_duration: | |
print("Silence detected. Stopping recording.") | |
break | |
else: | |
silence_start = None | |
except KeyboardInterrupt: | |
print("Recording stopped by user.") | |
break | |
audio_data = np.concatenate(buffer, axis=0) | |
return audio_data | |
def transcribe_audio(audio_data): | |
"""Transcribe audio to text using Hugging Face Whisper API.""" | |
print("Transcribing audio...") | |
headers = {"Authorization": f"Bearer {HF_API_KEY}"} | |
response = requests.post( | |
HF_API_URL_STT, | |
headers=headers, | |
data=audio_data.tobytes(), | |
) | |
if response.status_code == 200: | |
return response.json().get("text", "") | |
else: | |
print(f"Error: {response.status_code} {response.text}") | |
return "" | |
def generate_question(response, resume_embeddings): | |
"""Generate a question based on the user's response using Hugging Face API.""" | |
if resume_embeddings: | |
return get_relevant_question(response, resume_embeddings) | |
print("Generating a question...") | |
headers = {"Authorization": f"Bearer {HF_API_KEY}"} | |
payload = {"inputs": {"past_user_inputs": [""], "generated_responses": [""], "text": response}} | |
response = requests.post( | |
HF_API_URL_CONVERSATION, | |
headers=headers, | |
json=payload | |
) | |
if response.status_code == 200: | |
return response.json().get("generated_text", "Could you elaborate on that?") | |
else: | |
print(f"Error: {response.status_code} {response.text}") | |
return "Sorry, I couldn't generate a question." | |
def main(): | |
print("Mock Interview System Initialized") | |
# Load the resume embeddings | |
pdf_path = "resume.pdf" # Replace with the path to your PDF resume file | |
if os.path.exists(pdf_path): | |
print("Loading resume...") | |
resume_embeddings = load_resume(pdf_path) | |
else: | |
print("Resume file not found. Proceeding without it.") | |
resume_embeddings = None | |
while True: | |
try: | |
# Record audio | |
audio_data = record_audio() | |
# Transcribe to text | |
response = transcribe_audio(audio_data) | |
print(f"You said: {response}") | |
# Generate and ask the next question | |
question = generate_question(response, resume_embeddings) | |
print(f"Interview AI: {question}") | |
except Exception as e: | |
print(f"Error: {e}") | |
break | |
if __name__ == "__main__": | |
main() | |