Spaces:
Runtime error
Runtime error
File size: 5,531 Bytes
65fceff a9d5b3e 65fceff |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
import time
import os
import queue
import sounddevice as sd
import numpy as np
import requests
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import json
import PyPDF2
# Hugging Face API endpoint and keys
HF_API_URL_STT = "https://api-inference.huggingface.co/models/openai/whisper-base"
HF_API_URL_CONVERSATION = "https://api-inference.huggingface.co/models/facebook/blenderbot-400M-distill"
HF_API_KEY = os.getenv('HF_API_KEY') # Replace with your Hugging Face API key
# Parameters
silence_threshold = 0.01 # Silence threshold for audio detection
silence_duration = 2.0 # Duration of silence to detect end of speech
sample_rate = 16000 # Audio sample rate
# Audio buffer
audio_queue = queue.Queue()
# Load a pre-trained model for vector embeddings
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
# Parse PDF and create resume content
def parse_resume(pdf_path):
"""Extract text from a PDF file."""
try:
with open(pdf_path, 'rb') as f:
reader = PyPDF2.PdfReader(f)
text = "\n".join(page.extract_text() for page in reader.pages if page.extract_text())
sections = {"Resume Content": text}
return sections
except Exception as e:
print(f"Error reading PDF: {e}")
return {}
# Load vector database (resume content)
def load_resume(pdf_path):
resume_content = parse_resume(pdf_path)
resume_embeddings = {
section: embedding_model.encode(content) for section, content in resume_content.items()
}
return resume_embeddings
def get_relevant_question(user_input, resume_embeddings):
"""Find the most relevant section in the resume and generate a question."""
user_embedding = embedding_model.encode(user_input)
similarities = {
section: cosine_similarity([user_embedding], [embedding])[0][0]
for section, embedding in resume_embeddings.items()
}
most_relevant_section = max(similarities, key=similarities.get)
return f"Based on your experience in {most_relevant_section}, can you elaborate more?"
def audio_callback(indata, frames, time, status):
"""Audio callback to store audio in the queue."""
if status:
print(f"Audio error: {status}")
audio_queue.put(indata.copy())
def record_audio():
"""Record audio and stop on silence detection."""
print("Recording. Speak now!")
buffer = []
silence_start = None
with sd.InputStream(samplerate=sample_rate, channels=1, callback=audio_callback):
while True:
try:
# Fetch audio data
data = audio_queue.get()
buffer.append(data)
# Check for silence
rms = np.sqrt(np.mean(data**2))
if rms < silence_threshold:
if silence_start is None:
silence_start = time.time()
elif time.time() - silence_start > silence_duration:
print("Silence detected. Stopping recording.")
break
else:
silence_start = None
except KeyboardInterrupt:
print("Recording stopped by user.")
break
audio_data = np.concatenate(buffer, axis=0)
return audio_data
def transcribe_audio(audio_data):
"""Transcribe audio to text using Hugging Face Whisper API."""
print("Transcribing audio...")
headers = {"Authorization": f"Bearer {HF_API_KEY}"}
response = requests.post(
HF_API_URL_STT,
headers=headers,
data=audio_data.tobytes(),
)
if response.status_code == 200:
return response.json().get("text", "")
else:
print(f"Error: {response.status_code} {response.text}")
return ""
def generate_question(response, resume_embeddings):
"""Generate a question based on the user's response using Hugging Face API."""
if resume_embeddings:
return get_relevant_question(response, resume_embeddings)
print("Generating a question...")
headers = {"Authorization": f"Bearer {HF_API_KEY}"}
payload = {"inputs": {"past_user_inputs": [""], "generated_responses": [""], "text": response}}
response = requests.post(
HF_API_URL_CONVERSATION,
headers=headers,
json=payload
)
if response.status_code == 200:
return response.json().get("generated_text", "Could you elaborate on that?")
else:
print(f"Error: {response.status_code} {response.text}")
return "Sorry, I couldn't generate a question."
def main():
print("Mock Interview System Initialized")
# Load the resume embeddings
pdf_path = "resume.pdf" # Replace with the path to your PDF resume file
if os.path.exists(pdf_path):
print("Loading resume...")
resume_embeddings = load_resume(pdf_path)
else:
print("Resume file not found. Proceeding without it.")
resume_embeddings = None
while True:
try:
# Record audio
audio_data = record_audio()
# Transcribe to text
response = transcribe_audio(audio_data)
print(f"You said: {response}")
# Generate and ask the next question
question = generate_question(response, resume_embeddings)
print(f"Interview AI: {question}")
except Exception as e:
print(f"Error: {e}")
break
if __name__ == "__main__":
main()
|