Spaces:

garyd1
/

mock_interview

Runtime error

App Files Files Community

garyd1 commited on Jan 26

Commit

65fceff

verified ·

1 Parent(s): 4b6642f

Upload 2 files

Browse files

Files changed (2) hide show

app.py +160 -0
requirements.txt +7 -0

app.py ADDED Viewed

	@@ -0,0 +1,160 @@

+import time
+import os
+import queue
+import sounddevice as sd
+import numpy as np
+import requests
+from sentence_transformers import SentenceTransformer
+from sklearn.metrics.pairwise import cosine_similarity
+import json
+import PyPDF2
+# Hugging Face API endpoint and keys
+HF_API_URL_STT = "https://api-inference.huggingface.co/models/openai/whisper-base"
+HF_API_URL_CONVERSATION = "https://api-inference.huggingface.co/models/facebook/blenderbot-400M-distill"
+HF_API_KEY = "your_huggingface_api_key"  # Replace with your Hugging Face API key
+# Parameters
+silence_threshold = 0.01  # Silence threshold for audio detection
+silence_duration = 2.0  # Duration of silence to detect end of speech
+sample_rate = 16000  # Audio sample rate
+# Audio buffer
+audio_queue = queue.Queue()
+# Load a pre-trained model for vector embeddings
+embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
+# Parse PDF and create resume content
+def parse_resume(pdf_path):
+    """Extract text from a PDF file."""
+    try:
+        with open(pdf_path, 'rb') as f:
+            reader = PyPDF2.PdfReader(f)
+            text = "\n".join(page.extract_text() for page in reader.pages if page.extract_text())
+        sections = {"Resume Content": text}
+        return sections
+    except Exception as e:
+        print(f"Error reading PDF: {e}")
+        return {}
+# Load vector database (resume content)
+def load_resume(pdf_path):
+    resume_content = parse_resume(pdf_path)
+    resume_embeddings = {
+        section: embedding_model.encode(content) for section, content in resume_content.items()
+    }
+    return resume_embeddings
+def get_relevant_question(user_input, resume_embeddings):
+    """Find the most relevant section in the resume and generate a question."""
+    user_embedding = embedding_model.encode(user_input)
+    similarities = {
+        section: cosine_similarity([user_embedding], [embedding])[0][0]
+        for section, embedding in resume_embeddings.items()
+    }
+    most_relevant_section = max(similarities, key=similarities.get)
+    return f"Based on your experience in {most_relevant_section}, can you elaborate more?"
+def audio_callback(indata, frames, time, status):
+    """Audio callback to store audio in the queue."""
+    if status:
+        print(f"Audio error: {status}")
+    audio_queue.put(indata.copy())
+def record_audio():
+    """Record audio and stop on silence detection."""
+    print("Recording. Speak now!")
+    buffer = []
+    silence_start = None
+    with sd.InputStream(samplerate=sample_rate, channels=1, callback=audio_callback):
+        while True:
+            try:
+                # Fetch audio data
+                data = audio_queue.get()
+                buffer.append(data)
+                # Check for silence
+                rms = np.sqrt(np.mean(data**2))
+                if rms < silence_threshold:
+                    if silence_start is None:
+                        silence_start = time.time()
+                    elif time.time() - silence_start > silence_duration:
+                        print("Silence detected. Stopping recording.")
+                        break
+                else:
+                    silence_start = None
+            except KeyboardInterrupt:
+                print("Recording stopped by user.")
+                break
+    audio_data = np.concatenate(buffer, axis=0)
+    return audio_data
+def transcribe_audio(audio_data):
+    """Transcribe audio to text using Hugging Face Whisper API."""
+    print("Transcribing audio...")
+    headers = {"Authorization": f"Bearer {HF_API_KEY}"}
+    response = requests.post(
+        HF_API_URL_STT,
+        headers=headers,
+        data=audio_data.tobytes(),
+    )
+    if response.status_code == 200:
+        return response.json().get("text", "")
+    else:
+        print(f"Error: {response.status_code} {response.text}")
+        return ""
+def generate_question(response, resume_embeddings):
+    """Generate a question based on the user's response using Hugging Face API."""
+    if resume_embeddings:
+        return get_relevant_question(response, resume_embeddings)
+    print("Generating a question...")
+    headers = {"Authorization": f"Bearer {HF_API_KEY}"}
+    payload = {"inputs": {"past_user_inputs": [""], "generated_responses": [""], "text": response}}
+    response = requests.post(
+        HF_API_URL_CONVERSATION,
+        headers=headers,
+        json=payload
+    )
+    if response.status_code == 200:
+        return response.json().get("generated_text", "Could you elaborate on that?")
+    else:
+        print(f"Error: {response.status_code} {response.text}")
+        return "Sorry, I couldn't generate a question."
+def main():
+    print("Mock Interview System Initialized")
+    # Load the resume embeddings
+    pdf_path = "resume.pdf"  # Replace with the path to your PDF resume file
+    if os.path.exists(pdf_path):
+        print("Loading resume...")
+        resume_embeddings = load_resume(pdf_path)
+    else:
+        print("Resume file not found. Proceeding without it.")
+        resume_embeddings = None
+    while True:
+        try:
+            # Record audio
+            audio_data = record_audio()
+            # Transcribe to text
+            response = transcribe_audio(audio_data)
+            print(f"You said: {response}")
+            # Generate and ask the next question
+            question = generate_question(response, resume_embeddings)
+            print(f"Interview AI: {question}")
+        except Exception as e:
+            print(f"Error: {e}")
+            break
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+gradio
+sounddevice
+numpy
+requests
+sentence-transformers
+scikit-learn
+PyPDF2