garyd1 commited on
Commit
65fceff
·
verified ·
1 Parent(s): 4b6642f

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +160 -0
  2. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import os
3
+ import queue
4
+ import sounddevice as sd
5
+ import numpy as np
6
+ import requests
7
+ from sentence_transformers import SentenceTransformer
8
+ from sklearn.metrics.pairwise import cosine_similarity
9
+ import json
10
+ import PyPDF2
11
+
12
+ # Hugging Face API endpoint and keys
13
+ HF_API_URL_STT = "https://api-inference.huggingface.co/models/openai/whisper-base"
14
+ HF_API_URL_CONVERSATION = "https://api-inference.huggingface.co/models/facebook/blenderbot-400M-distill"
15
+ HF_API_KEY = "your_huggingface_api_key" # Replace with your Hugging Face API key
16
+
17
+ # Parameters
18
+ silence_threshold = 0.01 # Silence threshold for audio detection
19
+ silence_duration = 2.0 # Duration of silence to detect end of speech
20
+ sample_rate = 16000 # Audio sample rate
21
+
22
+ # Audio buffer
23
+ audio_queue = queue.Queue()
24
+
25
+ # Load a pre-trained model for vector embeddings
26
+ embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
27
+
28
+ # Parse PDF and create resume content
29
+ def parse_resume(pdf_path):
30
+ """Extract text from a PDF file."""
31
+ try:
32
+ with open(pdf_path, 'rb') as f:
33
+ reader = PyPDF2.PdfReader(f)
34
+ text = "\n".join(page.extract_text() for page in reader.pages if page.extract_text())
35
+ sections = {"Resume Content": text}
36
+ return sections
37
+ except Exception as e:
38
+ print(f"Error reading PDF: {e}")
39
+ return {}
40
+
41
+ # Load vector database (resume content)
42
+ def load_resume(pdf_path):
43
+ resume_content = parse_resume(pdf_path)
44
+ resume_embeddings = {
45
+ section: embedding_model.encode(content) for section, content in resume_content.items()
46
+ }
47
+ return resume_embeddings
48
+
49
+ def get_relevant_question(user_input, resume_embeddings):
50
+ """Find the most relevant section in the resume and generate a question."""
51
+ user_embedding = embedding_model.encode(user_input)
52
+ similarities = {
53
+ section: cosine_similarity([user_embedding], [embedding])[0][0]
54
+ for section, embedding in resume_embeddings.items()
55
+ }
56
+ most_relevant_section = max(similarities, key=similarities.get)
57
+ return f"Based on your experience in {most_relevant_section}, can you elaborate more?"
58
+
59
+ def audio_callback(indata, frames, time, status):
60
+ """Audio callback to store audio in the queue."""
61
+ if status:
62
+ print(f"Audio error: {status}")
63
+ audio_queue.put(indata.copy())
64
+
65
+ def record_audio():
66
+ """Record audio and stop on silence detection."""
67
+ print("Recording. Speak now!")
68
+ buffer = []
69
+ silence_start = None
70
+
71
+ with sd.InputStream(samplerate=sample_rate, channels=1, callback=audio_callback):
72
+ while True:
73
+ try:
74
+ # Fetch audio data
75
+ data = audio_queue.get()
76
+ buffer.append(data)
77
+
78
+ # Check for silence
79
+ rms = np.sqrt(np.mean(data**2))
80
+ if rms < silence_threshold:
81
+ if silence_start is None:
82
+ silence_start = time.time()
83
+ elif time.time() - silence_start > silence_duration:
84
+ print("Silence detected. Stopping recording.")
85
+ break
86
+ else:
87
+ silence_start = None
88
+
89
+ except KeyboardInterrupt:
90
+ print("Recording stopped by user.")
91
+ break
92
+
93
+ audio_data = np.concatenate(buffer, axis=0)
94
+ return audio_data
95
+
96
+ def transcribe_audio(audio_data):
97
+ """Transcribe audio to text using Hugging Face Whisper API."""
98
+ print("Transcribing audio...")
99
+ headers = {"Authorization": f"Bearer {HF_API_KEY}"}
100
+ response = requests.post(
101
+ HF_API_URL_STT,
102
+ headers=headers,
103
+ data=audio_data.tobytes(),
104
+ )
105
+ if response.status_code == 200:
106
+ return response.json().get("text", "")
107
+ else:
108
+ print(f"Error: {response.status_code} {response.text}")
109
+ return ""
110
+
111
+ def generate_question(response, resume_embeddings):
112
+ """Generate a question based on the user's response using Hugging Face API."""
113
+ if resume_embeddings:
114
+ return get_relevant_question(response, resume_embeddings)
115
+
116
+ print("Generating a question...")
117
+ headers = {"Authorization": f"Bearer {HF_API_KEY}"}
118
+ payload = {"inputs": {"past_user_inputs": [""], "generated_responses": [""], "text": response}}
119
+ response = requests.post(
120
+ HF_API_URL_CONVERSATION,
121
+ headers=headers,
122
+ json=payload
123
+ )
124
+ if response.status_code == 200:
125
+ return response.json().get("generated_text", "Could you elaborate on that?")
126
+ else:
127
+ print(f"Error: {response.status_code} {response.text}")
128
+ return "Sorry, I couldn't generate a question."
129
+
130
+ def main():
131
+ print("Mock Interview System Initialized")
132
+
133
+ # Load the resume embeddings
134
+ pdf_path = "resume.pdf" # Replace with the path to your PDF resume file
135
+ if os.path.exists(pdf_path):
136
+ print("Loading resume...")
137
+ resume_embeddings = load_resume(pdf_path)
138
+ else:
139
+ print("Resume file not found. Proceeding without it.")
140
+ resume_embeddings = None
141
+
142
+ while True:
143
+ try:
144
+ # Record audio
145
+ audio_data = record_audio()
146
+
147
+ # Transcribe to text
148
+ response = transcribe_audio(audio_data)
149
+ print(f"You said: {response}")
150
+
151
+ # Generate and ask the next question
152
+ question = generate_question(response, resume_embeddings)
153
+ print(f"Interview AI: {question}")
154
+
155
+ except Exception as e:
156
+ print(f"Error: {e}")
157
+ break
158
+
159
+ if __name__ == "__main__":
160
+ main()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio
2
+ sounddevice
3
+ numpy
4
+ requests
5
+ sentence-transformers
6
+ scikit-learn
7
+ PyPDF2