import gradio as gr import time from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer from sentence_transformers import SentenceTransformer from sklearn.metrics.pairwise import cosine_similarity from TTS.api import TTS # Coqui TTS library import PyPDF2 # Initialize Models stt_model = pipeline("automatic-speech-recognition", model="openai/whisper-tiny") embedding_model = SentenceTransformer("all-MiniLM-L6-v2") gpt_model_name = "google/flan-t5-base" gpt_tokenizer = AutoTokenizer.from_pretrained(gpt_model_name) gpt_model = AutoModelForSeq2SeqLM.from_pretrained(gpt_model_name) tts_model = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False) # Parse PDF and create resume content def parse_resume(pdf): reader = PyPDF2.PdfReader(pdf) text = "\n".join(page.extract_text() for page in reader.pages if page.extract_text()) return {"Resume Content": text} # Process inputs def process_inputs(resume, job_desc): resume_embeddings = { section: embedding_model.encode(content) for section, content in parse_resume(resume).items() } job_desc_embedding = embedding_model.encode(job_desc) return resume_embeddings, job_desc_embedding # Generate a follow-up question using Flan-T5 def generate_question(response, resume_embeddings, job_desc): user_embedding = embedding_model.encode(response) similarities = { section: cosine_similarity([user_embedding], [embedding])[0][0] for section, embedding in resume_embeddings.items() } most_relevant_section = max(similarities, key=similarities.get) prompt = f"You are a hiring manager. Based on the candidate's experience in {most_relevant_section} and the job description, ask a follow-up question." inputs = gpt_tokenizer(prompt, return_tensors="pt", truncation=True) outputs = gpt_model.generate(**inputs, max_length=50, num_beams=3, early_stopping=True) question = gpt_tokenizer.decode(outputs[0], skip_special_tokens=True) return question # Generate TTS audio for a question def generate_audio(question): audio_path = "output.wav" tts_model.tts_to_file(text=question, file_path=audio_path) return audio_path # Conduct a mock interview class MockInterview: def __init__(self): self.resume_embeddings = None self.job_desc_embedding = None self.interview_active = False self.current_question = None def start_interview(self, resume, job_desc): self.resume_embeddings, self.job_desc_embedding = process_inputs(resume, job_desc) self.interview_active = True self.current_question = "Tell me about yourself." return self.current_question, generate_audio(self.current_question) def next_interaction(self, user_audio): if not self.interview_active: return "Interview not started.", None # Transcribe user's response transcription = stt_model(user_audio)["text"] if not transcription.strip(): return "No response detected. Please try again.", None # Generate the next question self.current_question = generate_question(transcription, self.resume_embeddings, self.job_desc_embedding) return transcription, generate_audio(self.current_question) def end_interview(self): self.interview_active = False return "Thank you for participating in the interview.", generate_audio("Thank you for participating in the interview. Goodbye!") mock_interview = MockInterview() # Gradio Interface def start_interview(resume, job_desc): return mock_interview.start_interview(resume, job_desc) def next_interaction(user_audio): return mock_interview.next_interaction(user_audio) def end_interview(): return mock_interview.end_interview() interface = gr.Blocks() with interface: gr.Markdown("### Mock Interview AI\nUpload your resume and job description, and engage in a realistic audio-based mock interview simulation.") with gr.Row(): resume_input = gr.File(label="Upload Resume (PDF)") job_desc_input = gr.Textbox(label="Paste Job Description") audio_input = gr.Audio(type="filepath", label="Your Response") question_audio_output = gr.Audio(label="Question Audio") transcription_output = gr.Textbox(label="Transcription") interaction_button = gr.Button("Next Interaction") end_button = gr.Button("End Interview") resume_uploaded = resume_input.change(start_interview, inputs=[resume_input, job_desc_input], outputs=[transcription_output, question_audio_output]) interaction_button.click(next_interaction, inputs=[audio_input], outputs=[transcription_output, question_audio_output]) end_button.click(end_interview, outputs=[transcription_output, question_audio_output]) if __name__ == "__main__": interface.launch()