File size: 3,869 Bytes
66e260e
 
 
 
 
 
 
 
 
1744fe5
66e260e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1744fe5
 
 
 
 
f5331aa
 
1744fe5
66e260e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4b52d41
 
66e260e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#fucntions.py
import os
import pyperclip
import streamlit as st
import speech_recognition as sr
import re
import numpy as np
import numpy as np
import torch
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from transformers import BertTokenizer, BertModel
# from convert import ExtractPDFText
import streamlit as st


class Functions():

    @staticmethod
    def get_gemini_response(llm, input_text, doc, template, info=''):
        formated_prompt = template.format(doc=doc, input_text=input_text, info=info)
        response = llm.invoke(formated_prompt)
        return response.content
        # return formated_prompt

    @staticmethod
    def copy_text(answer, copy_button=False):
        pyperclip.copy(answer)
        if copy_button:
            st.toast("Text copied to clipboard!", icon="📋")

    @staticmethod
    def record_audio():
        r = sr.Recognizer()
        with st.spinner("Recording..."):
            with sr.Microphone() as source:
                r.adjust_for_ambient_noise(source)
                with st.spinner("Say Something..."):
                    audio = r.listen(source, timeout=5)
            with st.spinner("Processing..."):
                try:
                    text = r.recognize_google(audio)
                    st.session_state['input_text'] = text
                    return text
                except sr.UnknownValueError:
                    st.write("Sorry, I could not understand what you said. Please try again or write in text box.")
                    return ""
                except sr.RequestError as e:
                    st.write(f"Could not request results; {e}")
                    return ""

    @staticmethod
    def input_state(input_text):
        if isinstance(input_text, str):
            st.session_state['input_text'] = input_text

    @staticmethod
    def calculate_ats_score(resume_data, job_description):
        # Download NLTK stopwords if not already downloaded
        try:
            stopwords.words('english')
        except LookupError:
            nltk.download('stopwords')
            nltk.download('punkt')


        def preprocess_text(text):
            text = text.lower()
            stop_words = set(stopwords.words('english'))
            word_tokens = word_tokenize(text)
            filtered_text = [word for word in word_tokens if word not in stop_words]
            string_text = ' '.join(filtered_text)
            text = re.sub(r'[^a-zA-Z\s]', '', string_text)
            return text

        def get_bert_embeddings(text):
            tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
            model = BertModel.from_pretrained('bert-base-uncased')
            tokens = tokenizer(text, return_tensors='pt', padding=True, truncation=True)
            with torch.no_grad():
                outputs = model(**tokens)
                embeddings = outputs.last_hidden_state.mean(dim=1)
            return embeddings

        def calculate_cosine_similarity(embedding1, embedding2):
            sim = np.dot(embedding1[0].numpy(), embedding2[0].numpy()) / (
                np.linalg.norm(embedding1[0].numpy()) * np.linalg.norm(embedding2[0].numpy())
            )
            return sim

        resume = preprocess_text(resume_data)
        job_desc = preprocess_text(job_description)
        resume_embeddings = get_bert_embeddings(resume)
        job_desc_embeddings = get_bert_embeddings(job_desc)
        similarity_score = calculate_cosine_similarity(resume_embeddings, job_desc_embeddings)
        missing_keywords = [word for word in word_tokenize(job_desc) if word not in word_tokenize(resume)]
        if len(missing_keywords) == 0 :
            missing_keywords = ['Congratualitions, All the keywords match with your resume!!']
        return str(round(similarity_score * 100, 2)), missing_keywords