File size: 1,402 Bytes
6d4e9bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
from cleantext import clean
import cohere
import string
import numpy as np
from numpy.linalg import norm
from nltk.tokenize import SpaceTokenizer
import nltk
import os
from dotenv import load_dotenv
load_dotenv()

def coSkillEmbed(text):
    co = cohere.Client(os.getenv("COHERE_TOKEN"))
    response = co.embed(
        model='large',
        texts=[text])
    return response.embeddings
    
def cosine(A, B):
    return np.dot(A,B)/(norm(A)*norm(B))

def clean_my_text(resume):
    clean_text = ' '.join(resume.splitlines())
    clean_text = clean_text.replace('-', " ").replace("/"," ")
    clean_text = clean(clean_text.translate(str.maketrans('', '', string.punctuation)))
    stops = set(nltk.corpus.stopwords.words('english'))
    stops = stops.union({'eg', 'ie', 'etc', 'experience', 'experiences', 'experienced', 'experiencing', 'knowledge', 
    'ability', 'abilities', 'skill', 'skills', 'skilled', 'including', 'includes', 'included', 'include'
    'education', 'follow', 'following', 'follows', 'followed', 'make', 'made', 'makes', 'making', 'maker',
    'available', 'large', 'larger', 'largescale', 'client', 'clients', 'responsible', 'x', 'many', 'team', 'teams'})
    resume = [word for word in SpaceTokenizer().tokenize(resume) if word not in stops]
    resume = [word for word in resume if ")" not in word]
    resume = [word for word in resume if "(" not in word]
    return resume