Spaces:

cdcvd
/

resume_api_2

Sleeping

App Files Files Community

cdcvd commited on Jul 31, 2024

Commit

839ab56

verified ·

1 Parent(s): 727e326

Upload 7 files

Browse files

Files changed (7) hide show

app.py +79 -0
comparison_utils.py +104 -0
job_description_extractor.py +105 -0
model_trainer.py +37 -0
requirements.txt +8 -0
resume_extractor.py +147 -0
synthetic_data.py +29 -0

app.py ADDED Viewed

	@@ -0,0 +1,79 @@

+from fastapi import FastAPI, HTTPException, UploadFile, File
+import pandas as pd
+from resume_extractor import ResumeExtractor
+from job_description_extractor import JobDescriptionExtractor
+from model_trainer import ModelTrainer
+from comparison_utils import (
+    compare_with_chatgpt_job_title,
+    compare_with_chatgpt_education,
+    compare_with_chatgpt_location,
+    compare_age_range_with_description
+)
+from synthetic_data import create_synthetic_data
+app = FastAPI()
+def main(resume_text, job_description):
+    openai_api_key = 'sk-proj-bC6H6QrP6DUqHkn5vOkYT3BlbkFJsSyvL4Bc9c3UEbHrsPMj'
+    ner_model_name_or_path = "NLPclass/Named-entity-recognition"
+    skill_model_name_or_path = "GalalEwida/lm-ner-skills-recognition"
+    resume_extractor = ResumeExtractor(ner_model_name_or_path, openai_api_key)
+    job_description_extractor = JobDescriptionExtractor(openai_api_key)
+    full_name, loc, age, skills, education_resume, title_job_resume = resume_extractor.extract_resume_info(resume_text, skill_model_name_or_path)
+    job_skills, education_job, title_job, location, age_DS = job_description_extractor.extract_job_info(job_description, skill_model_name_or_path)
+    education_match = compare_with_chatgpt_education(education_resume, education_job, openai_api_key)
+    title_job_match = compare_with_chatgpt_job_title(title_job_resume, title_job, openai_api_key)
+    title_loc_match = compare_with_chatgpt_location(loc, location, openai_api_key)
+    title_age_match = compare_age_range_with_description(age, age_DS, openai_api_key)
+    synthetic_data = create_synthetic_data(job_skills, education_job, title_job, location, age_DS)
+    synthetic_data.to_csv('synthetic_data.csv')
+    model_trainer = ModelTrainer(synthetic_data)
+    best_model = model_trainer.train_models()
+    input_data = {skill: 1 if skill in skills else 0 for skill in job_skills}
+    input_data[education_job] = education_match
+    input_data[title_job] = title_job_match
+    input_data[location] = title_loc_match
+    input_data[age_DS] = title_age_match
+    input_df = pd.DataFrame([input_data])
+    input_df.to_csv('input_df.csv')
+    predicted_target = best_model.predict(input_df)
+    return {
+        "full_name": full_name,
+        "location": loc,
+        "age": age,
+        "age_DS": age_DS,
+        "skills": skills,
+        "education_resume": education_resume,
+        "title_job_resume": title_job_resume,
+        "job_skills": job_skills,
+        "education_job": education_job,
+        "title_job": title_job,
+        "location_job": location,
+        "predicted_target": predicted_target[0]
+    }
+@app.post("/extract")
+async def extract(resume_file: UploadFile = File(...), job_description_file: UploadFile = File(...)):
+    try:
+        resume_text = await resume_file.read()
+        job_description = await job_description_file.read()
+        # Convert bytes to string
+        resume_text = resume_text.decode('utf-8')
+        job_description = job_description.decode('utf-8')
+        output = main(resume_text, job_description)
+        return output
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)

comparison_utils.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import openai
+import re
+def compare_with_chatgpt_job_title(text1, text2, openai_api_key):
+    openai.api_key = openai_api_key
+    prompt = f"Compare the following two texts and determine if they match in job title . Return 1 for match and 0 for no match.\n\nText 1: {text1}\n\nText 2: {text2}"
+    response = openai.ChatCompletion.create(
+        model="gpt-3.5-turbo",
+        messages=[
+            {"role": "system", "content": "You are an assistant that helps compare texts for matching job titles "},
+            {"role": "user", "content": prompt}
+        ],
+        max_tokens=100
+    )
+    # Extract the response content
+    result = response.choices[0].message['content'].strip()
+    # Check if the response contains '1' or '0' and return the corresponding integer
+    if '1' in result:
+        return 1
+    elif '0' in result:
+        return 0
+    else:
+        raise ValueError(f"Unexpected response: {result}")
+def compare_with_chatgpt_education(text1, text2, openai_api_key):
+    openai.api_key = openai_api_key
+    prompt = f"Compare the following two texts and determine if they match in education . Return 1 for match and 0 for no match.\n\nText 1: {text1}\n\nText 2: {text2}"
+    response = openai.ChatCompletion.create(
+        model="gpt-3.5-turbo",
+        messages=[
+            {"role": "system", "content": "You are an assistant that helps compare texts for matching education "},
+            {"role": "user", "content": prompt}
+        ],
+        max_tokens=100
+    )
+    # Extract the response content
+    result = response.choices[0].message['content'].strip()
+    # Check if the response contains '1' or '0' and return the corresponding integer
+    if '1' in result:
+        return 1
+    elif '0' in result:
+        return 0
+    else:
+        raise ValueError(f"Unexpected response: {result}")
+def compare_with_chatgpt_location(text1, text2, openai_api_key):
+    openai.api_key = openai_api_key
+    prompt = f"Compare the following two texts and determine if they match in location . Return 1 for match and 0 for no match.\n\nText 1: {text1}\n\nText 2: {text2}"
+    response = openai.ChatCompletion.create(
+        model="gpt-3.5-turbo",
+        messages=[
+            {"role": "system", "content": "You are an assistant that helps compare texts for matching location "},
+            {"role": "user", "content": prompt}
+        ],
+        max_tokens=100
+    )
+    # Extract the response content
+    result = response.choices[0].message['content'].strip()
+    # Check if the response contains '1' or '0' and return the corresponding integer
+    if '1' in result:
+        return 1
+    elif '0' in result:
+        return 0
+    else:
+        raise ValueError(f"Unexpected response: {result}")
+def compare_age_range_with_description(age, age_DS, openai_api_key):
+    openai.api_key = openai_api_key
+    prompt = (f"Check if the age {age} falls within the age range '{age_DS}' "
+              f"Return '1' if it falls within the range, otherwise return '0'.\n\n"
+              f"Age: {age}\n\n"
+              f"Age Range: {age_DS}")
+    response = openai.ChatCompletion.create(
+        model="gpt-3.5-turbo",
+        messages=[
+            {"role": "system", "content": "You are an assistant that helps compare age ranges with a given age."},
+            {"role": "user", "content": prompt}
+        ],
+        max_tokens=100
+    )
+    result = response.choices[0].message['content'].strip()
+    # استفاده از regex برای پیدا کردن '1' یا '0'
+    match = re.search(r"\b[01]\b", result)
+    if match:
+        return int(match.group())
+    else:
+        raise ValueError(f"Unexpected response: {result}")

job_description_extractor.py ADDED Viewed

	@@ -0,0 +1,105 @@

+import openai
+import openai
+import re
+from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
+import torch
+class JobDescriptionExtractor:
+    def __init__(self, openai_api_key):
+        openai.api_key = openai_api_key
+    def extract_skills(self, text, skill_model_name_or_path):
+        skill_tokenizer = AutoTokenizer.from_pretrained(skill_model_name_or_path)
+        skill_model = AutoModelForTokenClassification.from_pretrained(skill_model_name_or_path)
+        inputs = skill_tokenizer(text, return_tensors="pt")
+        with torch.no_grad():
+            outputs = skill_model(**inputs)
+        logits = outputs.logits
+        predictions = torch.argmax(logits, dim=2)
+        tokens = skill_tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
+        tags = [skill_model.config.id2label[p.item()] for p in predictions[0]]
+        skills = []
+        temp_skill = ""
+        for token, tag in zip(tokens, tags):
+            if tag == "B-TECHNOLOGY":
+                if temp_skill:
+                    skills.append(temp_skill.strip())
+                    temp_skill = ""
+                skills.append(token)
+            elif tag == "B-TECHNICAL":
+                if temp_skill:
+                    skills.append(temp_skill.strip())
+                    temp_skill = ""
+                temp_skill = token
+            elif tag == "I-TECHNICAL":
+                temp_skill += token.replace('##', '')
+        if temp_skill:
+            skills.append(temp_skill.strip())
+        return list(set(skills))
+    def translate_text(self, text, target_language="en"):
+        response = openai.ChatCompletion.create(
+            model="gpt-3.5-turbo",
+            messages=[
+                {"role": "system", "content": "You are a helpful assistant that translates text."},
+                {"role": "user", "content": f"Translate the following text to {target_language}:\n\n{text}"}
+            ],
+            max_tokens=1000
+        )
+        return response.choices[0].message["content"].strip()
+    def extract_location(self, job_description):
+        response = openai.ChatCompletion.create(
+            model="gpt-3.5-turbo",
+            messages=[
+                {"role": "system", "content": "You are a helpful assistant that extracts information from text."},
+                {"role": "user", "content": f"Extract location from the following job description:\n\n{job_description}"}
+            ],
+            max_tokens=1000
+        )
+        return response.choices[0].message["content"].strip()
+    def title(self, text):
+        response = openai.ChatCompletion.create(
+            model="gpt-3.5-turbo",
+            messages=[
+                {"role": "system", "content": "You are a helpful assistant that extracts information from text."},
+                {"role": "user", "content": f"Extract the [Last Job Title] from the following text:\n\n{text}"}
+            ],
+            max_tokens=1000
+        )
+        return response.choices[0].message["content"].strip()
+    def extract_education(self, text):
+        response = openai.ChatCompletion.create(
+            model="gpt-3.5-turbo",
+            messages=[
+                {"role": "system", "content": "You are a helpful assistant that extracts information from text."},
+                {"role": "user", "content": f"Extract the [Highest Education Degree] from the following text:\n\n{text}"}
+            ],
+            max_tokens=1000
+        )
+        return response.choices[0].message["content"].strip()
+    def extract_age_range(self, text):
+        response = openai.ChatCompletion.create(
+        model="gpt-3.5-turbo",
+        messages=[
+            {"role": "system", "content": "You are a helpful assistant that extracts information from text."},
+            {"role": "user", "content": f"Extract the age range from the following text:\n\n{text}"}
+        ],
+        max_tokens=1000
+    )
+        return response.choices[0].message["content"].strip()
+        pass
+    def extract_job_info(self, job_description, skill_model_name_or_path):
+        # تابع استخراج اطلاعات کلی از توصیف شغلی
+        translated_job_description = self.translate_text(job_description)
+        job_skills = self.extract_skills(translated_job_description, skill_model_name_or_path)
+        education_job = self.extract_education(translated_job_description)
+        title_job = self.title(translated_job_description)
+        location = self.extract_location(translated_job_description)
+        age_DS = self.extract_age_range(translated_job_description)
+        return job_skills, education_job, title_job, location, age_DS

model_trainer.py ADDED Viewed

	@@ -0,0 +1,37 @@

+from sklearn.model_selection import train_test_split
+from sklearn.svm import SVR
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.linear_model import LinearRegression, Lasso
+from sklearn.metrics import r2_score
+class ModelTrainer:
+    def __init__(self, dataframe):
+        self.dataframe = dataframe
+    def train_models(self):
+        features = list(self.dataframe.columns[:-1])
+        X = self.dataframe[features]
+        y = self.dataframe['TARGET']
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+        models = {
+            "SVR": SVR(),
+            "RandomForest": RandomForestRegressor(),
+            "LinearRegression": LinearRegression(),
+            "Lasso": Lasso()
+        }
+        best_model = None
+        best_score = float('-inf')
+        for name, model in models.items():
+            model.fit(X_train, y_train)
+            y_pred = model.predict(X_test)
+            score = r2_score(y_test, y_pred)
+            print(f"{name} R2 Score: {score}")
+            if score > best_score:
+                best_score = score
+                best_model = model
+        print(f"Best Model: {best_model}")
+        return best_model

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+fastapi
+uvicorn[standard]
+pandas
+resume_extractor
+job_description_extractor
+model_trainer
+comparison_utils
+synthetic_data

resume_extractor.py ADDED Viewed

	@@ -0,0 +1,147 @@

+import openai
+import re
+from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
+import torch
+class ResumeExtractor:
+    def __init__(self, ner_model_name_or_path, openai_api_key):
+        self.ner_model_name_or_path = ner_model_name_or_path
+        self.tokenizer = AutoTokenizer.from_pretrained(ner_model_name_or_path)
+        self.model = AutoModelForTokenClassification.from_pretrained(ner_model_name_or_path)
+        self.nlp = pipeline("ner", model=self.model, tokenizer=self.tokenizer)
+        openai.api_key = openai_api_key
+    def calculate_age(self, date_string):
+        current_year = 1403
+        ymd_match = re.match(r'(\d{1,4})/(\d{1,2})/(\d{1,2})', date_string)
+        if ymd_match:
+            year = int(ymd_match.group(1))
+            if len(ymd_match.group(1)) == 4:
+                age = current_year - year
+            else:
+                year += 1300
+                age = current_year - year
+            return age
+        four_digit_match = re.match(r'(13\d{2})', date_string)
+        if four_digit_match:
+            year = int(four_digit_match.group(1))
+            age = current_year - year
+            return age
+        return None
+    def translate_text(self, text, target_language="en"):
+        response = openai.ChatCompletion.create(
+            model="gpt-3.5-turbo",
+            messages=[
+                {"role": "system", "content": "You are a helpful assistant that translates text."},
+                {"role": "user", "content": f"Translate the following text to {target_language}:\n\n{text}"}
+            ],
+            max_tokens=1000
+        )
+        return response.choices[0].message["content"].strip()
+    def extract_ner_info(self, text):
+        ner_results = self.nlp(text)
+        full_name = ''
+        loc = ''
+        age = None
+        i = 0
+        while i < len(ner_results):
+            if ner_results[i]['entity'] == 'B-pers' and ner_results[i]['score'] >= 0.80:
+                if full_name:
+                    full_name += ' '
+                full_name += ner_results[i]['word']
+                current_score = ner_results[i]['score']
+                stop_adding = False
+                for j in range(i + 1, len(ner_results)):
+                    if ner_results[j]['entity'] == 'I-pers' and ner_results[j]['score'] >= 0.80:
+                        if ner_results[j]['score'] >= current_score * 0.90:
+                            full_name += ner_results[j]['word'].replace('##', '')
+                            current_score = ner_results[j]['score']
+                            i = j
+                        else:
+                            stop_adding = True
+                            break
+                    else:
+                        stop_adding = True
+                        break
+                if stop_adding:
+                    break
+            i += 1
+        for entity in ner_results:
+            if entity['entity'] in ['B-loc', 'I-loc']:
+                if loc:
+                    loc += ' '
+                loc += entity['word']
+        age_match = re.search(r'سن\s*:\s*(\d+)', text)
+        if age_match:
+            age = int(age_match.group(1))
+        else:
+            date_match = re.search(r'(\d{1,4}/\d{1,2}/\d{1,2})', text)
+            if date_match:
+                age = self.calculate_age(date_match.group(1))
+            else:
+                four_digit_match = re.search(r'(13\d{2})', text)
+                if four_digit_match:
+                    age = self.calculate_age(four_digit_match.group(1))
+        return full_name, loc, age
+    def extract_skills(self, text, skill_model_name_or_path):
+        skill_tokenizer = AutoTokenizer.from_pretrained(skill_model_name_or_path)
+        skill_model = AutoModelForTokenClassification.from_pretrained(skill_model_name_or_path)
+        inputs = skill_tokenizer(text, return_tensors="pt")
+        with torch.no_grad():
+            outputs = skill_model(**inputs)
+        logits = outputs.logits
+        predictions = torch.argmax(logits, dim=2)
+        tokens = skill_tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
+        tags = [skill_model.config.id2label[p.item()] for p in predictions[0]]
+        skills = []
+        temp_skill = ""
+        for token, tag in zip(tokens, tags):
+            if tag == "B-TECHNOLOGY":
+                if temp_skill:
+                    skills.append(temp_skill.strip())
+                    temp_skill = ""
+                skills.append(token)
+            elif tag == "B-TECHNICAL":
+                if temp_skill:
+                    skills.append(temp_skill.strip())
+                    temp_skill = ""
+                temp_skill = token
+            elif tag == "I-TECHNICAL":
+                temp_skill += token.replace('##', '')
+        if temp_skill:
+            skills.append(temp_skill.strip())
+        return list(set(skills))
+    def extract_education_resume(self, text):
+        response = openai.ChatCompletion.create(
+            model="gpt-3.5-turbo",
+            messages=[
+                {"role": "system", "content": "You are a helpful assistant that extracts information from text."},
+                {"role": "user", "content": f"Extract only the highest education degree and field from the following text:\n\n{text}\n\nFormat the response as 'Degree in Field' and nothing else."}
+            ],
+            max_tokens=1000
+        )
+        return response.choices[0].message["content"].strip()
+    def extract_job_resume(self, text):
+        response = openai.ChatCompletion.create(
+            model="gpt-3.5-turbo",
+            messages=[
+                {"role": "system", "content": "You are a helpful assistant that extracts information from text."},
+                {"role": "user", "content": f"Extract only the last job title from the following text:\n\n{text}\n\nProvide just the job title and nothing else."}
+            ],
+            max_tokens=1000
+        )
+        return response.choices[0].message["content"].strip()
+    def extract_resume_info(self, resume_text, skill_model_name_or_path):
+        # تابع استخراج اطلاعات کلی از رزومه
+        full_name, loc, age = self.extract_ner_info(resume_text)
+        translated_resume = self.translate_text(resume_text)
+        skills = self.extract_skills(translated_resume, skill_model_name_or_path)
+        education_resume = self.extract_education_resume(translated_resume)
+        title_job_resume = self.extract_job_resume(translated_resume)
+        return full_name, loc, age, skills, education_resume, title_job_resume

synthetic_data.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import numpy as np
+import pandas as pd
+def create_synthetic_data(job_skills, education, title_job, location, age_DS, num_rows=2000):
+    if isinstance(job_skills, str):
+        job_skills = [job_skills]
+    if isinstance(education, str):
+        education = [education]
+    if isinstance(title_job, str):
+        title_job = [title_job]
+    if isinstance(location, str):
+        location = [location]
+    if isinstance(age_DS, str):
+        age_DS = [age_DS]
+    features = job_skills + education + title_job + location + age_DS
+    data = np.random.randint(2, size=(num_rows, len(features)))
+    df = pd.DataFrame(data, columns=features)
+    df['initial_TARGET'] = df.sum(axis=1)
+    min_target = df['initial_TARGET'].min()
+    max_target = df['initial_TARGET'].max()
+    df['TARGET'] = (df['initial_TARGET'] - min_target) * (100 / (max_target - min_target))
+    df.drop(columns=['initial_TARGET'], inplace=True)
+    df.loc[df.sum(axis=1) == 0, 'TARGET'] = 0
+    df.loc[df.sum(axis=1) == len(features), 'TARGET'] = 100
+    return df