Spaces:

cdcvd
/

resume_API

Sleeping

App Files Files Community

cdcvd commited on Jul 14, 2024

Commit

7b059c6

verified ·

1 Parent(s): 4a2164f

Upload 5 files

Browse files

Files changed (5) hide show

N_F_skill_output.xlsx +0 -0
app.py +161 -0
education_output.xlsx +0 -0
jobs_output.xlsx +0 -0
requirements.txt +4 -0

N_F_skill_output.xlsx ADDED Viewed

Binary file (20.4 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,161 @@

+import pandas as pd
+import re
+import json
+from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
+import gradio as gr
+def read_from_excel(file_path):
+    df = pd.read_excel(file_path)
+    items = df['object'].astype(str).tolist()  # تبدیل همه مقادیر به رشته
+    return items
+def preprocess_text(text):
+    # حذف کاراکترهای غیرضروری و نرمال‌سازی متن
+    text = text.replace('\u200c', ' ').strip()  # حذف نیم‌فاصله و فاصله‌های اضافی
+    text = re.sub(r'\s+', ' ', text)  # حذف فاصله‌های تکراری
+    return text
+def extract_items_in_text(text, items):
+    text = preprocess_text(text)
+    found_items = set()  # استفاده از مجموعه برای جلوگیری از تکرار
+    for item in items:
+        item_normalized = preprocess_text(item)
+        if item_normalized.lower() in text.lower():
+            found_items.add(item_normalized)
+    return list(found_items)
+def compare_items(items_1, items_2):
+    common_items = set()
+    score = 0  # مقدار پیش‌فرض برای score
+    for item1 in items_1:
+        for item2 in items_2:
+            words1 = set(item1.lower().split())
+            words2 = set(item2.lower().split())
+            common_words = words1.intersection(words2)
+            num_common = len(common_words)
+            if num_common >= 3:
+                common_items.add((item1, item2))
+                score = 100
+            elif num_common == 2:
+                common_items.add((item1, item2))
+                score = 75
+            elif num_common == 1:
+                common_items.add((item1, item2))
+                score = 50
+    return score, common_items
+def compare_skills(skill_1, skill_2):
+    common_skill = set(skill_1).intersection(set(skill_2))
+    num_common = len(common_skill)
+    if num_common >= 10:
+        score = 100
+    elif num_common == 7:
+        score = 75
+    elif num_common == 5:
+        score = 50
+    else:
+        score = 25
+    return score, common_skill
+def extract_ner_info(text, nlp):
+    ner_results = nlp(text)
+    full_name = ''
+    loc = ''
+    age = None
+    for i in range(len(ner_results)):
+        if ner_results[i]['entity'] == 'B-PER':
+            full_name = ner_results[i]['word']
+            for j in range(i+1, len(ner_results)):
+                if ner_results[j]['entity'].startswith('I-PER'):
+                    full_name += ner_results[j]['word'].replace('##', '')
+                else:
+                    break
+        if ner_results[i]['entity'] == 'B-LOC' and not loc:
+            loc = ner_results[i]['word']
+    age_match = re.search(r'سن\s*:\s*(\d+)', text)
+    if age_match:
+        age = int(age_match.group(1))
+    return full_name, loc, age
+def process_text(input_text):
+    # مسیر فایل اکسل‌ها را وارد کنید
+    job_excel_file_path = 'jobs_output.xlsx'
+    education_excel_file_path = 'education_output.xlsx'
+    skills_excel_file_path = 'N_F_skill_output.xlsx'
+    # خواندن شغل‌ها، تحصیلات و مهارت‌ها از فایل‌های اکسل
+    jobs = read_from_excel(job_excel_file_path)
+    education = read_from_excel(education_excel_file_path)
+    skills = read_from_excel(skills_excel_file_path)
+    # متن ثابت
+    fixed_text = """استخدام کارآموز هوش مصنوعی (AI-شیراز)"""
+    input_text = input_text.replace("آدرس", "")
+    # استخراج شغل‌ها، تحصیلات و مهارت‌ها از متن‌ها
+    jobs_in_fixed_text = extract_items_in_text(fixed_text, jobs)
+    jobs_in_input_text = extract_items_in_text(input_text, jobs)
+    education_in_fixed_text = extract_items_in_text(fixed_text, education)
+    education_in_input_text = extract_items_in_text(input_text, education)
+    skills_in_fixed_text = extract_items_in_text(fixed_text, skills)
+    skills_in_input_text = extract_items_in_text(input_text, skills)
+    # مقایسه و نمره‌دهی
+    job_score, common_jobs = compare_items(jobs_in_fixed_text, jobs_in_input_text)
+    education_score, common_education = compare_items(education_in_fixed_text, education_in_input_text)
+    skill_score, common_skills = compare_skills(skills_in_fixed_text, skills_in_input_text)
+    # تنظیم و آماده‌سازی مدل NER
+    model_name_or_path = "HooshvareLab/distilbert-fa-zwnj-base-ner"
+    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
+    model = AutoModelForTokenClassification.from_pretrained(model_name_or_path)  # Pytorch
+    nlp = pipeline("ner", model=model, tokenizer=tokenizer)
+    # استخراج اطلاعات NER
+    full_name, loc, age = extract_ner_info(input_text, nlp)
+    # نمره‌دهی لوکیشن
+    fixed_loc = "شیراز"
+    loc_score = 100 if loc == fixed_loc else 0
+    # نمره‌دهی سن
+    age_score = 100 if age and 18 <= age <= 30 else 0
+    # محاسبه و نمایش میانگین نمرات
+    average_score = (job_score + education_score + skill_score + loc_score + age_score) / 5
+    # ساخت خروجی JSON
+    output = {
+        "average_score": average_score,
+        "full_name": full_name,
+        "age": age,
+        "location": loc,
+        "job_score": job_score,
+        "education_score": education_score,
+        "skill_score": skill_score,
+        "loc_score": loc_score,
+        "age_score": age_score,
+        "common_jobs": list(common_jobs),
+        "common_education": list(common_education),
+        "common_skills": list(common_skills)
+    }
+    return json.dumps(output, ensure_ascii=False, indent=4)
+iface = gr.Interface(
+    fn=process_text,
+    inputs=gr.inputs.Textbox(lines=10, placeholder="لطفاً متن خود را وارد کنید..."),
+    outputs="json",
+    title="متن پرداز",
+    description="این ابزار متن شما را پردازش کرده و امتیازات مشابهت را محاسبه می‌کند."
+)
+if __name__ == "__main__":
+    iface.launch()

education_output.xlsx ADDED Viewed

Binary file (11 kB). View file

jobs_output.xlsx ADDED Viewed

Binary file (14.3 kB). View file

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+gradio
+transformers
+pandas
+openpyxl