Spaces:

Vaishvik1618
/

ATSscore

Sleeping

App Files Files Community

Vaishvik1618 commited on Nov 17, 2024

Commit

2053c29

verified ·

1 Parent(s): d657ff8

Upload app.py

Browse files

Files changed (1) hide show

app.py +174 -0

app.py ADDED Viewed

	@@ -0,0 +1,174 @@

+# import gradio as gr
+# from sklearn.feature_extraction.text import TfidfVectorizer
+# from sklearn.metrics.pairwise import cosine_similarity
+# import fitz
+# from docx import Document
+#
+# def read_resume_file(file):
+#     if file.name.endswith('.txt'):
+#         content = file.read().decode('utf-8')
+#     elif file.name.endswith('.pdf'):
+#         content = ''
+#         with fitz.open(stream=file.read(), filetype='pdf') as doc:
+#             for page in doc:
+#                 content+= page.get_text()
+#     elif file.name.endswith('.docx'):
+#         content =''
+#         document = Document(file)
+#         for para in document.paragraphs:
+#             content+=para.text+ '\n'
+#     else:
+#         return "Unsupported file format. Please upload a .txt, .pdf, or .docx file."
+#     return content
+#
+#
+# def calculate_similarity(job_desc, resume):
+#     vectorizer = TfidfVectorizer(stop_words = 'english')
+#     tfidf_matrix = vectorizer.fit_transform([job_desc, resume])
+#     print(tfidf_matrix)
+#
+#     similarityScore = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
+#     return f"Similarity Score: {similarityScore * 100:.2f}%"
+#
+# def find_missing_keywords(job_desc, resume):
+#     vectorizer = TfidfVectorizer(stop_words='english')
+#     vectorizer.fit_transform([job_desc, resume])
+#
+#     job_desc_words = set(job_desc.lower().split())
+#     resume_words = set(resume.lower().split())
+#
+#     missing_words = job_desc_words - resume_words
+#
+#     return list(missing_words)
+#
+# def ats_evalution(job_desc, resume_file):
+#     resume_text = read_resume_file(resume_file)
+#     if isinstance(resume_text, str) and resume_text.startswith("Unsupported"):
+#         return resume_text, ""
+#     similarity = calculate_similarity(job_desc, resume_text)
+#     missing_keywords = find_missing_keywords(job_desc, resume_text)
+#
+#     if missing_keywords:
+#         missing_keywords_str = ", ".join(missing_keywords)
+#         missing_info = f"Missing Keywords: {missing_keywords_str}"
+#     else:
+#         missing_info = "No missing keywords. Your resume covers all keywords in the job description."
+#     return similarity, missing_info
+#
+# app = gr.Interface(
+#     fn=ats_evalution,
+#     inputs = [
+#         gr.Textbox(lines = 10, placeholder = 'Paste job description here....'),
+#         gr.File(label='Upload your resume (.txt & .pdf & .docx)')
+#     ],
+#
+#     outputs = [
+#         gr.Text(label="Similarity Score"),
+#         gr.Text(label="Missing Keywords")
+#     ],
+#
+#     title = "ATS Resume Score Generator",
+#     description="Upload your resume and paste the job description to get a similarity score and identify missing keywords."
+#
+# )
+#
+# if __name__ == "__main__":
+#     app.launch()
+#
+import gradio as gr
+import PyPDF2
+import docx
+import re
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+from nltk.corpus import stopwords
+from nltk.tokenize import word_tokenize
+import string
+import nltk
+nltk.download('punkt_tab')
+# Download necessary NLTK data
+nltk.download('punkt')
+nltk.download('stopwords')
+# Function to extract text from uploaded files
+def extract_text_from_file(file):
+    if file.name.endswith('.pdf'):
+        reader = PyPDF2.PdfReader(file)
+        text = ''
+        for page in reader.pages:
+            page_text = page.extract_text()
+            if page_text:
+                text += page_text
+        return text
+    elif file.name.endswith('.docx'):
+        doc = docx.Document(file)
+        return '\n'.join([para.text for para in doc.paragraphs])
+    elif file.name.endswith('.txt'):
+        return file.read().decode('utf-8')
+    else:
+        return "Unsupported file format. Please upload a .txt, .pdf, or .docx file."
+# Function to preprocess the text
+def preprocess_text(text):
+    text = text.lower()
+    text = re.sub(r'\d+', '', text)  # Remove numbers
+    text = text.translate(str.maketrans('', '', string.punctuation))  # Remove punctuation
+    tokens = word_tokenize(text)
+    stop_words = set(stopwords.words('english'))
+    filtered_tokens = [word for word in tokens if word not in stop_words]  # Remove stopwords
+    return ' '.join(filtered_tokens)
+# Function to extract keywords using TF-IDF
+def extract_keywords(text, top_n=10):
+    vectorizer = TfidfVectorizer(max_features=top_n)
+    tfidf_matrix = vectorizer.fit_transform([text])
+    feature_names = vectorizer.get_feature_names_out()
+    return set(feature_names)
+# Combined function to evaluate ATS score and find missing keywords
+def ats_evaluation(job_desc, resume_file):
+    resume_text = extract_text_from_file(resume_file)
+    if isinstance(resume_text, str) and "Unsupported" in resume_text:
+        return resume_text, ""
+    job_desc_processed = preprocess_text(job_desc)
+    resume_processed = preprocess_text(resume_text)
+    job_keywords = extract_keywords(job_desc_processed)
+    resume_keywords = extract_keywords(resume_processed)
+    missing_keywords = job_keywords - resume_keywords
+    # Calculate similarity score
+    vectorizer = TfidfVectorizer()
+    tfidf_matrix = vectorizer.fit_transform([job_desc_processed, resume_processed])
+    similarity_score = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
+    # Format output
+    similarity_output = f"Similarity Score: {similarity_score * 100:.2f}%"
+    if missing_keywords:
+        missing_keywords_output = f"Missing Keywords: {', '.join(missing_keywords)}"
+    else:
+        missing_keywords_output = "No missing keywords. Your resume covers all key terms."
+    return similarity_output, missing_keywords_output
+# Create the Gradio interface
+app = gr.Interface(
+    fn=ats_evaluation,
+    inputs=[
+        gr.Textbox(lines=10, placeholder='Paste job description here...', label="Job Description"),
+        gr.File(label='Upload your resume (.txt, .pdf, .docx)')
+    ],
+    outputs=[
+        gr.Textbox(label="Similarity Score"),
+        gr.Textbox(label="Missing Keywords")
+    ],
+    title="ATS Resume Score Generator",
+    description="Upload your resume and paste the job description to get a similarity score and identify missing keywords."
+)
+# Run the app
+if __name__ == "__main__":
+    app.launch()