# import gradio as gr
# from sklearn.feature_extraction.text import TfidfVectorizer
# from sklearn.metrics.pairwise import cosine_similarity
# import fitz
# from docx import Document
#
# def read_resume_file(file):
#     if file.name.endswith('.txt'):
#         content = file.read().decode('utf-8')
#     elif file.name.endswith('.pdf'):
#         content = ''
#         with fitz.open(stream=file.read(), filetype='pdf') as doc:
#             for page in doc:
#                 content+= page.get_text()
#     elif file.name.endswith('.docx'):
#         content =''
#         document = Document(file)
#         for para in document.paragraphs:
#             content+=para.text+ '\n'
#     else:
#         return "Unsupported file format. Please upload a .txt, .pdf, or .docx file."
#     return content
#
#
# def calculate_similarity(job_desc, resume):
#     vectorizer = TfidfVectorizer(stop_words = 'english')
#     tfidf_matrix = vectorizer.fit_transform([job_desc, resume])
#     print(tfidf_matrix)
#
#     similarityScore = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
#     return f"Similarity Score: {similarityScore * 100:.2f}%"
#
# def find_missing_keywords(job_desc, resume):
#     vectorizer = TfidfVectorizer(stop_words='english')
#     vectorizer.fit_transform([job_desc, resume])
#
#     job_desc_words = set(job_desc.lower().split())
#     resume_words = set(resume.lower().split())
#
#     missing_words = job_desc_words - resume_words
#
#     return list(missing_words)
#
# def ats_evalution(job_desc, resume_file):
#     resume_text = read_resume_file(resume_file)
#     if isinstance(resume_text, str) and resume_text.startswith("Unsupported"):
#         return resume_text, ""
#     similarity = calculate_similarity(job_desc, resume_text)
#     missing_keywords = find_missing_keywords(job_desc, resume_text)
#
#     if missing_keywords:
#         missing_keywords_str = ", ".join(missing_keywords)
#         missing_info = f"Missing Keywords: {missing_keywords_str}"
#     else:
#         missing_info = "No missing keywords. Your resume covers all keywords in the job description."
#     return similarity, missing_info
#
# app = gr.Interface(
#     fn=ats_evalution,
#     inputs = [
#         gr.Textbox(lines = 10, placeholder = 'Paste job description here....'),
#         gr.File(label='Upload your resume (.txt & .pdf & .docx)')
#     ],
#
#     outputs = [
#         gr.Text(label="Similarity Score"),
#         gr.Text(label="Missing Keywords")
#     ],
#
#     title = "ATS Resume Score Generator",
#     description="Upload your resume and paste the job description to get a similarity score and identify missing keywords."
#
# )
#
# if __name__ == "__main__":
#     app.launch()
#

import gradio as gr
import PyPDF2
import docx
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import string
import nltk
nltk.download('punkt_tab')
# Download necessary NLTK data
nltk.download('punkt')
nltk.download('stopwords')

# Function to extract text from uploaded files
def extract_text_from_file(file):
    if file.name.endswith('.pdf'):
        reader = PyPDF2.PdfReader(file)
        text = ''
        for page in reader.pages:
            page_text = page.extract_text()
            if page_text:
                text += page_text
        return text
    elif file.name.endswith('.docx'):
        doc = docx.Document(file)
        return '\n'.join([para.text for para in doc.paragraphs])
    elif file.name.endswith('.txt'):
        return file.read().decode('utf-8')
    else:
        return "Unsupported file format. Please upload a .txt, .pdf, or .docx file."

# Function to preprocess the text
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'\d+', '', text)  # Remove numbers
    text = text.translate(str.maketrans('', '', string.punctuation))  # Remove punctuation
    tokens = word_tokenize(text)
    stop_words = set(stopwords.words('english'))
    filtered_tokens = [word for word in tokens if word not in stop_words]  # Remove stopwords
    return ' '.join(filtered_tokens)

# Function to extract keywords using TF-IDF
def extract_keywords(text, top_n=10):
    vectorizer = TfidfVectorizer(max_features=top_n)
    tfidf_matrix = vectorizer.fit_transform([text])
    feature_names = vectorizer.get_feature_names_out()
    return set(feature_names)

# Combined function to evaluate ATS score and find missing keywords
def ats_evaluation(job_desc, resume_file):
    resume_text = extract_text_from_file(resume_file)
    if isinstance(resume_text, str) and "Unsupported" in resume_text:
        return resume_text, ""

    job_desc_processed = preprocess_text(job_desc)
    resume_processed = preprocess_text(resume_text)

    job_keywords = extract_keywords(job_desc_processed)
    resume_keywords = extract_keywords(resume_processed)

    missing_keywords = job_keywords - resume_keywords

    # Calculate similarity score
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform([job_desc_processed, resume_processed])
    similarity_score = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]

    # Format output
    similarity_output = f"Similarity Score: {similarity_score * 100:.2f}%"
    if missing_keywords:
        missing_keywords_output = f"Missing Keywords: {', '.join(missing_keywords)}"
    else:
        missing_keywords_output = "No missing keywords. Your resume covers all key terms."

    return similarity_output, missing_keywords_output

# Create the Gradio interface
app = gr.Interface(
    fn=ats_evaluation,
    inputs=[
        gr.Textbox(lines=10, placeholder='Paste job description here...', label="Job Description"),
        gr.File(label='Upload your resume (.txt, .pdf, .docx)')
    ],
    outputs=[
        gr.Textbox(label="Similarity Score"),
        gr.Textbox(label="Missing Keywords")
    ],
    title="ATS Resume Score Generator",
    description="Upload your resume and paste the job description to get a similarity score and identify missing keywords."
)

# Run the app
if __name__ == "__main__":
    app.launch()