Spaces:
Runtime error
Runtime error
import os | |
from typing import List, Union, Tuple, Dict | |
from langchain.chains.question_answering import load_qa_chain | |
from langchain.document_loaders import UnstructuredFileLoader | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain.llms import OpenAI as OpenAILLM | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain.vectorstores import FAISS | |
import gradio as gr | |
from openai import AzureOpenAI | |
import matplotlib.pyplot as plt | |
import pandas as pd | |
import logging | |
from PyPDF2 import PdfReader | |
import re | |
import plotly.graph_objects as go | |
import csv | |
from langchain_openai import AzureChatOpenAI | |
from langchain_openai import AzureOpenAIEmbeddings | |
# Configure logging | |
logging.basicConfig( | |
filename='Resume_Analyzer.log', # You can adjust the log file name here | |
filemode='a', | |
format='[%(asctime)s] [%(levelname)s] [%(filename)s] [%(lineno)s:%(funcName)s()] %(message)s', | |
datefmt='%Y-%b-%d %H:%M:%S' | |
) | |
LOGGER = logging.getLogger(__name__) | |
log_level_env = 'INFO' # You can adjust the log level here | |
log_level_dict = { | |
'DEBUG': logging.DEBUG, | |
'INFO': logging.INFO, | |
'WARNING': logging.WARNING, | |
'ERROR': logging.ERROR, | |
'CRITICAL': logging.CRITICAL | |
} | |
if log_level_env in log_level_dict: | |
log_level = log_level_dict[log_level_env] | |
else: | |
log_level = log_level_dict['INFO'] | |
LOGGER.setLevel(log_level) | |
class JobPotral: | |
def __init__(self) -> None: | |
""" | |
Initialize the JobPotral object. | |
Sets the OpenAI API key in the environment. | |
""" | |
self.client = AzureOpenAI(azure_deployment = "GPT-3") | |
self.answer = "" | |
def get_empty_state(self) -> dict: | |
""" | |
Get an empty state for the knowledge base. | |
Returns: | |
- dict: An empty state dictionary. | |
""" | |
LOGGER.info("Creating Empty Dictionary...") | |
return {"knowledge_base": None} | |
def create_knowledge_base(self, docs: List[str]) -> FAISS: | |
""" | |
Create a knowledge base from a set of documents. | |
Args: | |
- docs (list): List of documents to create a knowledge base from. | |
Returns: | |
- knowledge_base: The created knowledge base. | |
""" | |
try: | |
LOGGER.info("Creating Knowledge Base...") | |
# split into chunks | |
text_splitter = CharacterTextSplitter( | |
separator="\n", chunk_size=500, chunk_overlap=0, length_function=len | |
) | |
chunks = text_splitter.split_documents(docs) | |
# Create embeddings | |
embeddings = AzureOpenAIEmbeddings( | |
azure_deployment="text-embedding-3-large") | |
#create knowledge base | |
knowledge_base = FAISS.from_documents(chunks, embeddings) | |
#return knowledge base | |
return knowledge_base | |
except Exception as e: | |
LOGGER.error(f"Error creating knowledge base: {str(e)}") | |
raise | |
def upload_file(self, file_obj: gr.File) -> Tuple[str, Union[str, Dict[str, FAISS]]]: | |
""" | |
Upload a file and create a knowledge base. | |
Args: | |
- file_obj: File object representing the uploaded file. | |
Returns: | |
- tuple: Tuple containing file name and the knowledge base of given document. | |
""" | |
try: | |
# Log that the process of unstructuring files is starting | |
LOGGER.info("Unstructuring Files...") | |
# Initialize an UnstructuredFileLoader with the uploaded file and a loading strategy | |
loader = UnstructuredFileLoader(file_obj.name, strategy="fast") | |
# Load the document(s) using the file loader | |
docs = loader.load() | |
# Create a knowledge base from the loaded documents | |
knowledge_base = self.create_knowledge_base(docs) | |
# Return the file name and the knowledge base as a dictionary | |
return file_obj.name, {"knowledge_base": knowledge_base} | |
except Exception as e: | |
LOGGER.error(f"Error uploading file: {str(e)}") | |
raise | |
def answer_question(self, question: str, state: Dict[str, Union[None, Dict[str, FAISS]]], chat_history) -> str: | |
""" | |
Answer a question using the knowledge base. | |
Args: | |
- question (str): The question to answer. | |
- state (dict): The state containing the knowledge base. | |
Returns: | |
- str: The answer to the question. | |
""" | |
try: | |
# Log that the model is generating a response | |
LOGGER.info("Generating Responce From Model...") | |
# Access the knowledge base from the state | |
knowledge_base = state["knowledge_base"] | |
# Perform similarity search on the knowledge base for the given question | |
docs = knowledge_base.similarity_search(question) | |
# Initialize the OpenAILLM model | |
llm = AzureChatOpenAI(azure_deployment="GPT-3") | |
# Load a question-answering chain of models | |
chain = load_qa_chain(llm, chain_type="stuff") | |
# Run the question-answering chain on the input documents and question | |
response = chain.run(input_documents=docs, question=question) | |
# Append the question and response to the chat history | |
chat_history.append((question, response)) | |
# Return an empty string and the updated chat history | |
return "", chat_history | |
except Exception as e: | |
# Log an error if an exception occurs during question answering | |
LOGGER.error(f"Error answering question: {str(e)}") | |
raise | |
def get_graph(self, file_path: str) -> Tuple[go.Figure, go.Figure, go.Figure]: | |
""" | |
Generate three types of charts based on data from a CSV file. | |
Parameters: | |
- file_path (str): The path to the CSV file. | |
Returns: | |
Tuple[go.Figure, go.Figure, go.Figure]: A tuple containing three Plotly figures (Bar chart, Pie chart, and Histogram). | |
""" | |
try: | |
LOGGER.info("Create graph for CSV file...") | |
# Read data from CSV file into a DataFrame | |
df = pd.read_csv(file_path.name) | |
# Chart 1: Bar chart - Number of members by domain | |
domain_counts = df['Domain'].value_counts() | |
domain_fig = go.Figure(go.Bar(x=domain_counts.index, y=domain_counts, marker_color='skyblue')) | |
domain_fig.update_layout(title='Number of Members by Domain', xaxis_title='Domain', yaxis_title='Number of Members') | |
# Chart 2: Pie chart - Distribution of working time | |
working_time_counts = df['Working Time'].value_counts() | |
working_time_fig = go.Figure(go.Pie(labels=working_time_counts.index, values=working_time_counts, | |
pull=[0.1, 0], marker_colors=['lightcoral', 'lightskyblue'])) | |
working_time_fig.update_layout(title='Distribution of Working Time') | |
# Chart 3: Histogram - Distribution of career gaps | |
career_gap_fig = go.Figure(go.Histogram(x=df['Career Gap (years)'], nbinsx=20, marker_color='lightgreen', | |
marker_line_color='black', marker_line_width=1.2)) | |
career_gap_fig.update_layout(title='Distribution of Career Gaps', xaxis_title='Career Gap (years)', yaxis_title='Number of Members') | |
return domain_fig, working_time_fig, career_gap_fig | |
except Exception as e: | |
# Handle exceptions | |
LOGGER.error(f"Error in get_graph: {str(e)}") | |
raise | |
def extract_text_from_pdf(self, pdf_path: str) -> str: | |
""" | |
Extracts text from a PDF file. | |
Args: | |
pdf_path (str): The path to the PDF file. | |
Returns: | |
str: The extracted text from the PDF. | |
""" | |
text = '' | |
try: | |
LOGGER.info("Extract text from pdf...") | |
# Load PDF document | |
pdf = PdfReader(pdf_path) | |
# Extract text from each page and pass it to the process_text function | |
for page_number in range(len(pdf.pages)): | |
try: | |
# Extract text from the page | |
page = pdf.pages[page_number] | |
# Extract page text | |
text += page.extract_text() | |
except Exception as e: | |
LOGGER.error(f"Error extracting text from page {page_number + 1}: {e}") | |
#return extracted text | |
return text | |
except Exception as e: | |
LOGGER.error(f"Error reading PDF file: {e}") | |
raise | |
def matching_percentage(self, resume_path: str, job_description_path: str) -> Tuple[str, go.Figure]: | |
""" | |
Assess the matching percentage between a resume and a job description using the OpenAI GPT-3.5-turbo model. | |
Parameters: | |
- resume_path (str): Path to the resume file (PDF format). | |
- job_description_path (str): Path to the job description file (PDF format). | |
Returns: | |
Tuple[str, go.Figure]: A tuple containing the matching result string and a Plotly figure. | |
""" | |
try: | |
LOGGER.info("Get matching percentage...") | |
# Extract text from the resume and job description PDFs | |
resume = self.extract_text_from_pdf(resume_path.name) | |
job_description = self.extract_text_from_pdf(job_description_path.name) | |
# Create a conversation for the OpenAI chat API | |
conversation = [ | |
{"role": "system", "content": "You are a helpful assistant."}, | |
{"role": "user", "content": f"""Given the job description and the resume, assess the matching percentage to 100 and if 100 percentage not matched mention the remaining percentage with reason. **Job Description:**{job_description}**Resume:**{resume} | |
**Detailed Analysis:** | |
the result should be in this format: | |
Matched Percentage: [matching percentage]. | |
Reason : [Mention Reason and keys from Job Description and Resume get this matched percentage.]. | |
Skills To Improve : [Mention the skills How to improve and get match the given Job Description]. | |
Keywords : [matched key words from Job Description and Resume]. | |
"""} | |
] | |
# Call OpenAI GPT-3.5-turbo | |
chat_completion = self.client.chat.completions.create( | |
model = "ChatGPT", | |
messages = conversation, | |
max_tokens=500, | |
temperature=0 | |
) | |
matched_result = chat_completion.choices[0].message.content | |
# Generate a Plotly figure for visualization | |
fig = self.get_ploty(matched_result) | |
return matched_result, fig | |
except Exception as e: | |
# Handle exceptions | |
LOGGER.error(f"Error in matching_percentage: {str(e)}") | |
raise | |
def get_ploty(self, result: str) -> go.Figure: | |
""" | |
Extracts matched percentage from the input result and creates a pie chart using Plotly. | |
Parameters: | |
- result (str): The input string containing information about the matched percentage. | |
Returns: | |
- go.Figure: Plotly figure object representing the pie chart. | |
""" | |
try: | |
LOGGER.info("Create Pie chart for Matched percentage...") | |
# Use regex with case-insensitive flag to extract the matched percentage | |
match_percentage = re.search(r'matched percentage: (\d+)%', result, re.IGNORECASE) | |
# If the specific format is found, extract the matched percentage | |
if match_percentage: | |
matched_percentage = int(match_percentage.group(1)) | |
else: | |
# If the specific format is not found, try another regex pattern | |
match_percentage = re.search(r'(\d+)%', result, re.IGNORECASE) | |
matched_percentage = int(match_percentage.group(1)) | |
# Creating a pie chart with plotly | |
labels = ['Matched', 'Not Matched'] | |
values = [matched_percentage, 100 - matched_percentage] | |
fig = go.Figure(data=[go.Pie(labels=labels, values=values, pull=[0.1, 0])]) | |
fig.update_layout(title='Matched Percentage') | |
return fig | |
except Exception as e: | |
# raise the exception | |
LOGGER.error(f"Error processing result:{str(e)}") | |
raise | |
def count_reviews(self) -> go.Figure: | |
""" | |
Count and visualize the distribution of positive, negative, and neutral reviews. | |
Returns: | |
go.Figure: Plotly figure showing the distribution of reviews. | |
""" | |
try: | |
LOGGER.info("Count reviews...") | |
# Extracted data from the reviews | |
data = self.answer | |
# Split the data into sections based on the review categories | |
sections = [section.strip() for section in data.split("\n\n")] | |
# Initialize counters for positive, neutral, and negative reviews | |
positive_count = 0 | |
neutral_count = 0 | |
negative_count = 0 | |
# Initialize counters for positive, neutral, and negative reviews | |
for section in sections: | |
lines = section.split('\n') | |
if len(lines) > 1: | |
category = lines[0].strip() | |
reviews = lines[1:] | |
count = len(reviews) | |
# Update counts based on the review category | |
if "Positive" in category: | |
positive_count += count | |
elif "Suggestion" in category: | |
neutral_count += count | |
elif "Negative" in category: | |
negative_count += count | |
# Data for the bar graph | |
labels = ['Positive', 'Negative', 'Neutral'] | |
counts = [positive_count, negative_count, neutral_count] | |
# Creating the bar graph using Plotly | |
fig = go.Figure(data=[go.Bar(x=labels, y=counts, marker=dict(color=['green', 'red', 'gray']))]) | |
# Adding title and labels | |
fig.update_layout(title='Distribution of Reviews', | |
xaxis=dict(title='Sentiment'), | |
yaxis=dict(title='Number of Reviews')) | |
return fig | |
except Exception as e: | |
# Log and raise an error in case of an exception | |
LOGGER.error(f"Error in count_reviews: {e}") | |
raise | |
def csv_to_list(self, file_path: str) -> list: | |
""" | |
Read a CSV file and convert it to a list. | |
Args: | |
file_path (str): Path to the CSV file. | |
Returns: | |
list: List containing data from the CSV file. | |
""" | |
try: | |
LOGGER.info("Extract CSV...") | |
# Initialize an empty list to store CSV data | |
data_list = [] | |
# Open the CSV file and read its contents | |
with open(file_path.name, 'r',newline='') as csv_file: | |
csv_reader = csv.reader(csv_file) | |
next(csv_reader, None) # Skip the header row | |
for row in csv_reader: | |
# Convert each row to a string and append to the list | |
data_list.append("".join(row)) | |
return data_list | |
except Exception as e: | |
# Log and raise an error in case of an exception | |
LOGGER.error(f"Error in csv_to_list: {e}") | |
raise | |
def extract_top_reviews(self, file_path: str) -> tuple: | |
""" | |
Extract the top suggestion, positive, and negative reviews from a CSV file. | |
Args: | |
file_path (str): Path to the CSV file. | |
Returns: | |
tuple: Suggestion reviews, positive reviews, and negative reviews. | |
""" | |
try: | |
LOGGER.info("Extract top reviews...") | |
# Set the number of top reviews to extract | |
top_count = 5 | |
# Split the reviews into suggestion, positive, and negative categories | |
suggestion_reviews,positive_reviews,negative_reviews = self.split_reviews(file_path) | |
# Extract the top suggestion reviews | |
reviews_list = suggestion_reviews.split("\n") # Assuming each review is on a new line | |
suggest_reviews = "\n\n ".join(reviews_list[:top_count]) | |
# Extract the top positive reviews | |
reviews_list = positive_reviews.split("\n") # Assuming each review is on a new line | |
pos_reviews ="\n\n ".join(reviews_list[:top_count]) | |
# Extract the top negative reviews | |
reviews_list = negative_reviews.split("\n") # Assuming each review is on a new line | |
neg_reviews = "\n\n ".join(reviews_list[:top_count]) | |
return suggest_reviews,pos_reviews,neg_reviews | |
except Exception as e: | |
# Log and raise an error in case of an exception | |
LOGGER.error(f"Error in extract_top_reviews: {e}") | |
raise | |
def split_reviews(self, file_path: str) -> tuple: | |
""" | |
Split reviews into suggestion, positive, and negative categories using OpenAI API. | |
Args: | |
file_path (str): Path to the CSV file. | |
Returns: | |
tuple: Suggestion reviews, positive reviews, and negative reviews. | |
""" | |
try: | |
LOGGER.info("Classify reviews...") | |
# Convert CSV file to a list of reviews | |
reviews = self.csv_to_list(file_path) | |
prompt_template_ = [ | |
{"role": "system", "content": "You are a helpful assistant."}, | |
{"role": "user", "content": f"""read and analyse to return suggestion reviews,postive reviews and negative reviews with label ***{reviews}***. | |
the result should be in this format: | |
Suggestion Reviews: | |
Positive Reviews: | |
Negative Reviews:"""} | |
] | |
# Construct the prompt for OpenAI API | |
# Call OpenAI API with the given prompt | |
response = self.client.chat.completions.create( | |
model="ChatGPT", # You can use a different engine | |
messages=prompt_template_, | |
max_tokens=200, | |
temperature = 0, | |
) | |
# Extract and return the generated text | |
self.answer += response.choices[0].message.content | |
# Split the generated text into suggestion, positive, and negative reviews | |
suggestion_reviews = self.answer.split("Suggestion Reviews:")[1].split("Positive Reviews:")[0].strip() | |
positive_reviews = self.answer.split("Positive Reviews:")[1].split("Negative Reviews:")[0].strip() | |
negative_reviews = self.answer.split("Negative Reviews:")[1].strip() | |
return suggestion_reviews,positive_reviews,negative_reviews | |
except Exception as e: | |
# Log and raise an error in case of an exception | |
LOGGER.error(f"Error in split_reviews: {e}") | |
raise | |
def file_name(self,upload_file:str) -> str: | |
""" | |
Get the name of the uploaded file. | |
Args: | |
upload_file: File object. | |
Returns: | |
str: File name. | |
""" | |
try: | |
# return file path | |
return upload_file.name | |
except Exception as e: | |
LOGGER.error(f"Error in file_name: {e}") | |
raise | |
def gradio_interface(self): | |
""" | |
Create a Gradio interface for the JobPotral. | |
""" | |
with gr.Blocks(css="style.css",theme='freddyaboulton/test-blue') as demo: | |
gr.HTML("""<center class="darkblue" text-align:center;padding:30px;'><center> | |
<center><h1 class ="center" style="color:#fff"></h1></center> | |
<br><center><h1 style="color:#fff">Job Potral Tool</h1></center>""") | |
# QA | |
state = gr.State(self.get_empty_state()) | |
with gr.Tab("QA and Graph"): | |
with gr.Column(elem_id="col-container"): | |
gr.Markdown("**Upload your file**") | |
with gr.Row(elem_id="row-flex"): | |
with gr.Column(scale=0.90, min_width=160): | |
file_output = gr.File(elem_classes="filenameshow") | |
with gr.Column(scale=0.10, min_width=160): | |
upload_button = gr.UploadButton( | |
"Browse File", file_types=[".txt", ".pdf", ".doc", ".docx",".csv"], | |
elem_classes="filenameshow") | |
with gr.Row(elem_id="col-container"): | |
with gr.Column(): | |
analyse_graph = gr.Button("Analyse Graph") | |
with gr.TabItem("Chatbot"): | |
with gr.Row(elem_id="col-container"): | |
with gr.Column(scale=1, min_width=0): | |
chatbot = gr.Chatbot(label = "Resume QA") | |
msg = gr.Textbox(label = "Question") | |
clear = gr.ClearButton([msg, chatbot]) | |
# analyse graph | |
with gr.TabItem("Graph"): | |
with gr.Row(elem_id="col-container"): | |
with gr.Column(scale=1.0, min_width=150): | |
domain_graph = gr.Plot(label="Domain Graph") | |
with gr.Row(elem_id="col-container"): | |
with gr.Column(scale=1.0, min_width=150): | |
working_time_graph = gr.Plot(label="Working Time Graph") | |
with gr.Row(elem_id="col-container"): | |
with gr.Column(scale=1.0, min_width=150): | |
career_gap_graph = gr.Plot(label="Career Gap Graph") | |
# resume analyser | |
with gr.Tab("Resume Analyzer"): | |
with gr.Row(elem_id="col-container"): | |
with gr.Column(scale=0.55, min_width=150, ): | |
job_description = gr.File(label="Job Description", file_types = [".pdf",".txt"]) | |
with gr.Column(scale=0.55, min_width=150): | |
resume = gr.File(label="Resume", file_types = [".pdf",".txt"]) | |
with gr.Row(elem_id="col-container"): | |
with gr.Column(scale=0.80, min_width=150): | |
analyse_btn = gr.Button("Analyse") | |
with gr.Column(scale=0.20, min_width=150): | |
clear_btn = gr.ClearButton() | |
with gr.Row(elem_id="col-container"): | |
with gr.Column(scale=1.0, min_width=150): | |
matched_result = gr.Textbox(label="Matched Result", lines=10) | |
with gr.Row(elem_id="col-container"): | |
with gr.Column(scale=1.0, min_width=150): | |
pychart = gr.Plot(label="Matching Percentage Chart") | |
# review analyser | |
with gr.Tab("Reviews Analyzer"): | |
with gr.Row(elem_id="col-container"): | |
with gr.Column(scale=0.90, min_width=160): | |
file_output_review = gr.File(elem_classes="filenameshow") | |
with gr.Column(scale=0.10, min_width=160): | |
upload_button_review = gr.UploadButton( | |
"Browse File",file_types=[".txt", ".pdf", ".doc", ".docx",".json",".csv"], | |
elem_classes="filenameshow") | |
with gr.Row(elem_id="col-container"): | |
split_reviews_top_5_btn = gr.Button("Split TOP 5 Reviews ") | |
with gr.Row(elem_id="col-container"): | |
suggested_reviews = gr.Textbox(label="Suggested Reviews", lines=10) | |
postive_reviews =gr.Textbox(label="Positive Reviews", lines=10) | |
negative_reviews = gr.Textbox(label="Negative Reviews", lines=10) | |
with gr.Row(elem_id="col-container"): | |
sentiment_graph_btn = gr.Button("Sentiment Graph") | |
with gr.Row(elem_id="col-container"): | |
sentiment_graph = gr.Plot(label="Sentiment Analysis") | |
# QA | |
upload_button.upload(self.upload_file, upload_button, [file_output,state]) | |
msg.submit(self.answer_question, [msg, state, chatbot], [msg, chatbot]) | |
# analyse graph | |
analyse_graph.click(self.get_graph, upload_button, [domain_graph, working_time_graph, career_gap_graph]) | |
# resume analyser | |
analyse_btn.click(self.matching_percentage, [resume,job_description], [matched_result, pychart]) | |
# review analyser | |
upload_button_review.upload(self.file_name,upload_button_review,file_output_review) | |
sentiment_graph_btn.click(self.count_reviews,[],sentiment_graph) | |
split_reviews_top_5_btn.click(self.extract_top_reviews,upload_button_review,[suggested_reviews,postive_reviews,negative_reviews]) | |
demo.launch(debug = True) | |
if __name__ == "__main__": | |
analyze = JobPotral() | |
analyze.gradio_interface() |