Running Extended the available models to Llama3, Gemma, and Mistral. Still don't have gemini working.
Browse files
@@ -1,10 +1,16 @@
1 |
from dotenv import load_dotenv
2 |
import io
3 |
import streamlit as st
4 |
from langchain.prompts import PromptTemplate
5 |
from langchain_core.output_parsers import PydanticOutputParser
6 |
from langchain_anthropic import ChatAnthropic
7 |
from langchain_openai import ChatOpenAI
8 |
from pydantic import ValidationError
9 |
from langchain_core.pydantic_v1 import BaseModel, Field
10 |
from resume_template import Resume
@@ -13,6 +19,8 @@ import PyPDF2
13 |
import json
14 |
import time
15 |
import os
16 |
# Set the LANGCHAIN_TRACING_V2 environment variable to 'true'
17 |
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
18 |
@@ -73,31 +81,15 @@ def extract_resume_fields(full_text, model):
73 |
output = chain.invoke(full_text)
74 |
75 |
return output
76 |
except ValidationError as e:
77 |
if attempt == max_attempts:
78 |
raise e
79 |
80 |
81 |
attempt += 1
82 |
83 |
return None
84 |
85 |
# try:
86 |
# parsed_output = parser.parse(output.content)
87 |
# json_output = parsed_output.json()
88 |
# print(json_output)
89 |
# return json_output
90 |
91 |
# except ValidationError as e:
92 |
# print(f"Validation error: {e}")
93 |
# print(output)
94 |
# return output.content
95 |
96 |
# except JSONDecodeError as e:
97 |
# print(f"JSONDecodeError error: {e}")
98 |
# print(output)
99 |
# return output.content
100 |
101 |
def display_extracted_fields(obj, section_title=None, indent=0):
102 |
if section_title:
103 |
@@ -117,33 +109,59 @@ def display_extracted_fields(obj, section_title=None, indent=0):
117 |
118 |
st.write(" " * indent + f"{field_name.replace('_', ' ').title()}: " + str(field_value))
119 |
120 |
121 |
st.title("Resume Parser")
122 |
123 |
llm_dict = {
124 |
"GPT 3.5 turbo": ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo"),
125 |
"Anthropic Sonnet": ChatAnthropic(model_name="claude-3-sonnet-20240229"),
126 |
127 |
128 |
129 |
130 |
uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
131 |
132 |
if uploaded_file is not None:
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
1 |
from dotenv import load_dotenv
2 |
import io
3 |
import streamlit as st
4 |
import streamlit.components.v1 as components
5 |
import base64
6 |
7 |
from langchain.prompts import PromptTemplate
8 |
from langchain_core.output_parsers import PydanticOutputParser
9 |
from langchain_anthropic import ChatAnthropic
10 |
from langchain_openai import ChatOpenAI
11 |
from langchain_groq import ChatGroq
12 |
from langchain_google_genai import ChatGoogleGenerativeAI
13 |
from langchain_core.exceptions import OutputParserException
14 |
from pydantic import ValidationError
15 |
from langchain_core.pydantic_v1 import BaseModel, Field
16 |
from resume_template import Resume
19 |
import json
20 |
import time
21 |
import os
22 |
23 |
24 |
# Set the LANGCHAIN_TRACING_V2 environment variable to 'true'
25 |
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
26 |
81 |
output = chain.invoke(full_text)
82 |
83 |
return output
84 |
except (OutputParserException, ValidationError) as e:
85 |
if attempt == max_attempts:
86 |
raise e
87 |
88 |
print(f"Parsing error occurred. Retrying (attempt {attempt + 1}/{max_attempts})...")
89 |
attempt += 1
90 |
91 |
return None
92 |
93 |
def display_extracted_fields(obj, section_title=None, indent=0):
94 |
if section_title:
95 |
109 |
110 |
st.write(" " * indent + f"{field_name.replace('_', ' ').title()}: " + str(field_value))
111 |
112 |
def get_json_download_link(json_str, download_name):
113 |
# Convert the JSON string back to a dictionary
114 |
data = json.loads(json_str)
115 |
116 |
# Convert the dictionary back to a JSON string with 4 spaces indentation
117 |
json_str_formatted = json.dumps(data, indent=4)
118 |
119 |
b64 = base64.b64encode(json_str_formatted.encode()).decode()
120 |
href = f'<a href="data:file/json;base64,{b64}" download="{download_name}.json">Click here to download the JSON file</a>'
121 |
return href
122 |
123 |
124 |
125 |
st.title("Resume Parser")
126 |
127 |
llm_dict = {
128 |
"GPT 3.5 turbo": ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo"),
129 |
"Anthropic Sonnet": ChatAnthropic(model_name="claude-3-sonnet-20240229"),
130 |
"Llama 3": ChatGroq(model_name="llama3-70b-8192"),
131 |
"Gemma": ChatGroq(model_name="gemma-7b-it"),
132 |
"Mistral": ChatGroq(model_name="mixtral-8x7b-32768"),
133 |
# "Gemini 1.5 Pro": ChatGoogleGenerativeAI(model_name="gemini-1.5-pro-latest"),
134 |
135 |
136 |
137 |
138 |
uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
139 |
col1, col2 = st.columns(2)
140 |
141 |
with col1:
142 |
selected_model1 = st.selectbox("Select Model 1", list(llm_dict.keys()), index=list(llm_dict.keys()).index("Llama 3"))
143 |
144 |
with col2:
145 |
selected_model2 = st.selectbox("Select Model 2", list(llm_dict.keys()), index=list(llm_dict.keys()).index("Mistral"))
146 |
147 |
if uploaded_file is not None:
148 |
text = pdf_to_string(uploaded_file)
149 |
150 |
if st.button("Extract Resume Fields"):
151 |
col1, col2 = st.columns(2)
152 |
153 |
with col1:
154 |
start_time = time.time()
155 |
extracted_fields1 = extract_resume_fields(text, selected_model1)
156 |
end_time = time.time()
157 |
elapsed_time = end_time - start_time
158 |
st.write(f"Extraction completed in {elapsed_time:.2f} seconds")
159 |
display_extracted_fields(extracted_fields1, "Extracted Resume Fields (Model 1)")
160 |
161 |
with col2:
162 |
start_time = time.time()
163 |
extracted_fields2 = extract_resume_fields(text, selected_model2)
164 |
end_time = time.time()
165 |
elapsed_time = end_time - start_time
166 |
st.write(f"Extraction completed in {elapsed_time:.2f} seconds")
167 |
display_extracted_fields(extracted_fields2, "Extracted Resume Fields (Model 2)")