Spaces:
Running
Running
app.py: Extended the available models to Llama3, Gemma, and Mistral. Still don't have gemini working.
Browse files
app.py
CHANGED
@@ -1,10 +1,16 @@
|
|
1 |
from dotenv import load_dotenv
|
2 |
import io
|
3 |
import streamlit as st
|
|
|
|
|
|
|
4 |
from langchain.prompts import PromptTemplate
|
5 |
from langchain_core.output_parsers import PydanticOutputParser
|
6 |
from langchain_anthropic import ChatAnthropic
|
7 |
from langchain_openai import ChatOpenAI
|
|
|
|
|
|
|
8 |
from pydantic import ValidationError
|
9 |
from langchain_core.pydantic_v1 import BaseModel, Field
|
10 |
from resume_template import Resume
|
@@ -13,6 +19,8 @@ import PyPDF2
|
|
13 |
import json
|
14 |
import time
|
15 |
import os
|
|
|
|
|
16 |
# Set the LANGCHAIN_TRACING_V2 environment variable to 'true'
|
17 |
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
|
18 |
|
@@ -73,31 +81,15 @@ def extract_resume_fields(full_text, model):
|
|
73 |
output = chain.invoke(full_text)
|
74 |
print(output)
|
75 |
return output
|
76 |
-
except ValidationError as e:
|
77 |
if attempt == max_attempts:
|
78 |
raise e
|
79 |
else:
|
80 |
-
print(f"
|
81 |
attempt += 1
|
82 |
|
83 |
return None
|
84 |
|
85 |
-
# try:
|
86 |
-
# parsed_output = parser.parse(output.content)
|
87 |
-
# json_output = parsed_output.json()
|
88 |
-
# print(json_output)
|
89 |
-
# return json_output
|
90 |
-
|
91 |
-
# except ValidationError as e:
|
92 |
-
# print(f"Validation error: {e}")
|
93 |
-
# print(output)
|
94 |
-
# return output.content
|
95 |
-
|
96 |
-
# except JSONDecodeError as e:
|
97 |
-
# print(f"JSONDecodeError error: {e}")
|
98 |
-
# print(output)
|
99 |
-
# return output.content
|
100 |
-
|
101 |
def display_extracted_fields(obj, section_title=None, indent=0):
|
102 |
if section_title:
|
103 |
st.subheader(section_title)
|
@@ -117,33 +109,59 @@ def display_extracted_fields(obj, section_title=None, indent=0):
|
|
117 |
else:
|
118 |
st.write(" " * indent + f"{field_name.replace('_', ' ').title()}: " + str(field_value))
|
119 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
|
121 |
st.title("Resume Parser")
|
122 |
|
123 |
llm_dict = {
|
124 |
"GPT 3.5 turbo": ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo"),
|
125 |
"Anthropic Sonnet": ChatAnthropic(model_name="claude-3-sonnet-20240229"),
|
|
|
|
|
|
|
|
|
126 |
}
|
127 |
|
128 |
-
|
129 |
|
130 |
uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
|
132 |
if uploaded_file is not None:
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
|
|
|
|
|
|
|
1 |
from dotenv import load_dotenv
|
2 |
import io
|
3 |
import streamlit as st
|
4 |
+
import streamlit.components.v1 as components
|
5 |
+
import base64
|
6 |
+
|
7 |
from langchain.prompts import PromptTemplate
|
8 |
from langchain_core.output_parsers import PydanticOutputParser
|
9 |
from langchain_anthropic import ChatAnthropic
|
10 |
from langchain_openai import ChatOpenAI
|
11 |
+
from langchain_groq import ChatGroq
|
12 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
13 |
+
from langchain_core.exceptions import OutputParserException
|
14 |
from pydantic import ValidationError
|
15 |
from langchain_core.pydantic_v1 import BaseModel, Field
|
16 |
from resume_template import Resume
|
|
|
19 |
import json
|
20 |
import time
|
21 |
import os
|
22 |
+
|
23 |
+
|
24 |
# Set the LANGCHAIN_TRACING_V2 environment variable to 'true'
|
25 |
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
|
26 |
|
|
|
81 |
output = chain.invoke(full_text)
|
82 |
print(output)
|
83 |
return output
|
84 |
+
except (OutputParserException, ValidationError) as e:
|
85 |
if attempt == max_attempts:
|
86 |
raise e
|
87 |
else:
|
88 |
+
print(f"Parsing error occurred. Retrying (attempt {attempt + 1}/{max_attempts})...")
|
89 |
attempt += 1
|
90 |
|
91 |
return None
|
92 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
def display_extracted_fields(obj, section_title=None, indent=0):
|
94 |
if section_title:
|
95 |
st.subheader(section_title)
|
|
|
109 |
else:
|
110 |
st.write(" " * indent + f"{field_name.replace('_', ' ').title()}: " + str(field_value))
|
111 |
|
112 |
+
def get_json_download_link(json_str, download_name):
|
113 |
+
# Convert the JSON string back to a dictionary
|
114 |
+
data = json.loads(json_str)
|
115 |
+
|
116 |
+
# Convert the dictionary back to a JSON string with 4 spaces indentation
|
117 |
+
json_str_formatted = json.dumps(data, indent=4)
|
118 |
+
|
119 |
+
b64 = base64.b64encode(json_str_formatted.encode()).decode()
|
120 |
+
href = f'<a href="data:file/json;base64,{b64}" download="{download_name}.json">Click here to download the JSON file</a>'
|
121 |
+
return href
|
122 |
+
|
123 |
+
st.set_page_config(layout="wide")
|
124 |
|
125 |
st.title("Resume Parser")
|
126 |
|
127 |
llm_dict = {
|
128 |
"GPT 3.5 turbo": ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo"),
|
129 |
"Anthropic Sonnet": ChatAnthropic(model_name="claude-3-sonnet-20240229"),
|
130 |
+
"Llama 3": ChatGroq(model_name="llama3-70b-8192"),
|
131 |
+
"Gemma": ChatGroq(model_name="gemma-7b-it"),
|
132 |
+
"Mistral": ChatGroq(model_name="mixtral-8x7b-32768"),
|
133 |
+
# "Gemini 1.5 Pro": ChatGoogleGenerativeAI(model_name="gemini-1.5-pro-latest"),
|
134 |
}
|
135 |
|
136 |
+
|
137 |
|
138 |
uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
|
139 |
+
col1, col2 = st.columns(2)
|
140 |
+
|
141 |
+
with col1:
|
142 |
+
selected_model1 = st.selectbox("Select Model 1", list(llm_dict.keys()), index=list(llm_dict.keys()).index("Llama 3"))
|
143 |
+
|
144 |
+
with col2:
|
145 |
+
selected_model2 = st.selectbox("Select Model 2", list(llm_dict.keys()), index=list(llm_dict.keys()).index("Mistral"))
|
146 |
|
147 |
if uploaded_file is not None:
|
148 |
+
text = pdf_to_string(uploaded_file)
|
149 |
+
|
150 |
+
if st.button("Extract Resume Fields"):
|
151 |
+
col1, col2 = st.columns(2)
|
152 |
+
|
153 |
+
with col1:
|
154 |
+
start_time = time.time()
|
155 |
+
extracted_fields1 = extract_resume_fields(text, selected_model1)
|
156 |
+
end_time = time.time()
|
157 |
+
elapsed_time = end_time - start_time
|
158 |
+
st.write(f"Extraction completed in {elapsed_time:.2f} seconds")
|
159 |
+
display_extracted_fields(extracted_fields1, "Extracted Resume Fields (Model 1)")
|
160 |
+
|
161 |
+
with col2:
|
162 |
+
start_time = time.time()
|
163 |
+
extracted_fields2 = extract_resume_fields(text, selected_model2)
|
164 |
+
end_time = time.time()
|
165 |
+
elapsed_time = end_time - start_time
|
166 |
+
st.write(f"Extraction completed in {elapsed_time:.2f} seconds")
|
167 |
+
display_extracted_fields(extracted_fields2, "Extracted Resume Fields (Model 2)")
|