Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,9 +3,24 @@ import os
|
|
3 |
import tempfile
|
4 |
from pathlib import Path
|
5 |
import time
|
6 |
-
from typing import List, Dict
|
7 |
import pandas as pd
|
8 |
from streamlit.runtime.uploaded_file_manager import UploadedFile
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
# Initialize Streamlit app
|
11 |
st.set_page_config(
|
@@ -14,15 +29,90 @@ st.set_page_config(
|
|
14 |
layout="wide"
|
15 |
)
|
16 |
|
17 |
-
def
|
18 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
try:
|
20 |
-
|
21 |
-
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
except Exception as e:
|
24 |
-
st.error(f"Error
|
25 |
-
return
|
26 |
|
27 |
def process_resumes(uploaded_files: List[UploadedFile]) -> Dict:
|
28 |
"""Process multiple resumes and return results."""
|
@@ -34,21 +124,16 @@ def process_resumes(uploaded_files: List[UploadedFile]) -> Dict:
|
|
34 |
st.warning(f"Skipping {file.name}: Not a PDF file")
|
35 |
continue
|
36 |
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
name,
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
except Exception as e:
|
48 |
-
st.error(f"Error processing {file.name}: {e}")
|
49 |
-
finally:
|
50 |
-
# Clean up temporary file
|
51 |
-
os.unlink(temp_path)
|
52 |
|
53 |
return results
|
54 |
|
|
|
3 |
import tempfile
|
4 |
from pathlib import Path
|
5 |
import time
|
6 |
+
from typing import List, Dict, Tuple
|
7 |
import pandas as pd
|
8 |
from streamlit.runtime.uploaded_file_manager import UploadedFile
|
9 |
+
from anthropic import Anthropic
|
10 |
+
import pymongo
|
11 |
+
from dotenv import load_dotenv
|
12 |
+
|
13 |
+
# Load environment variables
|
14 |
+
load_dotenv()
|
15 |
+
|
16 |
+
# Initialize MongoDB client
|
17 |
+
MONGO_URI = os.getenv('MONGO_URI')
|
18 |
+
mongo_client = pymongo.MongoClient(MONGO_URI)
|
19 |
+
db = mongo_client['intratalent']
|
20 |
+
resume_collection = db['resumes']
|
21 |
+
|
22 |
+
# Initialize Anthropic client
|
23 |
+
anthropic = Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY'))
|
24 |
|
25 |
# Initialize Streamlit app
|
26 |
st.set_page_config(
|
|
|
29 |
layout="wide"
|
30 |
)
|
31 |
|
32 |
+
def extract_info_with_claude(resume_content: bytes) -> str:
|
33 |
+
"""Extract information from resume using Claude."""
|
34 |
+
# Create a temporary file to store the resume content
|
35 |
+
with tempfile.NamedTemporaryFile(mode='w+b', suffix='.pdf', delete=False) as temp_file:
|
36 |
+
temp_file.write(resume_content)
|
37 |
+
temp_file_path = temp_file.name
|
38 |
+
|
39 |
+
prompt = """
|
40 |
+
Extract the following information from the given resume:
|
41 |
+
1. Full Name
|
42 |
+
2. List of all experiences with their descriptions (copy exactly from resume)
|
43 |
+
Please format the output as follows:
|
44 |
+
Name: [Full Name]
|
45 |
+
Projects:
|
46 |
+
1. [Project Name]: [Project Description]
|
47 |
+
2. [Project Name]: [Project Description]
|
48 |
+
...
|
49 |
+
Extract all experiences, including projects, leadership, work experience, research, etc.
|
50 |
+
"""
|
51 |
+
|
52 |
try:
|
53 |
+
message = anthropic.messages.create(
|
54 |
+
model="claude-3-haiku-20240307",
|
55 |
+
max_tokens=4096,
|
56 |
+
system="You are a helpful assistant that extracts information from resumes.",
|
57 |
+
messages=[{
|
58 |
+
"role": "user",
|
59 |
+
"content": [
|
60 |
+
{
|
61 |
+
"type": "text",
|
62 |
+
"text": prompt
|
63 |
+
},
|
64 |
+
{
|
65 |
+
"type": "file",
|
66 |
+
"source": temp_file_path
|
67 |
+
}
|
68 |
+
]
|
69 |
+
}]
|
70 |
+
)
|
71 |
+
extracted_info = message.content[0].text
|
72 |
+
except Exception as e:
|
73 |
+
extracted_info = f"An error occurred: {e}"
|
74 |
+
finally:
|
75 |
+
# Clean up the temporary file
|
76 |
+
os.unlink(temp_file_path)
|
77 |
+
|
78 |
+
return extracted_info
|
79 |
+
|
80 |
+
def parse_resume(uploaded_file: UploadedFile) -> Tuple[str, List[Dict]]:
|
81 |
+
"""Parse a resume file and return name and projects."""
|
82 |
+
try:
|
83 |
+
resume_content = uploaded_file.getvalue()
|
84 |
+
extracted_info = extract_info_with_claude(resume_content)
|
85 |
+
|
86 |
+
# Parse the extracted information
|
87 |
+
lines = extracted_info.split('\n')
|
88 |
+
name = lines[0].split(': ')[1] if len(lines) > 0 and ': ' in lines[0] else "Unknown"
|
89 |
+
|
90 |
+
projects = []
|
91 |
+
project_started = False
|
92 |
+
for line in lines:
|
93 |
+
if line.strip() == "Projects:":
|
94 |
+
project_started = True
|
95 |
+
continue
|
96 |
+
if project_started and line.strip():
|
97 |
+
project_parts = line.split(': ', 1)
|
98 |
+
if len(project_parts) == 2:
|
99 |
+
project_name = project_parts[0].split('. ', 1)[-1] # Remove the number
|
100 |
+
project_description = project_parts[1]
|
101 |
+
projects.append({"name": project_name, "description": project_description})
|
102 |
+
|
103 |
+
# Store in MongoDB
|
104 |
+
resume_data = {
|
105 |
+
"name": name,
|
106 |
+
"projects": projects,
|
107 |
+
"full_content": resume_content.decode('utf-8', errors='ignore')
|
108 |
+
}
|
109 |
+
resume_collection.insert_one(resume_data)
|
110 |
+
|
111 |
+
return name, projects
|
112 |
+
|
113 |
except Exception as e:
|
114 |
+
st.error(f"Error processing resume: {e}")
|
115 |
+
return "Unknown", []
|
116 |
|
117 |
def process_resumes(uploaded_files: List[UploadedFile]) -> Dict:
|
118 |
"""Process multiple resumes and return results."""
|
|
|
124 |
st.warning(f"Skipping {file.name}: Not a PDF file")
|
125 |
continue
|
126 |
|
127 |
+
try:
|
128 |
+
name, projects = parse_resume(file)
|
129 |
+
results[file.name] = {
|
130 |
+
"name": name,
|
131 |
+
"projects": projects
|
132 |
+
}
|
133 |
+
# Update progress
|
134 |
+
progress_bar.progress((idx + 1) / len(uploaded_files))
|
135 |
+
except Exception as e:
|
136 |
+
st.error(f"Error processing {file.name}: {e}")
|
|
|
|
|
|
|
|
|
|
|
137 |
|
138 |
return results
|
139 |
|