rushankg commited on
Commit
ea97af8
·
verified ·
1 Parent(s): 6e7206a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +108 -23
app.py CHANGED
@@ -3,9 +3,24 @@ import os
3
  import tempfile
4
  from pathlib import Path
5
  import time
6
- from typing import List, Dict
7
  import pandas as pd
8
  from streamlit.runtime.uploaded_file_manager import UploadedFile
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  # Initialize Streamlit app
11
  st.set_page_config(
@@ -14,15 +29,90 @@ st.set_page_config(
14
  layout="wide"
15
  )
16
 
17
- def save_uploaded_file(uploaded_file) -> str:
18
- """Save uploaded file to temporary directory and return path."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  try:
20
- with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
21
- tmp_file.write(uploaded_file.getvalue())
22
- return tmp_file.name
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  except Exception as e:
24
- st.error(f"Error saving file: {e}")
25
- return None
26
 
27
  def process_resumes(uploaded_files: List[UploadedFile]) -> Dict:
28
  """Process multiple resumes and return results."""
@@ -34,21 +124,16 @@ def process_resumes(uploaded_files: List[UploadedFile]) -> Dict:
34
  st.warning(f"Skipping {file.name}: Not a PDF file")
35
  continue
36
 
37
- temp_path = save_uploaded_file(file)
38
- if temp_path:
39
- try:
40
- name, projects = parse_resume(temp_path)
41
- results[file.name] = {
42
- "name": name,
43
- "projects": projects
44
- }
45
- # Update progress
46
- progress_bar.progress((idx + 1) / len(uploaded_files))
47
- except Exception as e:
48
- st.error(f"Error processing {file.name}: {e}")
49
- finally:
50
- # Clean up temporary file
51
- os.unlink(temp_path)
52
 
53
  return results
54
 
 
3
  import tempfile
4
  from pathlib import Path
5
  import time
6
+ from typing import List, Dict, Tuple
7
  import pandas as pd
8
  from streamlit.runtime.uploaded_file_manager import UploadedFile
9
+ from anthropic import Anthropic
10
+ import pymongo
11
+ from dotenv import load_dotenv
12
+
13
+ # Load environment variables
14
+ load_dotenv()
15
+
16
+ # Initialize MongoDB client
17
+ MONGO_URI = os.getenv('MONGO_URI')
18
+ mongo_client = pymongo.MongoClient(MONGO_URI)
19
+ db = mongo_client['intratalent']
20
+ resume_collection = db['resumes']
21
+
22
+ # Initialize Anthropic client
23
+ anthropic = Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY'))
24
 
25
  # Initialize Streamlit app
26
  st.set_page_config(
 
29
  layout="wide"
30
  )
31
 
32
+ def extract_info_with_claude(resume_content: bytes) -> str:
33
+ """Extract information from resume using Claude."""
34
+ # Create a temporary file to store the resume content
35
+ with tempfile.NamedTemporaryFile(mode='w+b', suffix='.pdf', delete=False) as temp_file:
36
+ temp_file.write(resume_content)
37
+ temp_file_path = temp_file.name
38
+
39
+ prompt = """
40
+ Extract the following information from the given resume:
41
+ 1. Full Name
42
+ 2. List of all experiences with their descriptions (copy exactly from resume)
43
+ Please format the output as follows:
44
+ Name: [Full Name]
45
+ Projects:
46
+ 1. [Project Name]: [Project Description]
47
+ 2. [Project Name]: [Project Description]
48
+ ...
49
+ Extract all experiences, including projects, leadership, work experience, research, etc.
50
+ """
51
+
52
  try:
53
+ message = anthropic.messages.create(
54
+ model="claude-3-haiku-20240307",
55
+ max_tokens=4096,
56
+ system="You are a helpful assistant that extracts information from resumes.",
57
+ messages=[{
58
+ "role": "user",
59
+ "content": [
60
+ {
61
+ "type": "text",
62
+ "text": prompt
63
+ },
64
+ {
65
+ "type": "file",
66
+ "source": temp_file_path
67
+ }
68
+ ]
69
+ }]
70
+ )
71
+ extracted_info = message.content[0].text
72
+ except Exception as e:
73
+ extracted_info = f"An error occurred: {e}"
74
+ finally:
75
+ # Clean up the temporary file
76
+ os.unlink(temp_file_path)
77
+
78
+ return extracted_info
79
+
80
+ def parse_resume(uploaded_file: UploadedFile) -> Tuple[str, List[Dict]]:
81
+ """Parse a resume file and return name and projects."""
82
+ try:
83
+ resume_content = uploaded_file.getvalue()
84
+ extracted_info = extract_info_with_claude(resume_content)
85
+
86
+ # Parse the extracted information
87
+ lines = extracted_info.split('\n')
88
+ name = lines[0].split(': ')[1] if len(lines) > 0 and ': ' in lines[0] else "Unknown"
89
+
90
+ projects = []
91
+ project_started = False
92
+ for line in lines:
93
+ if line.strip() == "Projects:":
94
+ project_started = True
95
+ continue
96
+ if project_started and line.strip():
97
+ project_parts = line.split(': ', 1)
98
+ if len(project_parts) == 2:
99
+ project_name = project_parts[0].split('. ', 1)[-1] # Remove the number
100
+ project_description = project_parts[1]
101
+ projects.append({"name": project_name, "description": project_description})
102
+
103
+ # Store in MongoDB
104
+ resume_data = {
105
+ "name": name,
106
+ "projects": projects,
107
+ "full_content": resume_content.decode('utf-8', errors='ignore')
108
+ }
109
+ resume_collection.insert_one(resume_data)
110
+
111
+ return name, projects
112
+
113
  except Exception as e:
114
+ st.error(f"Error processing resume: {e}")
115
+ return "Unknown", []
116
 
117
  def process_resumes(uploaded_files: List[UploadedFile]) -> Dict:
118
  """Process multiple resumes and return results."""
 
124
  st.warning(f"Skipping {file.name}: Not a PDF file")
125
  continue
126
 
127
+ try:
128
+ name, projects = parse_resume(file)
129
+ results[file.name] = {
130
+ "name": name,
131
+ "projects": projects
132
+ }
133
+ # Update progress
134
+ progress_bar.progress((idx + 1) / len(uploaded_files))
135
+ except Exception as e:
136
+ st.error(f"Error processing {file.name}: {e}")
 
 
 
 
 
137
 
138
  return results
139