rushankg commited on
Commit
33c7116
Β·
verified Β·
1 Parent(s): e07ba78

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -27
app.py CHANGED
@@ -9,6 +9,7 @@ from streamlit.runtime.uploaded_file_manager import UploadedFile
9
  from anthropic import Anthropic
10
  import pymongo
11
  from dotenv import load_dotenv
 
12
 
13
  # Load environment variables
14
  load_dotenv()
@@ -29,15 +30,34 @@ st.set_page_config(
29
  layout="wide"
30
  )
31
 
32
- def extract_info_with_claude(resume_content: bytes) -> str:
33
- """Extract information from resume using Claude."""
34
- # Create a temporary file to store the resume content
35
- with tempfile.NamedTemporaryFile(mode='w+b', suffix='.pdf', delete=False) as temp_file:
36
- temp_file.write(resume_content)
37
- temp_file_path = temp_file.name
 
 
 
 
 
 
 
 
 
38
 
39
- st.write("πŸ“„ Created temporary PDF file for Claude analysis")
 
 
 
 
 
 
40
 
 
 
 
 
41
  prompt = """
42
  Extract the following information from the given resume:
43
  1. Full Name
@@ -49,27 +69,19 @@ def extract_info_with_claude(resume_content: bytes) -> str:
49
  2. [Project Name]: [Project Description]
50
  ...
51
  Extract all experiences, including projects, leadership, work experience, research, etc.
52
- """
53
 
54
- st.write("πŸ€– Sending request to Claude API...")
 
 
55
 
56
  try:
57
  message = anthropic.messages.create(
58
- model="claude-3-5-sonnet-20240620",
59
  max_tokens=4096,
60
  system="You are a helpful assistant that extracts information from resumes.",
61
  messages=[{
62
  "role": "user",
63
- "content": [
64
- {
65
- "type": "text",
66
- "text": prompt
67
- },
68
- {
69
- "type": "file",
70
- "source": temp_file_path
71
- }
72
- ]
73
  }]
74
  )
75
  extracted_info = message.content[0].text
@@ -79,10 +91,6 @@ def extract_info_with_claude(resume_content: bytes) -> str:
79
  except Exception as e:
80
  extracted_info = f"An error occurred: {e}"
81
  st.error(f"❌ API Error: {e}")
82
- finally:
83
- # Clean up the temporary file
84
- os.unlink(temp_file_path)
85
- st.write("πŸ—‘οΈ Cleaned up temporary file")
86
 
87
  return extracted_info
88
 
@@ -91,9 +99,13 @@ def parse_resume(uploaded_file: UploadedFile) -> Tuple[str, List[Dict]]:
91
  try:
92
  st.write(f"πŸ“ Processing resume: {uploaded_file.name}")
93
  resume_content = uploaded_file.getvalue()
94
- st.write("πŸ“Š Extracted raw content from PDF")
 
 
 
 
95
 
96
- extracted_info = extract_info_with_claude(resume_content)
97
  st.write("πŸ” Parsing extracted information...")
98
 
99
  # Parse the extracted information
@@ -121,7 +133,7 @@ def parse_resume(uploaded_file: UploadedFile) -> Tuple[str, List[Dict]]:
121
  resume_data = {
122
  "name": name,
123
  "projects": projects,
124
- "full_content": resume_content.decode('utf-8', errors='ignore')
125
  }
126
  resume_collection.insert_one(resume_data)
127
  st.write("πŸ’Ύ Stored data in MongoDB")
 
9
  from anthropic import Anthropic
10
  import pymongo
11
  from dotenv import load_dotenv
12
+ import fitz # PyMuPDF
13
 
14
  # Load environment variables
15
  load_dotenv()
 
30
  layout="wide"
31
  )
32
 
33
+ def extract_text_from_pdf(pdf_content: bytes) -> str:
34
+ """Extract text from PDF content."""
35
+ try:
36
+ # Create a temporary file to store the PDF content
37
+ with tempfile.NamedTemporaryFile(mode='w+b', suffix='.pdf', delete=False) as temp_file:
38
+ temp_file.write(pdf_content)
39
+ temp_file_path = temp_file.name
40
+
41
+ # Extract text from PDF
42
+ doc = fitz.open(temp_file_path)
43
+ text = ""
44
+ for page_num in range(doc.page_count):
45
+ page = doc.load_page(page_num)
46
+ text += page.get_text() + "\n"
47
+ doc.close()
48
 
49
+ # Clean up temporary file
50
+ os.unlink(temp_file_path)
51
+
52
+ return text
53
+ except Exception as e:
54
+ st.error(f"Error extracting text from PDF: {e}")
55
+ return ""
56
 
57
+ def extract_info_with_claude(resume_text: str) -> str:
58
+ """Extract information from resume text using Claude."""
59
+ st.write("πŸ€– Sending request to Claude API...")
60
+
61
  prompt = """
62
  Extract the following information from the given resume:
63
  1. Full Name
 
69
  2. [Project Name]: [Project Description]
70
  ...
71
  Extract all experiences, including projects, leadership, work experience, research, etc.
 
72
 
73
+ Here's the resume text:
74
+ {resume_text}
75
+ """.format(resume_text=resume_text)
76
 
77
  try:
78
  message = anthropic.messages.create(
79
+ model="claude-3-haiku-20240307",
80
  max_tokens=4096,
81
  system="You are a helpful assistant that extracts information from resumes.",
82
  messages=[{
83
  "role": "user",
84
+ "content": prompt
 
 
 
 
 
 
 
 
 
85
  }]
86
  )
87
  extracted_info = message.content[0].text
 
91
  except Exception as e:
92
  extracted_info = f"An error occurred: {e}"
93
  st.error(f"❌ API Error: {e}")
 
 
 
 
94
 
95
  return extracted_info
96
 
 
99
  try:
100
  st.write(f"πŸ“ Processing resume: {uploaded_file.name}")
101
  resume_content = uploaded_file.getvalue()
102
+ st.write("πŸ“Š Extracting text from PDF...")
103
+
104
+ resume_text = extract_text_from_pdf(resume_content)
105
+ st.write("πŸ“„ Extracted text from PDF:")
106
+ st.code(resume_text[:500] + "..." if len(resume_text) > 500 else resume_text)
107
 
108
+ extracted_info = extract_info_with_claude(resume_text)
109
  st.write("πŸ” Parsing extracted information...")
110
 
111
  # Parse the extracted information
 
133
  resume_data = {
134
  "name": name,
135
  "projects": projects,
136
+ "full_content": resume_text
137
  }
138
  resume_collection.insert_one(resume_data)
139
  st.write("πŸ’Ύ Stored data in MongoDB")