Prernas19 commited on
Commit
2849213
·
verified ·
1 Parent(s): 3bfbf07

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +249 -0
app.py ADDED
@@ -0,0 +1,249 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ app.py
2
+
3
+ import streamlit as st
4
+ import google.generativeai as genai
5
+ from PIL import Image
6
+ import fitz # PyMuPDF
7
+ from docx import Document
8
+ import json
9
+ from pathlib import Path
10
+ from datetime import datetime
11
+ import re
12
+ import pytesseract
13
+ import io
14
+
15
+ def extract_text_from_pdf(pdf_file):
16
+ """Extract text from uploaded PDF file."""
17
+ text_content = []
18
+ try:
19
+ pdf_bytes = pdf_file.read()
20
+ doc = fitz.open(stream=pdf_bytes, filetype="pdf")
21
+ for page_num in range(len(doc)):
22
+ page = doc[page_num]
23
+ text_content.append(page.get_text())
24
+ return "\n".join(text_content)
25
+ except Exception as e:
26
+ st.error(f"Error in PDF extraction: {str(e)}")
27
+ return ""
28
+
29
+ def extract_text_from_docx(docx_file):
30
+ """Extract text from uploaded DOCX file."""
31
+ try:
32
+ doc = Document(docx_file)
33
+ text_content = []
34
+ for paragraph in doc.paragraphs:
35
+ text_content.append(paragraph.text)
36
+ return "\n".join(text_content)
37
+ except Exception as e:
38
+ st.error(f"Error in DOCX extraction: {str(e)}")
39
+ return ""
40
+
41
+ def parse_date(date_str):
42
+ """Parse date from various formats."""
43
+ try:
44
+ # Handle 'Present' or 'Current'
45
+ if date_str.lower() in ['present', 'current', 'now']:
46
+ return datetime.now()
47
+
48
+ date_str = date_str.strip()
49
+
50
+ formats = [
51
+ '%Y', '%b %Y', '%B %Y', '%m/%Y', '%m-%Y',
52
+ '%Y/%m', '%Y-%m'
53
+ ]
54
+
55
+ for fmt in formats:
56
+ try:
57
+ return datetime.strptime(date_str, fmt)
58
+ except ValueError:
59
+ continue
60
+
61
+ year_match = re.search(r'\b20\d{2}\b', date_str)
62
+ if year_match:
63
+ return datetime.strptime(year_match.group(), '%Y')
64
+
65
+ return None
66
+ except Exception:
67
+ return None
68
+
69
+ def calculate_experience(work_history):
70
+ """Calculate total years of experience from work history."""
71
+ total_experience = 0
72
+ current_year = datetime.now().year
73
+
74
+ for job in work_history:
75
+ duration = job.get('duration', '')
76
+ if not duration:
77
+ continue
78
+
79
+ parts = re.split(r'\s*-\s*|\s+to\s+', duration)
80
+ if len(parts) != 2:
81
+ continue
82
+
83
+ start_date = parse_date(parts[0])
84
+ end_date = parse_date(parts[1])
85
+
86
+ if start_date and end_date:
87
+ years = (end_date.year - start_date.year) + \
88
+ (end_date.month - start_date.month) / 12
89
+ total_experience += max(0, years)
90
+
91
+ return round(total_experience, 1)
92
+
93
+ def parse_resume(file_uploaded, api_key):
94
+ """Parse resume and extract information."""
95
+ genai.configure(api_key=api_key)
96
+ model = genai.GenerativeModel('gemini-1.5-flash')
97
+
98
+ prompt = """Extract the following information from this resume:
99
+ 1. Summarize the following resume in 100 words, focusing on key skills, experience, and qualifications
100
+ 2. Full Name
101
+ 3. Email Address
102
+ 4. Phone Number
103
+ 5. Education History (including degree, institution, graduation year, and field of study)
104
+ 6. Companies worked at with positions and EXACT duration (e.g., "Jan 2020 - Present" or "2018-2020")
105
+ 7. Skills
106
+ 8. LinkedIn Profile URL
107
+
108
+ Return the information in this JSON format:
109
+ {
110
+ "summary": "",
111
+ "name": "",
112
+ "email": "",
113
+ "phone": "",
114
+ "education": [
115
+ {
116
+ "degree": "",
117
+ "institution": "",
118
+ "year": "",
119
+ "field": "",
120
+ "gpa": ""
121
+ }
122
+ ],
123
+ "work_experience": [
124
+ {
125
+ "company": "",
126
+ "position": "",
127
+ "duration": ""
128
+ }
129
+ ],
130
+ "skills": [],
131
+ "linkedin": ""
132
+ }
133
+ For skills include tools and technologies in output if present any in resume.
134
+ For work experience durations, please specify exact dates in format: "MMM YYYY - MMM YYYY" or "YYYY - Present" , please return in one order either in ascending or descending.
135
+ Only return the JSON object, nothing else. If any field is not found, leave it empty."""
136
+
137
+ try:
138
+ file_extension = Path(file_uploaded.name).suffix.lower()
139
+
140
+ if file_extension == '.pdf':
141
+ text_content = extract_text_from_pdf(file_uploaded)
142
+ elif file_extension in ['.docx', '.doc']:
143
+ text_content = extract_text_from_docx(file_uploaded)
144
+ elif file_extension in ['.jpg', '.jpeg', '.png']:
145
+ image = Image.open(file_uploaded)
146
+ text_content = pytesseract.image_to_string(image)
147
+ else:
148
+ st.error(f"Unsupported file format: {file_extension}")
149
+ return None
150
+
151
+ response = model.generate_content(f"{prompt}\n\nResume Text:\n{text_content}")
152
+
153
+ try:
154
+ response_text = response.text
155
+ json_start = response_text.find('{')
156
+ json_end = response_text.rfind('}') + 1
157
+ json_str = response_text[json_start:json_end]
158
+
159
+ result = json.loads(json_str)
160
+ total_exp = calculate_experience(result.get('work_experience', []))
161
+ result['total_years_experience'] = total_exp
162
+
163
+ return result
164
+ except json.JSONDecodeError as e:
165
+ st.error(f"Error parsing response: {str(e)}")
166
+ return None
167
+
168
+ except Exception as e:
169
+ st.error(f"Error processing resume: {str(e)}")
170
+ return None
171
+
172
+ def format_education(edu):
173
+ """Format education details for display."""
174
+ parts = []
175
+ if edu.get('degree'):
176
+ parts.append(edu['degree'])
177
+ if edu.get('field'):
178
+ parts.append(f"in {edu['field']}")
179
+ if edu.get('institution'):
180
+ parts.append(f"from {edu['institution']}")
181
+ if edu.get('year'):
182
+ parts.append(f"({edu['year']})")
183
+ if edu.get('gpa') and edu['gpa'].strip():
184
+ parts.append(f"- GPA: {edu['gpa']}")
185
+ return " ".join(parts)
186
+
187
+ def main():
188
+ st.title("Resume Parser")
189
+ st.write("Upload a resume (PDF, DOCX, or Image) to extract information")
190
+
191
+ # Get API key from secrets or user input
192
+ api_key = st.secrets["GEMINI_API_KEY"] if "GEMINI_API_KEY" in st.secrets else st.text_input("Enter Gemini API Key", type="password")
193
+
194
+ uploaded_file = st.file_uploader("Choose a resume file", type=["pdf", "docx", "doc", "jpg", "jpeg", "png"])
195
+
196
+ if uploaded_file and api_key:
197
+ with st.spinner('Analyzing resume...'):
198
+ result = parse_resume(uploaded_file, api_key)
199
+
200
+ if result:
201
+ st.subheader("Extracted Information")
202
+
203
+ # Display summary in a text area
204
+ st.text_area("Summary", result.get('summary', 'Not found'), height=100)
205
+
206
+ # Display personal information
207
+ col1, col2, col3 = st.columns(3)
208
+ with col1:
209
+ st.write("*Name:*", result.get('name', 'Not found'))
210
+ with col2:
211
+ st.write("*Email:*", result.get('email', 'Not found'))
212
+ with col3:
213
+ st.write("*Phone:*", result.get('phone', 'Not found'))
214
+
215
+ # Display total experience
216
+ total_exp = result.get('total_years_experience', 0)
217
+ exp_text = f"{total_exp:.1f} years" if total_exp >= 1 else f"{total_exp * 12:.0f} months"
218
+ st.write("*Total Experience:*", exp_text)
219
+
220
+ # Display education
221
+ st.subheader("Education")
222
+ if result.get('education'):
223
+ for edu in result['education']:
224
+ st.write(f"- {format_education(edu)}")
225
+ else:
226
+ st.write("No education information found")
227
+
228
+ # Display work experience
229
+ st.subheader("Work Experience")
230
+ if result.get('work_experience'):
231
+ for exp in result['work_experience']:
232
+ duration = f" ({exp.get('duration', 'Duration not specified')})" if exp.get('duration') else ""
233
+ st.write(f"- {exp.get('position', 'Role not found')} at {exp.get('company', 'Company not found')}{duration}")
234
+ else:
235
+ st.write("No work experience found")
236
+
237
+ # Display Skills
238
+ st.subheader("Skills:")
239
+ if result.get('skills'):
240
+ for skill in result['skills']:
241
+ st.write(f"- {skill}")
242
+ else:
243
+ st.write("- No skills found")
244
+
245
+ # Display LinkedIn profile
246
+ st.write("*LinkedIn Profile:*", result.get('linkedin', 'Not found'))
247
+
248
+ if __name__ == "__main__":
249
+ main()