Prernas19 commited on
Commit
1ca78ca
·
verified ·
1 Parent(s): 3357655

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +244 -38
src/streamlit_app.py CHANGED
@@ -1,40 +1,246 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
4
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
5
 
6
- """
7
- # Welcome to Streamlit!
8
-
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
-
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
15
-
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import google.generativeai as genai
3
+ from PIL import Image
4
+ import fitz # PyMuPDF
5
+ from docx import Document
6
+ import json
7
+ from pathlib import Path
8
+ from datetime import datetime
9
+ import re
10
+ import pytesseract
11
+ import io
12
 
13
+ def extract_text_from_pdf(pdf_file):
14
+ """Extract text from uploaded PDF file."""
15
+ text_content = []
16
+ try:
17
+ pdf_bytes = pdf_file.read()
18
+ doc = fitz.open(stream=pdf_bytes, filetype="pdf")
19
+ for page_num in range(len(doc)):
20
+ page = doc[page_num]
21
+ text_content.append(page.get_text())
22
+ return "\n".join(text_content)
23
+ except Exception as e:
24
+ st.error(f"Error in PDF extraction: {str(e)}")
25
+ return ""
26
+
27
+ def extract_text_from_docx(docx_file):
28
+ """Extract text from uploaded DOCX file."""
29
+ try:
30
+ doc = Document(docx_file)
31
+ text_content = []
32
+ for paragraph in doc.paragraphs:
33
+ text_content.append(paragraph.text)
34
+ return "\n".join(text_content)
35
+ except Exception as e:
36
+ st.error(f"Error in DOCX extraction: {str(e)}")
37
+ return ""
38
+
39
+ def parse_date(date_str):
40
+ """Parse date from various formats."""
41
+ try:
42
+ # Handle 'Present' or 'Current'
43
+ if date_str.lower() in ['present', 'current', 'now']:
44
+ return datetime.now()
45
+
46
+ date_str = date_str.strip()
47
+
48
+ formats = [
49
+ '%Y', '%b %Y', '%B %Y', '%m/%Y', '%m-%Y',
50
+ '%Y/%m', '%Y-%m'
51
+ ]
52
+
53
+ for fmt in formats:
54
+ try:
55
+ return datetime.strptime(date_str, fmt)
56
+ except ValueError:
57
+ continue
58
+
59
+ year_match = re.search(r'\b20\d{2}\b', date_str)
60
+ if year_match:
61
+ return datetime.strptime(year_match.group(), '%Y')
62
+
63
+ return None
64
+ except Exception:
65
+ return None
66
+
67
+ def calculate_experience(work_history):
68
+ """Calculate total years of experience from work history."""
69
+ total_experience = 0
70
+ current_year = datetime.now().year
71
+
72
+ for job in work_history:
73
+ duration = job.get('duration', '')
74
+ if not duration:
75
+ continue
76
+
77
+ parts = re.split(r'\s*-\s*|\s+to\s+', duration)
78
+ if len(parts) != 2:
79
+ continue
80
+
81
+ start_date = parse_date(parts[0])
82
+ end_date = parse_date(parts[1])
83
+
84
+ if start_date and end_date:
85
+ years = (end_date.year - start_date.year) + \
86
+ (end_date.month - start_date.month) / 12
87
+ total_experience += max(0, years)
88
+
89
+ return round(total_experience, 1)
90
+
91
+ def parse_resume(file_uploaded, api_key):
92
+ """Parse resume and extract information."""
93
+ genai.configure(api_key=api_key)
94
+ model = genai.GenerativeModel('gemini-1.5-flash')
95
+
96
+ prompt = """Extract the following information from this resume:
97
+ 1. Summarize the following resume in 100 words, focusing on key skills, experience, and qualifications
98
+ 2. Full Name
99
+ 3. Email Address
100
+ 4. Phone Number
101
+ 5. Education History (including degree, institution, graduation year, and field of study)
102
+ 6. Companies worked at with positions and EXACT duration (e.g., "Jan 2020 - Present" or "2018-2020")
103
+ 7. Skills
104
+ 8. LinkedIn Profile URL
105
+ Return the information in this JSON format:
106
+ {
107
+ "summary": "",
108
+ "name": "",
109
+ "email": "",
110
+ "phone": "",
111
+ "education": [
112
+ {
113
+ "degree": "",
114
+ "institution": "",
115
+ "year": "",
116
+ "field": "",
117
+ "gpa": ""
118
+ }
119
+ ],
120
+ "work_experience": [
121
+ {
122
+ "company": "",
123
+ "position": "",
124
+ "duration": ""
125
+ }
126
+ ],
127
+ "skills": [],
128
+ "linkedin": ""
129
+ }
130
+ For skills include tools and technologies in output if present any in resume.
131
+ For work experience durations, please specify exact dates in format: "MMM YYYY - MMM YYYY" or "YYYY - Present" , please return in one order either in ascending or descending.
132
+ Only return the JSON object, nothing else. If any field is not found, leave it empty."""
133
+
134
+ try:
135
+ file_extension = Path(file_uploaded.name).suffix.lower()
136
+
137
+ if file_extension == '.pdf':
138
+ text_content = extract_text_from_pdf(file_uploaded)
139
+ elif file_extension in ['.docx', '.doc']:
140
+ text_content = extract_text_from_docx(file_uploaded)
141
+ elif file_extension in ['.jpg', '.jpeg', '.png']:
142
+ image = Image.open(file_uploaded)
143
+ text_content = pytesseract.image_to_string(image)
144
+ else:
145
+ st.error(f"Unsupported file format: {file_extension}")
146
+ return None
147
+
148
+ response = model.generate_content(f"{prompt}\n\nResume Text:\n{text_content}")
149
+
150
+ try:
151
+ response_text = response.text
152
+ json_start = response_text.find('{')
153
+ json_end = response_text.rfind('}') + 1
154
+ json_str = response_text[json_start:json_end]
155
+
156
+ result = json.loads(json_str)
157
+ total_exp = calculate_experience(result.get('work_experience', []))
158
+ result['total_years_experience'] = total_exp
159
+
160
+ return result
161
+ except json.JSONDecodeError as e:
162
+ st.error(f"Error parsing response: {str(e)}")
163
+ return None
164
+
165
+ except Exception as e:
166
+ st.error(f"Error processing resume: {str(e)}")
167
+ return None
168
+
169
+ def format_education(edu):
170
+ """Format education details for display."""
171
+ parts = []
172
+ if edu.get('degree'):
173
+ parts.append(edu['degree'])
174
+ if edu.get('field'):
175
+ parts.append(f"in {edu['field']}")
176
+ if edu.get('institution'):
177
+ parts.append(f"from {edu['institution']}")
178
+ if edu.get('year'):
179
+ parts.append(f"({edu['year']})")
180
+ if edu.get('gpa') and edu['gpa'].strip():
181
+ parts.append(f"- GPA: {edu['gpa']}")
182
+ return " ".join(parts)
183
+
184
+ def main():
185
+ st.title("Resume Parser")
186
+ st.write("Upload a resume (PDF, DOCX, or Image) to extract information")
187
+
188
+ # Get API key from secrets or user input
189
+ api_key = st.secrets["GEMINI_API_KEY"] if "GEMINI_API_KEY" in st.secrets else st.text_input("Enter Gemini API Key", type="password")
190
+
191
+ uploaded_file = st.file_uploader("Choose a resume file", type=["pdf", "docx", "doc", "jpg", "jpeg", "png"])
192
+
193
+ if uploaded_file and api_key:
194
+ with st.spinner('Analyzing resume...'):
195
+ result = parse_resume(uploaded_file, api_key)
196
+
197
+ if result:
198
+ st.subheader("Extracted Information")
199
+
200
+ # Display summary in a text area
201
+ st.text_area("Summary", result.get('summary', 'Not found'), height=100)
202
+
203
+ # Display personal information
204
+ col1, col2, col3 = st.columns(3)
205
+ with col1:
206
+ st.write("*Name:*", result.get('name', 'Not found'))
207
+ with col2:
208
+ st.write("*Email:*", result.get('email', 'Not found'))
209
+ with col3:
210
+ st.write("*Phone:*", result.get('phone', 'Not found'))
211
+
212
+ # Display total experience
213
+ total_exp = result.get('total_years_experience', 0)
214
+ exp_text = f"{total_exp:.1f} years" if total_exp >= 1 else f"{total_exp * 12:.0f} months"
215
+ st.write("*Total Experience:*", exp_text)
216
+
217
+ # Display education
218
+ st.subheader("Education")
219
+ if result.get('education'):
220
+ for edu in result['education']:
221
+ st.write(f"- {format_education(edu)}")
222
+ else:
223
+ st.write("No education information found")
224
+
225
+ # Display work experience
226
+ st.subheader("Work Experience")
227
+ if result.get('work_experience'):
228
+ for exp in result['work_experience']:
229
+ duration = f" ({exp.get('duration', 'Duration not specified')})" if exp.get('duration') else ""
230
+ st.write(f"- {exp.get('position', 'Role not found')} at {exp.get('company', 'Company not found')}{duration}")
231
+ else:
232
+ st.write("No work experience found")
233
+
234
+ # Display Skills
235
+ st.subheader("Skills:")
236
+ if result.get('skills'):
237
+ for skill in result['skills']:
238
+ st.write(f"- {skill}")
239
+ else:
240
+ st.write("- No skills found")
241
+
242
+ # Display LinkedIn profile
243
+ st.write("*LinkedIn Profile:*", result.get('linkedin', 'Not found'))
244
+
245
+ if __name__ == "__main__":
246
+ main()