Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,13 +1,11 @@
|
|
1 |
import os
|
2 |
import re
|
3 |
-
import torch # Explicitly imported if you want to use torch directly
|
4 |
-
from io import BytesIO
|
5 |
-
|
6 |
import streamlit as st
|
7 |
from PIL import Image
|
8 |
from transformers import pipeline
|
|
|
9 |
|
10 |
-
#
|
11 |
@st.cache_resource(show_spinner=False)
|
12 |
def load_ocr_pipeline():
|
13 |
try:
|
@@ -18,22 +16,30 @@ def load_ocr_pipeline():
|
|
18 |
st.error(f"Error loading model: {e}")
|
19 |
st.stop()
|
20 |
|
21 |
-
# Load the model at startup
|
22 |
ocr_pipeline = load_ocr_pipeline()
|
23 |
st.write("Model loaded successfully!")
|
24 |
|
25 |
#####################################
|
26 |
-
#
|
27 |
#####################################
|
28 |
def extract_text_from_file(file_obj):
|
29 |
full_text = ""
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
return full_text
|
38 |
|
39 |
#####################################
|
@@ -94,9 +100,9 @@ def process_resume(file_obj):
|
|
94 |
if file_obj is None:
|
95 |
return None, None
|
96 |
|
97 |
-
# Extract text
|
98 |
resume_text = extract_text_from_file(file_obj)
|
99 |
-
# Parse basic resume info
|
100 |
resume_info = extract_basic_resume_info(resume_text)
|
101 |
return resume_text, resume_info
|
102 |
|
@@ -105,14 +111,14 @@ def process_resume(file_obj):
|
|
105 |
#####################################
|
106 |
st.title("Resume Extraction and Basic Info Parsing")
|
107 |
st.markdown("""
|
108 |
-
Upload
|
109 |
""")
|
110 |
|
111 |
-
uploaded_file = st.file_uploader("Upload Resume
|
112 |
|
113 |
if st.button("Extract Info"):
|
114 |
if uploaded_file is None:
|
115 |
-
st.error("Please upload
|
116 |
else:
|
117 |
with st.spinner("Processing..."):
|
118 |
resume_text, resume_info = process_resume(uploaded_file)
|
|
|
1 |
import os
|
2 |
import re
|
|
|
|
|
|
|
3 |
import streamlit as st
|
4 |
from PIL import Image
|
5 |
from transformers import pipeline
|
6 |
+
from pdfminer.high_level import extract_text
|
7 |
|
8 |
+
# Load and cache the OCR model once at startup
|
9 |
@st.cache_resource(show_spinner=False)
|
10 |
def load_ocr_pipeline():
|
11 |
try:
|
|
|
16 |
st.error(f"Error loading model: {e}")
|
17 |
st.stop()
|
18 |
|
|
|
19 |
ocr_pipeline = load_ocr_pipeline()
|
20 |
st.write("Model loaded successfully!")
|
21 |
|
22 |
#####################################
|
23 |
+
# Extract Text from File Function
|
24 |
#####################################
|
25 |
def extract_text_from_file(file_obj):
|
26 |
full_text = ""
|
27 |
+
file_extension = os.path.splitext(file_obj.name)[1].lower()
|
28 |
+
|
29 |
+
if file_extension == ".pdf":
|
30 |
+
try:
|
31 |
+
# Use pdfminer.six to extract text from PDF files.
|
32 |
+
full_text = extract_text(file_obj)
|
33 |
+
except Exception as e:
|
34 |
+
full_text = f"Error processing PDF: {e}"
|
35 |
+
else:
|
36 |
+
try:
|
37 |
+
img = Image.open(file_obj)
|
38 |
+
result = ocr_pipeline(img)
|
39 |
+
if isinstance(result, list) and "text" in result[0]:
|
40 |
+
full_text = result[0]["text"]
|
41 |
+
except Exception as e:
|
42 |
+
full_text = f"Error processing image: {e}"
|
43 |
return full_text
|
44 |
|
45 |
#####################################
|
|
|
100 |
if file_obj is None:
|
101 |
return None, None
|
102 |
|
103 |
+
# Extract text based on file type (PDF or image)
|
104 |
resume_text = extract_text_from_file(file_obj)
|
105 |
+
# Parse basic resume info using heuristics
|
106 |
resume_info = extract_basic_resume_info(resume_text)
|
107 |
return resume_text, resume_info
|
108 |
|
|
|
111 |
#####################################
|
112 |
st.title("Resume Extraction and Basic Info Parsing")
|
113 |
st.markdown("""
|
114 |
+
Upload a resume file (PDF, PNG, JPG, or JPEG) to extract basic text and candidate information.
|
115 |
""")
|
116 |
|
117 |
+
uploaded_file = st.file_uploader("Upload Resume", type=["pdf", "png", "jpg", "jpeg"])
|
118 |
|
119 |
if st.button("Extract Info"):
|
120 |
if uploaded_file is None:
|
121 |
+
st.error("Please upload a file first.")
|
122 |
else:
|
123 |
with st.spinner("Processing..."):
|
124 |
resume_text, resume_info = process_resume(uploaded_file)
|