Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -6,9 +6,8 @@ from io import BytesIO
|
|
6 |
import streamlit as st
|
7 |
from PIL import Image
|
8 |
from transformers import pipeline
|
9 |
-
from pdf2image import convert_from_bytes
|
10 |
|
11 |
-
# Use st.cache_resource (Streamlit 1.18+) to load and cache the
|
12 |
@st.cache_resource(show_spinner=False)
|
13 |
def load_ocr_pipeline():
|
14 |
try:
|
@@ -23,40 +22,18 @@ def load_ocr_pipeline():
|
|
23 |
ocr_pipeline = load_ocr_pipeline()
|
24 |
st.write("Model loaded successfully!")
|
25 |
|
26 |
-
#####################################
|
27 |
-
# Utility: Convert PDF to Images
|
28 |
-
#####################################
|
29 |
-
def convert_pdf_to_images(pdf_bytes):
|
30 |
-
try:
|
31 |
-
images = convert_from_bytes(pdf_bytes)
|
32 |
-
return images
|
33 |
-
except Exception as e:
|
34 |
-
st.error(f"PDF conversion error: {e}")
|
35 |
-
return []
|
36 |
-
|
37 |
#####################################
|
38 |
# Pipeline: Extract Text with OCR Pipeline
|
39 |
#####################################
|
40 |
def extract_text_from_file(file_obj):
|
41 |
-
file_extension = os.path.splitext(file_obj.name)[1].lower()
|
42 |
full_text = ""
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
full_text += result[0]["text"] + "\n"
|
51 |
-
else:
|
52 |
-
try:
|
53 |
-
img = Image.open(file_obj)
|
54 |
-
result = ocr_pipeline(img)
|
55 |
-
if isinstance(result, list) and "text" in result[0]:
|
56 |
-
full_text = result[0]["text"]
|
57 |
-
except Exception as e:
|
58 |
-
full_text = f"Error processing image: {e}"
|
59 |
-
|
60 |
return full_text
|
61 |
|
62 |
#####################################
|
@@ -117,7 +94,7 @@ def process_resume(file_obj):
|
|
117 |
if file_obj is None:
|
118 |
return None, None
|
119 |
|
120 |
-
# Extract text
|
121 |
resume_text = extract_text_from_file(file_obj)
|
122 |
# Parse basic resume info
|
123 |
resume_info = extract_basic_resume_info(resume_text)
|
@@ -128,14 +105,14 @@ def process_resume(file_obj):
|
|
128 |
#####################################
|
129 |
st.title("Resume Extraction and Basic Info Parsing")
|
130 |
st.markdown("""
|
131 |
-
Upload
|
132 |
""")
|
133 |
|
134 |
-
uploaded_file = st.file_uploader("Upload Resume (
|
135 |
|
136 |
if st.button("Extract Info"):
|
137 |
if uploaded_file is None:
|
138 |
-
st.error("Please upload
|
139 |
else:
|
140 |
with st.spinner("Processing..."):
|
141 |
resume_text, resume_info = process_resume(uploaded_file)
|
|
|
6 |
import streamlit as st
|
7 |
from PIL import Image
|
8 |
from transformers import pipeline
|
|
|
9 |
|
10 |
+
# Use st.cache_resource (Streamlit 1.18+) to load and cache the OCR pipeline once
|
11 |
@st.cache_resource(show_spinner=False)
|
12 |
def load_ocr_pipeline():
|
13 |
try:
|
|
|
22 |
ocr_pipeline = load_ocr_pipeline()
|
23 |
st.write("Model loaded successfully!")
|
24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
#####################################
|
26 |
# Pipeline: Extract Text with OCR Pipeline
|
27 |
#####################################
|
28 |
def extract_text_from_file(file_obj):
|
|
|
29 |
full_text = ""
|
30 |
+
try:
|
31 |
+
img = Image.open(file_obj)
|
32 |
+
result = ocr_pipeline(img)
|
33 |
+
if isinstance(result, list) and "text" in result[0]:
|
34 |
+
full_text = result[0]["text"]
|
35 |
+
except Exception as e:
|
36 |
+
full_text = f"Error processing image: {e}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
return full_text
|
38 |
|
39 |
#####################################
|
|
|
94 |
if file_obj is None:
|
95 |
return None, None
|
96 |
|
97 |
+
# Extract text using only the image-based OCR pipeline
|
98 |
resume_text = extract_text_from_file(file_obj)
|
99 |
# Parse basic resume info
|
100 |
resume_info = extract_basic_resume_info(resume_text)
|
|
|
105 |
#####################################
|
106 |
st.title("Resume Extraction and Basic Info Parsing")
|
107 |
st.markdown("""
|
108 |
+
Upload an image file (PNG, JPG, or JPEG) to extract basic text and candidate information.
|
109 |
""")
|
110 |
|
111 |
+
uploaded_file = st.file_uploader("Upload Resume (Image Only)", type=["png", "jpg", "jpeg"])
|
112 |
|
113 |
if st.button("Extract Info"):
|
114 |
if uploaded_file is None:
|
115 |
+
st.error("Please upload an image file first.")
|
116 |
else:
|
117 |
with st.spinner("Processing..."):
|
118 |
resume_text, resume_info = process_resume(uploaded_file)
|