Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
import os
|
2 |
import re
|
3 |
import torch # Explicitly imported if you want to use torch directly
|
4 |
-
import tempfile
|
5 |
from io import BytesIO
|
6 |
|
7 |
import streamlit as st
|
@@ -9,16 +8,20 @@ from PIL import Image
|
|
9 |
from transformers import pipeline
|
10 |
from pdf2image import convert_from_bytes
|
11 |
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
try:
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
except Exception as e:
|
20 |
-
|
21 |
-
|
|
|
|
|
|
|
|
|
22 |
|
23 |
#####################################
|
24 |
# Utility: Convert PDF to Images
|
@@ -59,7 +62,8 @@ def extract_text_from_file(file_obj):
|
|
59 |
#####################################
|
60 |
# Information Extraction Functions
|
61 |
#####################################
|
62 |
-
def
|
|
|
63 |
info = {
|
64 |
"Name": None,
|
65 |
"Age": None,
|
@@ -113,16 +117,18 @@ def process_resume(file_obj):
|
|
113 |
if file_obj is None:
|
114 |
return None, None
|
115 |
|
|
|
116 |
resume_text = extract_text_from_file(file_obj)
|
117 |
-
|
|
|
118 |
return resume_text, resume_info
|
119 |
|
120 |
#####################################
|
121 |
-
# Streamlit
|
122 |
#####################################
|
123 |
-
st.title("Resume Extraction and
|
124 |
st.markdown("""
|
125 |
-
Upload a resume file (
|
126 |
""")
|
127 |
|
128 |
uploaded_file = st.file_uploader("Upload Resume (PDF or Image)", type=["pdf", "png", "jpg", "jpeg"])
|
@@ -135,5 +141,5 @@ if st.button("Extract Info"):
|
|
135 |
resume_text, resume_info = process_resume(uploaded_file)
|
136 |
st.subheader("Extracted Resume Text")
|
137 |
st.text_area("", resume_text, height=200)
|
138 |
-
st.subheader("Parsed Resume Information")
|
139 |
st.json(resume_info)
|
|
|
1 |
import os
|
2 |
import re
|
3 |
import torch # Explicitly imported if you want to use torch directly
|
|
|
4 |
from io import BytesIO
|
5 |
|
6 |
import streamlit as st
|
|
|
8 |
from transformers import pipeline
|
9 |
from pdf2image import convert_from_bytes
|
10 |
|
11 |
+
# Use st.cache_resource (Streamlit 1.18+) to load and cache the model/pipeline once
|
12 |
+
@st.cache_resource(show_spinner=False)
|
13 |
+
def load_ocr_pipeline():
|
14 |
+
try:
|
15 |
+
# Ensure your transformers library is up-to-date (>=4.x)
|
16 |
+
ocr_pipe = pipeline("image-to-text", model="YouLiXiya/tinyllava-v1.0-1.1b-hf")
|
17 |
+
return ocr_pipe
|
18 |
+
except Exception as e:
|
19 |
+
st.error(f"Error loading model: {e}")
|
20 |
+
st.stop()
|
21 |
+
|
22 |
+
# Load the model at startup
|
23 |
+
ocr_pipeline = load_ocr_pipeline()
|
24 |
+
st.write("Model loaded successfully!")
|
25 |
|
26 |
#####################################
|
27 |
# Utility: Convert PDF to Images
|
|
|
62 |
#####################################
|
63 |
# Information Extraction Functions
|
64 |
#####################################
|
65 |
+
def extract_basic_resume_info(text):
|
66 |
+
"""Extract basic resume info: Name, Age, Job Experience, Skills, Expected Industry/Direction."""
|
67 |
info = {
|
68 |
"Name": None,
|
69 |
"Age": None,
|
|
|
117 |
if file_obj is None:
|
118 |
return None, None
|
119 |
|
120 |
+
# Extract text from PDF or image using the preloaded OCR pipeline
|
121 |
resume_text = extract_text_from_file(file_obj)
|
122 |
+
# Parse basic resume info
|
123 |
+
resume_info = extract_basic_resume_info(resume_text)
|
124 |
return resume_text, resume_info
|
125 |
|
126 |
#####################################
|
127 |
+
# Streamlit Interface
|
128 |
#####################################
|
129 |
+
st.title("Resume Extraction and Basic Info Parsing")
|
130 |
st.markdown("""
|
131 |
+
Upload a resume file (PDF or image) to extract basic text and candidate information.
|
132 |
""")
|
133 |
|
134 |
uploaded_file = st.file_uploader("Upload Resume (PDF or Image)", type=["pdf", "png", "jpg", "jpeg"])
|
|
|
141 |
resume_text, resume_info = process_resume(uploaded_file)
|
142 |
st.subheader("Extracted Resume Text")
|
143 |
st.text_area("", resume_text, height=200)
|
144 |
+
st.subheader("Parsed Basic Resume Information")
|
145 |
st.json(resume_info)
|