CR7CAD commited on
Commit
586dcd2
·
verified ·
1 Parent(s): 6088e9d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -17
app.py CHANGED
@@ -1,7 +1,6 @@
1
  import os
2
  import re
3
  import torch # Explicitly imported if you want to use torch directly
4
- import tempfile
5
  from io import BytesIO
6
 
7
  import streamlit as st
@@ -9,16 +8,20 @@ from PIL import Image
9
  from transformers import pipeline
10
  from pdf2image import convert_from_bytes
11
 
12
- #####################################
13
- # Load the OCR Pipeline (Uses Torch)
14
- #####################################
15
- try:
16
- # Ensure your transformers library is updated (>=4.x)
17
- ocr_pipeline = pipeline("image-to-text", model="YouLiXiya/tinyllava-v1.0-1.1b-hf")
18
- st.write("Model loaded successfully!")
19
- except Exception as e:
20
- st.error(f"Error loading model: {e}")
21
- st.stop()
 
 
 
 
22
 
23
  #####################################
24
  # Utility: Convert PDF to Images
@@ -59,7 +62,8 @@ def extract_text_from_file(file_obj):
59
  #####################################
60
  # Information Extraction Functions
61
  #####################################
62
- def extract_resume_info(text):
 
63
  info = {
64
  "Name": None,
65
  "Age": None,
@@ -113,16 +117,18 @@ def process_resume(file_obj):
113
  if file_obj is None:
114
  return None, None
115
 
 
116
  resume_text = extract_text_from_file(file_obj)
117
- resume_info = extract_resume_info(resume_text)
 
118
  return resume_text, resume_info
119
 
120
  #####################################
121
- # Streamlit UI
122
  #####################################
123
- st.title("Resume Extraction and Information Parsing")
124
  st.markdown("""
125
- Upload a resume file (in PDF or image format) and the app will extract its text and parse critical candidate information.
126
  """)
127
 
128
  uploaded_file = st.file_uploader("Upload Resume (PDF or Image)", type=["pdf", "png", "jpg", "jpeg"])
@@ -135,5 +141,5 @@ if st.button("Extract Info"):
135
  resume_text, resume_info = process_resume(uploaded_file)
136
  st.subheader("Extracted Resume Text")
137
  st.text_area("", resume_text, height=200)
138
- st.subheader("Parsed Resume Information")
139
  st.json(resume_info)
 
1
  import os
2
  import re
3
  import torch # Explicitly imported if you want to use torch directly
 
4
  from io import BytesIO
5
 
6
  import streamlit as st
 
8
  from transformers import pipeline
9
  from pdf2image import convert_from_bytes
10
 
11
+ # Use st.cache_resource (Streamlit 1.18+) to load and cache the model/pipeline once
12
+ @st.cache_resource(show_spinner=False)
13
+ def load_ocr_pipeline():
14
+ try:
15
+ # Ensure your transformers library is up-to-date (>=4.x)
16
+ ocr_pipe = pipeline("image-to-text", model="YouLiXiya/tinyllava-v1.0-1.1b-hf")
17
+ return ocr_pipe
18
+ except Exception as e:
19
+ st.error(f"Error loading model: {e}")
20
+ st.stop()
21
+
22
+ # Load the model at startup
23
+ ocr_pipeline = load_ocr_pipeline()
24
+ st.write("Model loaded successfully!")
25
 
26
  #####################################
27
  # Utility: Convert PDF to Images
 
62
  #####################################
63
  # Information Extraction Functions
64
  #####################################
65
+ def extract_basic_resume_info(text):
66
+ """Extract basic resume info: Name, Age, Job Experience, Skills, Expected Industry/Direction."""
67
  info = {
68
  "Name": None,
69
  "Age": None,
 
117
  if file_obj is None:
118
  return None, None
119
 
120
+ # Extract text from PDF or image using the preloaded OCR pipeline
121
  resume_text = extract_text_from_file(file_obj)
122
+ # Parse basic resume info
123
+ resume_info = extract_basic_resume_info(resume_text)
124
  return resume_text, resume_info
125
 
126
  #####################################
127
+ # Streamlit Interface
128
  #####################################
129
+ st.title("Resume Extraction and Basic Info Parsing")
130
  st.markdown("""
131
+ Upload a resume file (PDF or image) to extract basic text and candidate information.
132
  """)
133
 
134
  uploaded_file = st.file_uploader("Upload Resume (PDF or Image)", type=["pdf", "png", "jpg", "jpeg"])
 
141
  resume_text, resume_info = process_resume(uploaded_file)
142
  st.subheader("Extracted Resume Text")
143
  st.text_area("", resume_text, height=200)
144
+ st.subheader("Parsed Basic Resume Information")
145
  st.json(resume_info)