CR7CAD commited on
Commit
294af95
·
verified ·
1 Parent(s): 586dcd2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -35
app.py CHANGED
@@ -6,9 +6,8 @@ from io import BytesIO
6
  import streamlit as st
7
  from PIL import Image
8
  from transformers import pipeline
9
- from pdf2image import convert_from_bytes
10
 
11
- # Use st.cache_resource (Streamlit 1.18+) to load and cache the model/pipeline once
12
  @st.cache_resource(show_spinner=False)
13
  def load_ocr_pipeline():
14
  try:
@@ -23,40 +22,18 @@ def load_ocr_pipeline():
23
  ocr_pipeline = load_ocr_pipeline()
24
  st.write("Model loaded successfully!")
25
 
26
- #####################################
27
- # Utility: Convert PDF to Images
28
- #####################################
29
- def convert_pdf_to_images(pdf_bytes):
30
- try:
31
- images = convert_from_bytes(pdf_bytes)
32
- return images
33
- except Exception as e:
34
- st.error(f"PDF conversion error: {e}")
35
- return []
36
-
37
  #####################################
38
  # Pipeline: Extract Text with OCR Pipeline
39
  #####################################
40
  def extract_text_from_file(file_obj):
41
- file_extension = os.path.splitext(file_obj.name)[1].lower()
42
  full_text = ""
43
-
44
- if file_extension == ".pdf":
45
- file_bytes = file_obj.read()
46
- images = convert_pdf_to_images(file_bytes)
47
- for img in images:
48
- result = ocr_pipeline(img)
49
- if isinstance(result, list) and "text" in result[0]:
50
- full_text += result[0]["text"] + "\n"
51
- else:
52
- try:
53
- img = Image.open(file_obj)
54
- result = ocr_pipeline(img)
55
- if isinstance(result, list) and "text" in result[0]:
56
- full_text = result[0]["text"]
57
- except Exception as e:
58
- full_text = f"Error processing image: {e}"
59
-
60
  return full_text
61
 
62
  #####################################
@@ -117,7 +94,7 @@ def process_resume(file_obj):
117
  if file_obj is None:
118
  return None, None
119
 
120
- # Extract text from PDF or image using the preloaded OCR pipeline
121
  resume_text = extract_text_from_file(file_obj)
122
  # Parse basic resume info
123
  resume_info = extract_basic_resume_info(resume_text)
@@ -128,14 +105,14 @@ def process_resume(file_obj):
128
  #####################################
129
  st.title("Resume Extraction and Basic Info Parsing")
130
  st.markdown("""
131
- Upload a resume file (PDF or image) to extract basic text and candidate information.
132
  """)
133
 
134
- uploaded_file = st.file_uploader("Upload Resume (PDF or Image)", type=["pdf", "png", "jpg", "jpeg"])
135
 
136
  if st.button("Extract Info"):
137
  if uploaded_file is None:
138
- st.error("Please upload a file first.")
139
  else:
140
  with st.spinner("Processing..."):
141
  resume_text, resume_info = process_resume(uploaded_file)
 
6
  import streamlit as st
7
  from PIL import Image
8
  from transformers import pipeline
 
9
 
10
+ # Use st.cache_resource (Streamlit 1.18+) to load and cache the OCR pipeline once
11
  @st.cache_resource(show_spinner=False)
12
  def load_ocr_pipeline():
13
  try:
 
22
  ocr_pipeline = load_ocr_pipeline()
23
  st.write("Model loaded successfully!")
24
 
 
 
 
 
 
 
 
 
 
 
 
25
  #####################################
26
  # Pipeline: Extract Text with OCR Pipeline
27
  #####################################
28
  def extract_text_from_file(file_obj):
 
29
  full_text = ""
30
+ try:
31
+ img = Image.open(file_obj)
32
+ result = ocr_pipeline(img)
33
+ if isinstance(result, list) and "text" in result[0]:
34
+ full_text = result[0]["text"]
35
+ except Exception as e:
36
+ full_text = f"Error processing image: {e}"
 
 
 
 
 
 
 
 
 
 
37
  return full_text
38
 
39
  #####################################
 
94
  if file_obj is None:
95
  return None, None
96
 
97
+ # Extract text using only the image-based OCR pipeline
98
  resume_text = extract_text_from_file(file_obj)
99
  # Parse basic resume info
100
  resume_info = extract_basic_resume_info(resume_text)
 
105
  #####################################
106
  st.title("Resume Extraction and Basic Info Parsing")
107
  st.markdown("""
108
+ Upload an image file (PNG, JPG, or JPEG) to extract basic text and candidate information.
109
  """)
110
 
111
+ uploaded_file = st.file_uploader("Upload Resume (Image Only)", type=["png", "jpg", "jpeg"])
112
 
113
  if st.button("Extract Info"):
114
  if uploaded_file is None:
115
+ st.error("Please upload an image file first.")
116
  else:
117
  with st.spinner("Processing..."):
118
  resume_text, resume_info = process_resume(uploaded_file)