Spaces:

CR7CAD
/

ISOM5240FinalProject

Sleeping

App Files Files Community

CR7CAD commited on Mar 16

Commit

294af95

verified ·

1 Parent(s): 586dcd2

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -35

app.py CHANGED Viewed

@@ -6,9 +6,8 @@ from io import BytesIO
 import streamlit as st
 from PIL import Image
 from transformers import pipeline
-from pdf2image import convert_from_bytes
-# Use st.cache_resource (Streamlit 1.18+) to load and cache the model/pipeline once
 @st.cache_resource(show_spinner=False)
 def load_ocr_pipeline():
     try:
@@ -23,40 +22,18 @@ def load_ocr_pipeline():
 ocr_pipeline = load_ocr_pipeline()
 st.write("Model loaded successfully!")
-#####################################
-# Utility: Convert PDF to Images
-#####################################
-def convert_pdf_to_images(pdf_bytes):
-    try:
-        images = convert_from_bytes(pdf_bytes)
-        return images
-    except Exception as e:
-        st.error(f"PDF conversion error: {e}")
-        return []
 #####################################
 # Pipeline: Extract Text with OCR Pipeline
 #####################################
 def extract_text_from_file(file_obj):
-    file_extension = os.path.splitext(file_obj.name)[1].lower()
     full_text = ""
-    if file_extension == ".pdf":
-        file_bytes = file_obj.read()
-        images = convert_pdf_to_images(file_bytes)
-        for img in images:
-            result = ocr_pipeline(img)
-            if isinstance(result, list) and "text" in result[0]:
-                full_text += result[0]["text"] + "\n"
-    else:
-        try:
-            img = Image.open(file_obj)
-            result = ocr_pipeline(img)
-            if isinstance(result, list) and "text" in result[0]:
-                full_text = result[0]["text"]
-        except Exception as e:
-            full_text = f"Error processing image: {e}"
     return full_text
 #####################################
@@ -117,7 +94,7 @@ def process_resume(file_obj):
     if file_obj is None:
         return None, None
-    # Extract text from PDF or image using the preloaded OCR pipeline
     resume_text = extract_text_from_file(file_obj)
     # Parse basic resume info
     resume_info = extract_basic_resume_info(resume_text)
@@ -128,14 +105,14 @@ def process_resume(file_obj):
 #####################################
 st.title("Resume Extraction and Basic Info Parsing")
 st.markdown("""
-Upload a resume file (PDF or image) to extract basic text and candidate information.
 """)
-uploaded_file = st.file_uploader("Upload Resume (PDF or Image)", type=["pdf", "png", "jpg", "jpeg"])
 if st.button("Extract Info"):
     if uploaded_file is None:
-        st.error("Please upload a file first.")
     else:
         with st.spinner("Processing..."):
             resume_text, resume_info = process_resume(uploaded_file)

 import streamlit as st
 from PIL import Image
 from transformers import pipeline
+# Use st.cache_resource (Streamlit 1.18+) to load and cache the OCR pipeline once
 @st.cache_resource(show_spinner=False)
 def load_ocr_pipeline():
     try:
 ocr_pipeline = load_ocr_pipeline()
 st.write("Model loaded successfully!")
 #####################################
 # Pipeline: Extract Text with OCR Pipeline
 #####################################
 def extract_text_from_file(file_obj):
     full_text = ""
+    try:
+        img = Image.open(file_obj)
+        result = ocr_pipeline(img)
+        if isinstance(result, list) and "text" in result[0]:
+            full_text = result[0]["text"]
+    except Exception as e:
+        full_text = f"Error processing image: {e}"
     return full_text
 #####################################
     if file_obj is None:
         return None, None
+    # Extract text using only the image-based OCR pipeline
     resume_text = extract_text_from_file(file_obj)
     # Parse basic resume info
     resume_info = extract_basic_resume_info(resume_text)
 #####################################
 st.title("Resume Extraction and Basic Info Parsing")
 st.markdown("""
+Upload an image file (PNG, JPG, or JPEG) to extract basic text and candidate information.
 """)
+uploaded_file = st.file_uploader("Upload Resume (Image Only)", type=["png", "jpg", "jpeg"])
 if st.button("Extract Info"):
     if uploaded_file is None:
+        st.error("Please upload an image file first.")
     else:
         with st.spinner("Processing..."):
             resume_text, resume_info = process_resume(uploaded_file)