Spaces:

CR7CAD
/

ISOM5240FinalProject

Sleeping

App Files Files Community

CR7CAD commited on Mar 16

Commit

586dcd2

verified ·

1 Parent(s): 6088e9d

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -17

app.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import os
 import re
 import torch  # Explicitly imported if you want to use torch directly
-import tempfile
 from io import BytesIO
 import streamlit as st
@@ -9,16 +8,20 @@ from PIL import Image
 from transformers import pipeline
 from pdf2image import convert_from_bytes
-#####################################
-# Load the OCR Pipeline (Uses Torch)
-#####################################
-try:
-    # Ensure your transformers library is updated (>=4.x)
-    ocr_pipeline = pipeline("image-to-text", model="YouLiXiya/tinyllava-v1.0-1.1b-hf")
-    st.write("Model loaded successfully!")
-except Exception as e:
-    st.error(f"Error loading model: {e}")
-    st.stop()
 #####################################
 # Utility: Convert PDF to Images
@@ -59,7 +62,8 @@ def extract_text_from_file(file_obj):
 #####################################
 # Information Extraction Functions
 #####################################
-def extract_resume_info(text):
     info = {
         "Name": None,
         "Age": None,
@@ -113,16 +117,18 @@ def process_resume(file_obj):
     if file_obj is None:
         return None, None
     resume_text = extract_text_from_file(file_obj)
-    resume_info = extract_resume_info(resume_text)
     return resume_text, resume_info
 #####################################
-# Streamlit UI
 #####################################
-st.title("Resume Extraction and Information Parsing")
 st.markdown("""
-Upload a resume file (in PDF or image format) and the app will extract its text and parse critical candidate information.
 """)
 uploaded_file = st.file_uploader("Upload Resume (PDF or Image)", type=["pdf", "png", "jpg", "jpeg"])
@@ -135,5 +141,5 @@ if st.button("Extract Info"):
             resume_text, resume_info = process_resume(uploaded_file)
         st.subheader("Extracted Resume Text")
         st.text_area("", resume_text, height=200)
-        st.subheader("Parsed Resume Information")
         st.json(resume_info)

 import os
 import re
 import torch  # Explicitly imported if you want to use torch directly
 from io import BytesIO
 import streamlit as st
 from transformers import pipeline
 from pdf2image import convert_from_bytes
+# Use st.cache_resource (Streamlit 1.18+) to load and cache the model/pipeline once
+@st.cache_resource(show_spinner=False)
+def load_ocr_pipeline():
+    try:
+        # Ensure your transformers library is up-to-date (>=4.x)
+        ocr_pipe = pipeline("image-to-text", model="YouLiXiya/tinyllava-v1.0-1.1b-hf")
+        return ocr_pipe
+    except Exception as e:
+        st.error(f"Error loading model: {e}")
+        st.stop()
+# Load the model at startup
+ocr_pipeline = load_ocr_pipeline()
+st.write("Model loaded successfully!")
 #####################################
 # Utility: Convert PDF to Images
 #####################################
 # Information Extraction Functions
 #####################################
+def extract_basic_resume_info(text):
+    """Extract basic resume info: Name, Age, Job Experience, Skills, Expected Industry/Direction."""
     info = {
         "Name": None,
         "Age": None,
     if file_obj is None:
         return None, None
+    # Extract text from PDF or image using the preloaded OCR pipeline
     resume_text = extract_text_from_file(file_obj)
+    # Parse basic resume info
+    resume_info = extract_basic_resume_info(resume_text)
     return resume_text, resume_info
 #####################################
+# Streamlit Interface
 #####################################
+st.title("Resume Extraction and Basic Info Parsing")
 st.markdown("""
+Upload a resume file (PDF or image) to extract basic text and candidate information.
 """)
 uploaded_file = st.file_uploader("Upload Resume (PDF or Image)", type=["pdf", "png", "jpg", "jpeg"])
             resume_text, resume_info = process_resume(uploaded_file)
         st.subheader("Extracted Resume Text")
         st.text_area("", resume_text, height=200)
+        st.subheader("Parsed Basic Resume Information")
         st.json(resume_info)