Spaces:
Sleeping
Sleeping
File size: 4,021 Bytes
cf8a522 92f45fe 8e1d297 92f45fe 8e1d297 92f45fe 586dcd2 92f45fe 586dcd2 92f45fe 586dcd2 92f45fe 586dcd2 92f45fe 8e1d297 92f45fe 8e1d297 92f45fe 9753cc9 92f45fe 9753cc9 92f45fe 9753cc9 92f45fe 9753cc9 92f45fe 9753cc9 92f45fe 8e1d297 92f45fe 8e1d297 92f45fe 08361f0 92f45fe 08361f0 92f45fe 8e1d297 92f45fe 8e1d297 6088e9d 8e1d297 6088e9d 8e1d297 92f45fe 8e1d297 586dcd2 8e1d297 92f45fe 8e1d297 92f45fe 8e1d297 92f45fe 8e1d297 9753cc9 8e1d297 92f45fe 8e1d297 92f45fe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
import os
import tempfile
import streamlit as st
from transformers import pipeline
import docx
import textract
#####################################
# Summarization Pipeline Setup
#####################################
@st.cache_resource(show_spinner=False)
def load_summarization_pipeline():
try:
summarizer = pipeline("summarization", model="recogna-nlp/ptt5-base-summ-xlsum")
return summarizer
except Exception as e:
st.error(f"Error loading summarization model: {e}")
st.stop()
summarizer = load_summarization_pipeline()
st.write("Summarization model loaded successfully!")
#####################################
# Function to Extract Text from File
#####################################
def extract_text_from_file(file_obj):
"""
Extract text from .txt, .docx, and .doc files.
"""
filename = file_obj.name
ext = os.path.splitext(filename)[1].lower()
text = ""
if ext == ".txt":
# For text files, decode the byte stream into a string.
try:
text = file_obj.read().decode("utf-8")
except Exception as e:
text = f"Error reading text file: {e}"
elif ext == ".docx":
try:
# Use python-docx to read .docx files.
document = docx.Document(file_obj)
text = "\n".join([para.text for para in document.paragraphs])
except Exception as e:
text = f"Error processing DOCX file: {e}"
elif ext == ".doc":
# For .doc files, use textract. textract expects a filename, so save temporarily.
try:
with tempfile.NamedTemporaryFile(delete=False, suffix=".doc") as tmp:
tmp.write(file_obj.read())
tmp.flush()
tmp_filename = tmp.name
text = textract.process(tmp_filename).decode("utf-8")
except Exception as e:
text = f"Error processing DOC file: {e}"
finally:
try:
os.remove(tmp_filename)
except Exception:
pass
else:
text = "Unsupported file type."
return text
#####################################
# Function to Summarize Extracted Text
#####################################
def summarize_text(text):
"""
Summarize the given text using the summarization pipeline.
Adjust max_length and min_length as needed.
"""
if not text.strip():
return "No text available to summarize."
try:
# Note: The summarization pipeline can have limitations on text length.
# If you face issues with long documents, consider summarizing in chunks.
summary = summarizer(text, max_length=150, min_length=40, do_sample=False)
return summary[0]["summary_text"]
except Exception as e:
return f"Error during summarization: {e}"
#####################################
# Main Processing Logic
#####################################
def process_resume(file_obj):
if file_obj is None:
return None, None
resume_text = extract_text_from_file(file_obj)
summary_text = summarize_text(resume_text)
return resume_text, summary_text
#####################################
# Streamlit Interface
#####################################
st.title("Resume Summarization App")
st.markdown(
"""
Upload your resume file — supported formats: **.doc**, **.docx**, and **.txt**.
The app will extract the text content from your resume and generate a summarization.
"""
)
uploaded_file = st.file_uploader("Upload Resume", type=["doc", "docx", "txt"])
if st.button("Summarize Resume"):
if uploaded_file is None:
st.error("Please upload a file first.")
else:
with st.spinner("Processing..."):
resume_text, summary_text = process_resume(uploaded_file)
st.subheader("Extracted Resume Text")
st.text_area("", resume_text, height=250)
st.subheader("Summarized Resume")
st.text_area("", summary_text, height=150) |