import os
import tempfile
import streamlit as st
from transformers import pipeline
import docx
import textract

#####################################
# Summarization Pipeline Setup
#####################################
@st.cache_resource(show_spinner=False)
def load_summarization_pipeline():
    try:
        summarizer = pipeline("summarization", model="recogna-nlp/ptt5-base-summ-xlsum")
        return summarizer
    except Exception as e:
        st.error(f"Error loading summarization model: {e}")
        st.stop()

summarizer = load_summarization_pipeline()
st.write("Summarization model loaded successfully!")

#####################################
# Function to Extract Text from File
#####################################
def extract_text_from_file(file_obj):
    """
    Extract text from .txt, .docx, and .doc files.
    """
    filename = file_obj.name
    ext = os.path.splitext(filename)[1].lower()
    text = ""
    
    if ext == ".txt":
        # For text files, decode the byte stream into a string.
        try:
            text = file_obj.read().decode("utf-8")
        except Exception as e:
            text = f"Error reading text file: {e}"
    
    elif ext == ".docx":
        try:
            # Use python-docx to read .docx files.
            document = docx.Document(file_obj)
            text = "\n".join([para.text for para in document.paragraphs])
        except Exception as e:
            text = f"Error processing DOCX file: {e}"
    
    elif ext == ".doc":
        # For .doc files, use textract. textract expects a filename, so save temporarily.
        try:
            with tempfile.NamedTemporaryFile(delete=False, suffix=".doc") as tmp:
                tmp.write(file_obj.read())
                tmp.flush()
                tmp_filename = tmp.name
            text = textract.process(tmp_filename).decode("utf-8")
        except Exception as e:
            text = f"Error processing DOC file: {e}"
        finally:
            try:
                os.remove(tmp_filename)
            except Exception:
                pass
    else:
        text = "Unsupported file type."
    
    return text

#####################################
# Function to Summarize Extracted Text
#####################################
def summarize_text(text):
    """
    Summarize the given text using the summarization pipeline.
    Adjust max_length and min_length as needed.
    """
    if not text.strip():
        return "No text available to summarize."
    
    try:
        # Note: The summarization pipeline can have limitations on text length.
        # If you face issues with long documents, consider summarizing in chunks.
        summary = summarizer(text, max_length=150, min_length=40, do_sample=False)
        return summary[0]["summary_text"]
    except Exception as e:
        return f"Error during summarization: {e}"

#####################################
# Main Processing Logic
#####################################
def process_resume(file_obj):
    if file_obj is None:
        return None, None

    resume_text = extract_text_from_file(file_obj)
    summary_text = summarize_text(resume_text)
    return resume_text, summary_text

#####################################
# Streamlit Interface
#####################################
st.title("Resume Summarization App")
st.markdown(
    """
    Upload your resume file — supported formats: **.doc**, **.docx**, and **.txt**.
    The app will extract the text content from your resume and generate a summarization.
    """
)

uploaded_file = st.file_uploader("Upload Resume", type=["doc", "docx", "txt"])

if st.button("Summarize Resume"):
    if uploaded_file is None:
        st.error("Please upload a file first.")
    else:
        with st.spinner("Processing..."):
            resume_text, summary_text = process_resume(uploaded_file)
        st.subheader("Extracted Resume Text")
        st.text_area("", resume_text, height=250)
        st.subheader("Summarized Resume")
        st.text_area("", summary_text, height=150)