import streamlit as st import docx import PyPDF2 from transformers import pipeline import tempfile # Load Hugging Face model @st.cache_resource def load_pipeline(): return pipeline("question-answering", model="deepset/roberta-base-squad2") qa_pipeline = load_pipeline() def read_pdf(file): text = "" pdf_reader = PyPDF2.PdfReader(file) for page in pdf_reader.pages: text += page.extract_text() + "\n" return text def read_word(file): doc = docx.Document(file) text = "" for para in doc.paragraphs: text += para.text + "\n" return text def extract_text(uploaded_file): file_type = uploaded_file.name.split('.')[-1].lower() if file_type == 'pdf': text = read_pdf(uploaded_file) elif file_type == 'docx': text = read_word(uploaded_file) else: st.error("Unsupported file type. Please upload a PDF or Word file.") text = None return text # Streamlit interface def main(): st.title("📄 File Reader & Hugging Face Q&A Application") st.write("Upload a PDF or Word file and ask questions based on its content.") # File upload uploaded_file = st.file_uploader("Choose a PDF or Word file", type=["pdf", "docx"]) if uploaded_file is not None: with tempfile.NamedTemporaryFile(delete=False) as temp_file: temp_file.write(uploaded_file.read()) temp_file_path = temp_file.name # Extract and display text file_text = extract_text(temp_file_path) if file_text: st.text_area("File Content", file_text[:1000] + "... (truncated for display)") # Question-answering question = st.text_input("Ask a question based on the file content:") if st.button("Get Answer"): if question.strip(): try: result = qa_pipeline(question=question, context=file_text) st.success(f"Answer: {result['answer']}") except Exception as e: st.error(f"Error generating answer: {str(e)}") else: st.warning("Please enter a question.") if __name__ == "__main__": main()