Spaces:
Sleeping
Sleeping
File size: 2,879 Bytes
67a56f6 ced2810 2a4ba68 67a56f6 ced2810 67a56f6 ced2810 2a4ba68 ced2810 67a56f6 ced2810 67a56f6 ced2810 d179e57 ced2810 583b178 ced2810 225229c ced2810 225229c ced2810 ae644bf 225229c ced2810 2a4ba68 ced2810 2a4ba68 ced2810 d179e57 ced2810 d179e57 225229c ced2810 225229c ced2810 2a4ba68 d179e57 ced2810 d179e57 ced2810 d179e57 ced2810 d179e57 ced2810 d179e57 ced2810 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
import os
import tempfile
import gradio as gr
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredPDFLoader
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFaceHub
from PIL import Image
from transformers import pipeline
# Directories for temporary storage
FIGURES_DIR = tempfile.mkdtemp(prefix="figures_")
# Configure Hugging Face
HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
# Initialize embeddings and vector store
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_store = None
# Initialize image captioning pipeline
captioner = pipeline("image-to-text", model="Salesforce/blip2-flan-t5-xl", use_auth_token=HUGGINGFACEHUB_API_TOKEN)
# Initialize LLM for QA
llm = HuggingFaceHub(
repo_id="google/flan-t5-xxl",
model_kwargs={"temperature":0.0, "max_length":256},
huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN
)
# Helper functions
def process_pdf(pdf_file):
# Load text content
loader = UnstructuredPDFLoader(pdf_file.name)
docs = loader.load()
# Basic text from PDF
raw_text = "\n".join([d.page_content for d in docs])
# Optionally extract images and caption them
# Here, we simply caption any embedded images
captions = []
# (In a real pipeline, extract and save images separately)
# For demo, we skip actual image files extraction
# Combine text and captions
combined = raw_text + "\n\n" + "\n".join(captions)
return combined
def build_index(text):
global vector_store
# Split into chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = splitter.split_text(text)
# Create or update FAISS index
vector_store = FAISS.from_texts(chunks, embeddings)
def answer_query(query):
qa = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=vector_store.as_retriever()
)
return qa.run(query)
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("# Multimodal RAG QA App")
with gr.Row():
pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"] )
question_input = gr.Textbox(label="Ask a question", placeholder="Enter your question here...")
output = gr.Textbox(label="Answer", interactive=False)
def on_submit(pdf, question):
if pdf is not None:
text = process_pdf(pdf)
build_index(text)
if not question:
return "Please enter a question."
return answer_query(question)
submit_btn = gr.Button("Get Answer")
submit_btn.click(on_submit, inputs=[pdf_input, question_input], outputs=output)
if __name__ == "__main__":
demo.launch()
|