Spaces:

Sasiraj01
/

MultiRAGClinical

Runtime error

App Files Files Community

Sasiraj01 commited on Apr 15

Commit

b85f85d

verified ·

1 Parent(s): 2e86476

Upload app-2.py

Browse files

Files changed (1) hide show

app-2.py +52 -0

app-2.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import gradio as gr
+from transformers import AutoProcessor, LlavaForConditionalGeneration
+from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext, set_global_service_context
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.vector_stores.faiss import FaissVectorStore
+from llama_index.storage.storage_context import StorageContext
+import torch
+from PIL import Image
+import os
+# Load LLaVA model and processor
+model_id = "llava-hf/llava-1.5-7b-hf"
+processor = AutoProcessor.from_pretrained(model_id)
+model = LlavaForConditionalGeneration.from_pretrained(model_id, torch_dtype=torch.float16, low_cpu_mem_usage=True)
+model.to("cuda" if torch.cuda.is_available() else "cpu")
+# Load documents and build FAISS index
+documents = SimpleDirectoryReader("docs").load_data()
+embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en")
+service_context = ServiceContext.from_defaults(embed_model=embed_model)
+set_global_service_context(service_context)
+index = VectorStoreIndex.from_documents(documents, service_context=service_context)
+query_engine = index.as_query_engine()
+def multimodal_rag(image, question):
+    # Step 1: RAG to retrieve context
+    context = query_engine.query(question)
+    # Step 2: Process with LLaVA
+    prompt = f"Context: {context}
+Question: {question}"
+    inputs = processor(prompt, image, return_tensors="pt").to(model.device)
+    output = model.generate(**inputs, max_new_tokens=100)
+    answer = processor.decode(output[0], skip_special_tokens=True)
+    return answer
+demo = gr.Interface(
+    fn=multimodal_rag,
+    inputs=[
+        gr.Image(type="pil", label="Upload Image"),
+        gr.Textbox(label="Enter your question")
+    ],
+    outputs="text",
+    title="Multimodal RAG with LLaVA and FAISS",
+    description="Upload an image and ask a question. The system retrieves relevant text using FAISS and answers using LLaVA."
+)
+if __name__ == "__main__":
+    demo.launch()