Spaces:

dhairyashah
/

langchain-que-gen

Runtime error

App Files Files Community

dhairyashah commited on Aug 27, 2024

Commit

ef4c75d

verified ·

1 Parent(s): 3cb5e70

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -74

app.py CHANGED Viewed

@@ -1,20 +1,14 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
 import tqdm
 from PIL import Image
 import hashlib
 import torch
 import fitz
-import threading
 import gradio as gr
-import spaces
 import os
-from transformers import AutoModel
-from transformers import AutoTokenizer
 import numpy as np
 import json
 cache_dir = 'kb_cache'
 os.makedirs(cache_dir, exist_ok=True)
@@ -33,10 +27,15 @@ def calculate_md5_from_binary(binary_data):
 @spaces.GPU(duration=100)
 def add_pdf_gradio(pdf_file_binary, progress=gr.Progress()):
     global model, tokenizer
     model.eval()
-    this_cache_dir = os.path.join(cache_dir, 'temp_cache')
     os.makedirs(this_cache_dir, exist_ok=True)
     with open(os.path.join(this_cache_dir, f"src.pdf"), 'wb') as file:
@@ -73,13 +72,16 @@ def add_pdf_gradio(pdf_file_binary, progress=gr.Progress()):
     return "PDF processed successfully!"
-@spaces.GPU(duration=50)
-def retrieve_gradio(query: str, topk: int):
     global model, tokenizer
     model.eval()
-    target_cache_dir = os.path.join(cache_dir, 'temp_cache')
     if not os.path.exists(target_cache_dir):
         return None
@@ -95,87 +97,35 @@ def retrieve_gradio(query: str, topk: int):
     with torch.no_grad():
         query_rep = model(text=[query_with_instruction], image=[None], tokenizer=tokenizer).reps.squeeze(0).cpu()
     doc_reps_cat = torch.stack([torch.Tensor(i) for i in doc_reps], dim=0)
     similarities = torch.matmul(query_rep, doc_reps_cat.T)
     topk_values, topk_doc_ids = torch.topk(similarities, k=topk)
-    topk_doc_ids_np = topk_doc_ids.cpu().numpy()
-    images_topk = [Image.open(os.path.join(target_cache_dir, f"{md5s[idx]}.png")) for idx in topk_doc_ids_np]
     return images_topk
-device = 'cuda'
-print("emb model load begin...")
-model_path = 'RhapsodyAI/minicpm-visual-embedding-v0' # replace with your local model path
-tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
-model = AutoModel.from_pretrained(model_path, trust_remote_code=True)
-model.eval()
-model.to(device)
-print("emb model load success!")
-print("gen model load begin...")
-gen_model_path = 'openbmb/MiniCPM-V-2_6'
-gen_tokenizer = AutoTokenizer.from_pretrained(gen_model_path, trust_remote_code=True)
-gen_model = AutoModel.from_pretrained(gen_model_path, trust_remote_code=True, attn_implementation='sdpa', torch_dtype=torch.bfloat16)
-gen_model.eval()
-gen_model.to(device)
-print("gen model load success!")
-@spaces.GPU(duration=50)
-def answer_question(images, question):
-    global gen_model, gen_tokenizer
-    images_ = [Image.open(image[0]).convert('RGB') for image in images]
-    msgs = [{'role': 'user', 'content': [question, *images_]}]
-    answer = gen_model.chat(
-        image=None,
-        msgs=msgs,
-        tokenizer=gen_tokenizer
-    )
-    print(answer)
-    return answer
 with gr.Blocks() as app:
-    gr.Markdown("# MiniCPMV-RAG-PDFQA: Two Vision Language Models Enable End-to-End RAG")
-    gr.Markdown("""
-- A Vision Language Model Dense Retriever ([minicpm-visual-embedding-v0](https://huggingface.co/RhapsodyAI/minicpm-visual-embedding-v0)) **directly reads** your PDFs **without need of OCR**, produce **multimodal dense representations** and build your personal library.
-- **Ask a question**, it retrieves the most relevant pages, then [MiniCPM-V-2.6](https://huggingface.co/spaces/openbmb/MiniCPM-V-2_6) will answer your question based on pages recalled, with strong multi-image understanding capability.
-    - It helps you read a long **visually-intensive** or **text-oriented** PDF document and find the pages that answer your question.
-    - It helps you build a personal library and retrieve book pages from a large collection of books.
-    - It works like a human: read, store, retrieve, and answer with full vision.
-""")
-    gr.Markdown("- Currently online demo support PDF document with less than 50 pages due to GPU time limit. Deploy on your own machine for longer PDFs and books.")
     with gr.Row():
-        file_input = gr.File(type="binary", label="Step 1: Upload PDF")
         process_button = gr.Button("Process PDF")
-        file_result = gr.Textbox(label="PDF Process Status")
-    process_button.click(add_pdf_gradio, inputs=[file_input], outputs=file_result)
     with gr.Row():
         query_input = gr.Text(label="Your Question")
-        topk_input = gr.Number(value=5, minimum=1, maximum=10, step=1, label="Number of Pages to Retrieve")
         retrieve_button = gr.Button("Retrieve Pages")
-        images_output = gr.Gallery(label="Retrieved Pages")
-    retrieve_button.click(retrieve_gradio, inputs=[query_input, topk_input], outputs=images_output)
-    with gr.Row():
-        answer_button = gr.Button("Answer Question")
-        gen_model_response = gr.Textbox(label="MiniCPM-V-2.6's Answer")
-        answer_button.click(fn=answer_question, inputs=[images_output, query_input], outputs=gen_model_response)
-    gr.Markdown("By using this demo, you agree to share your use data with us for research purpose, to help improve user experience.")
-app.launch()

 import tqdm
 from PIL import Image
 import hashlib
 import torch
 import fitz
 import gradio as gr
 import os
+from transformers import AutoModel, AutoTokenizer
 import numpy as np
 import json
+import spaces
 cache_dir = 'kb_cache'
 os.makedirs(cache_dir, exist_ok=True)
 @spaces.GPU(duration=100)
 def add_pdf_gradio(pdf_file_binary, progress=gr.Progress()):
+    if pdf_file_binary is None:
+        return "No PDF file uploaded."
     global model, tokenizer
     model.eval()
+    knowledge_base_name = calculate_md5_from_binary(pdf_file_binary)
+    this_cache_dir = os.path.join(cache_dir, knowledge_base_name)
     os.makedirs(this_cache_dir, exist_ok=True)
     with open(os.path.join(this_cache_dir, f"src.pdf"), 'wb') as file:
     return "PDF processed successfully!"
+def retrieve_gradio(pdf_file_binary, query: str, topk: int):
     global model, tokenizer
     model.eval()
+    if pdf_file_binary is None:
+        return "No PDF file uploaded."
+    knowledge_base_name = calculate_md5_from_binary(pdf_file_binary)
+    target_cache_dir = os.path.join(cache_dir, knowledge_base_name)
     if not os.path.exists(target_cache_dir):
         return None
     with torch.no_grad():
         query_rep = model(text=[query_with_instruction], image=[None], tokenizer=tokenizer).reps.squeeze(0).cpu()
+    query_md5 = hashlib.md5(query.encode()).hexdigest()
     doc_reps_cat = torch.stack([torch.Tensor(i) for i in doc_reps], dim=0)
     similarities = torch.matmul(query_rep, doc_reps_cat.T)
     topk_values, topk_doc_ids = torch.topk(similarities, k=topk)
+    images_topk = [Image.open(os.path.join(target_cache_dir, f"{md5s[idx]}.png")) for idx in topk_doc_ids.cpu().numpy()]
     return images_topk
 with gr.Blocks() as app:
+    gr.Markdown("# MiniCPMV-RAG-PDFQA")
     with gr.Row():
+        file_input = gr.File(type="binary", label="Upload PDF")
         process_button = gr.Button("Process PDF")
+    process_button.click(add_pdf_gradio, inputs=[file_input], outputs="text")
     with gr.Row():
         query_input = gr.Text(label="Your Question")
+        topk_input = gr.Number(value=5, minimum=1, maximum=10, step=1, label="Number of pages to retrieve")
         retrieve_button = gr.Button("Retrieve Pages")
+    images_output = gr.Gallery(label="Retrieved Pages")
+    retrieve_button.click(retrieve_gradio, inputs=[file_input, query_input, topk_input], outputs=images_output)
+app.launch(share=True)