File size: 3,923 Bytes
d37e507
 
 
 
 
 
0ea826a
 
 
 
d37e507
0ea826a
d37e507
 
 
 
 
 
 
 
 
 
 
3ef4133
 
 
 
 
 
0ea826a
 
d37e507
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0ea826a
 
 
 
 
 
 
 
d37e507
 
 
 
 
 
 
 
 
 
0ea826a
d37e507
 
0ea826a
d37e507
 
 
 
 
0ea826a
d37e507
 
 
 
0ea826a
 
 
 
 
 
d37e507
 
 
3ef4133
 
 
 
 
 
 
 
 
d37e507
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import gradio as gr
import requests
from io import BytesIO
import pypdf
import os

# **IMPORTANT:** Set your Hugging Face Space URL here or as an environment variable
space_url = os.environ.get("SPACE_URL", "https://ruslanmv-milvus-server.hf.space")  # Your Milvus Server Space URL
rag_url = space_url + "/rag"
insert_url = space_url + "/insert"

# Function to extract text from a PDF file (no changes needed here)
def extract_text_from_pdf(pdf_file):
    pdf_stream = BytesIO(pdf_file)
    reader = pypdf.PdfReader(pdf_stream)
    text = ""
    for page in reader.pages:
        text += page.extract_text()
    return text

# Function to handle PDF upload and insertion into Milvus
def upload_and_index_pdf(pdf_file, server_url):
    try:
        # Check if pdf_file is a file path (string) or a file object
        if isinstance(pdf_file, str):
            files = {'file': (pdf_file, open(pdf_file, 'rb'), 'application/pdf')}
        else:
            files = {'file': (pdf_file.name, pdf_file, 'application/pdf')}
        
        response = requests.post(insert_url, files=files, timeout=600)
        response.raise_for_status()
        return "PDF uploaded and indexed successfully!"
    except requests.exceptions.RequestException as e:
        return f"Error during PDF upload: {e}"
    except Exception as e:
        return f"An unexpected error occurred: {e}"

# Function to perform RAG query
def perform_rag_query(question, server_url):
    try:
        response = requests.post(rag_url, json={"question": question}, timeout=300)
        response.raise_for_status()
        results = response.json().get("result", [])
        return "\n".join(results)
    except requests.exceptions.RequestException as e:
        return f"Error during RAG query: {e}"
    except Exception as e:
        return f"An unexpected error occurred: {e}"

# Example questions
example_questions = [
    "What are the enabling technologies for GPT?",
    "Explain the potential applications of GPT.",
    "What are some emerging challenges with GPT technology?",
    "Describe the future directions for GPT research."
]

# Gradio interface setup
with gr.Blocks() as demo:
    gr.Markdown(
        """
        # Milvus PDF Search Client
        Upload a PDF to index it in Milvus, then ask questions about its content.
        """
    )
    with gr.Row():
        with gr.Column():
            pdf_input = gr.File(label="Upload PDF", type="filepath")  # Changed type to "filepath"
            server_url_input = gr.Textbox(
                label="Milvus Server URL",
                value=space_url,
                placeholder="Enter your Milvus Server URL"
            )
            upload_button = gr.Button("Upload and Index PDF")
        with gr.Column():
            upload_output = gr.Textbox(label="Upload Status")

    with gr.Row():
        with gr.Column():
            question_input = gr.Textbox(label="Ask a question about the PDF")
            query_button = gr.Button("Ask")
            # Example questions
            gr.Examples(
                examples=example_questions,
                inputs=question_input,
                label="Example Questions",
            )
        with gr.Column():
            answer_output = gr.Textbox(label="Answer")

    # Load and index the default PDF on startup (if it exists)
    
    if os.path.exists("transformers.pdf"):
        print("transformers.pdf exists")
        upload_and_index_pdf("transformers.pdf", space_url)
        upload_output.value = "Default PDF (transformers.pdf) indexed on startup!"  # Update status
    else:
        print("transformers.pdf does not exist")

    upload_button.click(
        fn=upload_and_index_pdf,
        inputs=[pdf_input, server_url_input],
        outputs=upload_output,
    )
    query_button.click(
        fn=perform_rag_query,
        inputs=[question_input, server_url_input],
        outputs=answer_output,
    )

demo.launch()