File size: 6,943 Bytes
71c916b
765ede8
 
 
 
 
71c916b
 
765ede8
72ba547
90b92b4
765ede8
215277a
765ede8
29c811a
90b92b4
765ede8
71c916b
765ede8
 
 
29c811a
765ede8
 
 
29c811a
71c916b
90b92b4
71c916b
 
 
 
765ede8
71c916b
72ba547
765ede8
 
 
 
 
 
 
 
 
 
 
 
 
71c916b
 
765ede8
 
 
90b92b4
765ede8
71c916b
 
765ede8
90b92b4
765ede8
71c916b
67c6e4d
90b92b4
765ede8
90b92b4
 
 
 
 
 
 
 
765ede8
 
 
 
 
 
769d8f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90b92b4
7600854
 
 
 
 
 
5f94cf5
 
 
7600854
5f94cf5
 
769d8f2
7600854
 
 
769d8f2
7600854
5f94cf5
7600854
5f94cf5
769d8f2
7600854
 
 
 
 
 
 
 
5f94cf5
769d8f2
5f94cf5
7600854
769d8f2
 
 
7600854
90b92b4
 
5f94cf5
7600854
769d8f2
 
 
 
 
 
 
7600854
769d8f2
 
7600854
769d8f2
 
 
5f94cf5
90b92b4
765ede8
 
 
 
 
 
 
 
 
 
90b92b4
 
 
 
765ede8
90b92b4
 
765ede8
 
 
90b92b4
 
 
 
765ede8
 
 
90b92b4
765ede8
 
71c916b
765ede8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
import os
from getpass import getpass

openai_api_key = os.getenv('OPENAI_API_KEY')
openai_api_key = openai_api_key

from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import Settings

Settings.llm = OpenAI(model="gpt-3.5-turbo", temperature=0.4)
Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002")

from llama_index.core import SimpleDirectoryReader

# Load initial documents
documents = SimpleDirectoryReader("new_file").load_data()

from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.vector_stores.qdrant import QdrantVectorStore
import qdrant_client

client = qdrant_client.QdrantClient(
    location=":memory:",
)

vector_store = QdrantVectorStore(
    collection_name="paper",
    client=client,
    enable_hybrid=True,
    batch_size=20,
)

storage_context = StorageContext.from_defaults(vector_store=vector_store)

index = VectorStoreIndex.from_documents(
    documents,
    storage_context=storage_context,
)

query_engine = index.as_query_engine(
    vector_store_query_mode="hybrid"
)

from llama_index.core.memory import ChatMemoryBuffer

memory = ChatMemoryBuffer.from_defaults(token_limit=3000)

chat_engine = index.as_chat_engine(
    chat_mode="context",
    memory=memory,
    system_prompt=(
        """You are an AI assistant who answers the user questions,
           use the schema fields to generate appropriate and valid json queries"""
    ),
)

import gradio as gr

def chat_with_ai(user_input, chat_history):
    response = chat_engine.chat(user_input)
    references = response.source_nodes
    ref, pages = [], []
    for i in range(len(references)):
        if references[i].metadata['file_name'] not in ref:
            ref.append(references[i].metadata['file_name'])
    complete_response = str(response) + "\n\n"
    if ref != [] or pages != []:
        chat_history.append((user_input, complete_response))
        ref = []
    elif ref == [] or pages == []:
        chat_history.append((user_input, str(response)))
        
    return chat_history, ""

def clear_history():
    return [], ""

import os
import PyPDF2
import docx
import pandas as pd

def extract_text_from_file(file_path):
    """
    Extracts text from the file based on its extension.
    Supports: PDF, DOC/DOCX, TXT, XLS/XLSX.
    """
    ext = os.path.splitext(file_path)[1].lower()
    text = ""
    
    if ext == ".pdf":
        try:
            with open(file_path, "rb") as f:
                pdf_reader = PyPDF2.PdfReader(f)
                for page in pdf_reader.pages:
                    page_text = page.extract_text()
                    if page_text:
                        text += page_text + "\n"
        except Exception as e:
            text = f"Error processing PDF: {e}"
    
    elif ext in [".doc", ".docx"]:
        try:
            doc = docx.Document(file_path)
            text = "\n".join([para.text for para in doc.paragraphs])
        except Exception as e:
            text = f"Error processing Word document: {e}"
    
    elif ext == ".txt":
        try:
            with open(file_path, "r", encoding="utf-8") as f:
                text = f.read()
        except Exception as e:
            text = f"Error processing TXT file: {e}"
    
    elif ext in [".xls", ".xlsx"]:
        try:
            # Read the first sheet of the Excel file
            df = pd.read_excel(file_path)
            # Convert the dataframe to CSV format (or any format you prefer)
            text = df.to_csv(index=False)
        except Exception as e:
            text = f"Error processing Excel file: {e}"
    
    else:
        text = "Unsupported file type for text extraction."
    
    return text

def upload_file(file):
    """
    Handles file upload from Gradio.
    Saves the file to the "new_file" directory and extracts text content based on file type.
    Supports file-like objects, dictionaries, or file paths.
    """
    # Check if a file was uploaded
    if file is None:
        return "No file uploaded!"
    
    # If file is a list (multiple files), take the first one
    if isinstance(file, list):
        file = file[0]
    
    # Initialize file_name and file_data based on the type of 'file'
    if hasattr(file, 'read'):
        # file is a file-like object
        file_data = file.read()
        file_name = getattr(file, 'name', "uploaded_file")
    elif isinstance(file, dict):
        # file is a dictionary with "name" and "data" keys
        file_name = file.get("name", "uploaded_file")
        file_data = file.get("data")
    elif isinstance(file, str):
        # file is a string (e.g., a NamedString representing a file path)
        file_name = os.path.basename(file)
        try:
            with open(file, "rb") as f:
                file_data = f.read()
        except Exception as e:
            return f"Error reading file from path: {e}"
    else:
        return "Uploaded file format not recognized."
    
    # Validate that file_data is available
    if file_data is None:
        return "Uploaded file data not found!"
    
    # Ensure the "new_file" directory exists
    if not os.path.exists("new_file"):
        os.makedirs("new_file")
    
    # Save the file to the "new_file" directory
    file_path = os.path.join("new_file", file_name)
    try:
        with open(file_path, "wb") as f:
            f.write(file_data)
    except Exception as e:
        return f"Error saving file: {e}"
    
    # Extract text from the file for further processing
    extracted_text = extract_text_from_file(file_path)
    
    # Create a preview of the extracted text
    preview = extracted_text[:200] + "..." if len(extracted_text) > 200 else extracted_text
    return f"File {file_name} uploaded and processed successfully!\nExtracted text preview:\n{preview}"



def gradio_chatbot():
    with gr.Blocks() as demo:
        gr.Markdown("# Chat Interface for LlamaIndex")

        chatbot = gr.Chatbot(label="LlamaIndex Chatbot")
        user_input = gr.Textbox(
            placeholder="Ask a question...", label="Enter your question"
        )

        submit_button = gr.Button("Send")
        btn_clear = gr.Button("Delete Context")

        # Add a file upload component
        file_upload = gr.File(label="Upload a file")

        # Add a button to handle file upload
        upload_button = gr.Button("Upload File")

        chat_history = gr.State([])

        # Define the file upload action
        upload_button.click(upload_file, inputs=file_upload, outputs=user_input)

        # Define the chat interaction
        submit_button.click(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])

        user_input.submit(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
        btn_clear.click(fn=clear_history, outputs=[chatbot, user_input])

    return demo

gradio_chatbot().launch(debug=True)