File size: 6,943 Bytes
71c916b 765ede8 71c916b 765ede8 72ba547 90b92b4 765ede8 215277a 765ede8 29c811a 90b92b4 765ede8 71c916b 765ede8 29c811a 765ede8 29c811a 71c916b 90b92b4 71c916b 765ede8 71c916b 72ba547 765ede8 71c916b 765ede8 90b92b4 765ede8 71c916b 765ede8 90b92b4 765ede8 71c916b 67c6e4d 90b92b4 765ede8 90b92b4 765ede8 769d8f2 90b92b4 7600854 5f94cf5 7600854 5f94cf5 769d8f2 7600854 769d8f2 7600854 5f94cf5 7600854 5f94cf5 769d8f2 7600854 5f94cf5 769d8f2 5f94cf5 7600854 769d8f2 7600854 90b92b4 5f94cf5 7600854 769d8f2 7600854 769d8f2 7600854 769d8f2 5f94cf5 90b92b4 765ede8 90b92b4 765ede8 90b92b4 765ede8 90b92b4 765ede8 90b92b4 765ede8 71c916b 765ede8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 |
import os
from getpass import getpass
openai_api_key = os.getenv('OPENAI_API_KEY')
openai_api_key = openai_api_key
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import Settings
Settings.llm = OpenAI(model="gpt-3.5-turbo", temperature=0.4)
Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002")
from llama_index.core import SimpleDirectoryReader
# Load initial documents
documents = SimpleDirectoryReader("new_file").load_data()
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.vector_stores.qdrant import QdrantVectorStore
import qdrant_client
client = qdrant_client.QdrantClient(
location=":memory:",
)
vector_store = QdrantVectorStore(
collection_name="paper",
client=client,
enable_hybrid=True,
batch_size=20,
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
documents,
storage_context=storage_context,
)
query_engine = index.as_query_engine(
vector_store_query_mode="hybrid"
)
from llama_index.core.memory import ChatMemoryBuffer
memory = ChatMemoryBuffer.from_defaults(token_limit=3000)
chat_engine = index.as_chat_engine(
chat_mode="context",
memory=memory,
system_prompt=(
"""You are an AI assistant who answers the user questions,
use the schema fields to generate appropriate and valid json queries"""
),
)
import gradio as gr
def chat_with_ai(user_input, chat_history):
response = chat_engine.chat(user_input)
references = response.source_nodes
ref, pages = [], []
for i in range(len(references)):
if references[i].metadata['file_name'] not in ref:
ref.append(references[i].metadata['file_name'])
complete_response = str(response) + "\n\n"
if ref != [] or pages != []:
chat_history.append((user_input, complete_response))
ref = []
elif ref == [] or pages == []:
chat_history.append((user_input, str(response)))
return chat_history, ""
def clear_history():
return [], ""
import os
import PyPDF2
import docx
import pandas as pd
def extract_text_from_file(file_path):
"""
Extracts text from the file based on its extension.
Supports: PDF, DOC/DOCX, TXT, XLS/XLSX.
"""
ext = os.path.splitext(file_path)[1].lower()
text = ""
if ext == ".pdf":
try:
with open(file_path, "rb") as f:
pdf_reader = PyPDF2.PdfReader(f)
for page in pdf_reader.pages:
page_text = page.extract_text()
if page_text:
text += page_text + "\n"
except Exception as e:
text = f"Error processing PDF: {e}"
elif ext in [".doc", ".docx"]:
try:
doc = docx.Document(file_path)
text = "\n".join([para.text for para in doc.paragraphs])
except Exception as e:
text = f"Error processing Word document: {e}"
elif ext == ".txt":
try:
with open(file_path, "r", encoding="utf-8") as f:
text = f.read()
except Exception as e:
text = f"Error processing TXT file: {e}"
elif ext in [".xls", ".xlsx"]:
try:
# Read the first sheet of the Excel file
df = pd.read_excel(file_path)
# Convert the dataframe to CSV format (or any format you prefer)
text = df.to_csv(index=False)
except Exception as e:
text = f"Error processing Excel file: {e}"
else:
text = "Unsupported file type for text extraction."
return text
def upload_file(file):
"""
Handles file upload from Gradio.
Saves the file to the "new_file" directory and extracts text content based on file type.
Supports file-like objects, dictionaries, or file paths.
"""
# Check if a file was uploaded
if file is None:
return "No file uploaded!"
# If file is a list (multiple files), take the first one
if isinstance(file, list):
file = file[0]
# Initialize file_name and file_data based on the type of 'file'
if hasattr(file, 'read'):
# file is a file-like object
file_data = file.read()
file_name = getattr(file, 'name', "uploaded_file")
elif isinstance(file, dict):
# file is a dictionary with "name" and "data" keys
file_name = file.get("name", "uploaded_file")
file_data = file.get("data")
elif isinstance(file, str):
# file is a string (e.g., a NamedString representing a file path)
file_name = os.path.basename(file)
try:
with open(file, "rb") as f:
file_data = f.read()
except Exception as e:
return f"Error reading file from path: {e}"
else:
return "Uploaded file format not recognized."
# Validate that file_data is available
if file_data is None:
return "Uploaded file data not found!"
# Ensure the "new_file" directory exists
if not os.path.exists("new_file"):
os.makedirs("new_file")
# Save the file to the "new_file" directory
file_path = os.path.join("new_file", file_name)
try:
with open(file_path, "wb") as f:
f.write(file_data)
except Exception as e:
return f"Error saving file: {e}"
# Extract text from the file for further processing
extracted_text = extract_text_from_file(file_path)
# Create a preview of the extracted text
preview = extracted_text[:200] + "..." if len(extracted_text) > 200 else extracted_text
return f"File {file_name} uploaded and processed successfully!\nExtracted text preview:\n{preview}"
def gradio_chatbot():
with gr.Blocks() as demo:
gr.Markdown("# Chat Interface for LlamaIndex")
chatbot = gr.Chatbot(label="LlamaIndex Chatbot")
user_input = gr.Textbox(
placeholder="Ask a question...", label="Enter your question"
)
submit_button = gr.Button("Send")
btn_clear = gr.Button("Delete Context")
# Add a file upload component
file_upload = gr.File(label="Upload a file")
# Add a button to handle file upload
upload_button = gr.Button("Upload File")
chat_history = gr.State([])
# Define the file upload action
upload_button.click(upload_file, inputs=file_upload, outputs=user_input)
# Define the chat interaction
submit_button.click(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
user_input.submit(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
btn_clear.click(fn=clear_history, outputs=[chatbot, user_input])
return demo
gradio_chatbot().launch(debug=True) |