Spaces:

anasmkh
/

QdrantVectorStore_Llamaindex

Running

App Files Files Community

QdrantVectorStore_Llamaindex / app.py

anasmkh

Update app.py

7600854 verified 5 months ago

raw

history blame

6.94 kB

	import os
	from getpass import getpass

	openai_api_key = os.getenv('OPENAI_API_KEY')
	openai_api_key = openai_api_key

	from llama_index.llms.openai import OpenAI
	from llama_index.embeddings.openai import OpenAIEmbedding
	from llama_index.core import Settings

	Settings.llm = OpenAI(model="gpt-3.5-turbo", temperature=0.4)
	Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002")

	from llama_index.core import SimpleDirectoryReader

	# Load initial documents
	documents = SimpleDirectoryReader("new_file").load_data()

	from llama_index.core import VectorStoreIndex, StorageContext
	from llama_index.vector_stores.qdrant import QdrantVectorStore
	import qdrant_client

	client = qdrant_client.QdrantClient(
	location=":memory:",
	)

	vector_store = QdrantVectorStore(
	collection_name="paper",
	client=client,
	enable_hybrid=True,
	batch_size=20,
	)

	storage_context = StorageContext.from_defaults(vector_store=vector_store)

	index = VectorStoreIndex.from_documents(
	documents,
	storage_context=storage_context,
	)

	query_engine = index.as_query_engine(
	vector_store_query_mode="hybrid"
	)

	from llama_index.core.memory import ChatMemoryBuffer

	memory = ChatMemoryBuffer.from_defaults(token_limit=3000)

	chat_engine = index.as_chat_engine(
	chat_mode="context",
	memory=memory,
	system_prompt=(
	"""You are an AI assistant who answers the user questions,
	use the schema fields to generate appropriate and valid json queries"""
	),
	)

	import gradio as gr

	def chat_with_ai(user_input, chat_history):
	response = chat_engine.chat(user_input)
	references = response.source_nodes
	ref, pages = [], []
	for i in range(len(references)):
	if references[i].metadata['file_name'] not in ref:
	ref.append(references[i].metadata['file_name'])
	complete_response = str(response) + "\n\n"
	if ref != [] or pages != []:
	chat_history.append((user_input, complete_response))
	ref = []
	elif ref == [] or pages == []:
	chat_history.append((user_input, str(response)))

	return chat_history, ""

	def clear_history():
	return [], ""

	import os
	import PyPDF2
	import docx
	import pandas as pd

	def extract_text_from_file(file_path):
	"""
	Extracts text from the file based on its extension.
	Supports: PDF, DOC/DOCX, TXT, XLS/XLSX.
	"""
	ext = os.path.splitext(file_path)[1].lower()
	text = ""

	if ext == ".pdf":
	try:
	with open(file_path, "rb") as f:
	pdf_reader = PyPDF2.PdfReader(f)
	for page in pdf_reader.pages:
	page_text = page.extract_text()
	if page_text:
	text += page_text + "\n"
	except Exception as e:
	text = f"Error processing PDF: {e}"

	elif ext in [".doc", ".docx"]:
	try:
	doc = docx.Document(file_path)
	text = "\n".join([para.text for para in doc.paragraphs])
	except Exception as e:
	text = f"Error processing Word document: {e}"

	elif ext == ".txt":
	try:
	with open(file_path, "r", encoding="utf-8") as f:
	text = f.read()
	except Exception as e:
	text = f"Error processing TXT file: {e}"

	elif ext in [".xls", ".xlsx"]:
	try:
	# Read the first sheet of the Excel file
	df = pd.read_excel(file_path)
	# Convert the dataframe to CSV format (or any format you prefer)
	text = df.to_csv(index=False)
	except Exception as e:
	text = f"Error processing Excel file: {e}"

	else:
	text = "Unsupported file type for text extraction."

	return text

	def upload_file(file):
	"""
	Handles file upload from Gradio.
	Saves the file to the "new_file" directory and extracts text content based on file type.
	Supports file-like objects, dictionaries, or file paths.
	"""
	# Check if a file was uploaded
	if file is None:
	return "No file uploaded!"

	# If file is a list (multiple files), take the first one
	if isinstance(file, list):
	file = file[0]

	# Initialize file_name and file_data based on the type of 'file'
	if hasattr(file, 'read'):
	# file is a file-like object
	file_data = file.read()
	file_name = getattr(file, 'name', "uploaded_file")
	elif isinstance(file, dict):
	# file is a dictionary with "name" and "data" keys
	file_name = file.get("name", "uploaded_file")
	file_data = file.get("data")
	elif isinstance(file, str):
	# file is a string (e.g., a NamedString representing a file path)
	file_name = os.path.basename(file)
	try:
	with open(file, "rb") as f:
	file_data = f.read()
	except Exception as e:
	return f"Error reading file from path: {e}"
	else:
	return "Uploaded file format not recognized."

	# Validate that file_data is available
	if file_data is None:
	return "Uploaded file data not found!"

	# Ensure the "new_file" directory exists
	if not os.path.exists("new_file"):
	os.makedirs("new_file")

	# Save the file to the "new_file" directory
	file_path = os.path.join("new_file", file_name)
	try:
	with open(file_path, "wb") as f:
	f.write(file_data)
	except Exception as e:
	return f"Error saving file: {e}"

	# Extract text from the file for further processing
	extracted_text = extract_text_from_file(file_path)

	# Create a preview of the extracted text
	preview = extracted_text[:200] + "..." if len(extracted_text) > 200 else extracted_text
	return f"File {file_name} uploaded and processed successfully!\nExtracted text preview:\n{preview}"



	def gradio_chatbot():
	with gr.Blocks() as demo:
	gr.Markdown("# Chat Interface for LlamaIndex")

	chatbot = gr.Chatbot(label="LlamaIndex Chatbot")
	user_input = gr.Textbox(
	placeholder="Ask a question...", label="Enter your question"
	)

	submit_button = gr.Button("Send")
	btn_clear = gr.Button("Delete Context")

	# Add a file upload component
	file_upload = gr.File(label="Upload a file")

	# Add a button to handle file upload
	upload_button = gr.Button("Upload File")

	chat_history = gr.State([])

	# Define the file upload action
	upload_button.click(upload_file, inputs=file_upload, outputs=user_input)

	# Define the chat interaction
	submit_button.click(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])

	user_input.submit(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
	btn_clear.click(fn=clear_history, outputs=[chatbot, user_input])

	return demo

	gradio_chatbot().launch(debug=True)