anasmkh's picture
Update app.py
769d8f2 verified
raw
history blame
6.28 kB
import os
from getpass import getpass
openai_api_key = os.getenv('OPENAI_API_KEY')
openai_api_key = openai_api_key
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import Settings
Settings.llm = OpenAI(model="gpt-3.5-turbo", temperature=0.4)
Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002")
from llama_index.core import SimpleDirectoryReader
# Load initial documents
documents = SimpleDirectoryReader("new_file").load_data()
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.vector_stores.qdrant import QdrantVectorStore
import qdrant_client
client = qdrant_client.QdrantClient(
location=":memory:",
)
vector_store = QdrantVectorStore(
collection_name="paper",
client=client,
enable_hybrid=True,
batch_size=20,
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
documents,
storage_context=storage_context,
)
query_engine = index.as_query_engine(
vector_store_query_mode="hybrid"
)
from llama_index.core.memory import ChatMemoryBuffer
memory = ChatMemoryBuffer.from_defaults(token_limit=3000)
chat_engine = index.as_chat_engine(
chat_mode="context",
memory=memory,
system_prompt=(
"""You are an AI assistant who answers the user questions,
use the schema fields to generate appropriate and valid json queries"""
),
)
import gradio as gr
def chat_with_ai(user_input, chat_history):
response = chat_engine.chat(user_input)
references = response.source_nodes
ref, pages = [], []
for i in range(len(references)):
if references[i].metadata['file_name'] not in ref:
ref.append(references[i].metadata['file_name'])
complete_response = str(response) + "\n\n"
if ref != [] or pages != []:
chat_history.append((user_input, complete_response))
ref = []
elif ref == [] or pages == []:
chat_history.append((user_input, str(response)))
return chat_history, ""
def clear_history():
return [], ""
import os
import PyPDF2
import docx
import pandas as pd
def extract_text_from_file(file_path):
"""
Extracts text from the file based on its extension.
Supports: PDF, DOC/DOCX, TXT, XLS/XLSX.
"""
ext = os.path.splitext(file_path)[1].lower()
text = ""
if ext == ".pdf":
try:
with open(file_path, "rb") as f:
pdf_reader = PyPDF2.PdfReader(f)
for page in pdf_reader.pages:
page_text = page.extract_text()
if page_text:
text += page_text + "\n"
except Exception as e:
text = f"Error processing PDF: {e}"
elif ext in [".doc", ".docx"]:
try:
doc = docx.Document(file_path)
text = "\n".join([para.text for para in doc.paragraphs])
except Exception as e:
text = f"Error processing Word document: {e}"
elif ext == ".txt":
try:
with open(file_path, "r", encoding="utf-8") as f:
text = f.read()
except Exception as e:
text = f"Error processing TXT file: {e}"
elif ext in [".xls", ".xlsx"]:
try:
# Read the first sheet of the Excel file
df = pd.read_excel(file_path)
# Convert the dataframe to CSV format (or any format you prefer)
text = df.to_csv(index=False)
except Exception as e:
text = f"Error processing Excel file: {e}"
else:
text = "Unsupported file type for text extraction."
return text
def upload_file(file):
if file is None:
return "No file uploaded!"
if isinstance(file, list):
file = file[0]
if hasattr(file, 'name'):
file_name = file.name
file_data = file.read()
elif isinstance(file, dict):
file_name = file.get("name", "uploaded_file")
file_data = file.get("data")
else:
return "Uploaded file format not recognized."
if file_data is None:
return "Uploaded file data not found!"
if not os.path.exists("new_file"):
os.makedirs("new_file")
file_path = os.path.join("new_file", file_name)
try:
with open(file_path, "wb") as f:
f.write(file_data)
except Exception as e:
return f"Error saving file: {e}"
extracted_text = extract_text_from_file(file_path)
preview = extracted_text[:200] + "..." if len(extracted_text) > 200 else extracted_text
return f"File {file_name} uploaded and processed successfully!\nExtracted text preview:\n{preview}"
file_path = os.path.join("new_file", file_name)
if hasattr(file, "read"):
content = file.read()
elif isinstance(file, dict) and "data" in file:
content = file["data"]
else:
return "Uploaded file format not recognized."
with open(file_path, "wb") as f:
f.write(content)
return f"File {file_name} uploaded successfully!"
def gradio_chatbot():
with gr.Blocks() as demo:
gr.Markdown("# Chat Interface for LlamaIndex")
chatbot = gr.Chatbot(label="LlamaIndex Chatbot")
user_input = gr.Textbox(
placeholder="Ask a question...", label="Enter your question"
)
submit_button = gr.Button("Send")
btn_clear = gr.Button("Delete Context")
# Add a file upload component
file_upload = gr.File(label="Upload a file")
# Add a button to handle file upload
upload_button = gr.Button("Upload File")
chat_history = gr.State([])
# Define the file upload action
upload_button.click(upload_file, inputs=file_upload, outputs=user_input)
# Define the chat interaction
submit_button.click(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
user_input.submit(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input])
btn_clear.click(fn=clear_history, outputs=[chatbot, user_input])
return demo
gradio_chatbot().launch(debug=True)