|
import os |
|
from getpass import getpass |
|
|
|
openai_api_key = os.getenv('OPENAI_API_KEY') |
|
openai_api_key = openai_api_key |
|
|
|
from llama_index.llms.openai import OpenAI |
|
from llama_index.embeddings.openai import OpenAIEmbedding |
|
from llama_index.core import Settings |
|
|
|
Settings.llm = OpenAI(model="gpt-3.5-turbo", temperature=0.4) |
|
Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002") |
|
|
|
from llama_index.core import SimpleDirectoryReader |
|
|
|
|
|
documents = SimpleDirectoryReader("new_file").load_data() |
|
|
|
from llama_index.core import VectorStoreIndex, StorageContext |
|
from llama_index.vector_stores.qdrant import QdrantVectorStore |
|
import qdrant_client |
|
|
|
client = qdrant_client.QdrantClient( |
|
location=":memory:", |
|
) |
|
|
|
vector_store = QdrantVectorStore( |
|
collection_name="paper", |
|
client=client, |
|
enable_hybrid=True, |
|
batch_size=20, |
|
) |
|
|
|
storage_context = StorageContext.from_defaults(vector_store=vector_store) |
|
|
|
index = VectorStoreIndex.from_documents( |
|
documents, |
|
storage_context=storage_context, |
|
) |
|
|
|
query_engine = index.as_query_engine( |
|
vector_store_query_mode="hybrid" |
|
) |
|
|
|
from llama_index.core.memory import ChatMemoryBuffer |
|
|
|
memory = ChatMemoryBuffer.from_defaults(token_limit=3000) |
|
|
|
chat_engine = index.as_chat_engine( |
|
chat_mode="context", |
|
memory=memory, |
|
system_prompt=( |
|
"""You are an AI assistant who answers the user questions, |
|
use the schema fields to generate appropriate and valid json queries""" |
|
), |
|
) |
|
|
|
import gradio as gr |
|
|
|
def chat_with_ai(user_input, chat_history): |
|
response = chat_engine.chat(user_input) |
|
references = response.source_nodes |
|
ref, pages = [], [] |
|
for i in range(len(references)): |
|
if references[i].metadata['file_name'] not in ref: |
|
ref.append(references[i].metadata['file_name']) |
|
complete_response = str(response) + "\n\n" |
|
if ref != [] or pages != []: |
|
chat_history.append((user_input, complete_response)) |
|
ref = [] |
|
elif ref == [] or pages == []: |
|
chat_history.append((user_input, str(response))) |
|
|
|
return chat_history, "" |
|
|
|
def clear_history(): |
|
return [], "" |
|
|
|
import os |
|
import PyPDF2 |
|
import docx |
|
import pandas as pd |
|
|
|
def extract_text_from_file(file_path): |
|
""" |
|
Extracts text from the file based on its extension. |
|
Supports: PDF, DOC/DOCX, TXT, XLS/XLSX. |
|
""" |
|
ext = os.path.splitext(file_path)[1].lower() |
|
text = "" |
|
|
|
if ext == ".pdf": |
|
try: |
|
with open(file_path, "rb") as f: |
|
pdf_reader = PyPDF2.PdfReader(f) |
|
for page in pdf_reader.pages: |
|
page_text = page.extract_text() |
|
if page_text: |
|
text += page_text + "\n" |
|
except Exception as e: |
|
text = f"Error processing PDF: {e}" |
|
|
|
elif ext in [".doc", ".docx"]: |
|
try: |
|
doc = docx.Document(file_path) |
|
text = "\n".join([para.text for para in doc.paragraphs]) |
|
except Exception as e: |
|
text = f"Error processing Word document: {e}" |
|
|
|
elif ext == ".txt": |
|
try: |
|
with open(file_path, "r", encoding="utf-8") as f: |
|
text = f.read() |
|
except Exception as e: |
|
text = f"Error processing TXT file: {e}" |
|
|
|
elif ext in [".xls", ".xlsx"]: |
|
try: |
|
|
|
df = pd.read_excel(file_path) |
|
|
|
text = df.to_csv(index=False) |
|
except Exception as e: |
|
text = f"Error processing Excel file: {e}" |
|
|
|
else: |
|
text = "Unsupported file type for text extraction." |
|
|
|
return text |
|
|
|
def upload_file(file): |
|
if file is None: |
|
return "No file uploaded!" |
|
|
|
if isinstance(file, list): |
|
file = file[0] |
|
|
|
if hasattr(file, 'name'): |
|
file_name = file.name |
|
file_data = file.read() |
|
elif isinstance(file, dict): |
|
file_name = file.get("name", "uploaded_file") |
|
file_data = file.get("data") |
|
else: |
|
return "Uploaded file format not recognized." |
|
|
|
if file_data is None: |
|
return "Uploaded file data not found!" |
|
|
|
|
|
if not os.path.exists("new_file"): |
|
os.makedirs("new_file") |
|
|
|
|
|
file_path = os.path.join("new_file", file_name) |
|
try: |
|
with open(file_path, "wb") as f: |
|
f.write(file_data) |
|
except Exception as e: |
|
return f"Error saving file: {e}" |
|
|
|
|
|
extracted_text = extract_text_from_file(file_path) |
|
|
|
|
|
preview = extracted_text[:200] + "..." if len(extracted_text) > 200 else extracted_text |
|
return f"File {file_name} uploaded and processed successfully!\nExtracted text preview:\n{preview}" |
|
|
|
|
|
|
|
file_path = os.path.join("new_file", file_name) |
|
if hasattr(file, "read"): |
|
content = file.read() |
|
elif isinstance(file, dict) and "data" in file: |
|
content = file["data"] |
|
else: |
|
return "Uploaded file format not recognized." |
|
|
|
with open(file_path, "wb") as f: |
|
f.write(content) |
|
|
|
return f"File {file_name} uploaded successfully!" |
|
|
|
|
|
def gradio_chatbot(): |
|
with gr.Blocks() as demo: |
|
gr.Markdown("# Chat Interface for LlamaIndex") |
|
|
|
chatbot = gr.Chatbot(label="LlamaIndex Chatbot") |
|
user_input = gr.Textbox( |
|
placeholder="Ask a question...", label="Enter your question" |
|
) |
|
|
|
submit_button = gr.Button("Send") |
|
btn_clear = gr.Button("Delete Context") |
|
|
|
|
|
file_upload = gr.File(label="Upload a file") |
|
|
|
|
|
upload_button = gr.Button("Upload File") |
|
|
|
chat_history = gr.State([]) |
|
|
|
|
|
upload_button.click(upload_file, inputs=file_upload, outputs=user_input) |
|
|
|
|
|
submit_button.click(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input]) |
|
|
|
user_input.submit(chat_with_ai, inputs=[user_input, chat_history], outputs=[chatbot, user_input]) |
|
btn_clear.click(fn=clear_history, outputs=[chatbot, user_input]) |
|
|
|
return demo |
|
|
|
gradio_chatbot().launch(debug=True) |