Spaces:
Sleeping
Sleeping
File size: 3,019 Bytes
3430157 fada25c bc16553 fada25c bc16553 fada25c bc16553 fada25c 3430157 fada25c bc16553 fada25c bc16553 fada25c ab182a7 bc16553 fada25c bc16553 fada25c bc16553 fada25c bc16553 fada25c bc16553 fada25c bc16553 fada25c bc16553 fada25c bc16553 fada25c bc16553 fada25c bc16553 fada25c bc16553 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
import os
from dotenv import load_dotenv
import gradio as gr
from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate, Settings
from llama_index.llms.huggingface import HuggingFaceInferenceAPI
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from sentence_transformers import SentenceTransformer
load_dotenv()
# Configure the Llama index settings
Settings.llm = HuggingFaceInferenceAPI(
model_name="google/gemma-1.1-7b-it",
tokenizer_name="google/gemma-1.1-7b-it",
context_window=3000,
token=os.getenv("HF_TOKEN"),
max_new_tokens=512,
generate_kwargs={"temperature": 0.1},
)
Settings.embed_model = HuggingFaceEmbedding(
model_name="BAAI/bge-small-en-v1.5"
)
# Define the directory for persistent storage and data
PERSIST_DIR = "db"
PDF_DIRECTORY = 'data' # Changed to the directory containing PDFs
# Ensure PDF directory exists
os.makedirs(PDF_DIRECTORY, exist_ok=True)
os.makedirs(PERSIST_DIR, exist_ok=True)
def data_ingestion_from_directory():
# Use SimpleDirectoryReader on the directory containing the PDF files
documents = SimpleDirectoryReader(PDF_DIRECTORY).load_data()
storage_context = StorageContext.from_defaults()
index = VectorStoreIndex.from_documents(documents)
index.storage_context.persist(persist_dir=PERSIST_DIR)
def handle_query(query, history):
chat_text_qa_msgs = [
(
"user",
"""
You are a RedfernsTech chatbot whose aim is to provide better service to the user, utilizing provided context to deliver answers.
and collect some basic information first like name, email, company name.
{context_str}
Question:
{query_str}
"""
)
]
text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs)
# Load index from storage
storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
index = load_index_from_storage(storage_context)
query_engine = index.as_query_engine(text_qa_template=text_qa_template)
answer = query_engine.query(query)
if hasattr(answer, 'response'):
response = answer.response
elif isinstance(answer, dict) and 'response' in answer:
response = answer['response']
else:
response = "Sorry, I couldn't find an answer."
history.append((query, response))
return response, history
# Example usage
print("Processing PDF ingestion from directory:", PDF_DIRECTORY)
data_ingestion_from_directory()
# Example query
query = "How do I use the RedfernsTech Q&A assistant?"
print("Query:", query)
response = handle_query(query, [])
print("Answer:", response)
# Create the Gradio chatbot interface with history
chatbot = gr.ChatInterface(
fn=handle_query,
title="RedfernsTech Q&A Chatbot",
description="Ask me anything about the uploaded documents.",
cache_examples=True, # Enable history caching
)
chatbot.launch() |