Spaces:
Sleeping
Sleeping
File size: 4,531 Bytes
3430157 fada25c bd45407 fada25c bd45407 fada25c bd45407 fada25c 0af1213 fada25c 3430157 fada25c c005a2c fada25c c005a2c fada25c c005a2c fada25c ea19a9d 59f2473 ea19a9d fada25c d7e2267 c005a2c d7e2267 fada25c bd45407 fada25c bd45407 fada25c bd45407 c005a2c 3794682 c005a2c bd45407 fada25c 3794682 fada25c c005a2c 5d2f342 bd45407 3794682 c005a2c 3794682 fada25c 5d2f342 fada25c 3794682 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
import os
from dotenv import load_dotenv
import gradio as gr
from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate, Settings
from llama_index.llms.huggingface import HuggingFaceInferenceAPI
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from sentence_transformers import SentenceTransformer
load_dotenv()
# Configure the Llama index settings
Settings.llm = HuggingFaceInferenceAPI(
model_name="meta-llama/Meta-Llama-3-8B-Instruct",
tokenizer_name="meta-llama/Meta-Llama-3-8B-Instruct",
context_window=3000,
token=os.getenv("HF_TOKEN"),
max_new_tokens=512,
generate_kwargs={"temperature": 0.1},
)
Settings.embed_model = HuggingFaceEmbedding(
model_name="BAAI/bge-small-en-v1.5"
)
# Define the directory for persistent storage and data
PERSIST_DIR = "db"
PDF_DIRECTORY = 'data' # Changed to the directory containing PDFs
# Ensure directories exist
os.makedirs(PDF_DIRECTORY, exist_ok=True)
os.makedirs(PERSIST_DIR, exist_ok=True)
# Variable to store current chat conversation
current_chat_history = []
def data_ingestion_from_directory():
# Use SimpleDirectoryReader on the directory containing the PDF files
documents = SimpleDirectoryReader(PDF_DIRECTORY).load_data()
storage_context = StorageContext.from_defaults()
index = VectorStoreIndex.from_documents(documents)
index.storage_context.persist(persist_dir=PERSIST_DIR)
def handle_query(query):
chat_text_qa_msgs = [
(
"user",
"""
To provide the best user experience, ensure all interactions reflect the professionalism and expertise of RedfernsTech. Always maintain a polite, professional, and helpful tone, using clear and concise language. Provide only one comprehensive and accurate answer to each question, ensuring the response is detailed enough to fully address the user’s inquiry. If users ask personal questions about the chatbot, redirect them to ask about the company, for example, "For more information about RedfernsTech, please ask me specific questions about our products or services." Focus on delivering information relevant to RedfernsTech’s products, services, and values, highlighting the benefits and features of RedfernsTech offerings whenever possible.
In short:
- Maintain a professional and polite tone.
- Provide one detailed and accurate answer per question.
- Give only the answer; do not include anything else.
- Redirect personal questions to company-related ones.
- Highlight RedfernsTech’s products, services, and values.Context:
{context_str}
Question:
{query_str}
"""
)
]
text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs)
# Load index from storage
storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
index = load_index_from_storage(storage_context)
# Use chat history to enhance response
context_str = ""
for past_query, response in reversed(current_chat_history):
if past_query.strip():
context_str += f"User asked: '{past_query}'\nBot answered: '{response}'\n"
query_engine = index.as_query_engine(text_qa_template=text_qa_template, context_str=context_str)
answer = query_engine.query(query)
if hasattr(answer, 'response'):
response = answer.response
elif isinstance(answer, dict) and 'response' in answer:
response = answer['response']
else:
response = "Sorry, I couldn't find an answer."
# Update current chat history
current_chat_history.append((query, response))
return response
# Example usage: Process PDF ingestion from directory
print("Processing PDF ingestion from directory:", PDF_DIRECTORY)
data_ingestion_from_directory()
# Define the input and output components for the Gradio interface
input_component = gr.Textbox(
show_label=False,
placeholder="Ask me anything about the document..."
)
output_component = gr.Textbox()
# Function to handle queries
def chatbot_handler(query):
response = handle_query(query)
return response
# Create the Gradio interface
interface = gr.Interface(
fn=chatbot_handler,
inputs=input_component,
outputs=output_component,
title="RedfernsTech Q&A Chatbot",
description="Ask me anything about the uploaded document."
)
# Launch the Gradio interface
interface.launch()
|