Spaces:

Moha782
/

gen-ai-project

Sleeping

App Files Files Community

Moha782 commited on Jun 26, 2024

Commit

4272192

verified ·

1 Parent(s): 16c59d8

Update app.py

Browse files

Files changed (1) hide show

app.py +105 -16

app.py CHANGED Viewed

@@ -1,11 +1,103 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 def respond(
     message,
@@ -25,19 +117,16 @@ def respond(
     messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface

 import gradio as gr
 from huggingface_hub import InferenceClient
+from langchain_community import document_loaders as dl
+from langchain_community import embeddings
+from langchain import text_splitter as ts
+from langchain_community import vectorstores as vs
+from langchain_community.llms import HuggingFacePipeline
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain.schema import StrOutputParser
+from langchain.schema.runnable import RunnablePassthrough
+from langchain.schema.runnable import RunnableParallel
+from langchain.prompts import PromptTemplate
+from operator import itemgetter
+document_path = "apexcustoms.pdf"
+def split_doc(document_path, chunk_size=500, chunk_overlap=20):
+    loader = dl.PyPDFLoader(document_path)
+    document = loader.load()
+    text_splitter = ts.RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
+    document_splitted = text_splitter.split_documents(documents=document)
+    return document_splitted
+# Split the document
+document_splitted = split_doc(document_path)
+def load_embedding_model():
+    model_kwargs = {'device': 'cpu'}
+    encode_kwargs = {'normalize_embeddings': False}
+    embedding_model_instance = embeddings.HuggingFaceEmbeddings(
+        model_name="sentence-transformers/all-mpnet-base-v2",
+        model_kwargs=model_kwargs,
+        encode_kwargs=encode_kwargs
+    )
+    return embedding_model_instance
+# Instantiate the embedding model
+embedding_model_instance = load_embedding_model()
+def create_db(document_splitted, embedding_model_instance):
+    model_vectorstore = vs.FAISS
+    db = None
+    try:
+        content = []
+        metadata = []
+        for d in document_splitted:
+            content.append(d.page_content)
+            metadata.append({'source': d.metadata})
+        db = model_vectorstore.from_texts(content, embedding_model_instance, metadata)
+    except Exception as error:
+        print(error)
+    return db
+db = create_db(document_splitted, embedding_model_instance)
+# Load the model and tokenizer
+from transformers import AutoTokenizer, AutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
+model = AutoModelForCausalLM.from_pretrained("HuggingFaceH4/zephyr-7b-beta", device_map="auto")
+# Create a pipeline with the loaded model
+from transformers import pipeline
+pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, device=0, max_new_tokens=1000)
+# Use the pipeline in Langchain
+llm = HuggingFacePipeline(pipeline=pipe, model_kwargs={'temperature': 0})
+# Load a retriever, define prompt template and chains
+retriever = db.as_retriever(search_type="similarity_score_threshold", search_kwargs={"k": 6, 'score_threshold': 0.01})
+# Define the prompt template
+template = """Use the following pieces of context to answer the question at the end.
+If you don't know the answer, just say that you don't know, don't try to make up an answer.
+{context}
+Question: {question}
+Helpful Answer:"""
+rag_prompt_custom = PromptTemplate.from_template(template)
+# Define the chains
+def format_docs(docs):
+    return "\n\n".join(doc.page_content for doc in docs)
+# First chain to query the LLM
+rag_chain_from_docs = (
+    {
+        "context": lambda input: format_docs(input["documents"]),
+        "question": itemgetter("question"),
+    }
+    | rag_prompt_custom
+    | llm
+    | StrOutputParser()
+)
+# Second chain to postprocess the answer
+rag_chain_with_source = RunnableParallel(
+    {"documents": retriever, "question": RunnablePassthrough()}
+) | {
+    "documents": lambda input: [doc.metadata for doc in input["documents"]],
+    "answer": rag_chain_from_docs,
+}
 def respond(
     message,
     messages.append({"role": "user", "content": message})
+    # Query the LLM and postprocess the answer
+    resp = rag_chain_with_source.invoke(message)
+    if len(resp['documents']) == 0:
+        response = "No relevant information found in the provided context."
+    else:
+        stripped_resp = re.sub(r"\n+$", " ", resp['answer'])
+        response = stripped_resp
+    for chunk in [response[i:i+max_tokens] for i in range(0, len(response), max_tokens)]:
+        yield chunk
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface