Spaces:

Moha782
/

gen-ai-project

Sleeping

App Files Files Community

Moha782 commited on Jun 26, 2024

Commit

20d9757

verified ·

1 Parent(s): 4272192

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -118

app.py CHANGED Viewed

@@ -1,112 +1,35 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
-from langchain_community import document_loaders as dl
-from langchain_community import embeddings
-from langchain import text_splitter as ts
-from langchain_community import vectorstores as vs
-from langchain_community.llms import HuggingFacePipeline
-from langchain_huggingface import HuggingFaceEmbeddings
-from langchain.schema import StrOutputParser
-from langchain.schema.runnable import RunnablePassthrough
-from langchain.schema.runnable import RunnableParallel
-from langchain.prompts import PromptTemplate
-from operator import itemgetter
-document_path = "apexcustoms.pdf"
-def split_doc(document_path, chunk_size=500, chunk_overlap=20):
-    loader = dl.PyPDFLoader(document_path)
-    document = loader.load()
-    text_splitter = ts.RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
-    document_splitted = text_splitter.split_documents(documents=document)
-    return document_splitted
-# Split the document
-document_splitted = split_doc(document_path)
-def load_embedding_model():
-    model_kwargs = {'device': 'cpu'}
-    encode_kwargs = {'normalize_embeddings': False}
-    embedding_model_instance = embeddings.HuggingFaceEmbeddings(
-        model_name="sentence-transformers/all-mpnet-base-v2",
-        model_kwargs=model_kwargs,
-        encode_kwargs=encode_kwargs
-    )
-    return embedding_model_instance
-# Instantiate the embedding model
-embedding_model_instance = load_embedding_model()
-def create_db(document_splitted, embedding_model_instance):
-    model_vectorstore = vs.FAISS
-    db = None
-    try:
-        content = []
-        metadata = []
-        for d in document_splitted:
-            content.append(d.page_content)
-            metadata.append({'source': d.metadata})
-        db = model_vectorstore.from_texts(content, embedding_model_instance, metadata)
-    except Exception as error:
-        print(error)
-    return db
-db = create_db(document_splitted, embedding_model_instance)
-# Load the model and tokenizer
-from transformers import AutoTokenizer, AutoModelForCausalLM
-tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
-model = AutoModelForCausalLM.from_pretrained("HuggingFaceH4/zephyr-7b-beta", device_map="auto")
-# Create a pipeline with the loaded model
-from transformers import pipeline
-pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, device=0, max_new_tokens=1000)
-# Use the pipeline in Langchain
-llm = HuggingFacePipeline(pipeline=pipe, model_kwargs={'temperature': 0})
-# Load a retriever, define prompt template and chains
-retriever = db.as_retriever(search_type="similarity_score_threshold", search_kwargs={"k": 6, 'score_threshold': 0.01})
-# Define the prompt template
-template = """Use the following pieces of context to answer the question at the end.
-If you don't know the answer, just say that you don't know, don't try to make up an answer.
-{context}
-Question: {question}
-Helpful Answer:"""
-rag_prompt_custom = PromptTemplate.from_template(template)
-# Define the chains
-def format_docs(docs):
-    return "\n\n".join(doc.page_content for doc in docs)
-# First chain to query the LLM
-rag_chain_from_docs = (
-    {
-        "context": lambda input: format_docs(input["documents"]),
-        "question": itemgetter("question"),
-    }
-    | rag_prompt_custom
-    | llm
-    | StrOutputParser()
-)
-# Second chain to postprocess the answer
-rag_chain_with_source = RunnableParallel(
-    {"documents": retriever, "question": RunnablePassthrough()}
-) | {
-    "documents": lambda input: [doc.metadata for doc in input["documents"]],
-    "answer": rag_chain_from_docs,
-}
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
     messages = [{"role": "system", "content": system_message}]
     for val in history:
@@ -117,16 +40,11 @@ def respond(
     messages.append({"role": "user", "content": message})
-    # Query the LLM and postprocess the answer
-    resp = rag_chain_with_source.invoke(message)
-    if len(resp['documents']) == 0:
-        response = "No relevant information found in the provided context."
-    else:
-        stripped_resp = re.sub(r"\n+$", " ", resp['answer'])
-        response = stripped_resp
-    for chunk in [response[i:i+max_tokens] for i in range(0, len(response), max_tokens)]:
-        yield chunk
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
@@ -134,19 +52,12 @@ For information on how to customize the ChatInterface, peruse the gradio docs: h
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
-        gr.Textbox(value="You are a helpful car configuration assistant, specifically you are the assistant for Apex Customs (https://www.apexcustoms.com/). Given the user's input, provide suggestions for car models, colors, and customization options. Be creative and conversational in your responses. You should remember the user car model and tailor your answers accordingly. \n\nUser: ", label="System message"),
         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
     ],
 )
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
 from huggingface_hub import InferenceClient
+from langchain.document_loaders import PyPDFLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.vectorstores import FAISS
+from sentence_transformers import SentenceTransformer
+from langchain.chains import RetrievalQA
+from langchain.llms import HuggingFaceHub
+# Load the PDF document
+loader = PyPDFLoader("apexcustoms.pdf")
+data = loader.load()
+# Split the document into chunks
+text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
+texts = text_splitter.split_documents(data)
+# Create a vector store
+embeddings = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
+vector_store = FAISS.from_texts(texts, embeddings)
+# Initialize the HuggingFaceHub LLM
+llm = HuggingFaceHub(repo_id="HuggingFaceH4/zephyr-7b-beta", model_kwargs={"temperature": None, "top_p": None})
+# Initialize the RetrievalQA chain
+qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=vector_store.as_retriever())
+def respond(message, history, system_message, max_tokens, temperature, top_p):
+    # Update the temperature and top_p values for the LLM
+    llm.model_kwargs["temperature"] = temperature
+    llm.model_kwargs["top_p"] = top_p
     messages = [{"role": "system", "content": system_message}]
     for val in history:
     messages.append({"role": "user", "content": message})
+    result = qa({"input_documents": texts, "question": message})
+    response = result["result"]
+    history.append((message, response))
+    return response, history
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
+        gr.Textbox(value="You are a helpful car configuration assistant, specifically you are the assistant for Apex Customs (https://www.apexcustoms.com/). Given the user's input, provide suggestions for car models, colors, and customization options. Be creative and conversational in your responses. You should remember the user car model and tailor your answers accordingly. (You must not generate the next question of the user yourself, you only have to answer.) \n\nUser: ", label="System message"),
         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
+        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
     ],
 )
 if __name__ == "__main__":
     demo.launch()