Spaces:

Moha782
/

GenAIEfrei

Sleeping

App Files Files Community

Moha782 commited on Jun 27, 2024

Commit

bca5017

verified ·

1 Parent(s): 39f6f38

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -26

app.py CHANGED Viewed

@@ -1,11 +1,17 @@
 import gradio as gr
 from pathlib import Path
-from transformers import RagTokenForGeneration, AutoTokenizer, AutoModelForCausalLM
 from pdfplumber import open as open_pdf
 from typing import List
 # Load the PDF file
-pdf_path = Path("apexcustoms.pdf")
 with open_pdf(pdf_path) as pdf:
     text = "\n".join(page.extract_text() for page in pdf.pages)
@@ -15,19 +21,14 @@ text_chunks: List[str] = [text[i:i+chunk_size] for i in range(0, len(text), chun
 # Load the RAG model and tokenizer for retrieval
 rag_tokenizer = AutoTokenizer.from_pretrained("facebook/rag-token-nq")
-rag_model = RagTokenForGeneration.from_pretrained("facebook/rag-token-nq")
-# Load the DialoGPT model and tokenizer for generation
-dialogpt_tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")
-dialogpt_model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-medium")
 def respond(
     message,
     history: list[tuple[str, str]],
     system_message,
     max_tokens,
-    num_beams,
-    no_repeat_ngram_size,
 ):
     messages = [{"role": "system", "content": system_message}]
@@ -46,21 +47,16 @@ def respond(
     rag_output = rag_model(rag_input_ids, text_chunks, return_retrieved_inputs=True)
     retrieved_text = rag_output.retrieved_inputs
-    # Encode the context and user's message for DialoGPT
-    input_ids = dialogpt_tokenizer.encode(retrieved_text + "\n\n" + message, return_tensors="pt")
-    # Generate the response using the DialoGPT model
-    output = dialogpt_model.generate(
-        input_ids,
-        max_length=max_tokens,
-        num_beams=num_beams,
-        no_repeat_ngram_size=no_repeat_ngram_size,
-        early_stopping=True
-    )
-    response = dialogpt_tokenizer.decode(output[0], skip_special_tokens=True)
-    yield response
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
@@ -70,8 +66,6 @@ demo = gr.ChatInterface(
     additional_inputs=[
         gr.Textbox(value="You are a helpful car configuration assistant, specifically you are the assistant for Apex Customs (https://www.apexcustoms.com/). Given the user's input, provide suggestions for car models, colors, and customization options. Be conversational in your responses. You should remember the user car model and tailor your answers accordingly. You limit yourself to answering the given question and maybe propose a suggestion but not write the next question of the user. \n\nUser: ", label="System message"),
         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=1, maximum=10, value=4, step=1, label="Number of beams"),
-        gr.Slider(minimum=1, maximum=5, value=2, step=1, label="No repeat ngram size"),
     ],
 )

 import gradio as gr
+from huggingface_hub import InferenceClient
 from pathlib import Path
+from transformers import RagTokenForGeneration, RagTokenizer, RagRetriever
 from pdfplumber import open as open_pdf
 from typing import List
+"""
+For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
+"""
+client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 # Load the PDF file
+pdf_path = Path("path/to/your/pdf/file.pdf")
 with open_pdf(pdf_path) as pdf:
     text = "\n".join(page.extract_text() for page in pdf.pages)
 # Load the RAG model and tokenizer for retrieval
 rag_tokenizer = AutoTokenizer.from_pretrained("facebook/rag-token-nq")
+retriever = RagRetriever.from_pretrained("facebook/rag-token-nq", index_name="exact", use_dummy_dataset=True)
+rag_model = RagTokenForGeneration.from_pretrained("facebook/rag-token-nq", retriever=retriever)
 def respond(
     message,
     history: list[tuple[str, str]],
     system_message,
     max_tokens,
 ):
     messages = [{"role": "system", "content": system_message}]
     rag_output = rag_model(rag_input_ids, text_chunks, return_retrieved_inputs=True)
     retrieved_text = rag_output.retrieved_inputs
+    # Generate the response using the zephyr model
+    for message in client.chat_completion(
+        messages,
+        max_tokens=max_tokens,
+        stream=True,
+        files={"context": retrieved_text},  # Pass retrieved text as context
+    ):
+        token = message.choices[0].delta.content
+        response += token
+        yield response
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
     additional_inputs=[
         gr.Textbox(value="You are a helpful car configuration assistant, specifically you are the assistant for Apex Customs (https://www.apexcustoms.com/). Given the user's input, provide suggestions for car models, colors, and customization options. Be conversational in your responses. You should remember the user car model and tailor your answers accordingly. You limit yourself to answering the given question and maybe propose a suggestion but not write the next question of the user. \n\nUser: ", label="System message"),
         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
     ],
 )