Spaces:

Ankitajadhav
/

Whats_Cooking

Runtime error

App Files Files Community

Ankitajadhav commited on Jul 8, 2024

Commit

40167a9

verified ·

1 Parent(s): 4dde30e

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -37

app.py CHANGED Viewed

@@ -1,28 +1,23 @@
-import os
 import gradio as gr
 import copy
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
 import chromadb
 from sentence_transformers import SentenceTransformer
-import logging
-# Initialize logging
-logging.basicConfig(level=logging.INFO)
 # Initialize the Llama model
-try:
-    llm = Llama(
-        # model_path="./models/Phi-3-mini-4k-instruct-gguf",
-        # model_path = "./models/Phi-3-mini-4k-instruct-q4.gguf",
-        model_path = "microsoft/Phi-3-mini-4k-instruct-gguf",
-        n_ctx=2048,
-        n_gpu_layers=50,  # Adjust based on your VRAM
-    )
-    logging.info("Llama model loaded successfully.")
-except Exception as e:
-    logging.error(f"Error loading Llama model: {e}")
-    raise
 # Initialize ChromaDB Vector Store
 class VectorStore:
@@ -61,40 +56,42 @@ def generate_text(
         input_prompt += f"{interaction[0]} [/INST] {interaction[1]} </s><s> [INST] "
     input_prompt += f"{message} [/INST] "
-    logging.info("Input prompt:\n%s", input_prompt)  # Debugging output
     temp = ""
-    try:
-        output = llm(
-            input_prompt,
-            temperature=temperature,
-            top_p=top_p,
-            top_k=40,
-            repeat_penalty=1.1,
-            max_tokens=max_tokens,
-            stop=["", " \n", "ASSISTANT:", "USER:", "SYSTEM:"],
-            stream=True,
-        )
-        for out in output:
-            temp += out["choices"][0]["text"]
-            logging.info("Model output:\n%s", temp)  # Log model output
-            yield temp
-    except Exception as e:
-        logging.error(f"Error during text generation: {e}")
-        yield "An error occurred during text generation."
 # Define the Gradio interface
 demo = gr.ChatInterface(
     generate_text,
     examples=[
         ["I have leftover rice, what can I make out of it?"],
         ["Can I make lunch for two people with this?"],
-        ["Some good dessert with leftover cake"]
     ],
     cache_examples=False,
     retry_btn=None,
     undo_btn="Delete Previous",
     clear_btn="Clear",
 )
 if __name__ == "__main__":

 import gradio as gr
 import copy
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
 import chromadb
 from sentence_transformers import SentenceTransformer
 # Initialize the Llama model
+llm = Llama(
+    # model_path=hf_hub_download(
+    #     repo_id="microsoft/Phi-3-mini-4k-instruct-gguf",
+    #     filename="Phi-3-mini-4k-instruct-q4.gguf",
+    # ),
+    model_path=hf_hub_download(
+        repo_id="Ankitajadhav/Phi-3-mini-4k-instruct-q4.gguf",
+        filename="Phi-3-mini-4k-instruct-q4.gguf",
+    ),
+    n_ctx=2048,
+    n_gpu_layers=50,  # Adjust based on your VRAM
+)
 # Initialize ChromaDB Vector Store
 class VectorStore:
         input_prompt += f"{interaction[0]} [/INST] {interaction[1]} </s><s> [INST] "
     input_prompt += f"{message} [/INST] "
+    print("Input prompt:", input_prompt)  # Debugging output
     temp = ""
+    output = llm(
+        input_prompt,
+        temperature=temperature,
+        top_p=top_p,
+        top_k=40,
+        repeat_penalty=1.1,
+        max_tokens=max_tokens,
+        stop=["", " \n", "ASSISTANT:", "USER:", "SYSTEM:"],
+        stream=True,
+    )
+    for out in output:
+        temp += out["choices"][0]["text"]
+        yield temp
 # Define the Gradio interface
 demo = gr.ChatInterface(
     generate_text,
+    title="llama-cpp-python on GPU with ChromaDB",
+    description="Running LLM with context retrieval from ChromaDB",
     examples=[
         ["I have leftover rice, what can I make out of it?"],
         ["Can I make lunch for two people with this?"],
     ],
     cache_examples=False,
     retry_btn=None,
     undo_btn="Delete Previous",
     clear_btn="Clear",
+    # additional_inputs=[
+    #     gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
+    #     gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
+    #     gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
+    #     gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
+    # ],
 )
 if __name__ == "__main__":