Spaces:

Ankitajadhav
/

Whats_Cooking

Runtime error

App Files Files Community

Ankitajadhav commited on Jul 18, 2024

Commit

790746b

verified ·

1 Parent(s): 2cbda23

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -88

app.py CHANGED Viewed

@@ -12,9 +12,13 @@ llm = Llama(
     #     repo_id="microsoft/Phi-3-mini-4k-instruct-gguf",
     #     filename="Phi-3-mini-4k-instruct-q4.gguf",
     # ),
     model_path=hf_hub_download(
-        repo_id="Ankitajadhav/Phi-3-mini-4k-instruct-q4.gguf",
-        filename="Phi-3-mini-4k-instruct-q4.gguf",
     ),
     n_ctx=2048,
     n_gpu_layers=50,  # Adjust based on your VRAM
@@ -26,26 +30,26 @@ class VectorStore:
         self.embedding_model = SentenceTransformer('sentence-transformers/multi-qa-MiniLM-L6-cos-v1')
         self.chroma_client = chromadb.Client()
         self.collection = self.chroma_client.create_collection(name=collection_name)
     # def populate_vectors(self, texts):
     #     embeddings = self.embedding_model.encode(texts, batch_size=32).tolist()
     #     for text, embedding in zip(texts, embeddings, ids):
     #         self.collection.add(embeddings=[embedding], documents=[text], ids=[doc_id])
-        # Method to populate the vector store with embeddings from a dataset
     def populate_vectors(self, dataset):
         # Select the text columns to concatenate
-        # title = dataset['train']['title_cleaned'][:1000]  # Limiting to 100 examples for the demo
-        recipe = dataset['train']['recipe_new'][:1000]
-        allergy = dataset['train']['allergy_type'][:1000]
-        ingredients = dataset['train']['ingredients_alternatives'][:1000]
         # Concatenate the text from both columns
-        texts = [f"{rep} {ingr} {alle}" for rep, ingr,alle in zip(recipe, ingredients,allergy)]
         for i, item in enumerate(texts):
             embeddings = self.embedding_model.encode(item).tolist()
             self.collection.add(embeddings=[embeddings], documents=[item], ids=[str(i)])
     def search_context(self, query, n_results=1):
         query_embedding = self.embedding_model.encode([query]).tolist()
         results = self.collection.query(query_embeddings=query_embedding, n_results=n_results)
@@ -55,101 +59,59 @@ class VectorStore:
 dataset = load_dataset('Thefoodprocessor/recipe_new_with_features_full')
 vector_store = VectorStore("embedding_vector")
 vector_store.populate_vectors(dataset)
-def format_recipe(input_string):
-    # Clean up the input
-    cleaned_text = input_string.strip("[]'").replace('\\n', '\n')
-    # Split the text into lines
-    lines = cleaned_text.split('\n')
-    # Initialize sections
-    title = lines[0]
-    ingredients = []
-    instructions = []
-    substitutions = []
-    # Extract ingredients and instructions
-    in_instructions = False
-    for line in lines[1:]:
-        if line.startswith("Instructions:"):
-            in_instructions = True
-            continue
-        if in_instructions:
-            if line.strip():  # Check for non-empty lines
-                instructions.append(line.strip())
-        else:
-            if line.strip():  # Check for non-empty lines
-                ingredients.append(line.strip())
-    # Gather substitutions from the last few lines
-    for line in lines:
-        if ':' in line:
-            substitutions.append(line.strip())
-    # Format output
-    formatted_recipe = f"## {title}\n\n### Ingredients:\n"
-    formatted_recipe += '\n'.join(f"- {item}" for item in ingredients) + "\n\n"
-    formatted_recipe += "### Instructions:\n" + '\n'.join(f"{i + 1}. {line}" for i, line in enumerate(instructions)) + "\n\n"
-    if substitutions:
-        formatted_recipe += "### Substitutions:\n" + '\n'.join(f"- **{line.split(':')[0].strip()}**: {line.split(':')[1].strip()}" for line in substitutions) + "\n"
-    return formatted_recipe
-    # print(formatted_recipe)
-def generate_text(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
     # Retrieve context from vector store
     context_results = vector_store.search_context(message, n_results=1)
     context = context_results[0] if context_results else ""
-    input_prompt = f"[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\n {context}\n"
-    for interaction in history:
-        input_prompt += f"{interaction[0]} [/INST] {interaction[1]} </s><s> [INST] "
-    input_prompt += f"{message} [/INST] "
-    print("Input prompt:", input_prompt)  # Debugging output
-    temp = ""
     output = llm(
-        input_prompt,
-        temperature=temperature,
-        top_p=top_p,
-        top_k=40,
-        repeat_penalty=1.1,
-        max_tokens=max_tokens,
-        stop=["", " \n", "ASSISTANT:", "USER:", "SYSTEM:"],
-        stream=True,
-    )
-    for out in output:
-        temp += format_recipe(out["choices"][0]["text"])
-        yield temp
 # Define the Gradio interface
-demo = gr.ChatInterface(
-    generate_text,
     title="llama-cpp-python on GPU with ChromaDB",
     description="Running LLM with context retrieval from ChromaDB",
     examples=[
         ["I have leftover rice, what can I make out of it?"],
-        ["Can I make lunch for two people with this?"],
     ],
     cache_examples=False,
-    retry_btn=None,
-    undo_btn="Delete Previous",
-    clear_btn="Clear",
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
-    ],
 )
 if __name__ == "__main__":
     demo.launch()

     #     repo_id="microsoft/Phi-3-mini-4k-instruct-gguf",
     #     filename="Phi-3-mini-4k-instruct-q4.gguf",
     # ),
+    # model_path=hf_hub_download(
+    #     repo_id="Ankitajadhav/Phi-3-mini-4k-instruct-q4.gguf",
+    #     filename="Phi-3-mini-4k-instruct-q4.gguf",
+    # ),
     model_path=hf_hub_download(
+        repo_id="TheBloke/CapybaraHermes-2.5-Mistral-7B-GGUF",
+        filename="capybarahermes-2.5-mistral-7b.Q2_K.gguf",
     ),
     n_ctx=2048,
     n_gpu_layers=50,  # Adjust based on your VRAM
         self.embedding_model = SentenceTransformer('sentence-transformers/multi-qa-MiniLM-L6-cos-v1')
         self.chroma_client = chromadb.Client()
         self.collection = self.chroma_client.create_collection(name=collection_name)
+    ## entire dataset
     # def populate_vectors(self, texts):
     #     embeddings = self.embedding_model.encode(texts, batch_size=32).tolist()
     #     for text, embedding in zip(texts, embeddings, ids):
     #         self.collection.add(embeddings=[embedding], documents=[text], ids=[doc_id])
+    ## subsetting
     def populate_vectors(self, dataset):
         # Select the text columns to concatenate
+        title = dataset['train']['title_cleaned'][:5000]  # Limiting to 100 examples for the demo
+        recipe = dataset['train']['recipe_new'][:5000]
+        allergy = dataset['train']['allergy_type'][:5000]
+        ingredients = dataset['train']['ingredients_alternatives'][:5000]
         # Concatenate the text from both columns
+        texts = [f"{tit} {rep} {ingr} {alle}" for tit, rep, ingr,alle in zip(title, recipe, ingredients,allergy)]
         for i, item in enumerate(texts):
             embeddings = self.embedding_model.encode(item).tolist()
             self.collection.add(embeddings=[embeddings], documents=[item], ids=[str(i)])
+    ## Method to populate the vector store with embeddings from a dataset
     def search_context(self, query, n_results=1):
         query_embedding = self.embedding_model.encode([query]).tolist()
         results = self.collection.query(query_embeddings=query_embedding, n_results=n_results)
 dataset = load_dataset('Thefoodprocessor/recipe_new_with_features_full')
 vector_store = VectorStore("embedding_vector")
 vector_store.populate_vectors(dataset)
+def generate_text(message, max_tokens, temperature, top_p):
     # Retrieve context from vector store
     context_results = vector_store.search_context(message, n_results=1)
     context = context_results[0] if context_results else ""
+    # Create the prompt template
+    prompt_template = (
+        f"SYSTEM: You are a recipe generating bot.\n"
+        f"SYSTEM: {context}\n"
+        f"USER: {message}\n"
+        f"ASSISTANT:\n"
+    )
+    # Generate text using the language model
     output = llm(
+            prompt_template,
+            # max_new_tokens=256,
+            temperature=0.3,
+            top_p=0.95,
+            top_k=40,
+            repeat_penalty=1.1,
+            max_tokens=600,
+            # repetition_penalty=1.1
+        )
+    # Process the output
+    input_string = output['choices'][0]['text'].strip()
+    cleaned_text = input_string.strip("[]'").replace('\\n', '\n')
+    continuous_text = '\n'.join(cleaned_text.split('\n'))
+    return continuous_text
 # Define the Gradio interface
+demo = gr.Interface(
+    fn=generate_text,
+    inputs=[
+        gr.Textbox(lines=2, placeholder="Enter your message here...", label="Message"),
+    ],
+    outputs=gr.Textbox(label="Generated Text"),
     title="llama-cpp-python on GPU with ChromaDB",
     description="Running LLM with context retrieval from ChromaDB",
     examples=[
         ["I have leftover rice, what can I make out of it?"],
+        ["I just have some milk and chocolate, what dessert can I make?"],
+        ["I am allergic to coconut milk, what can I use instead in a Thai curry?"],
+        ["Can you suggest a vegan breakfast recipe?"],
+        ["How do I make a perfect scrambled egg?"],
+        ["Can you guide me through making a soufflé?"],
     ],
     cache_examples=False,
 )
 if __name__ == "__main__":
     demo.launch()