Ankitajadhav commited on
Commit
790746b
·
verified ·
1 Parent(s): 2cbda23

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -88
app.py CHANGED
@@ -12,9 +12,13 @@ llm = Llama(
12
  # repo_id="microsoft/Phi-3-mini-4k-instruct-gguf",
13
  # filename="Phi-3-mini-4k-instruct-q4.gguf",
14
  # ),
 
 
 
 
15
  model_path=hf_hub_download(
16
- repo_id="Ankitajadhav/Phi-3-mini-4k-instruct-q4.gguf",
17
- filename="Phi-3-mini-4k-instruct-q4.gguf",
18
  ),
19
  n_ctx=2048,
20
  n_gpu_layers=50, # Adjust based on your VRAM
@@ -26,26 +30,26 @@ class VectorStore:
26
  self.embedding_model = SentenceTransformer('sentence-transformers/multi-qa-MiniLM-L6-cos-v1')
27
  self.chroma_client = chromadb.Client()
28
  self.collection = self.chroma_client.create_collection(name=collection_name)
29
-
30
  # def populate_vectors(self, texts):
31
  # embeddings = self.embedding_model.encode(texts, batch_size=32).tolist()
32
  # for text, embedding in zip(texts, embeddings, ids):
33
  # self.collection.add(embeddings=[embedding], documents=[text], ids=[doc_id])
34
 
35
- # Method to populate the vector store with embeddings from a dataset
36
  def populate_vectors(self, dataset):
37
  # Select the text columns to concatenate
38
- # title = dataset['train']['title_cleaned'][:1000] # Limiting to 100 examples for the demo
39
- recipe = dataset['train']['recipe_new'][:1000]
40
- allergy = dataset['train']['allergy_type'][:1000]
41
- ingredients = dataset['train']['ingredients_alternatives'][:1000]
42
 
43
  # Concatenate the text from both columns
44
- texts = [f"{rep} {ingr} {alle}" for rep, ingr,alle in zip(recipe, ingredients,allergy)]
45
  for i, item in enumerate(texts):
46
  embeddings = self.embedding_model.encode(item).tolist()
47
  self.collection.add(embeddings=[embeddings], documents=[item], ids=[str(i)])
48
-
49
  def search_context(self, query, n_results=1):
50
  query_embedding = self.embedding_model.encode([query]).tolist()
51
  results = self.collection.query(query_embeddings=query_embedding, n_results=n_results)
@@ -55,101 +59,59 @@ class VectorStore:
55
  dataset = load_dataset('Thefoodprocessor/recipe_new_with_features_full')
56
  vector_store = VectorStore("embedding_vector")
57
  vector_store.populate_vectors(dataset)
58
- def format_recipe(input_string):
59
- # Clean up the input
60
- cleaned_text = input_string.strip("[]'").replace('\\n', '\n')
61
-
62
- # Split the text into lines
63
- lines = cleaned_text.split('\n')
64
-
65
- # Initialize sections
66
- title = lines[0]
67
- ingredients = []
68
- instructions = []
69
- substitutions = []
70
 
71
- # Extract ingredients and instructions
72
- in_instructions = False
73
- for line in lines[1:]:
74
- if line.startswith("Instructions:"):
75
- in_instructions = True
76
- continue
77
-
78
- if in_instructions:
79
- if line.strip(): # Check for non-empty lines
80
- instructions.append(line.strip())
81
- else:
82
- if line.strip(): # Check for non-empty lines
83
- ingredients.append(line.strip())
84
-
85
- # Gather substitutions from the last few lines
86
- for line in lines:
87
- if ':' in line:
88
- substitutions.append(line.strip())
89
 
90
- # Format output
91
- formatted_recipe = f"## {title}\n\n### Ingredients:\n"
92
- formatted_recipe += '\n'.join(f"- {item}" for item in ingredients) + "\n\n"
93
- formatted_recipe += "### Instructions:\n" + '\n'.join(f"{i + 1}. {line}" for i, line in enumerate(instructions)) + "\n\n"
94
-
95
- if substitutions:
96
- formatted_recipe += "### Substitutions:\n" + '\n'.join(f"- **{line.split(':')[0].strip()}**: {line.split(':')[1].strip()}" for line in substitutions) + "\n"
97
- return formatted_recipe
98
- # print(formatted_recipe)
99
- def generate_text(
100
- message,
101
- history: list[tuple[str, str]],
102
- system_message,
103
- max_tokens,
104
- temperature,
105
- top_p,
106
- ):
107
  # Retrieve context from vector store
108
  context_results = vector_store.search_context(message, n_results=1)
109
  context = context_results[0] if context_results else ""
110
 
111
- input_prompt = f"[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\n {context}\n"
112
- for interaction in history:
113
- input_prompt += f"{interaction[0]} [/INST] {interaction[1]} </s><s> [INST] "
114
- input_prompt += f"{message} [/INST] "
115
-
116
- print("Input prompt:", input_prompt) # Debugging output
 
117
 
118
- temp = ""
119
  output = llm(
120
- input_prompt,
121
- temperature=temperature,
122
- top_p=top_p,
123
- top_k=40,
124
- repeat_penalty=1.1,
125
- max_tokens=max_tokens,
126
- stop=["", " \n", "ASSISTANT:", "USER:", "SYSTEM:"],
127
- stream=True,
128
- )
129
- for out in output:
130
- temp += format_recipe(out["choices"][0]["text"])
131
- yield temp
 
 
 
132
 
133
  # Define the Gradio interface
134
- demo = gr.ChatInterface(
135
- generate_text,
 
 
 
 
136
  title="llama-cpp-python on GPU with ChromaDB",
137
  description="Running LLM with context retrieval from ChromaDB",
138
  examples=[
139
  ["I have leftover rice, what can I make out of it?"],
140
- ["Can I make lunch for two people with this?"],
 
 
 
 
141
  ],
142
  cache_examples=False,
143
- retry_btn=None,
144
- undo_btn="Delete Previous",
145
- clear_btn="Clear",
146
- additional_inputs=[
147
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
148
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
149
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
150
- gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
151
- ],
152
  )
153
 
154
  if __name__ == "__main__":
155
  demo.launch()
 
 
12
  # repo_id="microsoft/Phi-3-mini-4k-instruct-gguf",
13
  # filename="Phi-3-mini-4k-instruct-q4.gguf",
14
  # ),
15
+ # model_path=hf_hub_download(
16
+ # repo_id="Ankitajadhav/Phi-3-mini-4k-instruct-q4.gguf",
17
+ # filename="Phi-3-mini-4k-instruct-q4.gguf",
18
+ # ),
19
  model_path=hf_hub_download(
20
+ repo_id="TheBloke/CapybaraHermes-2.5-Mistral-7B-GGUF",
21
+ filename="capybarahermes-2.5-mistral-7b.Q2_K.gguf",
22
  ),
23
  n_ctx=2048,
24
  n_gpu_layers=50, # Adjust based on your VRAM
 
30
  self.embedding_model = SentenceTransformer('sentence-transformers/multi-qa-MiniLM-L6-cos-v1')
31
  self.chroma_client = chromadb.Client()
32
  self.collection = self.chroma_client.create_collection(name=collection_name)
33
+ ## entire dataset
34
  # def populate_vectors(self, texts):
35
  # embeddings = self.embedding_model.encode(texts, batch_size=32).tolist()
36
  # for text, embedding in zip(texts, embeddings, ids):
37
  # self.collection.add(embeddings=[embedding], documents=[text], ids=[doc_id])
38
 
39
+ ## subsetting
40
  def populate_vectors(self, dataset):
41
  # Select the text columns to concatenate
42
+ title = dataset['train']['title_cleaned'][:5000] # Limiting to 100 examples for the demo
43
+ recipe = dataset['train']['recipe_new'][:5000]
44
+ allergy = dataset['train']['allergy_type'][:5000]
45
+ ingredients = dataset['train']['ingredients_alternatives'][:5000]
46
 
47
  # Concatenate the text from both columns
48
+ texts = [f"{tit} {rep} {ingr} {alle}" for tit, rep, ingr,alle in zip(title, recipe, ingredients,allergy)]
49
  for i, item in enumerate(texts):
50
  embeddings = self.embedding_model.encode(item).tolist()
51
  self.collection.add(embeddings=[embeddings], documents=[item], ids=[str(i)])
52
+ ## Method to populate the vector store with embeddings from a dataset
53
  def search_context(self, query, n_results=1):
54
  query_embedding = self.embedding_model.encode([query]).tolist()
55
  results = self.collection.query(query_embeddings=query_embedding, n_results=n_results)
 
59
  dataset = load_dataset('Thefoodprocessor/recipe_new_with_features_full')
60
  vector_store = VectorStore("embedding_vector")
61
  vector_store.populate_vectors(dataset)
 
 
 
 
 
 
 
 
 
 
 
 
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
+ def generate_text(message, max_tokens, temperature, top_p):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  # Retrieve context from vector store
66
  context_results = vector_store.search_context(message, n_results=1)
67
  context = context_results[0] if context_results else ""
68
 
69
+ # Create the prompt template
70
+ prompt_template = (
71
+ f"SYSTEM: You are a recipe generating bot.\n"
72
+ f"SYSTEM: {context}\n"
73
+ f"USER: {message}\n"
74
+ f"ASSISTANT:\n"
75
+ )
76
 
77
+ # Generate text using the language model
78
  output = llm(
79
+ prompt_template,
80
+ # max_new_tokens=256,
81
+ temperature=0.3,
82
+ top_p=0.95,
83
+ top_k=40,
84
+ repeat_penalty=1.1,
85
+ max_tokens=600,
86
+ # repetition_penalty=1.1
87
+ )
88
+
89
+ # Process the output
90
+ input_string = output['choices'][0]['text'].strip()
91
+ cleaned_text = input_string.strip("[]'").replace('\\n', '\n')
92
+ continuous_text = '\n'.join(cleaned_text.split('\n'))
93
+ return continuous_text
94
 
95
  # Define the Gradio interface
96
+ demo = gr.Interface(
97
+ fn=generate_text,
98
+ inputs=[
99
+ gr.Textbox(lines=2, placeholder="Enter your message here...", label="Message"),
100
+ ],
101
+ outputs=gr.Textbox(label="Generated Text"),
102
  title="llama-cpp-python on GPU with ChromaDB",
103
  description="Running LLM with context retrieval from ChromaDB",
104
  examples=[
105
  ["I have leftover rice, what can I make out of it?"],
106
+ ["I just have some milk and chocolate, what dessert can I make?"],
107
+ ["I am allergic to coconut milk, what can I use instead in a Thai curry?"],
108
+ ["Can you suggest a vegan breakfast recipe?"],
109
+ ["How do I make a perfect scrambled egg?"],
110
+ ["Can you guide me through making a soufflé?"],
111
  ],
112
  cache_examples=False,
 
 
 
 
 
 
 
 
 
113
  )
114
 
115
  if __name__ == "__main__":
116
  demo.launch()
117
+