Ankitajadhav commited on
Commit
40167a9
·
verified ·
1 Parent(s): 4dde30e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -37
app.py CHANGED
@@ -1,28 +1,23 @@
1
- import os
2
  import gradio as gr
3
  import copy
4
  from llama_cpp import Llama
5
  from huggingface_hub import hf_hub_download
6
  import chromadb
7
  from sentence_transformers import SentenceTransformer
8
- import logging
9
-
10
- # Initialize logging
11
- logging.basicConfig(level=logging.INFO)
12
 
13
  # Initialize the Llama model
14
- try:
15
- llm = Llama(
16
- # model_path="./models/Phi-3-mini-4k-instruct-gguf",
17
- # model_path = "./models/Phi-3-mini-4k-instruct-q4.gguf",
18
- model_path = "microsoft/Phi-3-mini-4k-instruct-gguf",
19
- n_ctx=2048,
20
- n_gpu_layers=50, # Adjust based on your VRAM
21
- )
22
- logging.info("Llama model loaded successfully.")
23
- except Exception as e:
24
- logging.error(f"Error loading Llama model: {e}")
25
- raise
26
 
27
  # Initialize ChromaDB Vector Store
28
  class VectorStore:
@@ -61,40 +56,42 @@ def generate_text(
61
  input_prompt += f"{interaction[0]} [/INST] {interaction[1]} </s><s> [INST] "
62
  input_prompt += f"{message} [/INST] "
63
 
64
- logging.info("Input prompt:\n%s", input_prompt) # Debugging output
65
 
66
  temp = ""
67
- try:
68
- output = llm(
69
- input_prompt,
70
- temperature=temperature,
71
- top_p=top_p,
72
- top_k=40,
73
- repeat_penalty=1.1,
74
- max_tokens=max_tokens,
75
- stop=["", " \n", "ASSISTANT:", "USER:", "SYSTEM:"],
76
- stream=True,
77
- )
78
- for out in output:
79
- temp += out["choices"][0]["text"]
80
- logging.info("Model output:\n%s", temp) # Log model output
81
- yield temp
82
- except Exception as e:
83
- logging.error(f"Error during text generation: {e}")
84
- yield "An error occurred during text generation."
85
 
86
  # Define the Gradio interface
87
  demo = gr.ChatInterface(
88
  generate_text,
 
 
89
  examples=[
90
  ["I have leftover rice, what can I make out of it?"],
91
  ["Can I make lunch for two people with this?"],
92
- ["Some good dessert with leftover cake"]
93
  ],
94
  cache_examples=False,
95
  retry_btn=None,
96
  undo_btn="Delete Previous",
97
  clear_btn="Clear",
 
 
 
 
 
 
98
  )
99
 
100
  if __name__ == "__main__":
 
 
1
  import gradio as gr
2
  import copy
3
  from llama_cpp import Llama
4
  from huggingface_hub import hf_hub_download
5
  import chromadb
6
  from sentence_transformers import SentenceTransformer
 
 
 
 
7
 
8
  # Initialize the Llama model
9
+ llm = Llama(
10
+ # model_path=hf_hub_download(
11
+ # repo_id="microsoft/Phi-3-mini-4k-instruct-gguf",
12
+ # filename="Phi-3-mini-4k-instruct-q4.gguf",
13
+ # ),
14
+ model_path=hf_hub_download(
15
+ repo_id="Ankitajadhav/Phi-3-mini-4k-instruct-q4.gguf",
16
+ filename="Phi-3-mini-4k-instruct-q4.gguf",
17
+ ),
18
+ n_ctx=2048,
19
+ n_gpu_layers=50, # Adjust based on your VRAM
20
+ )
21
 
22
  # Initialize ChromaDB Vector Store
23
  class VectorStore:
 
56
  input_prompt += f"{interaction[0]} [/INST] {interaction[1]} </s><s> [INST] "
57
  input_prompt += f"{message} [/INST] "
58
 
59
+ print("Input prompt:", input_prompt) # Debugging output
60
 
61
  temp = ""
62
+ output = llm(
63
+ input_prompt,
64
+ temperature=temperature,
65
+ top_p=top_p,
66
+ top_k=40,
67
+ repeat_penalty=1.1,
68
+ max_tokens=max_tokens,
69
+ stop=["", " \n", "ASSISTANT:", "USER:", "SYSTEM:"],
70
+ stream=True,
71
+ )
72
+ for out in output:
73
+ temp += out["choices"][0]["text"]
74
+ yield temp
 
 
 
 
 
75
 
76
  # Define the Gradio interface
77
  demo = gr.ChatInterface(
78
  generate_text,
79
+ title="llama-cpp-python on GPU with ChromaDB",
80
+ description="Running LLM with context retrieval from ChromaDB",
81
  examples=[
82
  ["I have leftover rice, what can I make out of it?"],
83
  ["Can I make lunch for two people with this?"],
 
84
  ],
85
  cache_examples=False,
86
  retry_btn=None,
87
  undo_btn="Delete Previous",
88
  clear_btn="Clear",
89
+ # additional_inputs=[
90
+ # gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
91
+ # gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
92
+ # gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
93
+ # gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
94
+ # ],
95
  )
96
 
97
  if __name__ == "__main__":