Ankitajadhav commited on
Commit
bda7944
·
verified ·
1 Parent(s): 9ccd468

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -34
app.py CHANGED
@@ -5,18 +5,22 @@ from llama_cpp import Llama
5
  from huggingface_hub import hf_hub_download
6
  import chromadb
7
  from sentence_transformers import SentenceTransformer
 
 
 
 
8
 
9
  # Initialize the Llama model
10
- llm = Llama(
11
- # model_path=hf_hub_download(
12
- # repo_id="microsoft/Phi-3-mini-4k-instruct-gguf",
13
- # filename="Phi-3-mini-4k-instruct-q4.gguf",
14
- # ),
15
- model_path = "./models/Phi-3-mini-4k-instruct-gguf",
16
- # model_path = "NicholasJohn/OpenBioLLM-Llama3-8B-Q5_K_M.gguf",
17
- n_ctx=2048,
18
- n_gpu_layers=50, # Adjust based on your VRAM
19
- )
20
 
21
  # Initialize ChromaDB Vector Store
22
  class VectorStore:
@@ -38,9 +42,6 @@ class VectorStore:
38
  # Example initialization (assuming you've already populated the vector store)
39
  vector_store = VectorStore("embedding_vector")
40
 
41
- # Populate with your data if not already done
42
- # vector_store.populate_vectors(your_texts, your_ids)
43
-
44
  def generate_text(
45
  message,
46
  history: list[tuple[str, str]],
@@ -58,40 +59,40 @@ def generate_text(
58
  input_prompt += f"{interaction[0]} [/INST] {interaction[1]} </s><s> [INST] "
59
  input_prompt += f"{message} [/INST] "
60
 
 
 
61
  temp = ""
62
- output = llm(
63
- input_prompt,
64
- temperature=temperature,
65
- top_p=top_p,
66
- top_k=40,
67
- repeat_penalty=1.1,
68
- max_tokens=max_tokens,
69
- stop=["", " \n", "ASSISTANT:", "USER:", "SYSTEM:"],
70
- stream=True,
71
- )
72
- for out in output:
73
- temp += out["choices"][0]["text"]
74
- yield temp
 
 
 
 
 
75
 
76
  # Define the Gradio interface
77
  demo = gr.ChatInterface(
78
  generate_text,
79
- title="llama-cpp-python on GPU with ChromaDB",
80
- description="Running LLM with context retrieval from ChromaDB",
81
  examples=[
82
  ["I have leftover rice, what can I make out of it?"],
83
  ["Can I make lunch for two people with this?"],
 
84
  ],
85
  cache_examples=False,
86
  retry_btn=None,
87
  undo_btn="Delete Previous",
88
  clear_btn="Clear",
89
- additional_inputs=[
90
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
91
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
92
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
93
- gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
94
- ],
95
  )
96
 
97
  if __name__ == "__main__":
 
5
  from huggingface_hub import hf_hub_download
6
  import chromadb
7
  from sentence_transformers import SentenceTransformer
8
+ import logging
9
+
10
+ # Initialize logging
11
+ logging.basicConfig(level=logging.INFO)
12
 
13
  # Initialize the Llama model
14
+ try:
15
+ llm = Llama(
16
+ model_path="./models/Phi-3-mini-4k-instruct-gguf",
17
+ n_ctx=2048,
18
+ n_gpu_layers=50, # Adjust based on your VRAM
19
+ )
20
+ logging.info("Llama model loaded successfully.")
21
+ except Exception as e:
22
+ logging.error(f"Error loading Llama model: {e}")
23
+ raise
24
 
25
  # Initialize ChromaDB Vector Store
26
  class VectorStore:
 
42
  # Example initialization (assuming you've already populated the vector store)
43
  vector_store = VectorStore("embedding_vector")
44
 
 
 
 
45
  def generate_text(
46
  message,
47
  history: list[tuple[str, str]],
 
59
  input_prompt += f"{interaction[0]} [/INST] {interaction[1]} </s><s> [INST] "
60
  input_prompt += f"{message} [/INST] "
61
 
62
+ logging.info("Input prompt:\n%s", input_prompt) # Debugging output
63
+
64
  temp = ""
65
+ try:
66
+ output = llm(
67
+ input_prompt,
68
+ temperature=temperature,
69
+ top_p=top_p,
70
+ top_k=40,
71
+ repeat_penalty=1.1,
72
+ max_tokens=max_tokens,
73
+ stop=["", " \n", "ASSISTANT:", "USER:", "SYSTEM:"],
74
+ stream=True,
75
+ )
76
+ for out in output:
77
+ temp += out["choices"][0]["text"]
78
+ logging.info("Model output:\n%s", temp) # Log model output
79
+ yield temp
80
+ except Exception as e:
81
+ logging.error(f"Error during text generation: {e}")
82
+ yield "An error occurred during text generation."
83
 
84
  # Define the Gradio interface
85
  demo = gr.ChatInterface(
86
  generate_text,
 
 
87
  examples=[
88
  ["I have leftover rice, what can I make out of it?"],
89
  ["Can I make lunch for two people with this?"],
90
+ ["Some good dessert with leftover cake"]
91
  ],
92
  cache_examples=False,
93
  retry_btn=None,
94
  undo_btn="Delete Previous",
95
  clear_btn="Clear",
 
 
 
 
 
 
96
  )
97
 
98
  if __name__ == "__main__":