Tijmen2 commited on
Commit
00e2bff
·
verified ·
1 Parent(s): 198d160

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -40
app.py CHANGED
@@ -1,21 +1,19 @@
1
  import spaces
2
  import gradio as gr
3
- from llama_cpp import Llama
4
  from huggingface_hub import hf_hub_download
 
5
  import random
6
 
7
- model_path = hf_hub_download(
8
- repo_id="AstroMLab/AstroSage-8B-GGUF",
9
- filename="AstroSage-8B-Q8_0.gguf"
10
- )
11
-
12
- llm = Llama(
13
- model_path=model_path,
14
- n_ctx=2048,
15
- chat_format="llama-3",
16
- n_gpu_layers=50, # ensure all layers are on GPU
17
- flash_attn=True,
18
  )
 
19
 
20
  # Placeholder responses for when context is empty
21
  GREETING_MESSAGES = [
@@ -33,42 +31,43 @@ def user(user_message, history):
33
 
34
  @spaces.GPU(duration=20)
35
  def bot(history):
36
- """Yield the chatbot response for streaming."""
37
 
38
  if not history:
39
  history = []
40
-
41
- # Prepare the messages for the model
42
- messages = [
43
- {
44
- "role": "system",
45
- "content": "You are AstroSage, an intelligent AI assistant specializing in astronomy, astrophysics, and cosmology. Provide accurate, scientific information while making complex concepts accessible. You're enthusiastic about space exploration and maintain a sense of wonder about the cosmos."
46
- }
47
- ]
48
-
49
- # Add chat history
50
- for message in history[:-1]: # Exclude the last message which we just added
51
- messages.append({"role": message["role"], "content": message["content"]})
52
-
53
- # Add the current user message
54
- messages.append({"role": "user", "content": history[-1]["content"]})
55
 
56
- # Start generating the response
57
- history.append({"role": "assistant", "content": ""})
 
 
 
 
58
 
59
- # Stream the response
60
- response = llm.create_chat_completion(
61
- messages=messages,
62
- max_tokens=512,
 
 
 
 
 
 
 
 
 
 
63
  temperature=0.7,
64
  top_p=0.95,
65
- stream=True,
 
66
  )
67
-
68
- for chunk in response:
69
- if chunk and "content" in chunk["choices"][0]["delta"]:
70
- history[-1]["content"] += chunk["choices"][0]["delta"]["content"]
71
- yield history
 
72
 
73
  def initial_greeting():
74
  """Return properly formatted initial greeting."""
 
1
  import spaces
2
  import gradio as gr
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
4
  from huggingface_hub import hf_hub_download
5
+ import torch
6
  import random
7
 
8
+ # Load model and tokenizer from Hugging Face
9
+ model_name = "AstroMLab/AstroSage-8B-GGUF"
10
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
11
+ model = AutoModelForCausalLM.from_pretrained(
12
+ model_name,
13
+ torch_dtype=torch.float16,
14
+ device_map="auto"
 
 
 
 
15
  )
16
+ streamer = TextStreamer(tokenizer)
17
 
18
  # Placeholder responses for when context is empty
19
  GREETING_MESSAGES = [
 
31
 
32
  @spaces.GPU(duration=20)
33
  def bot(history):
34
+ """Generate the chatbot response."""
35
 
36
  if not history:
37
  history = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
+ # Prepare input prompt for the model
40
+ system_prompt = (
41
+ "You are AstroSage, an intelligent AI assistant specializing in astronomy, astrophysics, and cosmology. "
42
+ "Provide accurate, scientific information while making complex concepts accessible. "
43
+ "You're enthusiastic about space exploration and maintain a sense of wonder about the cosmos."
44
+ )
45
 
46
+ # Construct the chat history as a single input string
47
+ prompt = system_prompt + "\n\n"
48
+ for message in history:
49
+ if message["role"] == "user":
50
+ prompt += f"User: {message['content']}\n"
51
+ else:
52
+ prompt += f"AstroSage: {message['content']}\n"
53
+ prompt += "AstroSage: "
54
+
55
+ # Generate response
56
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
57
+ outputs = model.generate(
58
+ **inputs,
59
+ max_new_tokens=512,
60
  temperature=0.7,
61
  top_p=0.95,
62
+ do_sample=True,
63
+ streamer=streamer
64
  )
65
+
66
+ # Decode the generated output and update history
67
+ response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
68
+ response_text = response_text[len(prompt):].strip()
69
+ history.append({"role": "assistant", "content": response_text})
70
+ yield history
71
 
72
  def initial_greeting():
73
  """Return properly formatted initial greeting."""