ShravanHN commited on
Commit
7ba6437
·
1 Parent(s): 8410c86

modified the gpu , to stay within the limit

Browse files
Files changed (1) hide show
  1. app.py +1 -1
app.py CHANGED
@@ -153,7 +153,7 @@ def generate_response_for_chunk(chunk, history, temperature, max_new_tokens):
153
 
154
  return "".join(outputs)
155
 
156
- @spaces.GPU(duration=120)
157
  def chat_llama3_8b(message: str, history: list, temperature: float, max_new_tokens: int):
158
  """
159
  Generate a streaming response using the llama3-8b model with chunking.
 
153
 
154
  return "".join(outputs)
155
 
156
+ @spaces.GPU(duration=110)
157
  def chat_llama3_8b(message: str, history: list, temperature: float, max_new_tokens: int):
158
  """
159
  Generate a streaming response using the llama3-8b model with chunking.