hackergeek98 commited on
Commit
34ef334
·
verified ·
1 Parent(s): 142217d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -12
app.py CHANGED
@@ -6,7 +6,7 @@ from huggingface_hub import login
6
  # Fetch token from environment (automatically loaded from secrets)
7
  hf_token = os.getenv("gemma3")
8
  login(hf_token)
9
-
10
  client = InferenceClient("hackergeek98/gemma-finetuned")
11
 
12
  def respond(
@@ -30,21 +30,20 @@ def respond(
30
  # Adding the new user message
31
  messages.append({"role": "user", "content": message})
32
 
33
- # Initialize the response string
34
- response = ""
35
 
36
- # Corrected method for chat completion
37
- for message in client.chat_completion(
38
- messages=messages, # Argument should be named 'messages'
 
39
  max_tokens=max_tokens,
40
  temperature=temperature,
41
  top_p=top_p,
42
- stream=True # Stream the response
43
- ):
44
- # Accumulate the response from the streaming output
45
- token = message.choices[0].delta.content
46
- response += token
47
- yield response
48
 
49
  # Gradio interface setup
50
  demo = gr.ChatInterface(
 
6
  # Fetch token from environment (automatically loaded from secrets)
7
  hf_token = os.getenv("gemma3")
8
  login(hf_token)
9
+ # Initialize the client with your model
10
  client = InferenceClient("hackergeek98/gemma-finetuned")
11
 
12
  def respond(
 
30
  # Adding the new user message
31
  messages.append({"role": "user", "content": message})
32
 
33
+ # Prepare the prompt for generation
34
+ prompt = " ".join([msg["content"] for msg in messages])
35
 
36
+ # Call the Inference API for text generation (or chat completion if supported)
37
+ response = client.completion(
38
+ model="hackergeek98/gemma-finetuned", # Specify the model
39
+ prompt=prompt,
40
  max_tokens=max_tokens,
41
  temperature=temperature,
42
  top_p=top_p,
43
+ )
44
+
45
+ # The response will contain the generated text
46
+ return response["choices"][0]["text"]
 
 
47
 
48
  # Gradio interface setup
49
  demo = gr.ChatInterface(