yongdong commited on
Commit
9720765
·
1 Parent(s): 9ffc795

feat: streamline inference output to JSON-only

Browse files
Files changed (1) hide show
  1. app.py +6 -3
app.py CHANGED
@@ -109,7 +109,10 @@ def generate_response_gpu(prompt, max_tokens=200, temperature=0.7, top_p=0.9):
109
 
110
  try:
111
  # Format input
112
- formatted_prompt = f"### Human: {prompt.strip()}\n### Assistant:"
 
 
 
113
 
114
  # Encode input
115
  inputs = tokenizer(
@@ -138,8 +141,8 @@ def generate_response_gpu(prompt, max_tokens=200, temperature=0.7, top_p=0.9):
138
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
139
 
140
  # Extract generated part
141
- if "### Assistant:" in response:
142
- response = response.split("### Assistant:")[-1].strip()
143
  elif len(response) > len(formatted_prompt):
144
  response = response[len(formatted_prompt):].strip()
145
 
 
109
 
110
  try:
111
  # Format input
112
+ formatted_prompt = (
113
+ f"### Instruction:\n{prompt.strip()}\n\n"
114
+ "### Response:\n"
115
+ )
116
 
117
  # Encode input
118
  inputs = tokenizer(
 
141
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
142
 
143
  # Extract generated part
144
+ if "### Response:" in response:
145
+ response = response.split("### Response:")[-1].strip()
146
  elif len(response) > len(formatted_prompt):
147
  response = response[len(formatted_prompt):].strip()
148