Spaces:
Starting
Starting
Create app.py
Browse files
app.py
CHANGED
|
@@ -86,7 +86,7 @@ def initialize_llm():
|
|
| 86 |
|
| 87 |
try:
|
| 88 |
tokenizer = AutoTokenizer.from_pretrained(HF_MODEL, token=HF_API_TOKEN)
|
| 89 |
-
model = AutoModelForCausalLM.from_pretrained(HF_MODEL, token=HF_API_TOKEN, device_map="
|
| 90 |
logger.info(f"Initialized local Hugging Face model: {HF_MODEL}")
|
| 91 |
return (model, tokenizer), "hf_local"
|
| 92 |
except Exception as e:
|
|
@@ -155,7 +155,7 @@ async def parse_question(state: JARVISState) -> JARVISState:
|
|
| 155 |
inputs = tokenizer.apply_chat_template(
|
| 156 |
[{"role": "system", "content": prompt[0].content}, {"role": "user", "content": prompt[1].content}],
|
| 157 |
return_tensors="pt"
|
| 158 |
-
).to(
|
| 159 |
outputs = model.generate(inputs, max_new_tokens=512, temperature=0.7)
|
| 160 |
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 161 |
tools_needed = json.loads(response.strip())
|
|
@@ -322,7 +322,7 @@ Document results: {document_results}""")
|
|
| 322 |
try:
|
| 323 |
if llm_type == "hf_local":
|
| 324 |
model, tokenizer = llm_client
|
| 325 |
-
inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to(
|
| 326 |
outputs = model.generate(inputs, max_new_tokens=512, temperature=0.7)
|
| 327 |
answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 328 |
else:
|
|
|
|
| 86 |
|
| 87 |
try:
|
| 88 |
tokenizer = AutoTokenizer.from_pretrained(HF_MODEL, token=HF_API_TOKEN)
|
| 89 |
+
model = AutoModelForCausalLM.from_pretrained(HF_MODEL, token=HF_API_TOKEN, device_map="auto")
|
| 90 |
logger.info(f"Initialized local Hugging Face model: {HF_MODEL}")
|
| 91 |
return (model, tokenizer), "hf_local"
|
| 92 |
except Exception as e:
|
|
|
|
| 155 |
inputs = tokenizer.apply_chat_template(
|
| 156 |
[{"role": "system", "content": prompt[0].content}, {"role": "user", "content": prompt[1].content}],
|
| 157 |
return_tensors="pt"
|
| 158 |
+
).to(model.device)
|
| 159 |
outputs = model.generate(inputs, max_new_tokens=512, temperature=0.7)
|
| 160 |
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 161 |
tools_needed = json.loads(response.strip())
|
|
|
|
| 322 |
try:
|
| 323 |
if llm_type == "hf_local":
|
| 324 |
model, tokenizer = llm_client
|
| 325 |
+
inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to(model.device)
|
| 326 |
outputs = model.generate(inputs, max_new_tokens=512, temperature=0.7)
|
| 327 |
answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 328 |
else:
|