Hjgugugjhuhjggg commited on
Commit
a906c19
1 Parent(s): 4c939b4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -7,7 +7,7 @@ from langchain_community.llms import VLLM
7
  from gptcache import Cache
8
  from gptcache.manager.factory import manager_factory
9
  from gptcache.processor.pre import get_prompt
10
- from langchain.callbacks import get_openai_callback
11
  from sklearn.metrics.pairwise import cosine_similarity
12
  from sentence_transformers import SentenceTransformer
13
  import torch
@@ -27,10 +27,10 @@ cache = Cache()
27
  hf_token = os.environ.get("HF_TOKEN")
28
 
29
  llm_models = {
30
- "TinyLlama": VLLM(model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", trust_remote_code=True, max_new_tokens=50, temperature=0.1, use_auth_token=hf_token),
31
- "yi-coder": VLLM(model="01-ai/Yi-Coder-1.5B", trust_remote_code=True, max_new_tokens=50, temperature=0.6, use_auth_token=hf_token),
32
- "llama": VLLM(model="meta-llama/Llama-3.2-3B-Instruct", trust_remote_code=True, max_new_tokens=50, temperature=0.1, use_auth_token=hf_token),
33
- "qwen": VLLM(model="Qwen/Qwen2.5-1.5B-Instruct", trust_remote_code=True, max_new_tokens=50, temperature=0.6, use_auth_token=hf_token),
34
  }
35
 
36
  for llm_name, llm in llm_models.items():
 
7
  from gptcache import Cache
8
  from gptcache.manager.factory import manager_factory
9
  from gptcache.processor.pre import get_prompt
10
+ from langchain_community.callbacks.manager import get_openai_callback
11
  from sklearn.metrics.pairwise import cosine_similarity
12
  from sentence_transformers import SentenceTransformer
13
  import torch
 
27
  hf_token = os.environ.get("HF_TOKEN")
28
 
29
  llm_models = {
30
+ "TinyLlama": VLLM(model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", trust_remote_code=True, max_new_tokens=50, temperature=0.1, use_auth_token=hf_token, device="cpu"),
31
+ "yi-coder": VLLM(model="01-ai/Yi-Coder-1.5B", trust_remote_code=True, max_new_tokens=50, temperature=0.6, use_auth_token=hf_token, device="cpu"),
32
+ "llama": VLLM(model="meta-llama/Llama-3.2-3B-Instruct", trust_remote_code=True, max_new_tokens=50, temperature=0.1, use_auth_token=hf_token, device="cpu"),
33
+ "qwen": VLLM(model="Qwen/Qwen2.5-1.5B-Instruct", trust_remote_code=True, max_new_tokens=50, temperature=0.6, use_auth_token=hf_token, device="cpu"),
34
  }
35
 
36
  for llm_name, llm in llm_models.items():