Akshayram1 commited on
Commit
fd31789
·
verified ·
1 Parent(s): 54d4e0a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -39
app.py CHANGED
@@ -25,7 +25,7 @@ import re
25
 
26
 
27
  # default_persist_directory = './chroma_HF/'
28
- list_llm = ["HuggingFaceH4/zephyr-7b-beta", "mistralai/Mistral-7B-Instruct-v0.2"]
29
  list_llm_simple = [os.path.basename(llm) for llm in list_llm]
30
 
31
 
@@ -97,16 +97,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
97
  # Use of trust_remote_code as model_kwargs
98
  # Warning: langchain issue
99
  # URL: https://github.com/langchain-ai/langchain/issues/6080
100
- if llm_model == "mistralai/Mixtral-8x7B-Instruct-v0.1":
101
- llm = HuggingFaceEndpoint(
102
- repo_id=llm_model,
103
- # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "load_in_8bit": True}
104
- temperature = temperature,
105
- max_new_tokens = max_tokens,
106
- top_k = top_k,
107
- load_in_8bit = True,
108
- )
109
- elif llm_model in ["HuggingFaceH4/zephyr-7b-gemma-v0.1","mosaicml/mpt-7b-instruct"]:
110
  raise gr.Error("LLM model is too large to be loaded automatically on free inference endpoint")
111
  llm = HuggingFaceEndpoint(
112
  repo_id=llm_model,
@@ -114,34 +105,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
114
  max_new_tokens = max_tokens,
115
  top_k = top_k,
116
  )
117
- elif llm_model == "microsoft/phi-2":
118
- raise gr.Error("phi-2 model requires 'trust_remote_code=True', currently not supported by langchain HuggingFaceHub...")
119
- llm = HuggingFaceEndpoint(
120
- repo_id=llm_model,
121
- # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "trust_remote_code": True, "torch_dtype": "auto"}
122
- temperature = temperature,
123
- max_new_tokens = max_tokens,
124
- top_k = top_k,
125
- trust_remote_code = True,
126
- torch_dtype = "auto",
127
- )
128
- elif llm_model == "TinyLlama/TinyLlama-1.1B-Chat-v1.0":
129
- llm = HuggingFaceEndpoint(
130
- repo_id=llm_model,
131
- # model_kwargs={"temperature": temperature, "max_new_tokens": 250, "top_k": top_k}
132
- temperature = temperature,
133
- max_new_tokens = 250,
134
- top_k = top_k,
135
- )
136
- elif llm_model == "meta-llama/Llama-2-7b-chat-hf":
137
- raise gr.Error("Llama-2-7b-chat-hf model requires a Pro subscription...")
138
- llm = HuggingFaceEndpoint(
139
- repo_id=llm_model,
140
- # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k}
141
- temperature = temperature,
142
- max_new_tokens = max_tokens,
143
- top_k = top_k,
144
- )
145
  else:
146
  llm = HuggingFaceEndpoint(
147
  repo_id=llm_model,
 
25
 
26
 
27
  # default_persist_directory = './chroma_HF/'
28
+ list_llm = ["HuggingFaceH4/zephyr-7b-beta"]
29
  list_llm_simple = [os.path.basename(llm) for llm in list_llm]
30
 
31
 
 
97
  # Use of trust_remote_code as model_kwargs
98
  # Warning: langchain issue
99
  # URL: https://github.com/langchain-ai/langchain/issues/6080
100
+ if llm_model in ["HuggingFaceH4/zephyr-7b-gemma-v0.1","mosaicml/mpt-7b-instruct"]:
 
 
 
 
 
 
 
 
 
101
  raise gr.Error("LLM model is too large to be loaded automatically on free inference endpoint")
102
  llm = HuggingFaceEndpoint(
103
  repo_id=llm_model,
 
105
  max_new_tokens = max_tokens,
106
  top_k = top_k,
107
  )
108
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  else:
110
  llm = HuggingFaceEndpoint(
111
  repo_id=llm_model,