Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -44,6 +44,8 @@ from llama_index.core.node_parser.relational.base_element import (
|
|
44 |
)
|
45 |
from llama_index.core.schema import BaseNode, TextNode
|
46 |
|
|
|
|
|
47 |
# Implementations
|
48 |
|
49 |
# default_persist_directory = './chroma_HF/'
|
@@ -94,6 +96,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
|
|
94 |
if llm_model == "mistralai/Mixtral-8x7B-Instruct-v0.3":
|
95 |
llm = HuggingFaceEndpoint(
|
96 |
repo_id=llm_model,
|
|
|
97 |
temperature = temperature,
|
98 |
max_new_tokens = max_tokens,
|
99 |
top_k = top_k,
|
@@ -103,6 +106,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
|
|
103 |
raise gr.Error("LLM model is too large to be loaded automatically on free inference endpoint")
|
104 |
llm = HuggingFaceEndpoint(
|
105 |
repo_id=llm_model,
|
|
|
106 |
temperature = temperature,
|
107 |
max_new_tokens = max_tokens,
|
108 |
top_k = top_k,
|
@@ -110,6 +114,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
|
|
110 |
elif llm_model == "microsoft/phi-2":
|
111 |
llm = HuggingFaceEndpoint(
|
112 |
repo_id=llm_model,
|
|
|
113 |
temperature = temperature,
|
114 |
max_new_tokens = max_tokens,
|
115 |
top_k = top_k,
|
@@ -118,7 +123,8 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
|
|
118 |
)
|
119 |
elif llm_model == "TinyLlama/TinyLlama-1.1B-Chat-v1.0":
|
120 |
llm = HuggingFaceEndpoint(
|
121 |
-
repo_id=llm_model,
|
|
|
122 |
temperature = temperature,
|
123 |
max_new_tokens = 250,
|
124 |
top_k = top_k,
|
@@ -127,6 +133,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
|
|
127 |
raise gr.Error("Llama-2-7b-chat-hf model requires a Pro subscription...")
|
128 |
llm = HuggingFaceEndpoint(
|
129 |
repo_id=llm_model,
|
|
|
130 |
temperature = temperature,
|
131 |
max_new_tokens = max_tokens,
|
132 |
top_k = top_k,
|
@@ -134,6 +141,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
|
|
134 |
else:
|
135 |
llm = HuggingFaceEndpoint(
|
136 |
repo_id=llm_model,
|
|
|
137 |
temperature = temperature,
|
138 |
max_new_tokens = max_tokens,
|
139 |
top_k = top_k,
|
|
|
44 |
)
|
45 |
from llama_index.core.schema import BaseNode, TextNode
|
46 |
|
47 |
+
|
48 |
+
api_token = os.getenv("HF_TOKEN")
|
49 |
# Implementations
|
50 |
|
51 |
# default_persist_directory = './chroma_HF/'
|
|
|
96 |
if llm_model == "mistralai/Mixtral-8x7B-Instruct-v0.3":
|
97 |
llm = HuggingFaceEndpoint(
|
98 |
repo_id=llm_model,
|
99 |
+
huggingfacehub_api_token = api_token,
|
100 |
temperature = temperature,
|
101 |
max_new_tokens = max_tokens,
|
102 |
top_k = top_k,
|
|
|
106 |
raise gr.Error("LLM model is too large to be loaded automatically on free inference endpoint")
|
107 |
llm = HuggingFaceEndpoint(
|
108 |
repo_id=llm_model,
|
109 |
+
huggingfacehub_api_token = api_token,
|
110 |
temperature = temperature,
|
111 |
max_new_tokens = max_tokens,
|
112 |
top_k = top_k,
|
|
|
114 |
elif llm_model == "microsoft/phi-2":
|
115 |
llm = HuggingFaceEndpoint(
|
116 |
repo_id=llm_model,
|
117 |
+
huggingfacehub_api_token = api_token,
|
118 |
temperature = temperature,
|
119 |
max_new_tokens = max_tokens,
|
120 |
top_k = top_k,
|
|
|
123 |
)
|
124 |
elif llm_model == "TinyLlama/TinyLlama-1.1B-Chat-v1.0":
|
125 |
llm = HuggingFaceEndpoint(
|
126 |
+
repo_id=llm_model,
|
127 |
+
huggingfacehub_api_token = api_token,
|
128 |
temperature = temperature,
|
129 |
max_new_tokens = 250,
|
130 |
top_k = top_k,
|
|
|
133 |
raise gr.Error("Llama-2-7b-chat-hf model requires a Pro subscription...")
|
134 |
llm = HuggingFaceEndpoint(
|
135 |
repo_id=llm_model,
|
136 |
+
huggingfacehub_api_token = api_token,
|
137 |
temperature = temperature,
|
138 |
max_new_tokens = max_tokens,
|
139 |
top_k = top_k,
|
|
|
141 |
else:
|
142 |
llm = HuggingFaceEndpoint(
|
143 |
repo_id=llm_model,
|
144 |
+
huggingfacehub_api_token = api_token,
|
145 |
temperature = temperature,
|
146 |
max_new_tokens = max_tokens,
|
147 |
top_k = top_k,
|