Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -26,9 +26,9 @@ HF_TOKEN = os.environ.get("Inference_Calls", None)
|
|
26 |
# from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration, TextIteratorStreamer
|
27 |
# processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf")
|
28 |
# model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf", torch_dtype=torch.float16, low_cpu_mem_usage=True)
|
29 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
|
30 |
-
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
|
31 |
tokenizer = AutoTokenizer.from_pretrained(model_id, token=HF_TOKEN)
|
|
|
32 |
model = AutoModelForCausalLM.from_pretrained(
|
33 |
model_id,
|
34 |
device_map="auto",
|
@@ -37,7 +37,10 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
37 |
terminators = [
|
38 |
tokenizer.eos_token_id,
|
39 |
tokenizer.convert_tokens_to_ids("<|eot_id|>")
|
40 |
-
|
|
|
|
|
|
|
41 |
|
42 |
embeddings_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
|
43 |
|
@@ -46,12 +49,10 @@ ASR_BATCH_SIZE = 8
|
|
46 |
ASR_CHUNK_LENGTH_S = 30
|
47 |
TEMP_FILE_LIMIT_MB = 1024 #2048
|
48 |
|
49 |
-
from huggingface_hub import InferenceClient
|
50 |
"""
|
51 |
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
|
52 |
"""
|
53 |
# client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
54 |
-
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
55 |
|
56 |
device = 0 if torch.cuda.is_available() else "cpu"
|
57 |
|
@@ -84,7 +85,7 @@ def respond(
|
|
84 |
messages.append({"role": "assistant", "content": val[1]})
|
85 |
|
86 |
messages.append({"role": "user", "content": message})
|
87 |
-
|
88 |
input_ids = tokenizer.apply_chat_template(
|
89 |
messages,
|
90 |
add_generation_prompt=True,
|
@@ -128,7 +129,7 @@ def respond(
|
|
128 |
|
129 |
response += token
|
130 |
yield response
|
131 |
-
|
132 |
|
133 |
@spaces.GPU
|
134 |
def transcribe(asr_inputs, task):
|
|
|
26 |
# from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration, TextIteratorStreamer
|
27 |
# processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf")
|
28 |
# model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf", torch_dtype=torch.float16, low_cpu_mem_usage=True)
|
29 |
+
"""from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
|
|
|
30 |
tokenizer = AutoTokenizer.from_pretrained(model_id, token=HF_TOKEN)
|
31 |
+
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
|
32 |
model = AutoModelForCausalLM.from_pretrained(
|
33 |
model_id,
|
34 |
device_map="auto",
|
|
|
37 |
terminators = [
|
38 |
tokenizer.eos_token_id,
|
39 |
tokenizer.convert_tokens_to_ids("<|eot_id|>")
|
40 |
+
]"""
|
41 |
+
from huggingface_hub import InferenceClient
|
42 |
+
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
|
43 |
+
client = InferenceClient(model_id, api_key="HF_TOKEN")
|
44 |
|
45 |
embeddings_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
|
46 |
|
|
|
49 |
ASR_CHUNK_LENGTH_S = 30
|
50 |
TEMP_FILE_LIMIT_MB = 1024 #2048
|
51 |
|
|
|
52 |
"""
|
53 |
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
|
54 |
"""
|
55 |
# client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
|
|
56 |
|
57 |
device = 0 if torch.cuda.is_available() else "cpu"
|
58 |
|
|
|
85 |
messages.append({"role": "assistant", "content": val[1]})
|
86 |
|
87 |
messages.append({"role": "user", "content": message})
|
88 |
+
"""
|
89 |
input_ids = tokenizer.apply_chat_template(
|
90 |
messages,
|
91 |
add_generation_prompt=True,
|
|
|
129 |
|
130 |
response += token
|
131 |
yield response
|
132 |
+
|
133 |
|
134 |
@spaces.GPU
|
135 |
def transcribe(asr_inputs, task):
|