kh-CHEUNG commited on
Commit
15e748c
·
verified ·
1 Parent(s): 69ae2e7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -7
app.py CHANGED
@@ -26,9 +26,9 @@ HF_TOKEN = os.environ.get("Inference_Calls", None)
26
  # from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration, TextIteratorStreamer
27
  # processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf")
28
  # model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf", torch_dtype=torch.float16, low_cpu_mem_usage=True)
29
- from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
30
- model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
31
  tokenizer = AutoTokenizer.from_pretrained(model_id, token=HF_TOKEN)
 
32
  model = AutoModelForCausalLM.from_pretrained(
33
  model_id,
34
  device_map="auto",
@@ -37,7 +37,10 @@ model = AutoModelForCausalLM.from_pretrained(
37
  terminators = [
38
  tokenizer.eos_token_id,
39
  tokenizer.convert_tokens_to_ids("<|eot_id|>")
40
- ]
 
 
 
41
 
42
  embeddings_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
43
 
@@ -46,12 +49,10 @@ ASR_BATCH_SIZE = 8
46
  ASR_CHUNK_LENGTH_S = 30
47
  TEMP_FILE_LIMIT_MB = 1024 #2048
48
 
49
- from huggingface_hub import InferenceClient
50
  """
51
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
52
  """
53
  # client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
54
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
55
 
56
  device = 0 if torch.cuda.is_available() else "cpu"
57
 
@@ -84,7 +85,7 @@ def respond(
84
  messages.append({"role": "assistant", "content": val[1]})
85
 
86
  messages.append({"role": "user", "content": message})
87
-
88
  input_ids = tokenizer.apply_chat_template(
89
  messages,
90
  add_generation_prompt=True,
@@ -128,7 +129,7 @@ def respond(
128
 
129
  response += token
130
  yield response
131
- """
132
 
133
  @spaces.GPU
134
  def transcribe(asr_inputs, task):
 
26
  # from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration, TextIteratorStreamer
27
  # processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf")
28
  # model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf", torch_dtype=torch.float16, low_cpu_mem_usage=True)
29
+ """from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 
30
  tokenizer = AutoTokenizer.from_pretrained(model_id, token=HF_TOKEN)
31
+ model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
32
  model = AutoModelForCausalLM.from_pretrained(
33
  model_id,
34
  device_map="auto",
 
37
  terminators = [
38
  tokenizer.eos_token_id,
39
  tokenizer.convert_tokens_to_ids("<|eot_id|>")
40
+ ]"""
41
+ from huggingface_hub import InferenceClient
42
+ model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
43
+ client = InferenceClient(model_id, api_key="HF_TOKEN")
44
 
45
  embeddings_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
46
 
 
49
  ASR_CHUNK_LENGTH_S = 30
50
  TEMP_FILE_LIMIT_MB = 1024 #2048
51
 
 
52
  """
53
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
54
  """
55
  # client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 
56
 
57
  device = 0 if torch.cuda.is_available() else "cpu"
58
 
 
85
  messages.append({"role": "assistant", "content": val[1]})
86
 
87
  messages.append({"role": "user", "content": message})
88
+ """
89
  input_ids = tokenizer.apply_chat_template(
90
  messages,
91
  add_generation_prompt=True,
 
129
 
130
  response += token
131
  yield response
132
+
133
 
134
  @spaces.GPU
135
  def transcribe(asr_inputs, task):