Heit39 commited on
Commit
05a7b40
·
verified ·
1 Parent(s): 1cdb992

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -44
app.py CHANGED
@@ -6,23 +6,13 @@ import os
6
  Copied from inference in colab notebook
7
  """
8
 
9
- from transformers import AutoTokenizer , AutoModelForSeq2SeqLM , TextIteratorStreamer
10
  from threading import Thread
11
 
12
  # Load model and tokenizer globally to avoid reloading for every request
13
- base_model = "google-t5/t5-small"
14
  model_path = "Mat17892/t5small_enfr_opus"
15
 
16
- # Load tokenizer
17
- tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=True, legacy=False)
18
- model = AutoModelForSeq2SeqLM.from_pretrained(model_path, token = os.getenv('huggingface_token'))
19
-
20
- # # Load the base model (e.g., LLaMA)
21
- # base_model = AutoModelForSeq2SeqLM.from_pretrained(base_model, token = os.getenv('huggingface_token'))
22
-
23
- # # Load LoRA adapter
24
- # from peft import PeftModel
25
- # model = PeftModel.from_pretrained(base_model, model_path, token = os.getenv('huggingface_token'))
26
 
27
  def respond(
28
  message: str,
@@ -32,40 +22,54 @@ def respond(
32
  temperature: float,
33
  top_p: float,
34
  ):
35
- # Combine system message and history into a single prompt
36
- messages = [{"role": "system", "content": system_message}]
37
- for val in history:
38
- if val[0]:
39
- messages.append({"role": "user", "content": val[0]})
40
- if val[1]:
41
- messages.append({"role": "assistant", "content": val[1]})
42
- messages.append({"role": "user", "content": message})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
- # Tokenize the messages
45
- inputs = tokenizer.apply_chat_template(
46
- messages,
47
- tokenize = True,
48
- add_generation_prompt = True, # Must add for generation
49
- return_tensors = "pt",
50
- )
51
- # Generate tokens incrementally
52
- streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
53
- generation_kwargs = {
54
- "input_ids": inputs,
55
- "max_new_tokens": max_tokens,
56
- "temperature": temperature,
57
- "top_p": top_p,
58
- "do_sample": True,
59
- "streamer": streamer,
60
- }
61
- thread = Thread(target=model.generate, kwargs=generation_kwargs)
62
- thread.start()
63
 
64
- # Yield responses as they are generated
65
- response = ""
66
- for token in streamer:
67
- response += token
68
- yield response
69
 
70
 
71
  """
 
6
  Copied from inference in colab notebook
7
  """
8
 
9
+ from transformers import TextIteratorStreamer , pipeline
10
  from threading import Thread
11
 
12
  # Load model and tokenizer globally to avoid reloading for every request
 
13
  model_path = "Mat17892/t5small_enfr_opus"
14
 
15
+ translator = pipeline("translation_xx_to_yy", model=model_path)
 
 
 
 
 
 
 
 
 
16
 
17
  def respond(
18
  message: str,
 
22
  temperature: float,
23
  top_p: float,
24
  ):
25
+ message = "translate English to French:" + message
26
+
27
+ response = translator(message)
28
+ print(response)
29
+ yield response
30
+
31
+ # def respond(
32
+ # message: str,
33
+ # history: list[tuple[str, str]],
34
+ # system_message: str,
35
+ # max_tokens: int,
36
+ # temperature: float,
37
+ # top_p: float,
38
+ # ):
39
+ # # Combine system message and history into a single prompt
40
+ # messages = [{"role": "system", "content": system_message}]
41
+ # for val in history:
42
+ # if val[0]:
43
+ # messages.append({"role": "user", "content": val[0]})
44
+ # if val[1]:
45
+ # messages.append({"role": "assistant", "content": val[1]})
46
+ # messages.append({"role": "user", "content": message})
47
 
48
+ # # Tokenize the messages
49
+ # inputs = tokenizer.apply_chat_template(
50
+ # messages,
51
+ # tokenize = True,
52
+ # add_generation_prompt = True, # Must add for generation
53
+ # return_tensors = "pt",
54
+ # )
55
+ # # Generate tokens incrementally
56
+ # streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
57
+ # generation_kwargs = {
58
+ # "input_ids": inputs,
59
+ # "max_new_tokens": max_tokens,
60
+ # "temperature": temperature,
61
+ # "top_p": top_p,
62
+ # "do_sample": True,
63
+ # "streamer": streamer,
64
+ # }
65
+ # thread = Thread(target=model.generate, kwargs=generation_kwargs)
66
+ # thread.start()
67
 
68
+ # # Yield responses as they are generated
69
+ # response = ""
70
+ # for token in streamer:
71
+ # response += token
72
+ # yield response
73
 
74
 
75
  """