Heit39 commited on
Commit
8e036eb
·
verified ·
1 Parent(s): b11843f

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -51
app.py CHANGED
@@ -6,27 +6,12 @@ import os
6
  Copied from inference in colab notebook
7
  """
8
 
9
- from transformers import TextIteratorStreamer , pipeline
10
- from threading import Thread
11
 
12
  # Load model and tokenizer globally to avoid reloading for every request
13
  model_path = "Mat17892/t5small_enfr_opus"
14
 
15
- translator = pipeline("translation_xx_to_yy", model=model_path)
16
-
17
- def respond(
18
- message: str,
19
- history: list[tuple[str, str]],
20
- system_message: str,
21
- max_tokens: int,
22
- temperature: float,
23
- top_p: float,
24
- ):
25
- message = "translate English to French:" + message
26
-
27
- response = translator(message)[0]
28
- print(response)
29
- yield response['translation_text']
30
 
31
  # def respond(
32
  # message: str,
@@ -36,40 +21,49 @@ def respond(
36
  # temperature: float,
37
  # top_p: float,
38
  # ):
39
- # # Combine system message and history into a single prompt
40
- # messages = [{"role": "system", "content": system_message}]
41
- # for val in history:
42
- # if val[0]:
43
- # messages.append({"role": "user", "content": val[0]})
44
- # if val[1]:
45
- # messages.append({"role": "assistant", "content": val[1]})
46
- # messages.append({"role": "user", "content": message})
47
-
48
- # # Tokenize the messages
49
- # inputs = tokenizer.apply_chat_template(
50
- # messages,
51
- # tokenize = True,
52
- # add_generation_prompt = True, # Must add for generation
53
- # return_tensors = "pt",
54
- # )
55
- # # Generate tokens incrementally
56
- # streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
57
- # generation_kwargs = {
58
- # "input_ids": inputs,
59
- # "max_new_tokens": max_tokens,
60
- # "temperature": temperature,
61
- # "top_p": top_p,
62
- # "do_sample": True,
63
- # "streamer": streamer,
64
- # }
65
- # thread = Thread(target=model.generate, kwargs=generation_kwargs)
66
- # thread.start()
67
-
68
- # # Yield responses as they are generated
69
- # response = ""
70
- # for token in streamer:
71
- # response += token
72
- # yield response
 
 
 
 
 
 
 
 
 
73
 
74
 
75
  """
 
6
  Copied from inference in colab notebook
7
  """
8
 
9
+ from transformers import pipeline
 
10
 
11
  # Load model and tokenizer globally to avoid reloading for every request
12
  model_path = "Mat17892/t5small_enfr_opus"
13
 
14
+ # translator = pipeline("translation_xx_to_yy", model=model_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  # def respond(
17
  # message: str,
 
21
  # temperature: float,
22
  # top_p: float,
23
  # ):
24
+ # message = "translate English to French:" + message
25
+
26
+ # response = translator(message)[0]
27
+ # yield response['translation_text']
28
+
29
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TextIteratorStreamer
30
+ import threading
31
+
32
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
33
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_path)
34
+
35
+ def respond(
36
+ message: str,
37
+ history: list[tuple[str, str]],
38
+ system_message: str,
39
+ max_tokens: int = 128,
40
+ temperature: float = 1.0,
41
+ top_p: float = 1.0,
42
+ ):
43
+ # Preprocess the input message
44
+ input_text = "translate English to French: " + message
45
+ input_ids = tokenizer(input_text, return_tensors="pt").input_ids
46
+
47
+ # Set up the streamer
48
+ streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
49
+
50
+ # Generate in a separate thread to avoid blocking
51
+ generation_thread = threading.Thread(
52
+ target=model.generate,
53
+ kwargs={
54
+ "input_ids": input_ids,
55
+ "max_new_tokens": max_tokens,
56
+ "do_sample": True,
57
+ "temperature": temperature,
58
+ "top_p": top_p,
59
+ "streamer": streamer,
60
+ },
61
+ )
62
+ generation_thread.start()
63
+
64
+ # Stream the output progressively
65
+ for token in streamer:
66
+ yield token
67
 
68
 
69
  """