ghengx commited on
Commit
06cb66f
1 Parent(s): 1300754
Files changed (2) hide show
  1. app.py +85 -0
  2. requirements.txt +63 -0
app.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
+ import gradio as gr
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
4
+ from threading import Thread
5
+
6
+ """
7
+ For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
8
+ """
9
+ model_name = "Merdeka-LLM/merdeka-llm-3.2b-128k-instruct"
10
+
11
+ model = AutoModelForCausalLM.from_pretrained(
12
+ model_name,
13
+ torch_dtype="auto",
14
+ device_map="auto"
15
+ )
16
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
17
+
18
+ streamer = TextIteratorStreamer(tokenizer, timeout=100., skip_prompt=True, skip_special_tokens=True)
19
+
20
+ @spaces.GPU
21
+ def respond(
22
+ message,
23
+ history: list[tuple[str, str]],
24
+ # system_message,
25
+ max_tokens,
26
+ temperature,
27
+ top_p,
28
+ ):
29
+ messages = [{"role": "system", "content": "You are a professional lawyer who is familiar with Malaysia Law."}]
30
+
31
+ for val in history:
32
+ if val[0]:
33
+ messages.append({"role": "user", "content": val[0]})
34
+ if val[1]:
35
+ messages.append({"role": "assistant", "content": val[1]})
36
+
37
+ messages.append({"role": "user", "content": message})
38
+
39
+ response = ""
40
+
41
+ text = tokenizer.apply_chat_template(
42
+ messages,
43
+ tokenize=False,
44
+ add_generation_prompt=True,
45
+ )
46
+ model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
47
+
48
+ generate_kwargs = dict(
49
+ model_inputs,
50
+ max_new_tokens=max_tokens,
51
+ temperature=temperature,
52
+ top_p=top_p,
53
+ streamer=streamer
54
+ )
55
+ t = Thread(target=model.generate, kwargs=generate_kwargs)
56
+ t.start()
57
+ for new_token in streamer:
58
+ if new_token != '<':
59
+ response += new_token
60
+ yield response
61
+
62
+ """
63
+ For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
64
+ """
65
+ demo = gr.ChatInterface(
66
+ respond,
67
+ additional_inputs=[
68
+ # gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
69
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
70
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.1, step=0.1, label="Temperature"),
71
+ gr.Slider(
72
+ minimum=0.1,
73
+ maximum=1.0,
74
+ value=0.95,
75
+ step=0.05,
76
+ label="Top-p (nucleus sampling)",
77
+ ),
78
+ ],
79
+ )
80
+
81
+
82
+ if __name__ == "__main__":
83
+ demo.launch(
84
+
85
+ )
requirements.txt ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==1.0.1
2
+ aiofiles==23.2.1
3
+ annotated-types==0.7.0
4
+ anyio==4.6.2.post1
5
+ certifi==2024.8.30
6
+ charset-normalizer==3.4.0
7
+ click==8.1.7
8
+ fastapi==0.115.4
9
+ ffmpy==0.4.0
10
+ filelock==3.16.1
11
+ fsspec==2024.10.0
12
+ gradio==5.4.0
13
+ gradio_client==1.4.2
14
+ h11==0.14.0
15
+ httpcore==1.0.6
16
+ httpx==0.27.2
17
+ huggingface-hub==0.26.2
18
+ idna==3.10
19
+ Jinja2==3.1.4
20
+ markdown-it-py==3.0.0
21
+ MarkupSafe==2.1.5
22
+ mdurl==0.1.2
23
+ mpmath==1.3.0
24
+ networkx==3.4.2
25
+ numpy==1.26.4
26
+ orjson==3.10.10
27
+ packaging==24.1
28
+ pandas==2.2.3
29
+ pillow==11.0.0
30
+ psutil==5.9.8
31
+ pydantic==2.9.2
32
+ pydantic_core==2.23.4
33
+ pydub==0.25.1
34
+ Pygments==2.18.0
35
+ python-dateutil==2.9.0.post0
36
+ python-multipart==0.0.12
37
+ pytz==2024.2
38
+ PyYAML==6.0.2
39
+ regex==2024.9.11
40
+ requests==2.32.3
41
+ rich==13.9.3
42
+ ruff==0.7.1
43
+ safehttpx==0.1.1
44
+ safetensors==0.4.5
45
+ semantic-version==2.10.0
46
+ setuptools==75.3.0
47
+ shellingham==1.5.4
48
+ six==1.16.0
49
+ sniffio==1.3.1
50
+ spaces==0.30.4
51
+ starlette==0.41.2
52
+ sympy==1.13.1
53
+ tokenizers==0.20.1
54
+ tomlkit==0.12.0
55
+ torch==2.2.0
56
+ tqdm==4.66.6
57
+ transformers==4.46.1
58
+ typer==0.12.5
59
+ typing_extensions==4.12.2
60
+ tzdata==2024.2
61
+ urllib3==2.2.3
62
+ uvicorn==0.32.0
63
+ websockets==12.0