Somunia commited on
Commit
02af69b
1 Parent(s): 97978f7

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. .DS_Store +0 -0
  2. Dockerfile +32 -5
  3. README.md +4 -9
  4. app.py +6 -9
  5. app.py_09_23_24 +62 -0
.DS_Store ADDED
Binary file (6.15 kB). View file
 
Dockerfile CHANGED
@@ -1,12 +1,39 @@
1
- # Use the base image from your Docker container
2
- FROM trcoot/cpu-casuallm:latest
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  # Set the environment variable for the Hugging Face cache directory
5
  ENV HF_HOME=/app/.cache
6
 
7
  # Create the cache directory and give the appropriate permissions
8
  RUN mkdir -p /app/.cache && chmod 777 /app/.cache
9
- RUN mkdir -p /app/gradio_flagged && chmod 777 /app/gradio_flagged
10
 
11
- # Start the application (modify this based on your app's entry point)
12
- CMD ["python", "app.py"]
 
 
1
+ # Use the full Python 3.9 image (if you need specific modules)
2
+ FROM python:3.9.19
3
+
4
+ RUN useradd -m -u 1000 user
5
+ USER user
6
+ ENV PATH="/home/user/.local/bin:$PATH"
7
+
8
+ # Working Directory
9
+ WORKDIR /app
10
+
11
+ COPY --chown=user ./models/ models/
12
+ COPY --chown=user ./app.py app.py
13
+
14
+ RUN pip install --no-cache-dir torch==2.2.2
15
+ RUN pip install --no-cache-dir packaging
16
+
17
+ # Copy Dependencies (if you have any)
18
+ COPY --chown=user ./requirements.txt requirements.txt
19
+
20
+ # Install Dependencies (if you have any)
21
+ RUN pip install --no-cache-dir -r requirements.txt
22
+ RUN pip install -U git+https://github.com/sustcsonglin/flash-linear-attention
23
+
24
+ # Copy Custom Modules (Adjust paths if needed)
25
+ COPY --chown=user ./causal-conv1d/ causal-conv1d/
26
+ RUN cd /app/causal-conv1d && python setup.py install --user
27
+
28
+ COPY --chown=user ./mamba/ mamba/
29
+ RUN cd /app/mamba && python setup.py install --user
30
 
31
  # Set the environment variable for the Hugging Face cache directory
32
  ENV HF_HOME=/app/.cache
33
 
34
  # Create the cache directory and give the appropriate permissions
35
  RUN mkdir -p /app/.cache && chmod 777 /app/.cache
 
36
 
37
+ # Print Messages
38
+ # CMD ["bash"]
39
+ CMD ["python", "app.py"]
README.md CHANGED
@@ -1,11 +1,6 @@
1
  ---
2
- title: Cpu Casuallm
3
- emoji: 🌍
4
- colorFrom: pink
5
- colorTo: purple
6
- sdk: docker
7
- pinned: false
8
- license: apache-2.0
9
  ---
10
-
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: cpu-casuallm
3
+ app_file: app.py
4
+ sdk: gradio
5
+ sdk_version: 4.42.0
 
 
 
6
  ---
 
 
app.py CHANGED
@@ -2,6 +2,7 @@ import torch
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import time
4
  import gradio as gr
 
5
 
6
  def generate_prompt(instruction, input=""):
7
  instruction = instruction.strip().replace('\r\n','\n').replace('\n\n','\n')
@@ -47,7 +48,7 @@ def generate_text(input_text):
47
  input_ids = tokenizer(prompt, return_tensors="pt").input_ids
48
 
49
  generated_text = ""
50
- stop_sequence_found = False
51
  for i in range(333):
52
  output = model.generate(input_ids, max_new_tokens=1, do_sample=True, temperature=1.0, top_p=0.3, top_k=0)
53
  new_word = tokenizer.decode(output[0][-1:], skip_special_tokens=True)
@@ -55,15 +56,9 @@ def generate_text(input_text):
55
  print(new_word, end="", flush=True)
56
  generated_text += new_word
57
 
58
- if new_word == '\n' or new_word == '.':
59
- stop_sequence_found = True
60
- break
61
 
62
  input_ids = output
63
 
64
- if stop_sequence_found:
65
- print("\n(Stop sequence found)")
66
- print()
67
  return generated_text
68
 
69
  # Create the Gradio interface
@@ -73,11 +68,13 @@ iface = gr.Interface(
73
  outputs="text",
74
  title="RWKV Chatbot",
75
  description="Enter your prompt below:",
76
- flagging_dir="/app/gradio_flagged"
 
77
  )
78
 
79
  # For local testing:
80
- # iface.launch()
 
81
 
82
 
83
  # Hugging Face Spaces will automatically launch the interface.
 
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import time
4
  import gradio as gr
5
+ from gradio import deploy
6
 
7
  def generate_prompt(instruction, input=""):
8
  instruction = instruction.strip().replace('\r\n','\n').replace('\n\n','\n')
 
48
  input_ids = tokenizer(prompt, return_tensors="pt").input_ids
49
 
50
  generated_text = ""
51
+
52
  for i in range(333):
53
  output = model.generate(input_ids, max_new_tokens=1, do_sample=True, temperature=1.0, top_p=0.3, top_k=0)
54
  new_word = tokenizer.decode(output[0][-1:], skip_special_tokens=True)
 
56
  print(new_word, end="", flush=True)
57
  generated_text += new_word
58
 
 
 
 
59
 
60
  input_ids = output
61
 
 
 
 
62
  return generated_text
63
 
64
  # Create the Gradio interface
 
68
  outputs="text",
69
  title="RWKV Chatbot",
70
  description="Enter your prompt below:",
71
+ # flagging_callback=None
72
+ flagging_dir="gradio_flagged/"
73
  )
74
 
75
  # For local testing:
76
+ # iface.launch(share=True)
77
+ deploy()
78
 
79
 
80
  # Hugging Face Spaces will automatically launch the interface.
app.py_09_23_24 ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import time
4
+
5
+ def generate_prompt(instruction, input=""):
6
+ instruction = instruction.strip().replace('\r\n','\n').replace('\n\n','\n')
7
+ input = input.strip().replace('\r\n','\n').replace('\n\n','\n')
8
+ if input:
9
+ return f"""Instruction: {instruction}
10
+
11
+ Input: {input}
12
+
13
+ Response:"""
14
+ else:
15
+ return f"""User: hi
16
+
17
+ Lover: Hi. I am your assistant and I will provide expert full response in full details. Please feel free to ask any question and I will always answer it.
18
+
19
+ User: {instruction}
20
+
21
+ Lover:"""
22
+
23
+ model_path = "models/rwkv-6-world-1b6/" # Path to your local model directory
24
+
25
+ model = AutoModelForCausalLM.from_pretrained(
26
+ model_path,
27
+ trust_remote_code=True,
28
+ use_flash_attention_2=False # Explicitly disable Flash Attention
29
+ ).to(torch.float32)
30
+
31
+
32
+ tokenizer = AutoTokenizer.from_pretrained(
33
+ model_path,
34
+ bos_token="</s>",
35
+ eos_token="</ s>",
36
+ unk_token="<unk>",
37
+ pad_token="<pad>",
38
+ trust_remote_code=True,
39
+ padding_side='left',
40
+ clean_up_tokenization_spaces=False # Or set to True if you prefer
41
+ )
42
+
43
+ print(tokenizer.special_tokens_map)
44
+
45
+ text = "Hi"
46
+
47
+ prompt = generate_prompt(text)
48
+
49
+ input_ids = tokenizer(prompt, return_tensors="pt").input_ids
50
+
51
+ # Generate text word by word with stop sequence
52
+ generated_text = ""
53
+ for i in range(333): # Generate up to 333 tokens
54
+ output = model.generate(input_ids, max_new_tokens=1, do_sample=True, temperature=1.0, top_p=0.3, top_k=0)
55
+ new_word = tokenizer.decode(output[0][-1:], skip_special_tokens=True)
56
+
57
+ print(new_word, end="", flush=True) # Print word-by-word
58
+ generated_text += new_word
59
+
60
+ input_ids = output # Update input_ids for next iteration
61
+
62
+ print() # Add a newline at the end