GaborToth2 commited on
Commit
3bad752
·
1 Parent(s): ea5bb32

llama3.1 8b

Browse files
.gitignore CHANGED
@@ -1,3 +1,5 @@
 
 
1
  # Byte-compiled / optimized / DLL files
2
  __pycache__/
3
  *.py[cod]
 
1
+ models/
2
+
3
  # Byte-compiled / optimized / DLL files
4
  __pycache__/
5
  *.py[cod]
app.py CHANGED
@@ -6,8 +6,8 @@ import cohere
6
  HF_API_KEY = os.getenv("HF_API_KEY")
7
  COHERE_API_KEY = os.getenv("COHERE_API_KEY") # Get Cohere API key
8
 
9
- models = ["HuggingFaceH4/zephyr-7b-beta", "microsoft/Phi-4-mini-instruct", "meta-llama/Llama-3.2-3B-Instruct"]
10
- client_hf = InferenceClient(model=models[2], token=HF_API_KEY) # HF Client
11
  client_cohere = cohere.Client(COHERE_API_KEY) # Cohere Client
12
 
13
  def respond(
 
6
  HF_API_KEY = os.getenv("HF_API_KEY")
7
  COHERE_API_KEY = os.getenv("COHERE_API_KEY") # Get Cohere API key
8
 
9
+ models = ["HuggingFaceH4/zephyr-7b-beta", "microsoft/Phi-4-mini-instruct", "meta-llama/Llama-3.2-3B-Instruct", "meta-llama/Llama-3.1-8B-Instruct"]
10
+ client_hf = InferenceClient(model=models[3], token=HF_API_KEY) # HF Client
11
  client_cohere = cohere.Client(COHERE_API_KEY) # Cohere Client
12
 
13
  def respond(
local_demos/llama_cpp_demo.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ from llama_cpp import Llama
4
+
5
+ # Define model path
6
+ MODEL_PATH = "models/llama3.2_3B_Q4.gguf"
7
+ MODEL_URL = "https://huggingface.co/prithivMLmods/Llama-3.2-3B-GGUF/resolve/main/Llama-3.2-3B-GGUF.Q4_K_M.gguf?download=true"
8
+
9
+ # Download model if not exists
10
+ if not os.path.exists(MODEL_PATH):
11
+ print("Downloading model...")
12
+ with requests.get(MODEL_URL, stream=True) as r:
13
+ r.raise_for_status()
14
+ with open(MODEL_PATH, "wb") as f:
15
+ for chunk in r.iter_content(chunk_size=8192):
16
+ f.write(chunk)
17
+ print("Download complete!")
18
+
19
+
20
+ # Load model
21
+ llm = Llama(model_path=MODEL_PATH, n_ctx=4096)
22
+
23
+ # Define system and user prompts
24
+ system_prompt = "You are a helpful AI assistant."
25
+ user_prompt = input("User: ")
26
+
27
+ # Generate response
28
+ output = llm.create_chat_completion(
29
+ messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}]
30
+ )
31
+
32
+ # Print response
33
+ print("Assistant:", output["choices"][0]["message"]["content"])
local_demos/transformers_demo.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer
2
+ import torch
3
+
4
+ def generate_response(model, tokenizer, prompt, max_length=50):
5
+ inputs = tokenizer(prompt, return_tensors="pt")
6
+ outputs = model.generate(inputs.input_ids, max_length=max_length, num_return_sequences=1)
7
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
8
+ return response
9
+
10
+ def main():
11
+ model_name = "meta-llama/Llama-3.2-3B-Instruct"
12
+ system_prompt = "You are a helpful assistant."
13
+
14
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
15
+ model = AutoModelForCausalLM.from_pretrained(model_name)
16
+
17
+ user_prompt = input("Enter your prompt: ")
18
+ full_prompt = f"{system_prompt}\n{user_prompt}"
19
+
20
+ response = generate_response(model, tokenizer, full_prompt)
21
+ print("Response:", response)
22
+
23
+ if __name__ == "__main__":
24
+ main()