Arambh commited on
Commit
5bbc1fb
·
verified ·
1 Parent(s): e4a65d9

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +18 -18
README.md CHANGED
@@ -24,38 +24,38 @@ This model sets a precedent for developing AI tools for other regional languages
24
 
25
  ## Model description
26
 
27
- !pip install transformers torch
28
- from transformers import AutoTokenizer, AutoModelForCausalLM
29
- import torch
30
 
31
 
32
- model_name = "Arambh/angika-llm-1b"
33
- tokenizer = AutoTokenizer.from_pretrained(model_name)
34
- model = AutoModelForCausalLM.from_pretrained(model_name)
35
 
36
 
37
- def generate_text(prompt, max_length=100, num_return_sequences=1):
38
- # Tokenize input prompt
39
- inputs = tokenizer(prompt, return_tensors="pt")
40
 
41
- # Generate text
42
- outputs = model.generate(
43
  **inputs,
44
  max_length=max_length,
45
  num_return_sequences=num_return_sequences,
46
  no_repeat_ngram_size=2, # Prevents repetition
47
  early_stopping=True
48
- )
49
 
50
- # Decode and return the generated text
51
- return [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]
52
 
53
 
54
- if __name__ == "__main__":
55
- prompt = "ये सब पहाड़ी पर पुरानो अभिलेख मिलै छै "
56
- generated_text = generate_text(prompt, max_length=100)
57
 
58
- for i, text in enumerate(generated_text):
59
  print(f"Generated Text {i+1}:\n{text}\n")
60
 
61
  ## Intended uses & limitations
 
24
 
25
  ## Model description
26
 
27
+ !pip install transformers torch
28
+ from transformers import AutoTokenizer, AutoModelForCausalLM
29
+ import torch
30
 
31
 
32
+ model_name = "Arambh/angika-llm-1b"
33
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
34
+ model = AutoModelForCausalLM.from_pretrained(model_name)
35
 
36
 
37
+ def generate_text(prompt, max_length=100, num_return_sequences=1):
38
+ # Tokenize input prompt
39
+ inputs = tokenizer(prompt, return_tensors="pt")
40
 
41
+ # Generate text
42
+ outputs = model.generate(
43
  **inputs,
44
  max_length=max_length,
45
  num_return_sequences=num_return_sequences,
46
  no_repeat_ngram_size=2, # Prevents repetition
47
  early_stopping=True
48
+ )
49
 
50
+ # Decode and return the generated text
51
+ return [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]
52
 
53
 
54
+ if __name__ == "__main__":
55
+ prompt = "ये सब पहाड़ी पर पुरानो अभिलेख मिलै छै "
56
+ generated_text = generate_text(prompt, max_length=100)
57
 
58
+ for i, text in enumerate(generated_text):
59
  print(f"Generated Text {i+1}:\n{text}\n")
60
 
61
  ## Intended uses & limitations