DarkAngel commited on
Commit
bf11651
·
verified ·
1 Parent(s): 9f19b9b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -10
app.py CHANGED
@@ -2,10 +2,10 @@ import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
3
  from peft import PeftModel
4
 
5
- # Load the fine-tuned model and tokenizer
6
- base_model = AutoModelForCausalLM.from_pretrained("unsloth/Meta-Llama-3.1-8B-Instruct") # No bnb-4bit here
7
  model = PeftModel.from_pretrained(base_model, "DarkAngel/gitallama")
8
- tokenizer = AutoTokenizer.from_pretrained("unsloth/Meta-Llama-3.1-8B-Instruct")
9
 
10
  def generate_response(shloka, transliteration):
11
  """
@@ -18,15 +18,13 @@ def generate_response(shloka, transliteration):
18
  }
19
  ]
20
 
21
- # Ensure the model uses CPU instead of GPU
22
  inputs = tokenizer.apply_chat_template(
23
  input_message,
24
  tokenize=True,
25
- add_generation_prompt=True, # Enable for generation
26
  return_tensors="pt"
27
- ).to("cpu") # Use CPU
28
 
29
- # Generate response
30
  text_streamer = TextStreamer(tokenizer, skip_prompt=True)
31
  generated_tokens = model.generate(
32
  input_ids=inputs,
@@ -39,7 +37,6 @@ def generate_response(shloka, transliteration):
39
 
40
  raw_response = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
41
 
42
- # Format the response
43
  try:
44
  sections = raw_response.split("Hindi Meaning:")
45
  english_meaning = sections[0].strip()
@@ -47,14 +44,12 @@ def generate_response(shloka, transliteration):
47
  hindi_meaning = hindi_and_word[0].strip()
48
  word_meaning = hindi_and_word[1].strip()
49
 
50
- # Format response for better readability
51
  formatted_response = (
52
  f"English Meaning:\n{english_meaning}\n\n"
53
  f"Hindi Meaning:\n{hindi_meaning}\n\n"
54
  f"Word Meaning:\n{word_meaning}"
55
  )
56
  except IndexError:
57
- # In case the response format is not as expected
58
  formatted_response = raw_response
59
 
60
  return formatted_response
 
2
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
3
  from peft import PeftModel
4
 
5
+ # Use a smaller model to reduce memory usage
6
+ base_model = AutoModelForCausalLM.from_pretrained("meta/llama-2-7b-hf") # Smaller model
7
  model = PeftModel.from_pretrained(base_model, "DarkAngel/gitallama")
8
+ tokenizer = AutoTokenizer.from_pretrained("meta/llama-2-7b-hf") # Use the tokenizer for the smaller model
9
 
10
  def generate_response(shloka, transliteration):
11
  """
 
18
  }
19
  ]
20
 
 
21
  inputs = tokenizer.apply_chat_template(
22
  input_message,
23
  tokenize=True,
24
+ add_generation_prompt=True,
25
  return_tensors="pt"
26
+ ).to("cpu") # Ensure CPU usage
27
 
 
28
  text_streamer = TextStreamer(tokenizer, skip_prompt=True)
29
  generated_tokens = model.generate(
30
  input_ids=inputs,
 
37
 
38
  raw_response = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
39
 
 
40
  try:
41
  sections = raw_response.split("Hindi Meaning:")
42
  english_meaning = sections[0].strip()
 
44
  hindi_meaning = hindi_and_word[0].strip()
45
  word_meaning = hindi_and_word[1].strip()
46
 
 
47
  formatted_response = (
48
  f"English Meaning:\n{english_meaning}\n\n"
49
  f"Hindi Meaning:\n{hindi_meaning}\n\n"
50
  f"Word Meaning:\n{word_meaning}"
51
  )
52
  except IndexError:
 
53
  formatted_response = raw_response
54
 
55
  return formatted_response