DarkAngel commited on
Commit
91dd789
·
verified ·
1 Parent(s): 0a0495d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -0
app.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import upload_folder, login
2
+
3
+ # Authenticate with Hugging Face
4
+ login()
5
+ import gradio as gr
6
+ from unsloth import FastLanguageModel
7
+ from transformers import TextStreamer
8
+
9
+ # Load the fine-tuned model and tokenizer
10
+ # model, tokenizer = FastLanguageModel.from_pretrained("lora_model")
11
+ from peft import PeftModel
12
+ from transformers import AutoModelForCausalLM, AutoTokenizer
13
+
14
+ base_model = AutoModelForCausalLM.from_pretrained("unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit")
15
+ model = PeftModel.from_pretrained(base_model, "DarkAngel/gitallama")
16
+ tokenizer = AutoTokenizer.from_pretrained("unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit")
17
+
18
+ tokenizer = AutoTokenizer.from_pretrained("unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit")
19
+ def generate_response(shloka, transliteration):
20
+ """
21
+ Generates the response using the fine-tuned LLaMA model.
22
+ """
23
+ input_message = [
24
+ {
25
+ "role": "user",
26
+ "content": f"Shloka: {shloka} Transliteration: {transliteration}"
27
+ }
28
+ ]
29
+ inputs = tokenizer.apply_chat_template(
30
+ input_message,
31
+ tokenize=True,
32
+ add_generation_prompt=True, # Enable for generation
33
+ return_tensors="pt"
34
+ ).to("cuda") # Assuming the model is running on GPU
35
+
36
+ # Generate response
37
+ text_streamer = TextStreamer(tokenizer, skip_prompt=True)
38
+ generated_tokens = model.generate(
39
+ input_ids=inputs,
40
+ streamer=text_streamer,
41
+ max_new_tokens=512,
42
+ use_cache=True,
43
+ temperature=1.5,
44
+ min_p=0.1
45
+ )
46
+
47
+ raw_response = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
48
+
49
+ # Format the response
50
+ # Assuming raw_response contains English Meaning, Hindi Meaning, and Word Meaning in sequence
51
+ try:
52
+ sections = raw_response.split("Hindi Meaning:")
53
+ english_meaning = sections[0].strip()
54
+ hindi_and_word = sections[1].split("Word Meaning:")
55
+ hindi_meaning = hindi_and_word[0].strip()
56
+ word_meaning = hindi_and_word[1].strip()
57
+
58
+ # Format response for better readability
59
+ formatted_response = (
60
+ f"English Meaning:\n{english_meaning}\n\n"
61
+ f"Hindi Meaning:\n{hindi_meaning}\n\n"
62
+ f"Word Meaning:\n{word_meaning}"
63
+ )
64
+ except IndexError:
65
+ # In case the response format is not as expected
66
+ formatted_response = raw_response
67
+
68
+ return formatted_response
69
+
70
+ # Gradio interface
71
+ interface = gr.Interface(
72
+ fn=generate_response,
73
+ inputs=[
74
+ gr.Textbox(label="Enter Shloka", placeholder="Type or paste a Shloka here"),
75
+ gr.Textbox(label="Enter Transliteration", placeholder="Type or paste the transliteration here")
76
+ ],
77
+ outputs=gr.Textbox(label="Generated Response"),
78
+ title="Bhagavad Gita LLaMA Model",
79
+ description="Input a Shloka with its transliteration, and this model will provide meanings in English and Hindi along with word meanings."
80
+ )
81
+
82
+ # Launch the interface
83
+ if __name__ == "__main__":
84
+ interface.launch()
85
+
86
+
87
+