Spaces:

DarkAngel
/

BhagavadGita-LLama8b

Runtime error

App Files Files Community

DarkAngel commited on Dec 30, 2024

Commit

91dd789

verified ·

1 Parent(s): 0a0495d

Create app.py

Browse files

Files changed (1) hide show

app.py +87 -0

app.py ADDED Viewed

	@@ -0,0 +1,87 @@

+from huggingface_hub import upload_folder, login
+# Authenticate with Hugging Face
+login()
+import gradio as gr
+from unsloth import FastLanguageModel
+from transformers import TextStreamer
+# Load the fine-tuned model and tokenizer
+# model, tokenizer = FastLanguageModel.from_pretrained("lora_model")
+from peft import PeftModel
+from transformers import AutoModelForCausalLM, AutoTokenizer
+base_model = AutoModelForCausalLM.from_pretrained("unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit")
+model = PeftModel.from_pretrained(base_model, "DarkAngel/gitallama")
+tokenizer = AutoTokenizer.from_pretrained("unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit")
+tokenizer = AutoTokenizer.from_pretrained("unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit")
+def generate_response(shloka, transliteration):
+    """
+    Generates the response using the fine-tuned LLaMA model.
+    """
+    input_message = [
+        {
+            "role": "user",
+            "content": f"Shloka: {shloka} Transliteration: {transliteration}"
+        }
+    ]
+    inputs = tokenizer.apply_chat_template(
+        input_message,
+        tokenize=True,
+        add_generation_prompt=True,  # Enable for generation
+        return_tensors="pt"
+    ).to("cuda")  # Assuming the model is running on GPU
+    # Generate response
+    text_streamer = TextStreamer(tokenizer, skip_prompt=True)
+    generated_tokens = model.generate(
+        input_ids=inputs,
+        streamer=text_streamer,
+        max_new_tokens=512,
+        use_cache=True,
+        temperature=1.5,
+        min_p=0.1
+    )
+    raw_response = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
+    # Format the response
+    # Assuming raw_response contains English Meaning, Hindi Meaning, and Word Meaning in sequence
+    try:
+        sections = raw_response.split("Hindi Meaning:")
+        english_meaning = sections[0].strip()
+        hindi_and_word = sections[1].split("Word Meaning:")
+        hindi_meaning = hindi_and_word[0].strip()
+        word_meaning = hindi_and_word[1].strip()
+        # Format response for better readability
+        formatted_response = (
+            f"English Meaning:\n{english_meaning}\n\n"
+            f"Hindi Meaning:\n{hindi_meaning}\n\n"
+            f"Word Meaning:\n{word_meaning}"
+        )
+    except IndexError:
+        # In case the response format is not as expected
+        formatted_response = raw_response
+    return formatted_response
+# Gradio interface
+interface = gr.Interface(
+    fn=generate_response,
+    inputs=[
+        gr.Textbox(label="Enter Shloka", placeholder="Type or paste a Shloka here"),
+        gr.Textbox(label="Enter Transliteration", placeholder="Type or paste the transliteration here")
+    ],
+    outputs=gr.Textbox(label="Generated Response"),
+    title="Bhagavad Gita LLaMA Model",
+    description="Input a Shloka with its transliteration, and this model will provide meanings in English and Hindi along with word meanings."
+)
+# Launch the interface
+if __name__ == "__main__":
+    interface.launch()