from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForCausalLM
from llama_cpp import Llama
from datasets import load_metric

pipe = pipeline("text-generation", model="varma007ut/Indian_Legal_Assitant")

prompt = "Summarize the key points of the Indian Contract Act, 1872:"
result = pipe(prompt, max_length=200)
print(result[0]['generated_text'])


tokenizer = AutoTokenizer.from_pretrained("varma007ut/Indian_Legal_Assitant")
model = AutoModelForCausalLM.from_pretrained("varma007ut/Indian_Legal_Assitant")

prompt = "What are the fundamental rights in the Indian Constitution?"
inputs = tokenizer(prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_length=200)
print(tokenizer.decode(outputs[0]))


llm = Llama.from_pretrained(
    repo_id="varma007ut/Indian_Legal_Assitant",
    filename="ggml-model-q4_0.gguf",  # Replace with the actual GGUF filename if different
)

response = llm.create_chat_completion(
    messages = [
        {
            "role": "user",
            "content": "Explain the concept of judicial review in India."
        }
    ]
)

print(response['choices'][0]['message']['content'])


bleu = load_metric("bleu")
predictions = model.generate(encoded_input)
results = bleu.compute(predictions=predictions, references=references)