GSridhar1982 commited on
Commit
e7bda41
·
verified ·
1 Parent(s): 0629758

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -54
app.py CHANGED
@@ -1,58 +1,26 @@
1
- import gradio as gr
2
- from transformers import pipeline
3
- from peft import AutoPeftModelForCausalLM
4
- from transformers import AutoTokenizer
5
 
6
- max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
7
- dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
8
- load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
9
- #from llama_cpp import Llama
10
-
11
- # Load the Llama model
12
- #llm = Llama.from_pretrained(
13
- # repo_id="GSridhar1982/QA_Llama31_Quantized_GGUF",
14
- # filename="QA_llama31_unsloth.Q4_K_M.gguf",
15
- #)
16
-
17
- #def generate_response(user_input):
18
- # Perform inference
19
- # response = llm.create_chat_completion(
20
- # messages=[
21
- # {
22
- # "role": "user",
23
- # "content": user_input
24
- # }
25
- # ]
26
- # )
27
-
28
- # Extract the model's reply
29
- # model_reply = response['choices'][0]['message']['content']
30
- # return model_reply
31
- def generate_answer(user_input):
32
- model = AutoPeftModelForCausalLM.from_pretrained(
33
- "GSridhar1982/AIML_QA_Llama31_FineTuned_UsingLora", # YOUR MODEL YOU USED FOR TRAINING
34
- load_in_4bit = load_in_4bit,
35
- )
36
- tokenizer = AutoTokenizer.from_pretrained("GSridhar1982/AIML_QA_Llama31_FineTuned_UsingLora")
37
- # Create a text generation pipeline
38
- generator = pipeline(task="text-generation", model=model, tokenizer=tokenizer)
39
 
40
- # Generate predictions on the test dataset
41
- # Access the input column of the dataset using the column name
42
- predictions = generator(user_input)[0]['generated_text']
43
 
44
- # Extract the generated text from the pipeline output
45
- #predictions = [pred[0]['generated_text'] for pred in predictions]
46
- return predictions
 
 
 
 
47
 
48
- # Create a Gradio interface
49
- iface = gr.Interface(
50
- fn=generate_answer,
51
- inputs="textbox",
52
- outputs="text",
53
- title="AIML Q&A Chatbot",
54
- description="Ask questions related to AIML and get answers from the fine-tuned Llama model."
55
- )
56
-
57
- # Launch the app
58
- iface.launch()
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
+ import nltk
4
+ nltk.download('punkt')
5
 
6
+ def generate_answer(question):
7
+ model_name = "anukvma/bart-aiml-question-answer-v2"
8
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
9
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
10
+ inputs = ["Answer this AIML Question: " + question]
11
+ inputs = tokenizer(inputs, max_length=256, truncation=True, return_tensors="pt")
12
+ output = model.generate(**inputs, num_beams=8, do_sample=True, min_length=1, max_length=256)
13
+ decoded_output = tokenizer.batch_decode(output, skip_special_tokens=True)[0]
14
+ predicted_title = nltk.sent_tokenize(decoded_output.strip())[0]
15
+ return predicted_title
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
 
 
 
17
 
18
+ iface = gr.Interface(
19
+ fn=generate_answer,
20
+ inputs=[
21
+ gr.Textbox(lines=5, label="Question")
22
+ ],
23
+ outputs=gr.Textbox(label="Answer")
24
+ )
25
 
26
+ iface.launch(debug=True)