Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import torch
|
3 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
4 |
+
import pandas as pd
|
5 |
+
|
6 |
+
# Load pretrained model and tokenizer
|
7 |
+
model_name = "jrocha/tiny_llama"
|
8 |
+
model = AutoModelForCausalLM.from_pretrained(model_name)
|
9 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
10 |
+
|
11 |
+
# Load data
|
12 |
+
df = pd.read_csv('splitted_df_jo.csv')
|
13 |
+
|
14 |
+
# Function to prepare context
|
15 |
+
def prepare_context():
|
16 |
+
pubmed_information_column = df['section_text']
|
17 |
+
pubmed_information_cleaned = ""
|
18 |
+
for text in pubmed_information_column.tolist():
|
19 |
+
objective_index = text.find("Objective")
|
20 |
+
if objective_index != -1:
|
21 |
+
cleaned_text = text[:objective_index]
|
22 |
+
pubmed_information_cleaned += cleaned_text
|
23 |
+
else:
|
24 |
+
pubmed_information_cleaned += text
|
25 |
+
max_length = 1000 # Adjust as needed
|
26 |
+
return pubmed_information_cleaned[:max_length]
|
27 |
+
|
28 |
+
# Function to generate answer
|
29 |
+
def answer_question(question, history):
|
30 |
+
pubmed_information_cleaned = prepare_context()
|
31 |
+
|
32 |
+
# Prepare input sequence
|
33 |
+
messages = [
|
34 |
+
{
|
35 |
+
"role": "system",
|
36 |
+
"content": "You are a friendly chatbot who responds to questions about cancer. Please be considerate.",
|
37 |
+
},
|
38 |
+
{"role": "user", "content": question},
|
39 |
+
]
|
40 |
+
prompt_with_pubmed = f"{pubmed_information_cleaned}\n\n" # Adjust formatting as needed
|
41 |
+
prompt_with_pubmed += tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
|
42 |
+
|
43 |
+
# Generate response
|
44 |
+
input_ids = tokenizer.encode(prompt_with_pubmed, return_tensors='pt')
|
45 |
+
output = model.generate(input_ids, max_length=600, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
|
46 |
+
|
47 |
+
# Decode and return generated text
|
48 |
+
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
|
49 |
+
position_assistant = generated_text.find("<|assistant|>") + len("<|assistant|>")
|
50 |
+
return generated_text[position_assistant:]
|
51 |
+
|
52 |
+
def main():
|
53 |
+
""""
|
54 |
+
Initializes a Women Cancer ChatBot interface using Hugging Face models for question answering.
|
55 |
+
|
56 |
+
This function loads a pretrained tokenizer and model from the Hugging Face model hub
|
57 |
+
and creates a Gradio interface for the ChatBot. Users can input questions related to
|
58 |
+
women's cancer topics, and the ChatBot will generate answers based on the provided context.
|
59 |
+
|
60 |
+
Returns:
|
61 |
+
None
|
62 |
+
Example:
|
63 |
+
>>> main()
|
64 |
+
"""
|
65 |
+
iface = gr.Interface(fn=answer_question,
|
66 |
+
inputs=["text"],
|
67 |
+
outputs=[gr.Textbox(label="Answer")],
|
68 |
+
title="Women Cancer ChatBot",
|
69 |
+
description="How can I help you?",
|
70 |
+
examples=[
|
71 |
+
["What is breast cancer?"],
|
72 |
+
["What are treatments for cervical cancer?"]
|
73 |
+
])
|
74 |
+
|
75 |
+
return iface.launch(debug = True, share=True)
|
76 |
+
|
77 |
+
main()
|