jrocha commited on
Commit
d3645b8
·
verified ·
1 Parent(s): 5b3f629

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -0
app.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
+ import pandas as pd
5
+
6
+ # Load pretrained model and tokenizer
7
+ model_name = "jrocha/tiny_llama"
8
+ model = AutoModelForCausalLM.from_pretrained(model_name)
9
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
10
+
11
+ # Load data
12
+ df = pd.read_csv('splitted_df_jo.csv')
13
+
14
+ # Function to prepare context
15
+ def prepare_context():
16
+ pubmed_information_column = df['section_text']
17
+ pubmed_information_cleaned = ""
18
+ for text in pubmed_information_column.tolist():
19
+ objective_index = text.find("Objective")
20
+ if objective_index != -1:
21
+ cleaned_text = text[:objective_index]
22
+ pubmed_information_cleaned += cleaned_text
23
+ else:
24
+ pubmed_information_cleaned += text
25
+ max_length = 1000 # Adjust as needed
26
+ return pubmed_information_cleaned[:max_length]
27
+
28
+ # Function to generate answer
29
+ def answer_question(question, history):
30
+ pubmed_information_cleaned = prepare_context()
31
+
32
+ # Prepare input sequence
33
+ messages = [
34
+ {
35
+ "role": "system",
36
+ "content": "You are a friendly chatbot who responds to questions about cancer. Please be considerate.",
37
+ },
38
+ {"role": "user", "content": question},
39
+ ]
40
+ prompt_with_pubmed = f"{pubmed_information_cleaned}\n\n" # Adjust formatting as needed
41
+ prompt_with_pubmed += tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
42
+
43
+ # Generate response
44
+ input_ids = tokenizer.encode(prompt_with_pubmed, return_tensors='pt')
45
+ output = model.generate(input_ids, max_length=600, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
46
+
47
+ # Decode and return generated text
48
+ generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
49
+ position_assistant = generated_text.find("<|assistant|>") + len("<|assistant|>")
50
+ return generated_text[position_assistant:]
51
+
52
+ def main():
53
+ """"
54
+ Initializes a Women Cancer ChatBot interface using Hugging Face models for question answering.
55
+
56
+ This function loads a pretrained tokenizer and model from the Hugging Face model hub
57
+ and creates a Gradio interface for the ChatBot. Users can input questions related to
58
+ women's cancer topics, and the ChatBot will generate answers based on the provided context.
59
+
60
+ Returns:
61
+ None
62
+ Example:
63
+ >>> main()
64
+ """
65
+ iface = gr.Interface(fn=answer_question,
66
+ inputs=["text"],
67
+ outputs=[gr.Textbox(label="Answer")],
68
+ title="Women Cancer ChatBot",
69
+ description="How can I help you?",
70
+ examples=[
71
+ ["What is breast cancer?"],
72
+ ["What are treatments for cervical cancer?"]
73
+ ])
74
+
75
+ return iface.launch(debug = True, share=True)
76
+
77
+ main()