Spaces:

yugamj
/

Finance_chatbot

Sleeping

App Files Files Community

yugamj commited on Jan 29, 2024

Commit

405cf46

verified ·

1 Parent(s): 5b1e482

updated app.py for v3

Browse files

Files changed (1) hide show

app.py +79 -21

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import pandas as pd
 import transformers
 from transformers import GPT2LMHeadModel, GPT2Tokenizer
 import torch
 import gradio as gr
@@ -22,27 +23,84 @@ tokenizer.add_tokens(["<bot>:"])
 #Inference function
-def infer(inp, history):
-    inp = "<startofstring>"+inp+"<bot>:"
-    inp_tok = tokenizer(inp, return_tensors="pt")
-    X = inp_tok["input_ids"].to(device)
-    a = inp_tok["attention_mask"].to(device)
-    output = model.generate(X, attention_mask=a )
-    output = tokenizer.decode(output[0])
-    return output[len(inp):]
 #Launch with gradio
-gr.ChatInterface(
-    infer,
-    chatbot=gr.Chatbot(height=300),
-    textbox=gr.Textbox(placeholder="Type Here", container=False, scale=10),
-    title="Finance Chatbot Based on Rich Dad Poor Dad",
-    description="This Chatbot is Based on a fine-tuned version of 'GPT2'. Popular quotes of Robert Kiyosaki from his book, 'Rich Dad Poor Dad' and book summary were used for training this model.",
-    theme="soft",
-    examples=["What do you want to earn more passive income?", "What is the result of people working all their lives for someone else?", "What tells the story of how a person handles money?"],
-    cache_examples=True,
-    retry_btn=None,
-    undo_btn="Delete Previous",
-    clear_btn="Clear",
-).launch()

 import transformers
 from transformers import GPT2LMHeadModel, GPT2Tokenizer
 import torch
+import re
 import gradio as gr
 #Inference function
+def infer_1(inp, chat_history):
+      inp_1 = "<startofstring>"+inp+"<bot>:"
+      inp_tok = tokenizer(inp_1, return_tensors="pt")
+      X = inp_tok["input_ids"].to(device)
+      a = inp_tok["attention_mask"].to(device)
+      output = model.generate(X, attention_mask=a, max_length=100, num_beams=5,top_k=50)
+      output = tokenizer.decode(output[0])
+      #Remove the user input part seq
+      output = output[len(inp_1):]
+      #Remove next line char
+      output = re.sub(r'\n', '', output)
+      #Remove <endofstring>|<pad> tokens from the end
+      output = re.sub(r'<endofstring>|<pad>', '', output)
+      #Append to chat history
+      chat_history.append((inp, output))
+      return '', chat_history
+#Ancillary variables
+project_heading = """## Project Description
+This Chatbot is based on a fine-tuned version of 'GPT2-Small'. Primarily, the text from Robert Kiyosaki's book, "Rich Dad Poor Dad," was processed and used in training.
+**Note:** This project is built for educational purposes, and it is important to note that the responses generated by the model should not be interpreted literally.
+"""
+sample_questions = ["Where should I invest in to protect my assets?", "What is the result of people working all their lives for someone else?", "Why do you want to earn more passive income?",
+                    "What inspires winners?", "What do most people fail to realize in life?", "Why do people struggle financially?", "What is the name of your developer?",
+                    "What do poor people do with their eggs?"]
+data_processing_story = """
+<h2>Data Collection and Processing</h2>
+<h3>Step 1: Extracted raw text data from two broad sources</h3>
+<p>1.1. Digital copy of 'Rich Dad Poor Dad' by Parsing the book, Link: <a href="https://drive.google.com/file/d/1vTrfwcqI5rMVq7CsVEy38aRwPTjN1s6T/view?usp=sharing">Rich Dad Poor Dad Book</a></p>
+<p>1.2. Scraping websites with summary or quotes from the book such as Link: <a href="https://www.goodreads.com/work/quotes/3366043-rich-dad-poor-dad?page=1">Goodreads Quotes</a></p>
+<h3>Step 2: Data Cleaning and Preprocessing</h3>
+<p>2.1. Removed irregularities from the text data, e.g., additional comments, quotes, special characters, spaces, indentations, etc.</p>
+<p>2.2. Created batches of text with 512 words each, ascertaining appropriate text boundaries for the next stage of the pipeline</p>
+<h3>Step 3: Extracted data in the form of question-answer to create a conversational dataset using open source models</h3>
+<p>3.1. Used FAQ module from QUESTgen.AI's API, which produced question and one-word answer pairs from the given text, Repo Link: <a href="https://github.com/ramsrigouthamg/Questgen.ai">QUESTgen.AI Repository</a></p>
+<p>3.2. Resulting answers didn't carry the complete context and were incomplete, thus arising the need for better and short answers</p>
+<p>3.3. Used the model 'roberta-base-squad2' by Deepset for extracting meaningful answers, Model Card Link: <a href="https://huggingface.co/deepset/roberta-base-squad2">roberta-base-squad2 Model Card</a></p>
+<h2>Processed Dataset:</h2>
+"""
+model_details_HTML = """<h2>Model Details</h2>
+<p><strong>Base Model Name:</strong> GPT-2 (Small)</p>
+<p><strong>Training Process:</strong> Fine-tuned using GPT-2 with 23 epochs, 64 batch size, and 100 input tokens</p>
+<p><strong>Source Code:</strong> <a href="https://huggingface.co/spaces/yugamj/AI_text_detector_01/tree/main">GitHub Repository</a></p>
+<p><strong>Base Model Card:</strong> <a href="https://huggingface.co/gpt2">GPT-2 Model Card</a></p>
+<p><strong>Inference Time:</strong> Average response time is approximately 13 seconds</p>
+"""
+training_data = pd.read_csv('ques_ans_v5.csv', index_col = 0)
 #Launch with gradio
+with gr.Blocks() as demo:
+    gr.Markdown(value = project_heading)
+    #gr.Markdown(value = "This Chatbot is Based on a fine-tuned version of 'GPT2-Small'. Primarily the text from Robert Kiyosaki's book, \"Rich Dad Poor Dad\" was processed and used in training.")
+    chatbot = gr.Chatbot(label = "Trained on scripts from \"Rich Dad Poor Dad\"")   #value = [['Hey there User', 'Hey there CB']]
+    msg = gr.Textbox(label = 'Press enter to submit!')
+    clear = gr.ClearButton([msg, chatbot])
+    gr.Examples(sample_questions, msg)
+    msg.submit(infer_1, [msg, chatbot], [msg, chatbot])
+    gr.HTML(data_processing_story)
+    gr.DataFrame(training_data[['Question', 'answer_cb']].rename(columns = {'Question':'Question', 'answer_cb':'Answer'}), wrap = True)
+    gr.HTML(model_details_HTML)
+demo.launch(debug=True)