Spaces:
Runtime error
Runtime error
Commit
·
95f539b
0
Parent(s):
Duplicate from rohan13/makerlab-bot
Browse filesCo-authored-by: Rohan Marwaha <[email protected]>
- .gitattributes +34 -0
- .gitignore +2 -0
- README.md +13 -0
- __pycache__/app.cpython-39.pyc +0 -0
- __pycache__/main.cpython-39.pyc +0 -0
- __pycache__/utils.cpython-39.pyc +0 -0
- app.py +42 -0
- main.py +27 -0
- open_ai.index +3 -0
- open_ai.pkl +3 -0
- requirements.txt +11 -0
- static/chatbot.js +159 -0
- static/style.css +296 -0
- templates/index.html +35 -0
- utils.py +335 -0
.gitattributes
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
env/
|
2 |
+
.idea
|
README.md
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Makerlab Bot
|
3 |
+
emoji: 📉
|
4 |
+
colorFrom: blue
|
5 |
+
colorTo: blue
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 3.23.0
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
duplicated_from: rohan13/makerlab-bot
|
11 |
+
---
|
12 |
+
|
13 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
__pycache__/app.cpython-39.pyc
ADDED
Binary file (921 Bytes). View file
|
|
__pycache__/main.cpython-39.pyc
ADDED
Binary file (822 Bytes). View file
|
|
__pycache__/utils.cpython-39.pyc
ADDED
Binary file (4.14 kB). View file
|
|
app.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Flask, render_template
|
2 |
+
from flask_executor import Executor
|
3 |
+
from flask_socketio import SocketIO, emit
|
4 |
+
from flask_cors import cross_origin, CORS
|
5 |
+
from main import run
|
6 |
+
from gevent import monkey
|
7 |
+
|
8 |
+
monkey.patch_all(ssl=False)
|
9 |
+
app = Flask(__name__)
|
10 |
+
app.config['SECRET_KEY'] = 'secret!'
|
11 |
+
socketio = SocketIO(app, cors_allowed_origins="*", async_mode='gevent', logger=True)
|
12 |
+
cors = CORS(app)
|
13 |
+
|
14 |
+
executor = Executor(app)
|
15 |
+
|
16 |
+
executor.init_app(app)
|
17 |
+
app.config['EXECUTOR_MAX_WORKERS'] = 10
|
18 |
+
|
19 |
+
@app.route('/')
|
20 |
+
def index():
|
21 |
+
return render_template('index.html')
|
22 |
+
|
23 |
+
|
24 |
+
@socketio.on('message')
|
25 |
+
def handle_message(data):
|
26 |
+
question = data['question']
|
27 |
+
print("question: " + question)
|
28 |
+
|
29 |
+
if executor.futures:
|
30 |
+
emit('response', {'response': 'Server is busy, please try again later'})
|
31 |
+
return
|
32 |
+
|
33 |
+
try:
|
34 |
+
future = executor.submit(run, question)
|
35 |
+
response = future.result()
|
36 |
+
emit('response', {'response': response})
|
37 |
+
except Exception as e:
|
38 |
+
traceback.print_exc()
|
39 |
+
emit('response', {'response': 'Server is busy. Please try again later.'})
|
40 |
+
|
41 |
+
if __name__ == '__main__':
|
42 |
+
socketio.run(app, host="0.0.0.0", port=7860)
|
main.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from utils import create_index, get_agent_chain, get_prompt_and_tools, get_search_index
|
2 |
+
from utils import get_custom_agent, get_prompt_and_tools_for_custom_agent
|
3 |
+
question_starters = ['who', 'why', 'what', 'how', 'where', 'when', 'which', 'whom', 'whose']
|
4 |
+
|
5 |
+
def run(question):
|
6 |
+
|
7 |
+
index = get_search_index()
|
8 |
+
|
9 |
+
# prompt, tools = get_prompt_and_tools()
|
10 |
+
|
11 |
+
# agent_chain = get_agent_chain(prompt, tools)
|
12 |
+
|
13 |
+
prompt, tools = get_prompt_and_tools_for_custom_agent()
|
14 |
+
|
15 |
+
agent_chain = get_custom_agent(prompt, tools)
|
16 |
+
|
17 |
+
result = None
|
18 |
+
|
19 |
+
try:
|
20 |
+
result = agent_chain.run(question)
|
21 |
+
print(result)
|
22 |
+
except ValueError as ve:
|
23 |
+
if "Could not parse LLM output:" in ve.args[0] and question.lower().startswith(tuple(question_starters)) and not question.lower().endswith('?'):
|
24 |
+
question = question + '?'
|
25 |
+
result = agent_chain.run(question)
|
26 |
+
|
27 |
+
return result
|
open_ai.index
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f5a87eb7dbbfd3245fc8025fb2467723bdcba8cdf308127050de9f8bbdeb21bc
|
3 |
+
size 2838573
|
open_ai.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7c399ec43dab5c11fabcc598d507b88db77a59b019c1b2533f2792199c6a1fcc
|
3 |
+
size 3171039
|
requirements.txt
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
faiss-cpu==1.7.3
|
2 |
+
langchain==0.0.131
|
3 |
+
beautifulsoup4==4.12.0
|
4 |
+
PyPDF2==3.0.1
|
5 |
+
openai==0.27.4
|
6 |
+
flask==2.2.3
|
7 |
+
flask-socketio==5.3.3
|
8 |
+
flask-cors==3.0.10
|
9 |
+
flask-executor==1.0.0
|
10 |
+
gevent==22.10.2
|
11 |
+
gevent-websocket==0.10.1
|
static/chatbot.js
ADDED
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
$(document).ready(function() {
|
2 |
+
// Initialize variables
|
3 |
+
var $chatContainer = $('.chat-container');
|
4 |
+
var $chatHeader = $('.chat-header');
|
5 |
+
var $chatBody = $('.chat-body');
|
6 |
+
var $chatInput = $('.chat-input');
|
7 |
+
var $input = $('.chat-input input');
|
8 |
+
var $submit = $('.chat_submit');
|
9 |
+
var session_id = '';
|
10 |
+
$chatBody.children().each(function() {
|
11 |
+
$(this).addClass('chat-message');
|
12 |
+
});
|
13 |
+
const buttonLabels = ["What is Makerlab?", "What is 3D printing?",
|
14 |
+
"Who are the founders of Makerlab?", "What are the 3D printing prices at Makerlab?",
|
15 |
+
"How can I host a birthday at Makerlab?", "Can I book an appointment at Makerlab?",
|
16 |
+
"Tell me about softwares used to create 3D printing designs", "Hi, I am bob. Tell me when Makerlab was founded.",
|
17 |
+
"Can I get my custom designs 3D printed at Makerlab?", "Can I host a private event at Makerlab?",
|
18 |
+
"Does Makerlab host any workshop?", "When is Makerlab open?", "How can I contact the Makerlab Team?"];
|
19 |
+
|
20 |
+
|
21 |
+
// Initialize SocketIO connection
|
22 |
+
var socket = io.connect('https://' + document.domain + ':' + location.port);
|
23 |
+
const container = document.getElementById("button-container");
|
24 |
+
|
25 |
+
|
26 |
+
for (let i = 0; i < buttonLabels.length; i++) {
|
27 |
+
|
28 |
+
const button = document.createElement("button");
|
29 |
+
button.innerHTML = buttonLabels[i];
|
30 |
+
button.setAttribute("class", "queries");
|
31 |
+
button.setAttribute("id", `button-${i}`);
|
32 |
+
button.style.margin = "5px";
|
33 |
+
container.appendChild(button);
|
34 |
+
}
|
35 |
+
scrollButtons();
|
36 |
+
|
37 |
+
|
38 |
+
// Function to send message to Flask-SocketIO app
|
39 |
+
function sendMessage(message) {
|
40 |
+
console.log("message: " + message )
|
41 |
+
socket.emit('message', {'question': message});
|
42 |
+
}
|
43 |
+
|
44 |
+
// Function to display message
|
45 |
+
function displayMessage(message, isUser, hasHtml) {
|
46 |
+
var $message = $('<div>').addClass('chat-message round');
|
47 |
+
if (hasHtml) {
|
48 |
+
$messageText = $('<p>').html(message);
|
49 |
+
} else {
|
50 |
+
$messageText = $('<p>').html(message.replace(/(https?:\/\/[^\s,]+)/g, '<a href="$1" target="_blank">$1</a>').replace(/(SOURCES:)/, '<br>$1'));
|
51 |
+
}
|
52 |
+
// var $messageText = $('<p>').html(message.replace(/(https?:\/\/[^\s,]+)/g, '<a href="$1">$1</a>'));
|
53 |
+
|
54 |
+
$message.append($messageText);
|
55 |
+
if (isUser) {
|
56 |
+
$message.addClass('user');
|
57 |
+
} else {
|
58 |
+
$message.addClass('bot')
|
59 |
+
}
|
60 |
+
if ($chatBody) {
|
61 |
+
$chatBody.append($message);
|
62 |
+
if ($chatBody[0]) {
|
63 |
+
$chatBody.animate({scrollTop: $chatBody[0].scrollHeight}, 300);
|
64 |
+
}
|
65 |
+
} else {
|
66 |
+
$('.chat-container').append($message);
|
67 |
+
$('.chat-container').animate({scrollTop: 0}, 300);
|
68 |
+
}
|
69 |
+
}
|
70 |
+
|
71 |
+
|
72 |
+
socket.on('response', function(data) {
|
73 |
+
console.log("Received response: " + data.response)
|
74 |
+
var response = data.response;
|
75 |
+
displayMessage(response, false);
|
76 |
+
});
|
77 |
+
|
78 |
+
|
79 |
+
// Send message on submit
|
80 |
+
$submit.click(function(event) {
|
81 |
+
event.preventDefault();
|
82 |
+
var message = $input.val().trim();
|
83 |
+
console.log("Submit clicked: " + message)
|
84 |
+
if (message !== '') {
|
85 |
+
displayMessage(message, true);
|
86 |
+
sendMessage(message);
|
87 |
+
$input.val('');
|
88 |
+
}
|
89 |
+
});
|
90 |
+
|
91 |
+
// Send message on enter key press
|
92 |
+
$input.keydown(function(event) {
|
93 |
+
if (event.keyCode === 13) {
|
94 |
+
event.preventDefault();
|
95 |
+
$submit.click();
|
96 |
+
}
|
97 |
+
});
|
98 |
+
|
99 |
+
// Initial message
|
100 |
+
displayMessage('Learn about <a href="https://makerlab.illinois.edu/" target="_blank">Makerlab</a>', false, true);
|
101 |
+
|
102 |
+
// Function to minimize the widget
|
103 |
+
function minimizeWidget() {
|
104 |
+
$chatContainer.addClass('minimized');
|
105 |
+
$chatHeader.hide();
|
106 |
+
$chatBody.hide()
|
107 |
+
$chatInput.hide();
|
108 |
+
$chatContainer.append('<div class="chat-bot-icon"><i class="fa fa-android"></i></div>');
|
109 |
+
}
|
110 |
+
|
111 |
+
// Function to maximize the widget
|
112 |
+
function maximizeWidget() {
|
113 |
+
$chatContainer.removeClass('minimized');
|
114 |
+
$chatBody.show()
|
115 |
+
$chatHeader.show();
|
116 |
+
$chatInput.show();
|
117 |
+
$('.chat-bot-icon').remove();
|
118 |
+
}
|
119 |
+
|
120 |
+
// Minimize the widget on click of close button
|
121 |
+
$chatHeader.find('.chat-close').click(function() {
|
122 |
+
minimizeWidget();
|
123 |
+
});
|
124 |
+
|
125 |
+
// Maximize the widget on click of chat-bot-icon
|
126 |
+
$chatContainer.on('click', '.chat-bot-icon', function() {
|
127 |
+
maximizeWidget();
|
128 |
+
});
|
129 |
+
|
130 |
+
// Add event listener to each button
|
131 |
+
$('.queries').click(function() {
|
132 |
+
// Set the value of the input field to the text content of the clicked button
|
133 |
+
$('input[type="text"]').val($(this).text());
|
134 |
+
});
|
135 |
+
|
136 |
+
function scrollButtons() {
|
137 |
+
var container = document.getElementById("button-container");
|
138 |
+
var buttons = container.querySelectorAll(".queries");
|
139 |
+
var current = 0;
|
140 |
+
|
141 |
+
var scrollInterval = setInterval(function() {
|
142 |
+
buttons[current].scrollIntoView({ behavior: "smooth", block: "nearest", inline: "center" });
|
143 |
+
current = (current + 1) % buttons.length;
|
144 |
+
}, 1000);
|
145 |
+
|
146 |
+
container.addEventListener("mouseenter", function() {
|
147 |
+
clearInterval(scrollInterval);
|
148 |
+
});
|
149 |
+
|
150 |
+
container.addEventListener("mouseleave", function() {
|
151 |
+
scrollInterval = setInterval(function() {
|
152 |
+
buttons[current].scrollIntoView({ behavior: "smooth", block: "nearest", inline: "center" });
|
153 |
+
current = (current + 1) % buttons.length;
|
154 |
+
}, 1000);
|
155 |
+
});
|
156 |
+
}
|
157 |
+
|
158 |
+
|
159 |
+
});
|
static/style.css
ADDED
@@ -0,0 +1,296 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.chat-container {
|
2 |
+
position: fixed;
|
3 |
+
bottom: 30px;
|
4 |
+
right: 30px;
|
5 |
+
z-index: 999;
|
6 |
+
background-color: #fff;
|
7 |
+
border-radius: 10px;
|
8 |
+
box-shadow: 0px 0px 20px rgba(0, 0, 0, 0.2);
|
9 |
+
max-width: 400px;
|
10 |
+
min-width: 400px;
|
11 |
+
}
|
12 |
+
|
13 |
+
.round {
|
14 |
+
border-radius: 10px;
|
15 |
+
-webkit-border-radius: 10px;
|
16 |
+
-moz-border-radius: 30px;
|
17 |
+
|
18 |
+
}
|
19 |
+
|
20 |
+
.chat-header {
|
21 |
+
display: flex;
|
22 |
+
align-items: center;
|
23 |
+
justify-content: space-between;
|
24 |
+
padding: 10px;
|
25 |
+
background-color: darkblue;
|
26 |
+
color: #fff;
|
27 |
+
border-top-left-radius: 10px;
|
28 |
+
border-top-right-radius: 10px;
|
29 |
+
}
|
30 |
+
|
31 |
+
.chat-header h4 {
|
32 |
+
margin: 0;
|
33 |
+
}
|
34 |
+
|
35 |
+
.chat-close {
|
36 |
+
cursor: pointer;
|
37 |
+
}
|
38 |
+
|
39 |
+
.chat-body {
|
40 |
+
height: 400px;
|
41 |
+
overflow-y: scroll;
|
42 |
+
padding: 10px;
|
43 |
+
word-wrap: break-word;
|
44 |
+
display:flex;
|
45 |
+
flex-direction: column;
|
46 |
+
}
|
47 |
+
|
48 |
+
.chat-message {
|
49 |
+
margin: 10px;
|
50 |
+
}
|
51 |
+
|
52 |
+
.chat-message p {
|
53 |
+
margin: 0;
|
54 |
+
padding: 10px;
|
55 |
+
font-size: 16px;
|
56 |
+
line-height: 1.4;
|
57 |
+
position: relative;
|
58 |
+
word-wrap: break-word;
|
59 |
+
border-radius: 10px;
|
60 |
+
color: #fff;
|
61 |
+
}
|
62 |
+
|
63 |
+
.chat-message.user {
|
64 |
+
display: flex;
|
65 |
+
align-self: flex-end;
|
66 |
+
justify-content: flex-end;
|
67 |
+
text-align: right;
|
68 |
+
align-items: center;
|
69 |
+
background-color: rgba(0, 0, 139, 0.75);
|
70 |
+
border-top-right-radius: 0px;
|
71 |
+
border-bottom-right-radius: 0px;
|
72 |
+
border-bottom-left-radius: 10px;
|
73 |
+
word-wrap: break-word;
|
74 |
+
}
|
75 |
+
|
76 |
+
|
77 |
+
.chat-message.bot {
|
78 |
+
display: flex;
|
79 |
+
align-self: flex-start;
|
80 |
+
justify-content: flex-start;
|
81 |
+
text-align: left;
|
82 |
+
align-items: center;
|
83 |
+
background-color: rgba(0, 0, 139, 0.75);
|
84 |
+
border-top-left-radius: 0px;
|
85 |
+
border-bottom-right-radius: 10px;
|
86 |
+
border-bottom-left-radius: 0px;
|
87 |
+
word-wrap: break-word;
|
88 |
+
}
|
89 |
+
|
90 |
+
.chat-message.bot p {
|
91 |
+
margin: 0;
|
92 |
+
padding: 10px;
|
93 |
+
font-size: 16px;
|
94 |
+
line-height: 1.4;
|
95 |
+
position: relative;
|
96 |
+
word-wrap: break-word;
|
97 |
+
border-radius: 10px;
|
98 |
+
overflow-wrap: anywhere;
|
99 |
+
}
|
100 |
+
|
101 |
+
.chat-message.user:after {
|
102 |
+
content: "";
|
103 |
+
position: relative;
|
104 |
+
top: 0;
|
105 |
+
right: -15px;
|
106 |
+
width: 0;
|
107 |
+
height: 0;
|
108 |
+
border-top: 15px solid transparent;
|
109 |
+
border-bottom: 15px solid transparent;
|
110 |
+
border-left: 16px solid #00008BBF;
|
111 |
+
border-top-right-radius: 10px;
|
112 |
+
}
|
113 |
+
|
114 |
+
.chat-message.bot:before {
|
115 |
+
content: "";
|
116 |
+
position: relative;
|
117 |
+
top: 0;
|
118 |
+
left: -15px;
|
119 |
+
width: 0;
|
120 |
+
height: 0;
|
121 |
+
border-top: 15px solid transparent;
|
122 |
+
border-bottom: 15px solid transparent;
|
123 |
+
border-right: 15px solid #00008BBF;
|
124 |
+
border-top-left-radius: 10px;
|
125 |
+
}
|
126 |
+
|
127 |
+
|
128 |
+
.chat-input {
|
129 |
+
display: flex;
|
130 |
+
margin-top: 10px;
|
131 |
+
}
|
132 |
+
|
133 |
+
.chat-input input {
|
134 |
+
flex-grow: 1;
|
135 |
+
border: none;
|
136 |
+
border-radius: 5px;
|
137 |
+
padding: 8px 10px;
|
138 |
+
font-size: 16px;
|
139 |
+
margin-right: 10px;
|
140 |
+
box-shadow: 0px 0px 5px rgba(0, 0, 0, 0.1);
|
141 |
+
}
|
142 |
+
|
143 |
+
.chat-input button {
|
144 |
+
background-color: #00008BBF;
|
145 |
+
color: #fff;
|
146 |
+
border: none;
|
147 |
+
border-radius: 5px;
|
148 |
+
padding: 8px 10px;
|
149 |
+
font-size: 16px;
|
150 |
+
cursor: pointer;
|
151 |
+
box-shadow: 0px 0px 5px rgba(0, 0, 0, 0.1);
|
152 |
+
}
|
153 |
+
|
154 |
+
/* CSS for chat-container when minimized */
|
155 |
+
.chat-container.minimized {
|
156 |
+
min-width: 70px;
|
157 |
+
height: 70px;
|
158 |
+
border-radius: 50%;
|
159 |
+
position: fixed;
|
160 |
+
bottom: 10px;
|
161 |
+
right: 10px;
|
162 |
+
z-index: 9999;
|
163 |
+
background-color: #fff;
|
164 |
+
box-shadow: 0px 2px 10px rgba(0, 0, 0, 0.3);
|
165 |
+
cursor: pointer;
|
166 |
+
}
|
167 |
+
|
168 |
+
/* CSS for chat-bot-icon */
|
169 |
+
.chat-bot-icon {
|
170 |
+
font-size: 30px;
|
171 |
+
color: #00008BBF;
|
172 |
+
position: absolute;
|
173 |
+
top: 50%;
|
174 |
+
left: 50%;
|
175 |
+
transform: translate(-50%, -50%);
|
176 |
+
}
|
177 |
+
|
178 |
+
/* CSS for chat-header when not minimized */
|
179 |
+
.chat-header {
|
180 |
+
display: flex;
|
181 |
+
justify-content: space-between;
|
182 |
+
align-items: center;
|
183 |
+
background-color: #6c7ae0;
|
184 |
+
color: #fff;
|
185 |
+
padding: 10px;
|
186 |
+
border-top-left-radius: 5px;
|
187 |
+
border-top-right-radius: 5px;
|
188 |
+
}
|
189 |
+
|
190 |
+
/* CSS for chat-container when not minimized */
|
191 |
+
.chat-container:not(.minimized) {
|
192 |
+
border-radius: 5px;
|
193 |
+
position: fixed;
|
194 |
+
bottom: 10px;
|
195 |
+
right: 10px;
|
196 |
+
z-index: 9999;
|
197 |
+
background-color: #fff;
|
198 |
+
box-shadow: 0px 2px 10px rgba(0, 0, 0, 0.3);
|
199 |
+
}
|
200 |
+
|
201 |
+
/* CSS for chat-bot-icon when chat-container is minimized */
|
202 |
+
.chat-container.minimized .chat-bot-icon {
|
203 |
+
display: block;
|
204 |
+
}
|
205 |
+
|
206 |
+
/* CSS for chat-bot-icon when chat-container is not minimized */
|
207 |
+
.chat-container:not(.minimized) .chat-bot-icon {
|
208 |
+
display: none;
|
209 |
+
}
|
210 |
+
|
211 |
+
.queries {
|
212 |
+
|
213 |
+
padding: 8px 12px;
|
214 |
+
font-size: 16px;
|
215 |
+
font-weight: bold;
|
216 |
+
text-align: center;
|
217 |
+
text-decoration: none;
|
218 |
+
border: 0.5px solid #a5a0a0;
|
219 |
+
border-radius: 20px;
|
220 |
+
color: #000;
|
221 |
+
background-color: #343a404a;
|
222 |
+
cursor: pointer;
|
223 |
+
margin: 5px;
|
224 |
+
}
|
225 |
+
|
226 |
+
.queries:hover {
|
227 |
+
background-color: #343a40ad;
|
228 |
+
}
|
229 |
+
|
230 |
+
.queries:active {
|
231 |
+
background-color: #0053a4;
|
232 |
+
}
|
233 |
+
|
234 |
+
#button-container {
|
235 |
+
display: flex;
|
236 |
+
position: relative;
|
237 |
+
left: 2%;
|
238 |
+
top: 40%;
|
239 |
+
flex-direction: column;
|
240 |
+
justify-content: inherit;
|
241 |
+
align-items: center;
|
242 |
+
width: auto;
|
243 |
+
overflow-y: scroll;
|
244 |
+
max-height: 350px;
|
245 |
+
padding-top: 110%;
|
246 |
+
margin-top: 2%;
|
247 |
+
|
248 |
+
}
|
249 |
+
|
250 |
+
#button-container button {
|
251 |
+
margin-bottom: 10px;
|
252 |
+
}
|
253 |
+
|
254 |
+
.query-heading {
|
255 |
+
display: flex;
|
256 |
+
position: relative;
|
257 |
+
width: auto%;
|
258 |
+
background-color: #fff;
|
259 |
+
padding: 10px;
|
260 |
+
z-index: 1;
|
261 |
+
justify-content: inherit;
|
262 |
+
width: 100%;
|
263 |
+
border-bottom: 1px solid #2f4f4f5e;
|
264 |
+
}
|
265 |
+
|
266 |
+
.sample-query {
|
267 |
+
display: flex;
|
268 |
+
position: absolute;
|
269 |
+
left: 30%;
|
270 |
+
top: 10%;
|
271 |
+
flex-direction: column;
|
272 |
+
justify-content: flex-start;
|
273 |
+
align-items: center;
|
274 |
+
width: auto;
|
275 |
+
padding: 10px;
|
276 |
+
border: 1px solid #2f4f4f5e;
|
277 |
+
justify-content: center;
|
278 |
+
border-radius: 10px;
|
279 |
+
max-width: 30%;
|
280 |
+
}
|
281 |
+
|
282 |
+
::-webkit-scrollbar {
|
283 |
+
width: 8px;
|
284 |
+
}
|
285 |
+
|
286 |
+
::-webkit-scrollbar-track {
|
287 |
+
background-color: #f4f4f4;
|
288 |
+
}
|
289 |
+
|
290 |
+
::-webkit-scrollbar-thumb {
|
291 |
+
background-color: #a3bfe9a6;
|
292 |
+
border-radius: 20px;
|
293 |
+
}
|
294 |
+
|
295 |
+
|
296 |
+
|
templates/index.html
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html>
|
3 |
+
<head>
|
4 |
+
<meta charset="utf-8">
|
5 |
+
<title>MakerlabX3DPrinting QA</title>
|
6 |
+
<meta http-equiv="Content-Security-Policy" content="upgrade-insecure-requests">
|
7 |
+
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/css/bootstrap.min.css">
|
8 |
+
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.7.0/css/font-awesome.min.css">
|
9 |
+
<link rel="stylesheet" href="static/style.css">
|
10 |
+
</head>
|
11 |
+
<body>
|
12 |
+
<div class = "sample-query">
|
13 |
+
<div class="query-heading"><h4>Sample Queries</h4></div>
|
14 |
+
<div id="button-container">
|
15 |
+
</div></div>
|
16 |
+
<div class="chat-container">
|
17 |
+
<div class="chat-header">
|
18 |
+
<h4>Makerlab Q&A Bot</h4>
|
19 |
+
<i class="fa fa-close chat-close"></i>
|
20 |
+
</div>
|
21 |
+
<div class="chat-bot-icon">
|
22 |
+
<i class="fa fa-android"></i> <!-- Replace with your bot icon -->
|
23 |
+
</div>
|
24 |
+
<div class="chat-body chat-messages round"></div>
|
25 |
+
<div class="chat-input">
|
26 |
+
<input type="text" placeholder="Type your message">
|
27 |
+
<button class="chat_submit">Send</button>
|
28 |
+
</div>
|
29 |
+
</div>
|
30 |
+
<!--<script src="https://cdnjs.cloudflare.com/ajax/libs/socket.io/4.5.1/socket.io.js" integrity="sha512-sY2t8W1xNQ2yB+1RFXJv+wwhdN7CHX9Z+fhM7JH/3B3q1x7VJBOwKe+zb7VW0EC8XG5M5rjBQd7+47F5fQlhKQ==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>-->
|
31 |
+
<script src="https://cdn.socket.io/4.4.1/socket.io.min.js"></script>
|
32 |
+
<script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
|
33 |
+
<script src="{{ url_for('static', filename='chatbot.js') }}"></script>
|
34 |
+
</body>
|
35 |
+
</html>
|
utils.py
ADDED
@@ -0,0 +1,335 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import pickle
|
3 |
+
import re
|
4 |
+
import time
|
5 |
+
from typing import List, Union
|
6 |
+
from urllib.parse import urlparse, urljoin
|
7 |
+
|
8 |
+
import faiss
|
9 |
+
import requests
|
10 |
+
from PyPDF2 import PdfReader
|
11 |
+
from bs4 import BeautifulSoup
|
12 |
+
from langchain import OpenAI, LLMChain
|
13 |
+
from langchain.agents import ConversationalAgent
|
14 |
+
from langchain.agents import Tool, AgentExecutor, LLMSingleActionAgent, AgentOutputParser
|
15 |
+
from langchain.prompts import BaseChatPromptTemplate
|
16 |
+
from langchain.chains import ConversationalRetrievalChain
|
17 |
+
from langchain.docstore.document import Document
|
18 |
+
from langchain.embeddings import OpenAIEmbeddings
|
19 |
+
from langchain.memory import ConversationBufferWindowMemory
|
20 |
+
from langchain.schema import AgentAction, AgentFinish, HumanMessage
|
21 |
+
from langchain.text_splitter import CharacterTextSplitter
|
22 |
+
from langchain.vectorstores.faiss import FAISS
|
23 |
+
|
24 |
+
book_url = 'https://g.co/kgs/2VFC7u'
|
25 |
+
book_file = "Book.pdf"
|
26 |
+
url = 'https://makerlab.illinois.edu/'
|
27 |
+
|
28 |
+
pickle_file = "open_ai.pkl"
|
29 |
+
index_file = "open_ai.index"
|
30 |
+
|
31 |
+
gpt_3_5 = OpenAI(model_name='gpt-3.5-turbo',temperature=0)
|
32 |
+
|
33 |
+
embeddings = OpenAIEmbeddings()
|
34 |
+
|
35 |
+
chat_history = []
|
36 |
+
|
37 |
+
memory = ConversationBufferWindowMemory(memory_key="chat_history")
|
38 |
+
|
39 |
+
gpt_3_5_index = None
|
40 |
+
|
41 |
+
class CustomOutputParser(AgentOutputParser):
|
42 |
+
|
43 |
+
def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]:
|
44 |
+
# Check if agent replied without using tools
|
45 |
+
if "AI:" in llm_output:
|
46 |
+
return AgentFinish(return_values={"output": llm_output.split("AI:")[-1].strip()},
|
47 |
+
log=llm_output)
|
48 |
+
# Check if agent should finish
|
49 |
+
if "Final Answer:" in llm_output:
|
50 |
+
return AgentFinish(
|
51 |
+
# Return values is generally always a dictionary with a single `output` key
|
52 |
+
# It is not recommended to try anything else at the moment :)
|
53 |
+
return_values={"output": llm_output.split("Final Answer:")[-1].strip()},
|
54 |
+
log=llm_output,
|
55 |
+
)
|
56 |
+
# Parse out the action and action input
|
57 |
+
regex = r"Action: (.*?)[\n]*Action Input:[\s]*(.*)"
|
58 |
+
match = re.search(regex, llm_output, re.DOTALL)
|
59 |
+
if not match:
|
60 |
+
raise ValueError(f"Could not parse LLM output: `{llm_output}`")
|
61 |
+
action = match.group(1).strip()
|
62 |
+
action_input = match.group(2)
|
63 |
+
# Return the action and action input
|
64 |
+
return AgentAction(tool=action, tool_input=action_input.strip(" ").strip('"'), log=llm_output)
|
65 |
+
|
66 |
+
# Set up a prompt template
|
67 |
+
class CustomPromptTemplate(BaseChatPromptTemplate):
|
68 |
+
# The template to use
|
69 |
+
template: str
|
70 |
+
# The list of tools available
|
71 |
+
tools: List[Tool]
|
72 |
+
|
73 |
+
def format_messages(self, **kwargs) -> str:
|
74 |
+
# Get the intermediate steps (AgentAction, Observation tuples)
|
75 |
+
# Format them in a particular way
|
76 |
+
intermediate_steps = kwargs.pop("intermediate_steps")
|
77 |
+
thoughts = ""
|
78 |
+
for action, observation in intermediate_steps:
|
79 |
+
thoughts += action.log
|
80 |
+
thoughts += f"\nObservation: {observation}\nThought: "
|
81 |
+
# Set the agent_scratchpad variable to that value
|
82 |
+
kwargs["agent_scratchpad"] = thoughts
|
83 |
+
# Create a tools variable from the list of tools provided
|
84 |
+
kwargs["tools"] = "\n".join([f"{tool.name}: {tool.description}" for tool in self.tools])
|
85 |
+
# Create a list of tool names for the tools provided
|
86 |
+
kwargs["tool_names"] = ", ".join([tool.name for tool in self.tools])
|
87 |
+
formatted = self.template.format(**kwargs)
|
88 |
+
return [HumanMessage(content=formatted)]
|
89 |
+
|
90 |
+
def get_search_index():
|
91 |
+
global gpt_3_5_index
|
92 |
+
if os.path.isfile(pickle_file) and os.path.isfile(index_file) and os.path.getsize(pickle_file) > 0:
|
93 |
+
# Load index from pickle file
|
94 |
+
with open(pickle_file, "rb") as f:
|
95 |
+
search_index = pickle.load(f)
|
96 |
+
else:
|
97 |
+
search_index = create_index()
|
98 |
+
|
99 |
+
gpt_3_5_index = search_index
|
100 |
+
|
101 |
+
|
102 |
+
def create_index():
|
103 |
+
source_chunks = create_chunk_documents()
|
104 |
+
search_index = search_index_from_docs(source_chunks)
|
105 |
+
faiss.write_index(search_index.index, index_file)
|
106 |
+
# Save index to pickle file
|
107 |
+
with open(pickle_file, "wb") as f:
|
108 |
+
pickle.dump(search_index, f)
|
109 |
+
return search_index
|
110 |
+
|
111 |
+
|
112 |
+
def create_chunk_documents():
|
113 |
+
sources = fetch_data_for_embeddings(url, book_file, book_url)
|
114 |
+
# print("sources" + str(len(sources)))
|
115 |
+
|
116 |
+
splitter = CharacterTextSplitter(separator=" ", chunk_size=800, chunk_overlap=0)
|
117 |
+
|
118 |
+
source_chunks = splitter.split_documents(sources)
|
119 |
+
|
120 |
+
for chunk in source_chunks:
|
121 |
+
print("Size of chunk: " + str(len(chunk.page_content) + len(chunk.metadata)))
|
122 |
+
if chunk.page_content is None or chunk.page_content == '':
|
123 |
+
print("removing chunk: "+ chunk.page_content)
|
124 |
+
source_chunks.remove(chunk)
|
125 |
+
elif len(chunk.page_content) >=1000:
|
126 |
+
print("splitting document")
|
127 |
+
source_chunks.extend(splitter.split_documents([chunk]))
|
128 |
+
# print("Chunks: " + str(len(source_chunks)) + "and type " + str(type(source_chunks)))
|
129 |
+
return source_chunks
|
130 |
+
|
131 |
+
|
132 |
+
def fetch_data_for_embeddings(url, book_file, book_url):
|
133 |
+
sources = get_website_data(url)
|
134 |
+
sources.extend(get_document_data(book_file, book_url))
|
135 |
+
return sources
|
136 |
+
|
137 |
+
def get_website_data(index_url):
|
138 |
+
# Get all page paths from index
|
139 |
+
paths = get_paths(index_url)
|
140 |
+
|
141 |
+
# Filter out invalid links and join them with the base URL
|
142 |
+
links = get_links(index_url, paths)
|
143 |
+
|
144 |
+
return get_content_from_links(links, index_url)
|
145 |
+
|
146 |
+
|
147 |
+
def get_content_from_links(links, index_url):
|
148 |
+
content_list = []
|
149 |
+
for link in set(links):
|
150 |
+
if link.startswith(index_url):
|
151 |
+
page_data = requests.get(link).content
|
152 |
+
soup = BeautifulSoup(page_data, "html.parser")
|
153 |
+
|
154 |
+
# Get page content
|
155 |
+
content = soup.get_text(separator="\n")
|
156 |
+
# print(link)
|
157 |
+
|
158 |
+
# Get page metadata
|
159 |
+
metadata = {"source": link}
|
160 |
+
|
161 |
+
content_list.append(Document(page_content=content, metadata=metadata))
|
162 |
+
time.sleep(1)
|
163 |
+
# print("content list" + str(len(content_list)))
|
164 |
+
return content_list
|
165 |
+
|
166 |
+
|
167 |
+
def get_paths(index_url):
|
168 |
+
index_data = requests.get(index_url).content
|
169 |
+
soup = BeautifulSoup(index_data, "html.parser")
|
170 |
+
paths = set([a.get('href') for a in soup.find_all('a', href=True)])
|
171 |
+
return paths
|
172 |
+
|
173 |
+
|
174 |
+
def get_links(index_url, paths):
|
175 |
+
links = []
|
176 |
+
for path in paths:
|
177 |
+
url = urljoin(index_url, path)
|
178 |
+
parsed_url = urlparse(url)
|
179 |
+
if parsed_url.scheme in ["http", "https"] and "squarespace" not in parsed_url.netloc:
|
180 |
+
links.append(url)
|
181 |
+
return links
|
182 |
+
|
183 |
+
|
184 |
+
def get_document_data(book_file, book_url):
|
185 |
+
document_list = []
|
186 |
+
with open(book_file, 'rb') as f:
|
187 |
+
pdf_reader = PdfReader(f)
|
188 |
+
for i in range(len(pdf_reader.pages)):
|
189 |
+
page_text = pdf_reader.pages[i].extract_text()
|
190 |
+
metadata = {"source": book_url}
|
191 |
+
document_list.append(Document(page_content=page_text, metadata=metadata))
|
192 |
+
|
193 |
+
# print("document list" + str(len(document_list)))
|
194 |
+
return document_list
|
195 |
+
|
196 |
+
def search_index_from_docs(source_chunks):
|
197 |
+
# Create index from chunk documents
|
198 |
+
# print("Size of chunk" + str(len(source_chunks)))
|
199 |
+
search_index = FAISS.from_texts([doc.page_content for doc in source_chunks], embeddings, metadatas=[doc.metadata for doc in source_chunks])
|
200 |
+
return search_index
|
201 |
+
|
202 |
+
|
203 |
+
def get_qa_chain(gpt_3_5_index):
|
204 |
+
global gpt_3_5
|
205 |
+
print("index: " + str(gpt_3_5_index))
|
206 |
+
return ConversationalRetrievalChain.from_llm(gpt_3_5, chain_type="stuff", get_chat_history=get_chat_history,
|
207 |
+
retriever=gpt_3_5_index.as_retriever(), return_source_documents=True, verbose=True)
|
208 |
+
|
209 |
+
def get_chat_history(inputs) -> str:
|
210 |
+
res = []
|
211 |
+
for human, ai in inputs:
|
212 |
+
res.append(f"Human:{human}\nAI:{ai}")
|
213 |
+
return "\n".join(res)
|
214 |
+
|
215 |
+
|
216 |
+
def generate_answer(question) -> str:
|
217 |
+
global chat_history, gpt_3_5_index
|
218 |
+
gpt_3_5_chain = get_qa_chain(gpt_3_5_index)
|
219 |
+
result = gpt_3_5_chain(
|
220 |
+
{"question": question, "chat_history": chat_history,"vectordbkwargs": {"search_distance": 0.8}})
|
221 |
+
print("REsult: " + str(result))
|
222 |
+
chat_history = [(question, result["answer"])]
|
223 |
+
sources = []
|
224 |
+
|
225 |
+
for document in result['source_documents']:
|
226 |
+
source = document.metadata['source']
|
227 |
+
sources.append(source)
|
228 |
+
|
229 |
+
source = ',\n'.join(set(sources))
|
230 |
+
return result['answer'] + '\nSOURCES: ' + source
|
231 |
+
|
232 |
+
|
233 |
+
def get_agent_chain(prompt, tools):
|
234 |
+
global gpt_3_5
|
235 |
+
# output_parser = CustomOutputParser()
|
236 |
+
llm_chain = LLMChain(llm=gpt_3_5, prompt=prompt)
|
237 |
+
agent = ConversationalAgent(llm_chain=llm_chain, tools=tools, verbose=True)
|
238 |
+
agent_chain = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True, memory=memory,
|
239 |
+
intermediate_steps=True)
|
240 |
+
return agent_chain
|
241 |
+
|
242 |
+
|
243 |
+
def get_prompt_and_tools():
|
244 |
+
tools = get_tools()
|
245 |
+
|
246 |
+
prefix = """Have a conversation with a human, answering the following questions as best you can.
|
247 |
+
Always try to use Vectorstore first.
|
248 |
+
Your name is Makerlab Bot because you are a personal assistant of Makerlab. You have access to the following tools:"""
|
249 |
+
suffix = """Begin! If you use any tool, ALWAYS return a "SOURCES" part in your answer"
|
250 |
+
|
251 |
+
{chat_history}
|
252 |
+
Question: {input}
|
253 |
+
{agent_scratchpad}
|
254 |
+
SOURCES:"""
|
255 |
+
prompt = ConversationalAgent.create_prompt(
|
256 |
+
tools,
|
257 |
+
prefix=prefix,
|
258 |
+
suffix=suffix,
|
259 |
+
input_variables=["input", "chat_history", "agent_scratchpad"]
|
260 |
+
)
|
261 |
+
# print("Template: " + prompt.template)
|
262 |
+
return prompt, tools
|
263 |
+
|
264 |
+
|
265 |
+
def get_tools():
|
266 |
+
tools = [
|
267 |
+
Tool(
|
268 |
+
name="Vectorstore",
|
269 |
+
func=generate_answer,
|
270 |
+
description="useful for when you need to answer questions about the Makerlab or 3D Printing.",
|
271 |
+
return_direct=True
|
272 |
+
)]
|
273 |
+
return tools
|
274 |
+
|
275 |
+
def get_custom_agent(prompt, tools):
|
276 |
+
|
277 |
+
llm_chain = LLMChain(llm=gpt_3_5, prompt=prompt)
|
278 |
+
|
279 |
+
output_parser = CustomOutputParser()
|
280 |
+
tool_names = [tool.name for tool in tools]
|
281 |
+
agent = LLMSingleActionAgent(
|
282 |
+
llm_chain=llm_chain,
|
283 |
+
output_parser=output_parser,
|
284 |
+
stop=["\nObservation:"],
|
285 |
+
allowed_tools=tool_names
|
286 |
+
)
|
287 |
+
agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True, memory=memory,
|
288 |
+
intermediate_steps=True)
|
289 |
+
return agent_executor
|
290 |
+
|
291 |
+
def get_prompt_and_tools_for_custom_agent():
|
292 |
+
template = """
|
293 |
+
Have a conversation with a human, answering the following questions as best you can.
|
294 |
+
Always try to use Vectorstore first.
|
295 |
+
Your name is Makerlab Bot because you are a personal assistant of Makerlab. You have access to the following tools:
|
296 |
+
|
297 |
+
{tools}
|
298 |
+
|
299 |
+
To answer for the new input, use the following format:
|
300 |
+
|
301 |
+
New Input: the input question you must answer
|
302 |
+
Thought: Do I need to use a tool? Yes
|
303 |
+
Action: the action to take, should be one of [{tool_names}]
|
304 |
+
Action Input: the input to the action
|
305 |
+
Observation: the result of the action
|
306 |
+
... (this Thought/Action/Action Input/Observation can repeat N times)
|
307 |
+
Thought: I now know the final answer
|
308 |
+
Final Answer: the final answer to the original input question. SOURCES: the sources referred to find the final answer
|
309 |
+
|
310 |
+
|
311 |
+
When you have a response to say to the Human and DO NOT need to use a tool:
|
312 |
+
1. DO NOT return "SOURCES" if you did not use any tool.
|
313 |
+
2. You MUST use this format:
|
314 |
+
```
|
315 |
+
Thought: Do I need to use a tool? No
|
316 |
+
AI: [your response here]
|
317 |
+
```
|
318 |
+
|
319 |
+
Begin! Remember to speak as a personal assistant when giving your final answer.
|
320 |
+
ALWAYS return a "SOURCES" part in your answer, if you used any tool.
|
321 |
+
|
322 |
+
Previous conversation history:
|
323 |
+
{chat_history}
|
324 |
+
New input: {input}
|
325 |
+
{agent_scratchpad}
|
326 |
+
SOURCES:"""
|
327 |
+
tools = get_tools()
|
328 |
+
prompt = CustomPromptTemplate(
|
329 |
+
template=template,
|
330 |
+
tools=tools,
|
331 |
+
# This omits the `agent_scratchpad`, `tools`, and `tool_names` variables because those are generated dynamically
|
332 |
+
# This includes the `intermediate_steps` variable because that is needed
|
333 |
+
input_variables=["input", "intermediate_steps", "chat_history"]
|
334 |
+
)
|
335 |
+
return prompt, tools
|