yusiqo commited on
Commit
842d7ef
·
verified ·
1 Parent(s): 09938da

Rename chatbot.py to app

Browse files
Files changed (1) hide show
  1. chatbot.py → app +142 -141
chatbot.py → app RENAMED
@@ -1,141 +1,142 @@
1
- import argparse
2
- import tensorflow as tf
3
- import model
4
- from dataset import get_dataset, preprocess_sentence
5
-
6
-
7
- def inference(hparams, chatbot, tokenizer, sentence):
8
- sentence = preprocess_sentence(sentence)
9
-
10
- sentence = tf.expand_dims(
11
- hparams.start_token + tokenizer.encode(sentence) + hparams.end_token, axis=0
12
- )
13
-
14
- output = tf.expand_dims(hparams.start_token, 0)
15
-
16
- for _ in range(hparams.max_length):
17
- predictions = chatbot(inputs=[sentence, output], training=False)
18
-
19
- predictions = predictions[:, -1:, :]
20
- predicted_id = tf.cast(tf.argmax(predictions, axis=-1), tf.int32)
21
-
22
- if tf.equal(predicted_id, hparams.end_token[0]):
23
- break
24
-
25
- output = tf.concat([output, predicted_id], axis=-1)
26
-
27
- return tf.squeeze(output, axis=0)
28
-
29
-
30
- def predict(hparams, chatbot, tokenizer, sentence):
31
- prediction = inference(hparams, chatbot, tokenizer, sentence)
32
- predicted_sentence = tokenizer.decode(
33
- [i for i in prediction if i < tokenizer.vocab_size]
34
- )
35
- return predicted_sentence
36
-
37
- def read_file(file_path):
38
- with open(file_path, 'r', encoding='utf-8') as file:
39
- lines = file.readlines()
40
- return lines
41
-
42
- def append_to_file(file_path, line):
43
- with open(file_path, 'a', encoding='utf-8') as file:
44
- file.write(f"{line}\n")
45
-
46
- def get_last_ids(lines_file, conversations_file):
47
- lines = read_file(lines_file)
48
- conversations = read_file(conversations_file)
49
-
50
- last_line = lines[-1]
51
- last_conversation = conversations[-1]
52
-
53
- last_line_id = int(last_line.split(" +++$+++ ")[0][1:])
54
- last_user_id = int(last_conversation.split(" +++$+++ ")[1][1:])
55
- last_movie_id = int(last_conversation.split(" +++$+++ ")[2][1:])
56
-
57
- return last_line_id, last_user_id, last_movie_id
58
-
59
- def update_data_files(user_input, bot_response, lines_file='data/lines.txt', conversations_file='data/conversations.txt'):
60
- last_line_id, last_user_id, last_movie_id = get_last_ids(lines_file, conversations_file)
61
-
62
- new_line_id = f"L{last_line_id + 1}"
63
- new_bot_line_id = f"L{last_line_id + 2}"
64
- new_user_id = f"u{last_user_id + 1}"
65
- new_bot_user_id = f"u{last_user_id + 2}"
66
- new_movie_id = f"m{last_movie_id + 1}"
67
-
68
- append_to_file(lines_file, f"{new_line_id} +++$+++ {new_user_id} +++$+++ {new_movie_id} +++$+++ Ben +++$+++ {user_input}")
69
- append_to_file(lines_file, f"{new_bot_line_id} +++$+++ {new_bot_user_id} +++$+++ {new_movie_id} +++$+++ Bot +++$+++ {bot_response}")
70
-
71
- new_conversation = f"{new_user_id} +++$+++ {new_bot_user_id} +++$+++ {new_movie_id} +++$+++ ['{new_line_id}', '{new_bot_line_id}']"
72
- append_to_file(conversations_file, new_conversation)
73
-
74
- def get_feedback():
75
- feedback = input("Bu cevap yardımcı oldu mu? (Evet/Hayır): ").lower()
76
- return feedback == "Evet"
77
-
78
- def chat(hparams, chatbot, tokenizer):
79
- print("\nCHATBOT")
80
-
81
- for _ in range(5):
82
- sentence = input("Sen: ")
83
- output = predict(hparams, chatbot, tokenizer, sentence)
84
- print(f"\nBOT: {output}")
85
-
86
-
87
- user_input = sentence
88
- bot_response = output
89
-
90
- feedback = get_feedback()
91
-
92
- if feedback:
93
- update_data_files(user_input, bot_response)
94
- else:
95
- pass
96
-
97
-
98
- def main(hparams):
99
-
100
- _, token = get_dataset(hparams)
101
-
102
- tf.keras.backend.clear_session()
103
- chatbot = tf.keras.models.load_model(
104
- hparams.save_model,
105
- custom_objects={
106
- "PositionalEncoding": model.PositionalEncoding,
107
- "MultiHeadAttention": model.MultiHeadAttention,
108
- },
109
- compile=False,
110
- )
111
-
112
-
113
- chat(hparams, chatbot, token)
114
-
115
-
116
- if __name__ == "__main__":
117
-
118
- parser = argparse.ArgumentParser()
119
- parser.add_argument(
120
- "--save_model", default="model.h5", type=str, help="path save the model"
121
- )
122
- parser.add_argument(
123
- "--max_samples",
124
- default=25000,
125
- type=int,
126
- help="maximum number of conversation pairs to use",
127
- )
128
- parser.add_argument(
129
- "--max_length", default=40, type=int, help="maximum sentence length"
130
- )
131
- parser.add_argument("--batch_size", default=64, type=int)
132
- parser.add_argument("--num_layers", default=2, type=int)
133
- parser.add_argument("--num_units", default=512, type=int)
134
- parser.add_argument("--d_model", default=256, type=int)
135
- parser.add_argument("--num_heads", default=8, type=int)
136
- parser.add_argument("--dropout", default=0.1, type=float)
137
- parser.add_argument("--activation", default="relu", type=str)
138
- parser.add_argument("--epochs", default=80, type=int)
139
-
140
- main(parser.parse_args())
141
-
 
 
1
+ import argparse
2
+ import streamlit as st
3
+ import tensorflow as tf
4
+ import model
5
+ from dataset import get_dataset, preprocess_sentence
6
+
7
+
8
+ def inference(hparams, chatbot, tokenizer, sentence):
9
+ sentence = preprocess_sentence(sentence)
10
+
11
+ sentence = tf.expand_dims(
12
+ hparams.start_token + tokenizer.encode(sentence) + hparams.end_token, axis=0
13
+ )
14
+
15
+ output = tf.expand_dims(hparams.start_token, 0)
16
+
17
+ for _ in range(hparams.max_length):
18
+ predictions = chatbot(inputs=[sentence, output], training=False)
19
+
20
+ predictions = predictions[:, -1:, :]
21
+ predicted_id = tf.cast(tf.argmax(predictions, axis=-1), tf.int32)
22
+
23
+ if tf.equal(predicted_id, hparams.end_token[0]):
24
+ break
25
+
26
+ output = tf.concat([output, predicted_id], axis=-1)
27
+
28
+ return tf.squeeze(output, axis=0)
29
+
30
+
31
+ def predict(hparams, chatbot, tokenizer, sentence):
32
+ prediction = inference(hparams, chatbot, tokenizer, sentence)
33
+ predicted_sentence = tokenizer.decode(
34
+ [i for i in prediction if i < tokenizer.vocab_size]
35
+ )
36
+ return predicted_sentence
37
+
38
+ def read_file(file_path):
39
+ with open(file_path, 'r', encoding='utf-8') as file:
40
+ lines = file.readlines()
41
+ return lines
42
+
43
+ def append_to_file(file_path, line):
44
+ with open(file_path, 'a', encoding='utf-8') as file:
45
+ file.write(f"{line}\n")
46
+
47
+ def get_last_ids(lines_file, conversations_file):
48
+ lines = read_file(lines_file)
49
+ conversations = read_file(conversations_file)
50
+
51
+ last_line = lines[-1]
52
+ last_conversation = conversations[-1]
53
+
54
+ last_line_id = int(last_line.split(" +++$+++ ")[0][1:])
55
+ last_user_id = int(last_conversation.split(" +++$+++ ")[1][1:])
56
+ last_movie_id = int(last_conversation.split(" +++$+++ ")[2][1:])
57
+
58
+ return last_line_id, last_user_id, last_movie_id
59
+
60
+ def update_data_files(user_input, bot_response, lines_file='data/lines.txt', conversations_file='data/conversations.txt'):
61
+ last_line_id, last_user_id, last_movie_id = get_last_ids(lines_file, conversations_file)
62
+
63
+ new_line_id = f"L{last_line_id + 1}"
64
+ new_bot_line_id = f"L{last_line_id + 2}"
65
+ new_user_id = f"u{last_user_id + 1}"
66
+ new_bot_user_id = f"u{last_user_id + 2}"
67
+ new_movie_id = f"m{last_movie_id + 1}"
68
+
69
+ append_to_file(lines_file, f"{new_line_id} +++$+++ {new_user_id} +++$+++ {new_movie_id} +++$+++ Ben +++$+++ {user_input}")
70
+ append_to_file(lines_file, f"{new_bot_line_id} +++$+++ {new_bot_user_id} +++$+++ {new_movie_id} +++$+++ Bot +++$+++ {bot_response}")
71
+
72
+ new_conversation = f"{new_user_id} +++$+++ {new_bot_user_id} +++$+++ {new_movie_id} +++$+++ ['{new_line_id}', '{new_bot_line_id}']"
73
+ append_to_file(conversations_file, new_conversation)
74
+
75
+ def get_feedback():
76
+ feedback = input("Bu cevap yardımcı oldu mu? (Evet/Hayır): ").lower()
77
+ return feedback == "Evet"
78
+
79
+ def chat(hparams, chatbot, tokenizer):
80
+ print("\nCHATBOT")
81
+
82
+ for _ in range(5):
83
+ sentence = st.text_area("Sen: ")
84
+ output = predict(hparams, chatbot, tokenizer, sentence)
85
+ st.json(output)
86
+
87
+
88
+ user_input = sentence
89
+ bot_response = output
90
+
91
+ feedback = get_feedback()
92
+
93
+ if feedback:
94
+ update_data_files(user_input, bot_response)
95
+ else:
96
+ pass
97
+
98
+
99
+ def main(hparams):
100
+
101
+ _, token = get_dataset(hparams)
102
+
103
+ tf.keras.backend.clear_session()
104
+ chatbot = tf.keras.models.load_model(
105
+ hparams.save_model,
106
+ custom_objects={
107
+ "PositionalEncoding": model.PositionalEncoding,
108
+ "MultiHeadAttention": model.MultiHeadAttention,
109
+ },
110
+ compile=False,
111
+ )
112
+
113
+
114
+ chat(hparams, chatbot, token)
115
+
116
+
117
+ if __name__ == "__main__":
118
+
119
+ parser = argparse.ArgumentParser()
120
+ parser.add_argument(
121
+ "--save_model", default="model.h5", type=str, help="path save the model"
122
+ )
123
+ parser.add_argument(
124
+ "--max_samples",
125
+ default=25000,
126
+ type=int,
127
+ help="maximum number of conversation pairs to use",
128
+ )
129
+ parser.add_argument(
130
+ "--max_length", default=40, type=int, help="maximum sentence length"
131
+ )
132
+ parser.add_argument("--batch_size", default=64, type=int)
133
+ parser.add_argument("--num_layers", default=2, type=int)
134
+ parser.add_argument("--num_units", default=512, type=int)
135
+ parser.add_argument("--d_model", default=256, type=int)
136
+ parser.add_argument("--num_heads", default=8, type=int)
137
+ parser.add_argument("--dropout", default=0.1, type=float)
138
+ parser.add_argument("--activation", default="relu", type=str)
139
+ parser.add_argument("--epochs", default=80, type=int)
140
+
141
+ main(parser.parse_args())
142
+