|
import tensorflow as tf |
|
from tensorflow import keras |
|
import pandas as pd |
|
import numpy as np |
|
print("keep in mind: the ai will periodicaly relearn from your chat and its own data, please dont close the ai's console window while it relearns, as the file could get corrupted, and uterly unusable, forcing you to retrain it from scratch, sincirely: the data-finder team(cross)") |
|
data_location = "data/" |
|
|
|
|
|
def build_model(input_shape): |
|
model = keras.Sequential([ |
|
keras.layers.Dense(64, activation='relu', input_shape=input_shape), |
|
keras.layers.Dense(64, activation='relu'), |
|
keras.layers.Dense(1, activation='sigmoid') |
|
]) |
|
model.compile(optimizer='adam', |
|
loss='binary_crossentropy', |
|
metrics=['accuracy']) |
|
return model |
|
|
|
|
|
def load_data(train_file): |
|
df = pd.read_csv(train_file) |
|
print(df) |
|
X = df['content'].values |
|
y = df['expected_output'].values |
|
return X, y |
|
|
|
|
|
def preprocess_data(X_train, y_train, X_test, y_test, max_features=20000, maxlen=100): |
|
tokenizer = keras.preprocessing.text.Tokenizer(num_words=max_features) |
|
tokenizer.fit_on_texts(list(X_train)) |
|
X_train = tokenizer.texts_to_sequences(X_train) |
|
X_test = tokenizer.texts_to_sequences(X_test) |
|
X_train = keras.preprocessing.sequence.pad_sequences(X_train, maxlen=maxlen) |
|
X_test = keras.preprocessing.sequence.pad_sequences(X_test, maxlen=maxlen) |
|
y_train = np.array(y_train) |
|
y_test = np.array(y_test) |
|
return X_train, y_train, X_test, y_test |
|
|
|
|
|
def train(model, X_train, y_train, X_test, y_test, epochs=10, batch_size=32): |
|
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=epochs, batch_size=batch_size) |
|
|
|
|
|
def generate_response(model, tokenizer, input_text): |
|
input_vector = tokenizer.texts_to_sequences([input_text]) |
|
input_vector = keras.preprocessing.sequence.pad_sequences(input_vector, maxlen=100) |
|
output_vector = model.predict(input_vector) |
|
output_text = "I'm not sure what you're asking. Can you please clarify?" |
|
if output_vector[0][0] >= 0.5: |
|
output_text = "Yes, I am Chihiro Fujisaki." |
|
return output_text |
|
|
|
|
|
def run(): |
|
train_file = f"{data_location}initial_data.csv" |
|
X, y = load_data(train_file) |
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) |
|
X_train, y_train, X_test, y_test = preprocess_data(X_train, y_train, X_test, y_test) |
|
model = build_model(input_shape=(X_train.shape[1],)) |
|
train(model, X_train, y_train, X_test, y_test) |
|
tokenizer = keras.preprocessing.text.Tokenizer(num_words=20000) |
|
tokenizer.fit_on_texts(X) |
|
while True: |
|
input_text = input("You: ") |
|
output_text = generate_response(model, tokenizer, input_text) |
|
print(f"AI: {output_text}") |
|
|
|
if __name__ == '__main__': |
|
run() |
|
|