cross-hedgehog commited on
Commit
f47464c
·
1 Parent(s): 59ad416

Create main.py

Browse files
Files changed (1) hide show
  1. main.py +70 -0
main.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tensorflow as tf
2
+ from tensorflow import keras
3
+ import pandas as pd
4
+ import numpy as np
5
+ print("keep in mind: the ai will periodicaly relearn from your chat and its own data, please dont close the ai's console window while it relearns, as the file could get corrupted, and uterly unusable, forcing you to retrain it from scratch, sincirely: the data-finder team(cross)")
6
+ data_location = "data/"
7
+
8
+ # Define the structure of the neural network
9
+ def build_model(input_shape):
10
+ model = keras.Sequential([
11
+ keras.layers.Dense(64, activation='relu', input_shape=input_shape),
12
+ keras.layers.Dense(64, activation='relu'),
13
+ keras.layers.Dense(1, activation='sigmoid')
14
+ ])
15
+ model.compile(optimizer='adam',
16
+ loss='binary_crossentropy',
17
+ metrics=['accuracy'])
18
+ return model
19
+
20
+ # Load the training data
21
+ def load_data(train_file):
22
+ df = pd.read_csv(train_file)
23
+ print(df)
24
+ X = df['content'].values
25
+ y = df['expected_output'].values
26
+ return X, y
27
+
28
+ # Convert text data to vectors using an embedding layer
29
+ def preprocess_data(X_train, y_train, X_test, y_test, max_features=20000, maxlen=100):
30
+ tokenizer = keras.preprocessing.text.Tokenizer(num_words=max_features)
31
+ tokenizer.fit_on_texts(list(X_train))
32
+ X_train = tokenizer.texts_to_sequences(X_train)
33
+ X_test = tokenizer.texts_to_sequences(X_test)
34
+ X_train = keras.preprocessing.sequence.pad_sequences(X_train, maxlen=maxlen)
35
+ X_test = keras.preprocessing.sequence.pad_sequences(X_test, maxlen=maxlen)
36
+ y_train = np.array(y_train)
37
+ y_test = np.array(y_test)
38
+ return X_train, y_train, X_test, y_test
39
+
40
+ # Train the neural network on the training data
41
+ def train(model, X_train, y_train, X_test, y_test, epochs=10, batch_size=32):
42
+ model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=epochs, batch_size=batch_size)
43
+
44
+ # Generate a response to user input using the trained model
45
+ def generate_response(model, tokenizer, input_text):
46
+ input_vector = tokenizer.texts_to_sequences([input_text])
47
+ input_vector = keras.preprocessing.sequence.pad_sequences(input_vector, maxlen=100)
48
+ output_vector = model.predict(input_vector)
49
+ output_text = "I'm not sure what you're asking. Can you please clarify?"
50
+ if output_vector[0][0] >= 0.5:
51
+ output_text = "Yes, I am Chihiro Fujisaki."
52
+ return output_text
53
+
54
+ # Main function to run the AI
55
+ def run():
56
+ train_file = f"{data_location}initial_data.csv"
57
+ X, y = load_data(train_file)
58
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
59
+ X_train, y_train, X_test, y_test = preprocess_data(X_train, y_train, X_test, y_test)
60
+ model = build_model(input_shape=(X_train.shape[1],))
61
+ train(model, X_train, y_train, X_test, y_test)
62
+ tokenizer = keras.preprocessing.text.Tokenizer(num_words=20000)
63
+ tokenizer.fit_on_texts(X)
64
+ while True:
65
+ input_text = input("You: ")
66
+ output_text = generate_response(model, tokenizer, input_text)
67
+ print(f"AI: {output_text}")
68
+
69
+ if __name__ == '__main__':
70
+ run()