razerblade072611
/

EleutherAI2

Text Generation

Transformers

PyTorch

JAX

Rust

gpt_neo

Model card Files Files and versions Community

razerblade072611 commited on Jun 2, 2023

Commit

82f5e81

1 Parent(s): 6c8a6af

Update README.md

Browse files

Files changed (1) hide show

README.md +194 -227

README.md CHANGED Viewed

@@ -1,81 +1,58 @@
-import atexit
-import json
-import os
-import sys
 import nltk
-import numpy as np
 import pyttsx3
 import spacy
-import spacy as nlp
 import speech_recognition as sr
 import torch
-import transformers
-from nltk.corpus import stopwords
-from nltk.stem import WordNetLemmatizer
-from sklearn.feature_extraction.text import TfidfVectorizer
-from transformers import AutoTokenizer, GPTNeoForCausalLM, pipeline, AutoModelForCausalLM
-from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
-print("transformers version:", transformers.__version__)
-print("spacy version:", spacy.__version__)
-print("nltk version:", nltk.__version__)
-from transformers import AutoTokenizer
-vocab_file = r"C:/Users/withe/PycharmProjects/no hope2/EleutherAI2/vocab.txt"
-tokenizer = AutoTokenizer.from_pretrained("razerblade072611/EleutherAI2", vocab_file=vocab_file)
-merges_file = r"C:/Users/withe/PycharmProjects/no hope2/EleutherAI2/merges.txt"
-tokenizer = AutoTokenizer.from_pretrained("razerblade072611/EleutherAI2", merges_file=merges_file)
 # Download necessary NLTK resources
 nltk.download('punkt')
-nltk.download('stopwords')
 nltk.download('wordnet')
-nltk.download('omw-1.4')
-# Load the spaCy model
-nlp = spacy.load('en_core_web_sm')
-# Define a text input
-text = "Example text to process"
-# Process the text using the nlp object
-doc = nlp(text)
-# Extract named entities from the processed text
-named_entities = []
-for entity in doc.ents:
-    if entity.label_ in ['PERSON', 'ORG', 'GPE']:
-        named_entities.append(entity.text)
-# Print the extracted named entities
-print(named_entities)
-# Load the API key from the environment file
-dotenv_path = './API_KEY.env'
-(dotenv_path)
 # Check if GPU is available and set the device accordingly
 device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
 # Initialize the speech engine
 speech_engine = pyttsx3.init()
 # Get the list of available voices
-voices = speech_engine.getProperty('voices')
-for voice in voices:
-    print(voice.id, voice.name)
-# Set the desired voice
-voice_id = "HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\Tokens\TTS_MS_EN-GB_HAZEL_11.0 Microsoft Hazel Desktop - English (Great Britain)"
-speech_engine.setProperty('voice', voice_id)
 voices = speech_engine.getProperty('voices')
 for voice in voices:
     print(voice.id, voice.name)
@@ -96,28 +73,45 @@ if voice_id:
 else:
     print("Desired voice not found.")
 class CommonModule:
-    def __init__(self, model, name, param1, param2):
         # Initialize the instance variables using the provided arguments
-        self.model = model
         self.name = name
         self.param1 = param1
         self.param2 = param2
-        self.tokenizer = AutoTokenizer.from_pretrained("razerblade072611/EleutherAI2")  # Load the tokenizer
         self.tokenizer.add_special_tokens({'pad_token': '[PAD]'})
-        self.gpt3_model = AutoModelForCausalLM.from_pretrained("razerblade072611/EleutherAI2")
         self.gpt3_model.to(device)  # Move model to the device (GPU or CPU)
-        self.memory_module = MemoryModule()
         self.sentiment_module = SentimentAnalysisModule()
         self.speech_engine = speech_engine  # Assign the initialized speech engine
-        self.max_sequence_length = 10  # Decrease the value for faster response
         self.num_beams = 4  # Reduce the value for faster response
         self.no_repeat_ngram_size = 2
         self.temperature = 0.3
         self.response_cache = {}  # Cache for storing frequently occurring responses
     def reset_conversation(self):
         self.memory_module.reset_memory()
@@ -128,30 +122,41 @@ class CommonModule:
                 return self.response_cache.get(entity)
         return None
-    def generate_gpt3_response(self, input_text, conversation_history, temperature=0.3):
-        prompt = '\n'.join(conversation_history) + '\n' + input_text + '\n'
-        generator = pipeline('text-generation', model="razerblade072611/EleutherAI2")
-        output = generator(
-            prompt,
-            do_sample=True,
-            min_length=10,
-            max_length=50,
-            num_return_sequences=2,
-            temperature=0.3
-        )
-        if output:
-            generated_response = output[0]['generated_text'].strip()
-            return generated_response
-        return ""
     def process_input(self, input_text, conversation_history):
         named_entities = list(self.memory_module.get_named_entities())
         for entity in named_entities:
             if entity in input_text:
-                response = "Nice to meet you again, {}!".format(entity)
                 self.memory_module.add_to_memory(response)
                 return response
@@ -159,198 +164,160 @@ class CommonModule:
         if '?' in input_text:
             return "You're making me angry, you wouldn't like me when I'm angry."
-        # Check if the input contains a keyword for memory search
-        if 'search' in input_text.lower():
-            keyword = input_text.lower().split('search ')[-1]
-            matches = self.memory_module.search_memory(keyword)
-            if matches:
-                return "I found some related information in the memory:\n" + '\n'.join(matches)
             else:
-                return "Sorry, I couldn't find any relevant information in the memory."
-        # Retrieve the cached response
-        response = self.retrieve_cached_response(input_text)
-        if response is None:
-            response = self.generate_gpt3_response(input_text, conversation_history)
-            self.cache_response(input_text, response)
-        named_entities = self.memory_module.get_named_entities()
-        if named_entities and any(entity in input_text for entity in named_entities):
-            response = "Nice to meet you, {}! I'm still {}".format(named_entities[0], self.name)
-            self.memory_module.add_to_memory(response)
-            return response
         self.memory_module.add_to_memory(response)
         return response
-    def cache_response(self, input_text, response):
-        self.response_cache[input_text] = response
-    def speak(self, text, conversation_history=None):
-        if conversation_history is None:
-            conversation_history = []
-        conversation_history.append(text)
-        full_text = "\n".join(conversation_history)
-        print(text)
-        self.speech_engine.say(text)
-        self.speech_engine.runAndWait()
-    def listen(self):
-        recognizer = sr.Recognizer()
-        with sr.Microphone() as source:
-            print("Listening...")
-            audio = recognizer.listen(source)
-        try:
-            user_input = recognizer.recognize_google(audio)
-            print("You said:", user_input)
-            return user_input
-        except sr.UnknownValueError:
-            print("Sorry, I could not understand your speech.")
-        except sr.RequestError as e:
-            print("Sorry, an error occurred while processing your request. Please try again.")
-        return ""
-    def converse(self):
-        self.reset_conversation()
-        self.speak("Hey, what's up bro? I'm {}".format(self.name))
-        conversation_history = []
-        while True:
-            user_input = self.listen()
-            if user_input:
-                response = self.process_input(user_input, conversation_history)
-                self.speak(response, conversation_history)
-                # Check if the user input contains a named entity (name)
-                named_entities = self.memory_module.get_named_entities()
-                if named_entities and any(entity in user_input for entity in named_entities):
-                    self.speak("Nice to meet you, {}! I'm still {}".format(named_entities[0], self.name),
-                               conversation_history)
-                conversation_history.append(user_input)
-            # Check if the conversation is over (you can define your own condition here)
-            if user_input == "bye":
-                self.save_memory('C:\\Users\\withe\PycharmProjects\\no hope\\Chat_Bot_Main\\save_memory.json')
-                break
-    def save_memory(self, file_path):
-        data = {
-            'memory': self.memory_module.memory,
-            'named_entities': list(self.memory_module.named_entities)  # Convert set to list
-        }
-        with open(file_path, 'w') as file:
-            json.dump(data, file)
-    def load_memory_data(self, memory_data):
-        self.memory_module.memory = memory_data['memory']
-        self.memory_module.named_entities = set(memory_data['named_entities'])
 class MemoryModule:
-    def __init__(self):
         self.memory = []
-        self.vectorizer = TfidfVectorizer(stop_words=stopwords.words('english'))
-        self.lemmatizer = WordNetLemmatizer()
-        self.tokenizer = nltk.tokenize.word_tokenize
-        self.named_entities = set()  # Set to store named entities like names
-    def get_named_entities(self):
-        return self.named_entities
-    def preprocess_text(self, text):
-        tokens = self.tokenizer(text.lower())
-        tokens = [self.lemmatizer.lemmatize(token) for token in tokens if token.isalnum()]
-        preprocessed_text = ' '.join(tokens)
-        return preprocessed_text
-    def add_to_memory(self, text):
-        preprocessed_text = self.preprocess_text(text)
-        self.memory.append(preprocessed_text)
-        # Update named entities if any
-        named_entity = self.extract_named_entity(text)
-        if named_entity:
-            self.named_entities.add(named_entity)
-    def extract_named_entity(self, text):
-        doc = nlp(text)
-        for entity in doc.ents:
-            if entity.label_ in ['PERSON', 'ORG', 'GPE']:
-                return entity.text
-        return None
-    def search_memory(self, keyword):
-        preprocessed_keyword = self.preprocess_text(keyword)
-        vectorized_memory = self.vectorizer.transform(self.memory)
-        vectorized_keyword = self.vectorizer.transform([preprocessed_keyword])
-        similarity_scores = np.dot(vectorized_memory, vectorized_keyword.T).toarray().flatten()
-        sorted_indices = np.argsort(similarity_scores)[::-1]
-        matches = [self.memory[i] for i in sorted_indices if similarity_scores[i] > 0.5]
-        return matches
     def reset_memory(self):
         self.memory = []
-        self.named_entities = set()
-class SentimentAnalysisModule:
-    def __init__(self):
-        self.analyzer = SentimentIntensityAnalyzer()
-    def analyze_sentiment(self, text):
-        sentiment_scores = self.analyzer.polarity_scores(text)
-        return sentiment_scores
-    def get_sentiment_label(self, sentiment_scores):
-        compound_score = sentiment_scores['compound']
-        if compound_score >= 0.05:
-            return 'positive'
-        elif compound_score <= -0.05:
-            return 'negative'
-        else:
-            return 'neutral'
-# Define an exit handler function
-def exit_handler(common_module):
-    memory_data = {
-        'memory': common_module.memory_module.memory,
-        'named_entities': list(common_module.memory_module.named_entities)
-    }
-    common_module.save_memory('C:\\Users\\withe\\PycharmProjects\\no hope2\\Chat_Bot1\\save_memory.json')
-    print("Memory data saved successfully.")
-    return memory_data
-# Define a method to check if the load_memory.json file exists
-def check_memory_file(file_path):
-    return os.path.isfile(file_path)
-# Modify the main section of the code to load memory data if the file exists
-if __name__ == "__main__":
-    model = '"EleutherAI2"'
-    name = "Chat bot1"
-    param1 = 'value1'
-    param2 = 'value2'
-    common_module = CommonModule(model, name, param1, param2)
-    memory_file_path = 'C:\\Users\\withe\\PycharmProjects\\no hope2\\Chat_Bot1\\load_memory1.json'
-    if check_memory_file(memory_file_path):
-        with open(memory_file_path, 'r') as file:
-            memory_data = json.load(file)
-        common_module.load_memory_data(memory_data)
-    # Register the exit handler
-    atexit.register(exit_handler, common_module)
-    common_module.converse()
-    common_module.save_memory(memory_file_path)

+MAIN_SCRIPT_MODULE
+(common_module)
+import atexit
 import nltk
 import pyttsx3
 import spacy
 import speech_recognition as sr
 import torch
+from transformers import GPTNeoForCausalLM, AutoTokenizer
+from nltk.sentiment import SentimentIntensityAnalyzer
+import os
+import json
+from memory_module import MemoryModule
+from sentiment_module import SentimentAnalysisModule
+# Get the current directory
+current_directory = os.getcwd()
+# Get a list of files and directories in the current directory
+file_list = os.listdir(current_directory)
+# Print the list
+for file_name in file_list:
+    print(file_name)
+sia = SentimentIntensityAnalyzer()
+sentence = "This is a positive sentence."
+sentiment = sia.polarity_scores(sentence)
+# Access sentiment scores
+compound_score = sentiment['compound']
+positive_score = sentiment['pos']
+negative_score = sentiment['neg']
+model_directory = "EleutherAI/gpt-neo-125m"
 # Download necessary NLTK resources
 nltk.download('punkt')
 nltk.download('wordnet')
+nltk.download('stopwords')
 # Check if GPU is available and set the device accordingly
 device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
+if torch.cuda.is_available():
+    current_device = torch.cuda.current_device()
+    print(f"Using GPU: {torch.cuda.get_device_name(current_device)}")
+else:
+    print("No GPU available, using CPU.")
 # Initialize the speech engine
 speech_engine = pyttsx3.init()
 # Get the list of available voices
 voices = speech_engine.getProperty('voices')
 for voice in voices:
     print(voice.id, voice.name)
 else:
     print("Desired voice not found.")
+# Load the spaCy English model
+nlp = spacy.load('en_core_web_sm')
+# Update the CommonModule instantiation
+load_memory_file = "load_memory.json"
+save_memory_file = "save_memory.json"
 class CommonModule:
+    def __init__(self, model, name, param1, param2, load_memory_file, save_memory_file):
         # Initialize the instance variables using the provided arguments
+        self.memory = []  # Initialize memory as a list
         self.name = name
         self.param1 = param1
         self.param2 = param2
+        self.model = GPTNeoForCausalLM.from_pretrained(model_directory)
+        self.tokenizer = AutoTokenizer.from_pretrained(model_directory)
         self.tokenizer.add_special_tokens({'pad_token': '[PAD]'})
+        self.gpt3_model = GPTNeoForCausalLM.from_pretrained(model_directory)
         self.gpt3_model.to(device)  # Move model to the device (GPU or CPU)
+        self.load_memory_file = "C:\\Users\\withe\\PycharmProjects\\no hope2\\Chat_Bot4\\load_memory.json"
+        self.save_memory_file = "C:\\Users\\withe\\PycharmProjects\\no hope2\\Chat_Bot4\\save_memory.json"
+        self.memory_module = MemoryModule(self.load_memory_file, self.save_memory_file)
         self.sentiment_module = SentimentAnalysisModule()
         self.speech_engine = speech_engine  # Assign the initialized speech engine
+        self.max_sequence_length = 200  # Decrease the value for faster response
         self.num_beams = 4  # Reduce the value for faster response
         self.no_repeat_ngram_size = 2
         self.temperature = 0.3
         self.response_cache = {}  # Cache for storing frequently occurring responses
+        # Initialize speech recognition
+        self.recognizer = sr.Recognizer()
     def reset_conversation(self):
         self.memory_module.reset_memory()
                 return self.response_cache.get(entity)
         return None
+    def generate_gpt2_response(self, input_text, conversation_history):
+        # Prepare the conversation history for GPT-2 input format
+        if len(conversation_history) == 0:
+            gpt2_input = "USER: " + input_text + "\n"
+        else:
+            gpt2_input = "USER: " + conversation_history[-1] + "\n"  # Append the user's query
+            gpt2_input += "BOT: " + conversation_history[-2] + "\n"  # Append the bot's previous response
+            # Append the rest of the conversation history in reverse order
+            for i in range(len(conversation_history) - 3, -1, -2):
+                gpt2_input += "USER: " + conversation_history[i] + "\n"
+                gpt2_input += "BOT: " + conversation_history[i - 1] + "\n"
+            # Append the current user input to the conversation history
+            gpt2_input += "USER: " + input_text + "\n"
+        # Tokenize the input text
+        input_ids = self.tokenizer.encode(gpt2_input, return_tensors='pt')
+        # Generate response using the GPT-2 model
+        with torch.no_grad():
+            output = self.model.generate(input_ids, max_length=100, num_return_sequences=1)
+        # Decode the generated response
+        generated_text = self.tokenizer.decode(output[:, input_ids.shape[-1]:][0], skip_special_tokens=True)
+        # Process the GPT-2 response
+        response = generated_text.strip().split("\n")[-1]  # Extract the last line (bot's response)
+        return response
     def process_input(self, input_text, conversation_history):
         named_entities = list(self.memory_module.get_named_entities())
         for entity in named_entities:
             if entity in input_text:
+                response = self.generate_gpt2_response(input_text, conversation_history)
                 self.memory_module.add_to_memory(response)
                 return response
         if '?' in input_text:
             return "You're making me angry, you wouldn't like me when I'm angry."
+        # Check if the input is a greeting
+        greetings = ['hello', 'hi', 'hey', 'hola']
+        for greeting in greetings:
+            if greeting in input_text.lower():
+                return "Hello! How can I assist you today?"
+        # Check if the input is a statement about the model
+        if self.name.lower() in input_text.lower():
+            return "Yes, I am {}. How can I assist you today?".format(self.name)
+        # Check if the input is a statement about the creator
+        if 'creator' in input_text.lower():
+            return "I was created by {}.".format(self.param1)
+        # Check if the input is a sentiment analysis request
+        if 'sentiment' in input_text.lower():
+            sentiment = self.sentiment_module.analyze_sentiment(input_text)
+            if sentiment == 'positive':
+                return "The sentiment of the text is positive."
+            elif sentiment == 'negative':
+                return "The sentiment of the text is negative."
             else:
+                return "The sentiment of the text is neutral."
+        # Retrieve a cached response if available
+        cached_response = self.retrieve_cached_response(input_text)
+        if cached_response:
+            return cached_response
+        # Generate a response using GPT-2
+        response = self.generate_gpt2_response(input_text, conversation_history)
+        # Update the conversation history and cache the response
+        conversation_history.append(input_text)
+        conversation_history.append(response)
+        self.response_cache[input_text] = response
+        # Update memory with the generated response
         self.memory_module.add_to_memory(response)
         return response
+common_module = CommonModule(model_directory, "Chatbot", "John Doe", "Jane Smith", load_memory_file, save_memory_file)
+def text_to_speech(text):
+    common_module.speech_engine.say(text)
+    common_module.speech_engine.runAndWait()
+def exit_handler():
+    common_module.reset_conversation()
+atexit.register(exit_handler)
+recognizer = sr.Recognizer()
+while True:
+    with sr.Microphone() as source:
+        print("Listening...")
+        audio = recognizer.listen(source)
+    try:
+        user_input = recognizer.recognize_google(audio)
+        print("User:", user_input)
+    except sr.UnknownValueError:
+        print("Sorry, I could not understand your speech.")
+        continue
+    except sr.RequestError:
+        print("Sorry, the speech recognition service is currently unavailable.")
+        continue
+    response = common_module.process_input(user_input, [])
+    print("Bot:", response)
+    text_to_speech(response)
+    MEMORY_MODULE
+import json
+import spacy
+# Load the spaCy English model
+nlp = spacy.load('en_core_web_sm')
 class MemoryModule:
+    def __init__(self, load_file, save_file):
         self.memory = []
+        self.load_file = load_file
+        self.save_file = save_file
+        self.load_memory()
+    def add_to_memory(self, statement):
+        self.memory.append(statement)
+        self.save_memory()
     def reset_memory(self):
         self.memory = []
+        self.save_memory()
+    def save_memory(self):
+        with open(self.save_file, 'w') as file:
+            json.dump(self.memory, file)
+    def load_memory(self):
+        try:
+            with open(self.load_file, 'r') as file:
+                loaded_memory = json.load(file)
+                if isinstance(loaded_memory, list):
+                    self.memory = loaded_memory
+                else:
+                    print("Loaded memory is not a list. Starting with an empty memory.")
+        except FileNotFoundError:
+            print("Load memory file not found. Starting with an empty memory.")
+    def get_named_entities(self):
+        named_entities = set()
+        for statement in self.memory:
+            doc = nlp(statement)
+            for entity in doc.ents:
+                if entity.label_:
+                    named_entities.add(entity.text)
+        return named_entities
+memory_module = MemoryModule(
+    r"C:\Users\withe\PycharmProjects\no hope2\Chat_Bot4\load_memory.json",
+    r"C:\Users\withe\PycharmProjects\no hope2\Chat_Bot4\save_memory.json"
+)
+SENTIMENT_MODULE
+class SentimentAnalysisModule:
+    def __init__(self):
+        self.sia = SentimentIntensityAnalyzer()
+    def analyze_sentiment(self, text):
+        sentiment = self.sia.polarity_scores(text)
+        compound_score = sentiment['compound']
+        if compound_score >= 0.05:
+            return 'positive'
+        elif compound_score <= -0.05:
+            return 'negative'
+        else:
+            return 'neutral'