import numpy as np import pandas as pd import string import time #import re #import torch #import tensorflow as tf #import matplotlib.pyplot as plt import pickle as pkl import streamlit as st from wordcloud import WordCloud, STOPWORDS from deepmultilingualpunctuation import PunctuationModel from tensorflow.keras.preprocessing.text import Tokenizer from tensorflow.keras.preprocessing.sequence import pad_sequences from tensorflow.keras.models import load_model from tensorflow.python.keras.saving.hdf5_format import save_attributes_to_hdf5_group @st.cache(allow_output_mutation=True) def load_horoscope_model(): model=load_model('horoscopeModel.h5') return model model = load_horoscope_model() @st.cache(allow_output_mutation = True) def load_punc_model(): punctuation_model = PunctuationModel() return punctuation_model punctuation_model = load_punc_model() @st.cache def load_get_word(): # open the get_word file fileo = open('get_word.pkl' , "rb") # loading data get_word = pkl.load(fileo) return get_word get_word = load_get_word() @st.cache def load_tokenizer(): # open the horoscope_tokenizer file fileo = open('horoscope_tokenizer.pkl' , "rb") # loading data tokenizer = pkl.load(fileo) return tokenizer tokenizer = load_tokenizer() # with st.spinner("Loading the cosmos..."): # # #load models # # punctuation_model = PunctuationModel() # # # open the model file # # model=load_model('horoscopeModel.h5') # # # model.summary() # # open the get_word file # fileo = open('get_word.pkl' , "rb") # # loading data # get_word = pkl.load(fileo) # # open the horoscope_tokenizer file # fileo = open('horoscope_tokenizer.pkl' , "rb") # # loading data # tokenizer = pkl.load(fileo) # #load data # url = 'https://raw.githubusercontent.com/nicsusuki/horoscope-streamlit-app/main/horoscopes.csv' # data = pd.read_csv(url, # error_bad_lines=False, # sep = "|", header = None, # names = ["text", "date", "sign"], index_col = 0) st.title("Horoscope Generator") query = st.selectbox( 'What is your sign?', ('Aries','Taurus','Gemini','Cancer','Leo','Virgo','Libra', 'Scorpio', 'Sagitarius', 'Capricorn','Aquarius', 'Pisces', 'Generate my own')) if query == 'Generate my own': query = st.text_input("Type horoscope seed text here") search_button = st.button('Search the cosmos!') # words = "" # stopwords = set(STOPWORDS) # for review in data.text.values: # text = str(review) # text = text.split() # words += " ".join([(i.lower() + " ") for i in text]) # #cleaning function - lowercase, remove punc # def clean_text(text): # words = str(text).split() # words = [i.lower() + " " for i in words] # words = " ".join(words) # words = words.translate(words.maketrans('', '', string.punctuation)) # return words # data['text'] = data['text'].apply(clean_text) # #tokenize the data vocab_size = 15000 # max_length = 50 # oov_tok = "" # tokenizer = Tokenizer(num_words = vocab_size, oov_token = oov_tok) # tokenizer.fit_on_texts(data.text.values) # word_index = tokenizer.word_index # get_word = {v: k for k, v in word_index.items()} # #create n-grams # sequences = tokenizer.texts_to_sequences(data.text.values[::100]) # n_gram_sequences = [] # for sequence in sequences: # for i,j in enumerate(sequence): # if i < (len(sequence) - 10): # s = sequence[i:i + 10] # for k, l in enumerate(s): # n_gram_sequences.append(s[:k + 1]) # np.array(n_gram_sequences).shape # n_gram_sequences = np.array(n_gram_sequences) max_len = 10 #max([len(i) for i in n_gram_sequences]) ##max len = 10 #predict horoscopes avg_length = 44 #int(len(words.split())/len(data)) ## average length of horoscope 44 #takes seed text and generates horoscopes using closest matching words #uses random choice element to change horoscopes returned #@st.cache def write_horoscope(seed_text): for _ in range(avg_length): token_list = tokenizer.texts_to_sequences([seed_text])[0] token_list = pad_sequences([token_list], maxlen=max_len-1, padding='pre') pred_probs = model.predict(token_list) predicted = np.random.choice(np.linspace(0, vocab_size - 1, vocab_size), p = pred_probs[0]) if predicted == 1: ## if it's OOV, pick the next most likely one. pred_probs[0][1] = 0 predicted = np.argmax(pred_probs) output_word = get_word[predicted] seed_text += " " + output_word return seed_text if search_button: st.markdown("**Searching the cosmos for your horoscope:** " + query) with st.spinner("Consulting the oracle..."): time.sleep(2) horoscope_text = write_horoscope(query) horoscope = punctuation_model.restore_punctuation(horoscope_text) st.success(horoscope)