Spaces:
Sleeping
Sleeping
import numpy as np | |
import pandas as pd | |
import string | |
import time | |
#import re | |
#import torch | |
#import tensorflow as tf | |
#import matplotlib.pyplot as plt | |
import pickle as pkl | |
import streamlit as st | |
from wordcloud import WordCloud, STOPWORDS | |
from deepmultilingualpunctuation import PunctuationModel | |
from tensorflow.keras.preprocessing.text import Tokenizer | |
from tensorflow.keras.preprocessing.sequence import pad_sequences | |
from tensorflow.keras.models import load_model | |
from tensorflow.python.keras.saving.hdf5_format import save_attributes_to_hdf5_group | |
def load_horoscope_model(): | |
model=load_model('horoscopeModel.h5') | |
return model | |
model = load_horoscope_model() | |
def load_punc_model(): | |
punctuation_model = PunctuationModel() | |
return punctuation_model | |
punctuation_model = load_punc_model() | |
def load_get_word(): | |
# open the get_word file | |
fileo = open('get_word.pkl' , "rb") | |
# loading data | |
get_word = pkl.load(fileo) | |
return get_word | |
get_word = load_get_word() | |
def load_tokenizer(): | |
# open the horoscope_tokenizer file | |
fileo = open('horoscope_tokenizer.pkl' , "rb") | |
# loading data | |
tokenizer = pkl.load(fileo) | |
return tokenizer | |
tokenizer = load_tokenizer() | |
# with st.spinner("Loading the cosmos..."): | |
# # #load models | |
# # punctuation_model = PunctuationModel() | |
# # # open the model file | |
# # model=load_model('horoscopeModel.h5') | |
# # # model.summary() | |
# # open the get_word file | |
# fileo = open('get_word.pkl' , "rb") | |
# # loading data | |
# get_word = pkl.load(fileo) | |
# # open the horoscope_tokenizer file | |
# fileo = open('horoscope_tokenizer.pkl' , "rb") | |
# # loading data | |
# tokenizer = pkl.load(fileo) | |
# #load data | |
# url = 'https://raw.githubusercontent.com/nicsusuki/horoscope-streamlit-app/main/horoscopes.csv' | |
# data = pd.read_csv(url, | |
# error_bad_lines=False, | |
# sep = "|", header = None, | |
# names = ["text", "date", "sign"], index_col = 0) | |
st.title("Horoscope Generator") | |
query = st.selectbox( | |
'What is your sign?', | |
('Aries','Taurus','Gemini','Cancer','Leo','Virgo','Libra', 'Scorpio', | |
'Sagitarius', 'Capricorn','Aquarius', 'Pisces', 'Generate my own')) | |
if query == 'Generate my own': | |
query = st.text_input("Type horoscope seed text here") | |
search_button = st.button('Search the cosmos!') | |
# words = "" | |
# stopwords = set(STOPWORDS) | |
# for review in data.text.values: | |
# text = str(review) | |
# text = text.split() | |
# words += " ".join([(i.lower() + " ") for i in text]) | |
# #cleaning function - lowercase, remove punc | |
# def clean_text(text): | |
# words = str(text).split() | |
# words = [i.lower() + " " for i in words] | |
# words = " ".join(words) | |
# words = words.translate(words.maketrans('', '', string.punctuation)) | |
# return words | |
# data['text'] = data['text'].apply(clean_text) | |
# #tokenize the data | |
vocab_size = 15000 | |
# max_length = 50 | |
# oov_tok = "<OOV>" | |
# tokenizer = Tokenizer(num_words = vocab_size, oov_token = oov_tok) | |
# tokenizer.fit_on_texts(data.text.values) | |
# word_index = tokenizer.word_index | |
# get_word = {v: k for k, v in word_index.items()} | |
# #create n-grams | |
# sequences = tokenizer.texts_to_sequences(data.text.values[::100]) | |
# n_gram_sequences = [] | |
# for sequence in sequences: | |
# for i,j in enumerate(sequence): | |
# if i < (len(sequence) - 10): | |
# s = sequence[i:i + 10] | |
# for k, l in enumerate(s): | |
# n_gram_sequences.append(s[:k + 1]) | |
# np.array(n_gram_sequences).shape | |
# n_gram_sequences = np.array(n_gram_sequences) | |
max_len = 10 #max([len(i) for i in n_gram_sequences]) ##max len = 10 | |
#predict horoscopes | |
avg_length = 44 #int(len(words.split())/len(data)) ## average length of horoscope 44 | |
#takes seed text and generates horoscopes using closest matching words | |
#uses random choice element to change horoscopes returned | |
#@st.cache | |
def write_horoscope(seed_text): | |
for _ in range(avg_length): | |
token_list = tokenizer.texts_to_sequences([seed_text])[0] | |
token_list = pad_sequences([token_list], maxlen=max_len-1, padding='pre') | |
pred_probs = model.predict(token_list) | |
predicted = np.random.choice(np.linspace(0, vocab_size - 1, vocab_size), p = pred_probs[0]) | |
if predicted == 1: ## if it's OOV, pick the next most likely one. | |
pred_probs[0][1] = 0 | |
predicted = np.argmax(pred_probs) | |
output_word = get_word[predicted] | |
seed_text += " " + output_word | |
return seed_text | |
if search_button: | |
st.markdown("**Searching the cosmos for your horoscope:** " + query) | |
with st.spinner("Consulting the oracle..."): | |
time.sleep(2) | |
horoscope_text = write_horoscope(query) | |
horoscope = punctuation_model.restore_punctuation(horoscope_text) | |
st.success(horoscope) | |