Delete app.py
Browse files
app.py
DELETED
@@ -1,71 +0,0 @@
|
|
1 |
-
import numpy as np
|
2 |
-
import pandas as pd
|
3 |
-
import requests
|
4 |
-
from bs4 import BeautifulSoup
|
5 |
-
|
6 |
-
URL = "https://en.wikisource.org/wiki/The_Verdict"
|
7 |
-
page = requests.get(URL)
|
8 |
-
soup = BeautifulSoup(page.content, "html.parser")
|
9 |
-
# soup.prettify()
|
10 |
-
text = [i.text for i in soup.find_all("p")]
|
11 |
-
text = text[0:83]
|
12 |
-
# with open('Data.txt', 'w') as file:
|
13 |
-
# for string in text:
|
14 |
-
# file.write(string + '\n')
|
15 |
-
|
16 |
-
from tensorflow.keras.preprocessing.text import Tokenizer
|
17 |
-
tokenizer = Tokenizer(oov_token='<nothing>')
|
18 |
-
tokenizer.fit_on_texts(text)
|
19 |
-
|
20 |
-
input_sequences = []
|
21 |
-
|
22 |
-
for sentences in text:
|
23 |
-
tokenized_sen = tokenizer.texts_to_sequences([sentences])[0]
|
24 |
-
for i in range(1,len(tokenized_sen)):
|
25 |
-
input_sequences.append(tokenized_sen[:i+1])
|
26 |
-
|
27 |
-
max_len = max(len(x) for x in input_sequences)
|
28 |
-
|
29 |
-
from keras.preprocessing.sequence import pad_sequences
|
30 |
-
padded_input_sequences = pad_sequences(input_sequences, maxlen = max_len, padding='pre')
|
31 |
-
|
32 |
-
X = padded_input_sequences[:,:max_len-1]
|
33 |
-
y = padded_input_sequences[:,-1:]
|
34 |
-
|
35 |
-
from tensorflow.keras.utils import to_categorical #OHE
|
36 |
-
y = to_categorical(y, num_classes = 1100) # vocal size + 1
|
37 |
-
|
38 |
-
from keras.layers import Embedding, Dense, LSTM
|
39 |
-
from keras.models import Sequential
|
40 |
-
|
41 |
-
model = Sequential()
|
42 |
-
model.add(Embedding(1100, 50, input_length = 230))
|
43 |
-
model.add(LSTM(250))
|
44 |
-
model.add(Dense(1100, activation = 'softmax'))
|
45 |
-
model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
|
46 |
-
model.summary()
|
47 |
-
model.fit(X, y, epochs = 45)
|
48 |
-
|
49 |
-
def prediction(t='',l=1):
|
50 |
-
text = t
|
51 |
-
sentence_length = l
|
52 |
-
for repeat in range(sentence_length):
|
53 |
-
token_text = tokenizer.texts_to_sequences([text])
|
54 |
-
padded_token_text = pad_sequences(token_text, maxlen = 230, padding = 'pre')
|
55 |
-
pos = np.argmax(model.predict(padded_token_text))
|
56 |
-
for (word,index) in tokenizer.word_index.items():
|
57 |
-
if index == pos:
|
58 |
-
text = text + " " + word
|
59 |
-
return text
|
60 |
-
|
61 |
-
import gradio as gr
|
62 |
-
|
63 |
-
demo = gr.Interface(title = "The Verdict",
|
64 |
-
examples = [['It had always been'], ['I found the couple at'],['She glanced out almost']],
|
65 |
-
fn=prediction,
|
66 |
-
inputs=[gr.Textbox(lines = 2, label = 'Query', placeholder='Enter Here', value=""),
|
67 |
-
gr.Slider(1,100,step = 1, label = "How many Words to generate?", value = 1)],
|
68 |
-
outputs=gr.Text(lines = 7, ), allow_flagging = 'never', theme=gr.themes.Base())
|
69 |
-
|
70 |
-
demo.launch(share = True)
|
71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|