satvikjain commited on
Commit
c5a7a6c
·
verified ·
1 Parent(s): 7d113ea

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -71
app.py DELETED
@@ -1,71 +0,0 @@
1
- import numpy as np
2
- import pandas as pd
3
- import requests
4
- from bs4 import BeautifulSoup
5
-
6
- URL = "https://en.wikisource.org/wiki/The_Verdict"
7
- page = requests.get(URL)
8
- soup = BeautifulSoup(page.content, "html.parser")
9
- # soup.prettify()
10
- text = [i.text for i in soup.find_all("p")]
11
- text = text[0:83]
12
- # with open('Data.txt', 'w') as file:
13
- # for string in text:
14
- # file.write(string + '\n')
15
-
16
- from tensorflow.keras.preprocessing.text import Tokenizer
17
- tokenizer = Tokenizer(oov_token='<nothing>')
18
- tokenizer.fit_on_texts(text)
19
-
20
- input_sequences = []
21
-
22
- for sentences in text:
23
- tokenized_sen = tokenizer.texts_to_sequences([sentences])[0]
24
- for i in range(1,len(tokenized_sen)):
25
- input_sequences.append(tokenized_sen[:i+1])
26
-
27
- max_len = max(len(x) for x in input_sequences)
28
-
29
- from keras.preprocessing.sequence import pad_sequences
30
- padded_input_sequences = pad_sequences(input_sequences, maxlen = max_len, padding='pre')
31
-
32
- X = padded_input_sequences[:,:max_len-1]
33
- y = padded_input_sequences[:,-1:]
34
-
35
- from tensorflow.keras.utils import to_categorical #OHE
36
- y = to_categorical(y, num_classes = 1100) # vocal size + 1
37
-
38
- from keras.layers import Embedding, Dense, LSTM
39
- from keras.models import Sequential
40
-
41
- model = Sequential()
42
- model.add(Embedding(1100, 50, input_length = 230))
43
- model.add(LSTM(250))
44
- model.add(Dense(1100, activation = 'softmax'))
45
- model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
46
- model.summary()
47
- model.fit(X, y, epochs = 45)
48
-
49
- def prediction(t='',l=1):
50
- text = t
51
- sentence_length = l
52
- for repeat in range(sentence_length):
53
- token_text = tokenizer.texts_to_sequences([text])
54
- padded_token_text = pad_sequences(token_text, maxlen = 230, padding = 'pre')
55
- pos = np.argmax(model.predict(padded_token_text))
56
- for (word,index) in tokenizer.word_index.items():
57
- if index == pos:
58
- text = text + " " + word
59
- return text
60
-
61
- import gradio as gr
62
-
63
- demo = gr.Interface(title = "The Verdict",
64
- examples = [['It had always been'], ['I found the couple at'],['She glanced out almost']],
65
- fn=prediction,
66
- inputs=[gr.Textbox(lines = 2, label = 'Query', placeholder='Enter Here', value=""),
67
- gr.Slider(1,100,step = 1, label = "How many Words to generate?", value = 1)],
68
- outputs=gr.Text(lines = 7, ), allow_flagging = 'never', theme=gr.themes.Base())
69
-
70
- demo.launch(share = True)
71
-