imseldrith commited on
Commit
1f46a45
·
1 Parent(s): b5bf547

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -0
app.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, render_template
2
+ import requests
3
+ import re
4
+ import nltk
5
+ from googletrans import Translator
6
+
7
+ nltk.download("punkt")
8
+
9
+ app = Flask(__name__)
10
+
11
+ def translate_text(text, src='en', dest='en'):
12
+ translator = Translator(service_urls=['translate.google.com'])
13
+ return translator.translate(text, dest=dest, src=src).text
14
+
15
+ def text_processing(text, language='en'):
16
+ # remove special characters and numbers
17
+ processed_text = re.sub(r'[^a-zA-Z\s]', '', text)
18
+
19
+ # tokenize the text into words
20
+ tokens = nltk.word_tokenize(processed_text)
21
+
22
+ # translate the text if it's not in English
23
+ if language != 'en':
24
+ translated_text = translate_text(processed_text, dest='en')
25
+ tokens = nltk.word_tokenize(translated_text)
26
+
27
+ # create the vocabulary of words
28
+ vocab = set(tokens)
29
+
30
+ # create a dictionary to encode each word as an integer
31
+ word_to_int = {word: i for i, word in enumerate(vocab)}
32
+
33
+ # encode the words as integers
34
+ encoded_text = [word_to_int[word] for word in tokens]
35
+
36
+ return encoded_text
37
+
38
+ @app.route("/", methods=['GET', 'POST'])
39
+ def index():
40
+ if request.method == 'POST':
41
+ if 'url' in request.form:
42
+ url = request.form['url']
43
+ response = requests.get(url)
44
+ text = response.text
45
+ language = translate_text(text, dest='en')
46
+ elif 'file' in request.files:
47
+ file = request.files['file']
48
+ text = file.read().decode('utf-8')
49
+ language = 'en'
50
+
51
+ encoded_text = text_processing(text, language)
52
+
53
+ return render_template('index.html', encoded_text=encoded_text)
54
+
55
+ return render_template('index.html')
56
+
57
+ if __name__ == "__main__":
58
+ app.run(host="0.0.0.0",port=7860)