Spaces:
Build error
Build error
Commit
·
1f46a45
1
Parent(s):
b5bf547
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Flask, request, render_template
|
2 |
+
import requests
|
3 |
+
import re
|
4 |
+
import nltk
|
5 |
+
from googletrans import Translator
|
6 |
+
|
7 |
+
nltk.download("punkt")
|
8 |
+
|
9 |
+
app = Flask(__name__)
|
10 |
+
|
11 |
+
def translate_text(text, src='en', dest='en'):
|
12 |
+
translator = Translator(service_urls=['translate.google.com'])
|
13 |
+
return translator.translate(text, dest=dest, src=src).text
|
14 |
+
|
15 |
+
def text_processing(text, language='en'):
|
16 |
+
# remove special characters and numbers
|
17 |
+
processed_text = re.sub(r'[^a-zA-Z\s]', '', text)
|
18 |
+
|
19 |
+
# tokenize the text into words
|
20 |
+
tokens = nltk.word_tokenize(processed_text)
|
21 |
+
|
22 |
+
# translate the text if it's not in English
|
23 |
+
if language != 'en':
|
24 |
+
translated_text = translate_text(processed_text, dest='en')
|
25 |
+
tokens = nltk.word_tokenize(translated_text)
|
26 |
+
|
27 |
+
# create the vocabulary of words
|
28 |
+
vocab = set(tokens)
|
29 |
+
|
30 |
+
# create a dictionary to encode each word as an integer
|
31 |
+
word_to_int = {word: i for i, word in enumerate(vocab)}
|
32 |
+
|
33 |
+
# encode the words as integers
|
34 |
+
encoded_text = [word_to_int[word] for word in tokens]
|
35 |
+
|
36 |
+
return encoded_text
|
37 |
+
|
38 |
+
@app.route("/", methods=['GET', 'POST'])
|
39 |
+
def index():
|
40 |
+
if request.method == 'POST':
|
41 |
+
if 'url' in request.form:
|
42 |
+
url = request.form['url']
|
43 |
+
response = requests.get(url)
|
44 |
+
text = response.text
|
45 |
+
language = translate_text(text, dest='en')
|
46 |
+
elif 'file' in request.files:
|
47 |
+
file = request.files['file']
|
48 |
+
text = file.read().decode('utf-8')
|
49 |
+
language = 'en'
|
50 |
+
|
51 |
+
encoded_text = text_processing(text, language)
|
52 |
+
|
53 |
+
return render_template('index.html', encoded_text=encoded_text)
|
54 |
+
|
55 |
+
return render_template('index.html')
|
56 |
+
|
57 |
+
if __name__ == "__main__":
|
58 |
+
app.run(host="0.0.0.0",port=7860)
|