ambreshrc commited on
Commit
846f7ec
·
1 Parent(s): af7099a

Delete streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +0 -128
streamlit_app.py DELETED
@@ -1,128 +0,0 @@
1
- import streamlit as st
2
- from io import BytesIO
3
- # import gradio as gr
4
- # Def_04 Docx file to translated_Docx file
5
- #from transformers import MarianMTModel, MarianTokenizer
6
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
7
- import nltk
8
- from nltk.tokenize import sent_tokenize
9
- from nltk.tokenize import LineTokenizer
10
- nltk.download('punkt')
11
- import math
12
- import torch
13
- from docx import Document
14
- from time import sleep
15
- from stqdm import stqdm
16
-
17
- import docx
18
- def getText(filename):
19
- doc = docx.Document(filename)
20
- fullText = []
21
- for para in doc.paragraphs:
22
- fullText.append(para.text)
23
- return '\n'.join(fullText)
24
-
25
-
26
-
27
-
28
- # mname = 'Helsinki-NLP/opus-mt-en-hi'
29
- # tokenizer = MarianTokenizer.from_pretrained(mname)
30
- # model = MarianMTModel.from_pretrained(mname)
31
- # model.to(device)
32
-
33
- #@st.cache
34
- def btTranslator(docxfile):
35
- if torch.cuda.is_available():
36
- dev = "cuda"
37
- else:
38
- dev = "cpu"
39
- device = torch.device(dev)
40
- a=getText(docxfile)
41
- a1=a.split('\n')
42
- bigtext=''' '''
43
- for a in a1:
44
- bigtext=bigtext+'\n'+a
45
-
46
- files=Document()
47
-
48
- a="Helsinki-NLP/opus-mt-en-ru"
49
- b="Helsinki-NLP/opus-mt-ru-fr"
50
- c="Helsinki-NLP/opus-mt-fr-en"
51
- # d="Helsinki-NLP/opus-mt-es-en"
52
- langs=[a,b,c]
53
- text=bigtext
54
-
55
- for _,lang in zip(stqdm(langs),langs):
56
- st.spinner('Wait for it...')
57
- sleep(0.5)
58
- # mname = '/content/drive/MyDrive/Transformers Models/opus-mt-en-hi-Trans Model'
59
- tokenizer = AutoTokenizer.from_pretrained(lang)
60
- model = AutoModelForSeq2SeqLM.from_pretrained(lang)
61
- model.to(device)
62
- lt = LineTokenizer()
63
- batch_size = 64
64
- paragraphs = lt.tokenize(bigtext)
65
- translated_paragraphs = []
66
-
67
- for _, paragraph in zip(stqdm(paragraphs),paragraphs):
68
- st.spinner('Wait for it...')
69
- # ######################################
70
- sleep(0.5)
71
-
72
- # ######################################
73
- sentences = sent_tokenize(paragraph)
74
- batches = math.ceil(len(sentences) / batch_size)
75
- translated = []
76
- for i in range(batches):
77
- sent_batch = sentences[i*batch_size:(i+1)*batch_size]
78
- model_inputs = tokenizer(sent_batch, return_tensors="pt", padding=True, truncation=True, max_length=500).to(device)
79
- with torch.no_grad():
80
- translated_batch = model.generate(**model_inputs)
81
- translated += translated_batch
82
- translated = [tokenizer.decode(t, skip_special_tokens=True) for t in translated]
83
- translated_paragraphs += [" ".join(translated)]
84
- #files.add_paragraph(translated)
85
- translated_text = "\n".join(translated_paragraphs)
86
- bigtext=translated_text
87
- files.add_paragraph(bigtext)
88
- #files2save=files.save("Translated.docx")
89
- #files.save("Translated.docx")
90
- #binary_output = BytesIO()
91
- #f=files.save(binary_output)
92
- #f2=f.getvalue()
93
- return files
94
-
95
- ####################################################################
96
- #Trails
97
-
98
- btTranslator("https://huggingface.co/spaces/ambreshrc/T5_Translator/blob/main/Trail%20Doc%20of%205000%20words.docx")
99
- ####################################################################
100
-
101
- #return translated_text
102
- st.title('Translator App')
103
- st.markdown("Translate from Docx file")
104
- st.subheader("File Upload")
105
-
106
- datas=st.file_uploader("Original File")
107
- name=st.text_input('Enter New File Name: ')
108
- #data=getText("C:\Users\Ambresh C\Desktop\Python Files\Translators\Trail Doc of 500 words.docx")
109
- #if datas :
110
- #if st.button(label='Data Process'):
111
- binary_output = BytesIO()
112
- if st.button(label='Translate'):
113
- st.spinner('Waiting...')
114
- btTranslator(datas).save(binary_output)
115
- binary_output.getbuffer()
116
- st.success("Translated")
117
-
118
- st.download_button(label='Download Translated File',file_name=(f"{name}_Translated.docx"), data=binary_output.getvalue())
119
- #files.save(f"{name}_Translated.docx")
120
- #else:
121
- # st.text('Upload File and Start the process')
122
-
123
-
124
- #f4=binary_output(f3)
125
-
126
- #st.sidebar.download_button(label='Download Translated File',file_name='Translated.docx', data=binary_output.getvalue())
127
- # st.text_area(label="",value=btTranslator(datas),height=100)
128
- # Footer