Spaces:
Sleeping
Sleeping
add app.py, requirements.txt
Browse filesadd app.py, requirements.txt.
Mar 13, 2024
- app.py +31 -0
- requirements.txt +12 -0
app.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr # Imports the Gradio library, which is used to create user interfaces for machine learning models.
|
2 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM # Imports the AutoTokenizer and AutoModelForSeq2SeqLM classes from the Transformers library, which will be used to tokenize and translate text.
|
3 |
+
|
4 |
+
tokenizer = AutoTokenizer.from_pretrained("t5-small") # Instantiates an AutoTokenizer object using the pre-trained T5-small model. The tokenizer is used to convert input text into a sequence of numerical values that can be used as input to the T5 model.
|
5 |
+
model = AutoModelForSeq2SeqLM.from_pretrained("t5-small") # Instantiates an AutoModelForSeq2SeqLM object using the pre-trained T5-small model. This is the model that will be used to generate translations from input text.
|
6 |
+
|
7 |
+
def translate_text(text):
|
8 |
+
inputs = tokenizer.encode("translate English to French: " + text, return_tensors="pt") # Uses the tokenizer to encode the input text as a sequence of numerical values that the T5 model can process. The text is prepended with the string "translate English to French: ", which is required by the T5 model to know which language to translate from and to. The return_tensors argument is set to "pt" to return a PyTorch tensor.
|
9 |
+
outputs = model.generate(inputs, max_length=128, num_beams=4, early_stopping=True) # Uses the T5 model to generate a translation for the input text. The generate method takes the encoded input text as input and returns a tensor containing the translated text. The max_length argument specifies the maximum length of the generated text, num_beams specifies the number of beams to use during decoding, and early_stopping specifies whether to stop generating output as soon as the model predicts an end-of-sentence token.
|
10 |
+
translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) # Uses the tokenizer to convert the tensor of translated text back into a string. The skip_special_tokens argument specifies whether to remove special tokens like padding and end-of-sentence tokens from the decoded text.
|
11 |
+
return translated_text
|
12 |
+
|
13 |
+
output_1 = gr.Textbox(label="Speech to Text")
|
14 |
+
output_2 = gr.Textbox(label="Speech Translation")
|
15 |
+
|
16 |
+
# Creates a Gradio interface that loads the pre-trained Facebook Wav2Vec2 model for speech recognition. The input source is set to the user's microphone, and the output is set to output_1. The interface is given the title "Speech-to-text".
|
17 |
+
generator = gr.Interface.load("huggingface/facebook/wav2vec2-base-960h",
|
18 |
+
inputs="microphone",
|
19 |
+
outputs=output_1,
|
20 |
+
title="Speech-to-text",
|
21 |
+
)
|
22 |
+
# Creates a Gradio interface that uses the translate_text function defined earlier to translate English speech to French text. The input to the interface is set to output_1, which is the speech-to-text transcription
|
23 |
+
translator = gr.Interface(fn=translate_text,
|
24 |
+
inputs=output_1,
|
25 |
+
outputs=output_2,
|
26 |
+
title="English to French Translator",
|
27 |
+
description="Translate English speech to French text using the T5-small model.",
|
28 |
+
)
|
29 |
+
|
30 |
+
gr.Series(generator, translator).launch(debug=True)
|
31 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit==0.84.1
|
2 |
+
Pillow
|
3 |
+
jax[cpu]
|
4 |
+
flax
|
5 |
+
transformers
|
6 |
+
torch
|
7 |
+
torchvision
|
8 |
+
torchaudio
|
9 |
+
huggingface_hub
|
10 |
+
googletrans==4.0.0-rc1
|
11 |
+
protobuf==3.20
|
12 |
+
|