Spaces:
Running
Running
File size: 4,025 Bytes
2c4ef8c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
from googletrans import Translator
import spacy
import gradio as gr
spacy.cli.download("en_core_web_sm")
nlp = spacy.load('en_core_web_sm')
translator = Translator()
def Sentencechunker(sentence):
Sentchunks = sentence.split(" ")
chunks = []
for i in range(len(Sentchunks)):
chunks.append(" ".join(Sentchunks[:i+1]))
return " | ".join(chunks)
def ReverseSentenceChunker(sentence):
reversed_sentence = " ".join(reversed(sentence.split()))
chunks = Sentencechunker(reversed_sentence)
return chunks
def three_words_chunk(sentence):
words = sentence.split()
chunks = [words[i:i+3] for i in range(len(words)-2)]
chunks = [" ".join(chunk) for chunk in chunks]
return " | ".join(chunks)
def keep_nouns_verbs(sentence):
doc = nlp(sentence)
nouns_verbs = []
for token in doc:
if token.pos_ in ['NOUN','VERB','PUNCT']:
nouns_verbs.append(token.text)
return " ".join(nouns_verbs)
def unique_word_count(text="", state=None):
if state is None:
state = {}
words = text.split()
word_counts = state
for word in words:
if word in word_counts:
word_counts[word] += 1
else:
word_counts[word] = 1
sorted_word_counts = sorted(word_counts.items(), key=lambda x: x[1], reverse=True)
return sorted_word_counts,
"""
sentence = "Please help me create a sentence chunker"
sentencechunks = Sentencechunker(sentence)
reversed_chunks = ReverseSentenceChunker(sentence)
TWchunks = three_words_chunk(sentence)
nouns_verbs = keep_nouns_verbs(sentence)
"""
# Translate from English to French
langdest = gr.Dropdown(choices=["af", "de", "es", "ko", "ja", "zh-cn"], label="Choose Language", value="de")
"""
def VarTrans(text, langdest):
translated = translator.translate(text, dest=langdest)
SCtranslated = translator.translate(sentencechunks, dest=langdest)
RCtranslated = translator.translate(reversed_chunks, dest=langdest)
TWCtranslated = translator.translate(TWchunks, dest=langdest)
return translated, SCtranslated, RCtranslated, TWCtranslated
"""
ChunkModeDrop = gr.Dropdown(choices=["Chunks", "Reverse", "Three Word Chunks"], label="Choose Chunk Type")
def FrontRevSentChunk (Chunkmode, Translate, Text, langdest):
FinalOutput = ""
TransFinalOutput = ""
if Chunkmode=="Chunks":
FinalOutput += Sentencechunker(Text)
if Chunkmode=="Reverse":
FinalOutput += ReverseSentenceChunker(Text)
if Chunkmode=="Three Word Chunks":
FinalOutput += three_words_chunk(Text)
if Translate:
TransFinalOutput = FinalOutput
translated = translator.translate(TransFinalOutput, dest=langdest)
FinalOutput += "\n" + translated.text
return FinalOutput
"""
print(translated.text)
print(sentencechunks)
print(SCtranslated.text)
print(reversed_chunks)
print(RCtranslated.text)
print(TWchunks)
print(TWCtranslated.text)
print(nouns_verbs)
"""
def Wordchunker(word):
chunks = []
for i in range(len(word)):
chunks.append(word[:i+1])
return chunks
word = "please"
wordchunks = Wordchunker(word)
print("\n")
print(wordchunks)
#random_chunk_display(TWCtranslated.text)
with gr.Blocks() as lliface:
gr.HTML("<p> Still Undercontruction </p> <> Arrows app json creator for easy knowledge graphing and spacy POS graph? </p> <p> https://huggingface.co/spaces/RASMUS/Whisper-youtube-crosslingual-subtitles, https://huggingface.co/spaces/vumichien/whisper-speaker-diarization, Maybe duplicate these, private them and then load into spaces? --> Whisper space for youtube, Clip Interrogator, load here and all my random functions esp. text to HTML </p>")
gr.Interface(fn=FrontRevSentChunk, inputs=[ChunkModeDrop, "checkbox", "text", langdest], outputs="text")
gr.Interface(fn=keep_nouns_verbs, inputs=["text"], outputs="text", title="Noun and Verbs only (Plus punctuation")
gr.HTML("Add a codepen pen page here")
gr.Interface(fn=unique_word_count, inputs="text", outputs="text", title="Wordcounter")
lliface.launch() |