Spaces:
Runtime error
Runtime error
class LangChainChunker: | |
def __init__(self, text): | |
self.text = text | |
def chunker(self, size=1000): | |
from langchain.text_splitter import CharacterTextSplitter | |
# attach the duration of the video to the chunk | |
# [[chunk, duration]] | |
text_splitter = CharacterTextSplitter( | |
separator=" ", | |
chunk_size=size, | |
chunk_overlap=0.9, | |
) | |
return text_splitter.split_text(self.text) | |
def __sizeof__(self) -> int: | |
count = 0 | |
for _ in self.text: | |
count += 1 | |
return count | |
def getSubsText(video_id="", getGenerated=False): | |
from youtube_transcript_api import YouTubeTranscriptApi as ytapi | |
from youtube_transcript_api.formatters import TextFormatter | |
tList = ytapi.list_transcripts(video_id) | |
data = "" | |
if getGenerated: | |
# TODO: implement getGenerated | |
pass | |
for t in tList: | |
data = t.fetch() | |
return (TextFormatter().format_transcript(data)).replace("\n", " ") | |