Spaces:
Sleeping
Sleeping
Commit
·
9ae8e28
1
Parent(s):
51e2c63
adding Char length vector
Browse files
app.py
CHANGED
@@ -48,10 +48,10 @@ def process_documents(documents,data_chunk=1500,chunk_overlap=100):
|
|
48 |
texts = text_splitter.split_documents(documents)
|
49 |
return texts
|
50 |
|
51 |
-
def process_youtube_link(link, document_name="youtube-content"):
|
52 |
try:
|
53 |
metadata = {"source": f"{document_name}.txt"}
|
54 |
-
return [Document(page_content=get_text_from_youtube_link(video_link=link), metadata=metadata)]
|
55 |
except Exception as err:
|
56 |
logger.error(f'Error in reading document. {err}')
|
57 |
|
@@ -72,7 +72,7 @@ def create_prompt():
|
|
72 |
|
73 |
def youtube_chat(youtube_link,API_key,llm='HuggingFace',temperature=0.1,max_tokens=1096,char_length=1500):
|
74 |
|
75 |
-
document = process_youtube_link(link=youtube_link)
|
76 |
print("docuemt:",document)
|
77 |
embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-base',model_kwargs={"device": DEVICE})
|
78 |
texts = process_documents(documents=document)
|
|
|
48 |
texts = text_splitter.split_documents(documents)
|
49 |
return texts
|
50 |
|
51 |
+
def process_youtube_link(link, document_name="youtube-content",char_length=1000):
|
52 |
try:
|
53 |
metadata = {"source": f"{document_name}.txt"}
|
54 |
+
return [Document(page_content=get_text_from_youtube_link(video_link=link,max_video_length=char_length), metadata=metadata)]
|
55 |
except Exception as err:
|
56 |
logger.error(f'Error in reading document. {err}')
|
57 |
|
|
|
72 |
|
73 |
def youtube_chat(youtube_link,API_key,llm='HuggingFace',temperature=0.1,max_tokens=1096,char_length=1500):
|
74 |
|
75 |
+
document = process_youtube_link(link=youtube_link,char_length=char_length)
|
76 |
print("docuemt:",document)
|
77 |
embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-base',model_kwargs={"device": DEVICE})
|
78 |
texts = process_documents(documents=document)
|