File size: 775 Bytes
2b94668 1f47244 68a7260 1f47244 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
import gradio as gr
from langchain.document_loaders import UnstructuredURLLoader
from langchain.text_splitter import CharacterTextSplitter
def scrap(urls):
loaders = UnstructuredURLLoader(urls=[urls])
data = loaders.load()
# Text Splitter
text_splitter = CharacterTextSplitter(separator='\n',
chunk_size=1000,
chunk_overlap=200)
docs = text_splitter.split_documents(data)
return docs
iface = gr.Interface(fn = scrap,
inputs = "text",
outputs = ['text'],
title = 'WebScrap',
description="Get content of the website from given website URL")
iface.launch(inline = False) |