agentharbor's picture
Update app.py
db576dc verified
raw
history blame
1.38 kB
import gradio as gr
import getpass
import os
if "HUGGINGFACEHUB_API_TOKEN" not in os.environ:
os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv('hf_token')
from langchain.document_loaders import TextLoader
loader = TextLoader('./Agentville Academy.txt')
documents = loader.load()
import textwrap
def wrap_text_preserve_newlines(text, width=110):
# Split the input text into lines based on newline characters
lines = text.split('\n')
# Wrap each line individually
wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
# Join the wrapped lines back together using newline characters
wrapped_text = '\n'.join(wrapped_lines)
return wrapped_text
# Text Splitter
from langchain.text_splitter import CharacterTextSplitter
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)
# Embeddings
from langchain.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings()
# Vectorstore: https://python.langchain.com/en/latest/modules/indexes/vectorstores.html
from langchain.vectorstores import FAISS
db = FAISS.from_documents(docs, embeddings)
def get_answer(query):
docs = db.similarity_search(query)
return wrap_text_preserve_newlines(str(docs[0].page_content))
demo = gr.Interface(fn=get_answer, inputs="text", outputs="text")
demo.launch()