Spaces:

gufett0
/

chatbot-llamaindex

Sleeping

App Files Files Community

gufett0 commited on Sep 19, 2024

Commit

bcb5f66

1 Parent(s): 0875680

hf llm

Browse files

Files changed (1) hide show

backend.py +4 -9

backend.py CHANGED Viewed

@@ -63,13 +63,13 @@ os.environ['TOKENIZERS_PARALLELISM'] = 'false'
 llm = HuggingFaceLLM(
     context_window=4096,
     max_new_tokens=256,
-    generate_kwargs={"temperature": 0.1, "do_sample": False},
     system_prompt=system_prompt,
     tokenizer_name="meta-llama/Llama-2-7b-chat-hf",
     model_name="meta-llama/Llama-2-7b-chat-hf",
     device_map="auto",
     # loading model in 8bit for reducing memory
-    model_kwargs={"torch_dtype": torch.float16 , "load_in_8bit":True}
 )
 embed_model= HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
@@ -87,18 +87,13 @@ nodes = SentenceSplitter(chunk_size=512, chunk_overlap=20, paragraph_separator="
 # Build the vector store index from the nodes
 index = VectorStoreIndex(nodes, show_progress = True)
-#
 # what models will be used by LlamaIndex:
 #Settings.embed_model = InstructorEmbedding(model_name="hkunlp/instructor-base")
 #Settings.embed_model = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
 #Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
 #Settings.llm = GemmaLLMInterface()
 documents_paths = {
     'blockchain': 'data/blockchainprova.txt',
     'metaverse': 'data/metaverseprova.txt',
@@ -122,7 +117,7 @@ ISTR = "In italiano, chiedi molto brevemente se la domanda si riferisce agli 'Os
 ############################---------------------------------
 # Get the parser
-parser = SentenceSplitter.from_defaults(
                 chunk_size=256, chunk_overlap=64, paragraph_separator="\n\n"
             )
 def build_index(path: str):
@@ -136,7 +131,7 @@ def build_index(path: str):
     #storage_context = StorageContext.from_defaults()
     #index.storage_context.persist(persist_dir=PERSIST_DIR)
-    return index

 llm = HuggingFaceLLM(
     context_window=4096,
     max_new_tokens=256,
+    generate_kwargs={"temperature": 0.1, "do_sample": True},
     system_prompt=system_prompt,
     tokenizer_name="meta-llama/Llama-2-7b-chat-hf",
     model_name="meta-llama/Llama-2-7b-chat-hf",
     device_map="auto",
     # loading model in 8bit for reducing memory
+    model_kwargs={"torch_dtype": torch.float16 }
 )
 embed_model= HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
 # Build the vector store index from the nodes
 index = VectorStoreIndex(nodes, show_progress = True)
 # what models will be used by LlamaIndex:
 #Settings.embed_model = InstructorEmbedding(model_name="hkunlp/instructor-base")
 #Settings.embed_model = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
 #Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
 #Settings.llm = GemmaLLMInterface()
 documents_paths = {
     'blockchain': 'data/blockchainprova.txt',
     'metaverse': 'data/metaverseprova.txt',
 ############################---------------------------------
 # Get the parser
+"""parser = SentenceSplitter.from_defaults(
                 chunk_size=256, chunk_overlap=64, paragraph_separator="\n\n"
             )
 def build_index(path: str):
     #storage_context = StorageContext.from_defaults()
     #index.storage_context.persist(persist_dir=PERSIST_DIR)
+    return index"""