Spaces:

Hackoor
/

SampleLlamaModel-1

Runtime error

Hackoor commited on Sep 8, 2023

Commit

07358c8

1 Parent(s): 657964c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -65,7 +65,7 @@ def create_conversational_chain(vector_store):
                         #model_type="llama", config={'max_new_tokens': 500, 'temperature': 0.01})
     llm = Replicate(
         streaming = True,
-        model = "NousResearch/Llama-2-7b-hf",
         callbacks=[StreamingStdOutCallbackHandler()],
         input = {"temperature": 0.01, "max_length" :500,"top_p":1})
     memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
@@ -105,7 +105,7 @@ def main():
                 text.extend(loader.load())
                 os.remove(temp_file_path)
-        text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=100, length_function=len)
         text_chunks = text_splitter.split_documents(text)
         # Create embeddings
@@ -113,7 +113,7 @@ def main():
                                            model_kwargs={'device': 'cpu'})
         # Create vector store
-        vector_store = FAISS.from_documents(text_chunks, embedding=embeddings)
         # Create the chain object
         chain = create_conversational_chain(vector_store)

                         #model_type="llama", config={'max_new_tokens': 500, 'temperature': 0.01})
     llm = Replicate(
         streaming = True,
+        model = AutoModelForCausalLM.from_pretrained("NousResearch/Llama-2-7b-hf", device_map='auto',torch_dtype=torch.float16,load_in_4bit=True, token=True ),
         callbacks=[StreamingStdOutCallbackHandler()],
         input = {"temperature": 0.01, "max_length" :500,"top_p":1})
     memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
                 text.extend(loader.load())
                 os.remove(temp_file_path)
+        text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1500, chunk_overlap=300, length_function=len)
         text_chunks = text_splitter.split_documents(text)
         # Create embeddings
                                            model_kwargs={'device': 'cpu'})
         # Create vector store
+        vector_store = FAISS.from_documents(text_chunks,embeddings)
         # Create the chain object
         chain = create_conversational_chain(vector_store)