Spaces:

Namitg02
/

Test

Runtime error

App Files Files Community

Namitg02 commited on Jun 21, 2024

Commit

3376a6f

verified ·

1 Parent(s): bfcdf28

Update app.py

Browse files

Files changed (1) hide show

app.py +1 -31

app.py CHANGED Viewed

@@ -8,13 +8,7 @@ import json
 import pandas as pd
 from llama_cpp import Llama
 from langchain_community.llms import LlamaCpp
-from transformers import AutoTokenizer, GenerationConfig #, AutoModelForCausalLM
-#from transformers import AutoModelForCausalLM, AutoModel
-from transformers import TextIteratorStreamer
 from threading import Thread
-from ctransformers import AutoModelForCausalLM, AutoConfig, Config #, AutoTokenizer
 from huggingface_hub import Repository, upload_file
 import os
@@ -50,27 +44,6 @@ If you don't know the answer, just say "I do not know." Don't make up an answer.
 llm_model = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
 # TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF and tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf not working, TinyLlama/TinyLlama-1.1B-Chat-v0.6, andrijdavid/TinyLlama-1.1B-Chat-v1.0-GGUF"
-#tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
-#initiate model and tokenizer
-#generation_config = AutoConfig.from_pretrained(
-#    "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
-#    max_new_tokens= 300,
-  #  do_sample=True,
-  #  stream = streamer,
-#    top_p=0.95,
-#    temperature=0.4,
-#    stream = True
-  #  eos_token_id=terminators
-#)
-# send additional parameters to model for generation
-#terminators = [
-#    tokenizer.eos_token_id, # End-of-Sequence Token that indicates where the model should consider the text sequence to be complete
-#    tokenizer.convert_tokens_to_ids("<|eot_id|>") # Converts a token strings in a single/ sequence of integer id using the vocabulary
-#]
-# indicates the end of a sequence
-#model = llama_cpp.Llama(model_path = tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf AutoModelForCausalLM.from_pretrained(llm_model, model_file = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf", model_type="llama", gpu_layers=0, config = generation_config)
 model = Llama(
     model_path="./tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
  #   chat_format="llama-2",
@@ -82,6 +55,7 @@ model = Llama(
     # callback_manager=callback_manager,
     # verbose=True,  # Verbose is required to pass to the callback manager
     )
 def search(query: str, k: int = 2 ):
     """a function that embeds a new query and returns the most probable results"""
@@ -136,15 +110,11 @@ def talk(prompt, history):
    # formatted_prompt_with_history = formatted_prompt_with_history[:600] # to avoid memory issue
 #    print(formatted_prompt_with_history)
     messages = [{"role":"system","content":SYS_PROMPT},{"role":"user","content":formatted_prompt}]
-#    messages = "role":"system","content":SYS_PROMPT},{"role":"user","content":formatted_prompt}]
-    print(messages)
     # binding the system context and new prompt for LLM
     # the chat template structure should be based on text generation model format
-    print("check6")
 # indicates the end of a sequence
     stream = model.create_chat_completion(messages = messages, max_tokens=1000, stop=["</s>"], stream=False)
-#    stream = model(prompt = messages, max_tokens=1000, stop=["</s>"],echo=True, stream=False)
     print(f"{stream}")
     print("check 7")
     print(stream['choices'][0]['message']['content'])

 import pandas as pd
 from llama_cpp import Llama
 from langchain_community.llms import LlamaCpp
 from threading import Thread
 from huggingface_hub import Repository, upload_file
 import os
 llm_model = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
 # TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF and tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf not working, TinyLlama/TinyLlama-1.1B-Chat-v0.6, andrijdavid/TinyLlama-1.1B-Chat-v1.0-GGUF"
 model = Llama(
     model_path="./tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
  #   chat_format="llama-2",
     # callback_manager=callback_manager,
     # verbose=True,  # Verbose is required to pass to the callback manager
     )
+#initiate model and tokenizer
 def search(query: str, k: int = 2 ):
     """a function that embeds a new query and returns the most probable results"""
    # formatted_prompt_with_history = formatted_prompt_with_history[:600] # to avoid memory issue
 #    print(formatted_prompt_with_history)
     messages = [{"role":"system","content":SYS_PROMPT},{"role":"user","content":formatted_prompt}]
     # binding the system context and new prompt for LLM
     # the chat template structure should be based on text generation model format
 # indicates the end of a sequence
     stream = model.create_chat_completion(messages = messages, max_tokens=1000, stop=["</s>"], stream=False)
     print(f"{stream}")
     print("check 7")
     print(stream['choices'][0]['message']['content'])