Spaces:

xavierbarbier
/

rag_ngap

Sleeping

xavierbarbier commited on Aug 30, 2024

Commit

a8fbe43

verified ·

1 Parent(s): a80badd

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -24,11 +24,21 @@ Mistral does not support system prompt symbol (such as ```<<SYS>>```) now, input
 [Model From TheBloke/Mistral-7B-Instruct-v0.1-GGUF](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF)
 [Mistral-instruct-v0.1 System prompt](https://docs.mistral.ai/usage/guardrailing)
 """
 model_path = "models"
 model_name = "SmolLM-1.7B-Instruct.Q2_K.gguf"
 hf_hub_download(repo_id="mradermacher/SmolLM-1.7B-Instruct-GGUF", filename=model_name, local_dir=model_path, local_dir_use_symlinks=False)
 print("Start the model init process")
 model = model = GPT4All(model_name, model_path, allow_download = False, device="cpu")
@@ -100,10 +110,16 @@ def qa(question):
         Given the context information and not prior knowledge, answer the query.
         Query: {question}
         Answer:
-        """
     max_new_tokens = 2048
-    outputs =  model.generate(prompt=prompt, temp=0.5, top_k = 40, top_p = 1, max_tokens = max_new_tokens)
-    return outputs
 with gr.Blocks() as demo:

 [Model From TheBloke/Mistral-7B-Instruct-v0.1-GGUF](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF)
 [Mistral-instruct-v0.1 System prompt](https://docs.mistral.ai/usage/guardrailing)
 """
+"""
 model_path = "models"
 model_name = "SmolLM-1.7B-Instruct.Q2_K.gguf"
 hf_hub_download(repo_id="mradermacher/SmolLM-1.7B-Instruct-GGUF", filename=model_name, local_dir=model_path, local_dir_use_symlinks=False)
+"""
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model_name = "croissantllm/CroissantLLMBase"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
 print("Start the model init process")
 model = model = GPT4All(model_name, model_path, allow_download = False, device="cpu")
         Given the context information and not prior knowledge, answer the query.
         Query: {question}
         Answer:
+       """
+    """
     max_new_tokens = 2048
+    outputs =  model.generate(prompt=prompt, temp=0.5, top_k = 40, top_p = 1, max_tokens = max_new_tokens)"""
+    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+    tokens = model.generate(**inputs, max_length=100, do_sample=True, top_p=0.95, top_k=60, temperature=0.3)
+    return tokenizer.decode(tokens[0])
 with gr.Blocks() as demo: