Spaces:

slasiyal
/

coderinstruct

Sleeping

App Files Files Community

suraj commited on Jun 12, 2024

Commit

ff0a367

1 Parent(s): 48975c6

bugfix

Browse files

Files changed (5) hide show

.gitattributes +1 -0
.gitignore +2 -0
__init__.py +0 -65
app.py +130 -404
requirements.txt +1 -1

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ env/*
2	+ env/pyvenv.cfg

__init__.py DELETED Viewed

@@ -1,65 +0,0 @@
-import os
-from langchain.document_loaders import (
-    CSVLoader,
-    EverNoteLoader,
-    PDFMinerLoader,
-    TextLoader,
-    UnstructuredEPubLoader,
-    UnstructuredHTMLLoader,
-    UnstructuredMarkdownLoader,
-    UnstructuredODTLoader,
-    UnstructuredPowerPointLoader,
-    UnstructuredWordDocumentLoader,
-)
-FAVICON_PATH: str = 'https://modishcard.com/app/assets/icons/ModishCard_Logo6-02.svg'
-SYSTEM_PROMPT: str = "You are Saiga, a Englis-speaking automated assistant. You talk to people and help them."
-SYSTEM_TOKEN: int = 1788
-USER_TOKEN: int = 1404
-BOT_TOKEN: int = 9225
-LINEBREAK_TOKEN: int = 13
-ROLE_TOKENS: dict = {
-    "user": USER_TOKEN,
-    "bot": BOT_TOKEN,
-    "system": SYSTEM_TOKEN
-}
-LOADER_MAPPING: dict = {
-    ".csv": (CSVLoader, {}),
-    ".doc": (UnstructuredWordDocumentLoader, {}),
-    ".docx": (UnstructuredWordDocumentLoader, {}),
-    ".enex": (EverNoteLoader, {}),
-    ".epub": (UnstructuredEPubLoader, {}),
-    ".html": (UnstructuredHTMLLoader, {}),
-    ".md": (UnstructuredMarkdownLoader, {}),
-    ".odt": (UnstructuredODTLoader, {}),
-    ".pdf": (PDFMinerLoader, {}),
-    ".ppt": (UnstructuredPowerPointLoader, {}),
-    ".pptx": (UnstructuredPowerPointLoader, {}),
-    ".txt": (TextLoader, {"encoding": "utf8"}),
-}
-DICT_REPO_AND_MODELS: dict = {
-    "https://huggingface.co/MaziyarPanahi/Qwen2-1.5B-Instruct-GGUF/resolve/main/Qwen2-1.5B-Instruct.Q8_0.gguf":
-        "MaziyarPanahi/Qwen2-1.5B-Instruct.Q8_0.gguf",
-}
-EMBEDDER_NAME: str = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
-MAX_NEW_TOKENS: int = 1500
-ABS_PATH = os.path.dirname(os.path.abspath(__file__))
-MODELS_DIR = os.path.join(ABS_PATH, "../models")
-AUTH_FILE = os.path.join(ABS_PATH, "auth.csv")
-BLOCK_CSS = """
-#buttons button {
-    min-width: min(120px,100%);
-}
-"""

app.py CHANGED Viewed

@@ -1,412 +1,138 @@
-import tempfile
-import itertools
 import gradio as gr
-from __init__ import *
 from llama_cpp import Llama
-from chromadb.config import Settings
-from typing import List, Optional, Union
-from langchain.vectorstores import Chroma
-from langchain.docstore.document import Document
-from huggingface_hub.file_download import http_get
-from langchain.embeddings import HuggingFaceEmbeddings
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-class LocalChatGPT:
-    def __init__(self):
-        self.llama_model: Optional[Llama] = None
-        self.embeddings: HuggingFaceEmbeddings = self.initialize_app()
-    def initialize_app(self) -> HuggingFaceEmbeddings:
-        """
-        Load all models from the list
-        :return:
-        """
-        os.makedirs(MODELS_DIR, exist_ok=True)
-        model_url, model_name = list(DICT_REPO_AND_MODELS.items())[0]
-        final_model_path = os.path.join(MODELS_DIR, model_name)
-        os.makedirs("/".join(final_model_path.split("/")[:-1]), exist_ok=True)
-        if not os.path.exists(final_model_path):
-            with open(final_model_path, "wb") as f:
-                http_get(model_url, f)
-        self.llama_model = Llama(
-            model_path=final_model_path,
-            n_ctx=2000,
-            n_parts=1,
-        )
-        return HuggingFaceEmbeddings(model_name=EMBEDDER_NAME, cache_folder=MODELS_DIR)
-    def load_model(self, model_name):
-        """
-        :param model_name:
-        :return:
-        """
-        final_model_path = os.path.join(MODELS_DIR, model_name)
-        os.makedirs("/".join(final_model_path.split("/")[:-1]), exist_ok=True)
-        if not os.path.exists(final_model_path):
-            with open(final_model_path, "wb") as f:
-                if model_url := [i for i in DICT_REPO_AND_MODELS if DICT_REPO_AND_MODELS[i] == model_name]:
-                    http_get(model_url[0], f)
-        self.llama_model = Llama(
-            model_path=final_model_path,
-            n_ctx=2000,
-            n_parts=1,
-        )
-        return model_name
-    @staticmethod
-    def load_single_document(file_path: str) -> Document:
-        """
-        Upload one document.
-        :param file_path:
-        :return:
-        """
-        ext: str = "." + file_path.rsplit(".", 1)[-1]
-        assert ext in LOADER_MAPPING
-        loader_class, loader_args = LOADER_MAPPING[ext]
-        loader = loader_class(file_path, **loader_args)
-        return loader.load()[0]
-    @staticmethod
-    def get_message_tokens(model: Llama, role: str, content: str) -> list:
-        """
-        :param model:
-        :param role:
-        :param content:
-        :return:
-        """
-        message_tokens: list = model.tokenize(content.encode("utf-8"))
-        message_tokens.insert(1, ROLE_TOKENS[role])
-        message_tokens.insert(2, LINEBREAK_TOKEN)
-        message_tokens.append(model.token_eos())
-        return message_tokens
-    def get_system_tokens(self, model: Llama) -> list:
-        """
-        :param model:
-        :return:
-        """
-        system_message: dict = {"role": "system", "content": SYSTEM_PROMPT}
-        return self.get_message_tokens(model, **system_message)
-    @staticmethod
-    def upload_files(files: List[tempfile.TemporaryFile]) -> List[str]:
-        """
-        :param files:
-        :return:
-        """
-        return [f.name for f in files]
-    @staticmethod
-    def process_text(text: str) -> Optional[str]:
-        """
-        :param text:
-        :return:
-        """
-        lines: list = text.split("\n")
-        lines = [line for line in lines if len(line.strip()) > 2]
-        text = "\n".join(lines).strip()
-        return None if len(text) < 10 else text
-    @staticmethod
-    def update_text_db(
-        db: Optional[Chroma],
-        fixed_documents: List[Document],
-        ids: List[str]
-    ) -> Union[Optional[Chroma], str]:
-        if db:
-            data: dict = db.get()
-            files_db = {dict_data['source'].split('/')[-1] for dict_data in data["metadatas"]}
-            files_load = {dict_data.metadata["source"].split('/')[-1] for dict_data in fixed_documents}
-            if files_load == files_db:
-                # db.delete([item for item in data['ids'] if item not in ids])
-                # db.update_documents(ids, fixed_documents)
-                db.delete(data['ids'])
-                db.add_texts(
-                    texts=[doc.page_content for doc in fixed_documents],
-                    metadatas=[doc.metadata for doc in fixed_documents],
-                    ids=ids
-                )
-                file_warning = f"Uploaded {len(fixed_documents)} fragments! You can ask questions"
-                return db, file_warning
-    def build_index(
-        self,
-        file_paths: List[str],
-        db: Optional[Chroma],
-        chunk_size: int,
-        chunk_overlap: int
-    ):
-        """
-        :param file_paths:
-        :param db:
-        :param chunk_size:
-        :param chunk_overlap:
-        :return:
-        """
-        documents: List[Document] = [self.load_single_document(path) for path in file_paths]
-        text_splitter: RecursiveCharacterTextSplitter = RecursiveCharacterTextSplitter(
-            chunk_size=chunk_size, chunk_overlap=chunk_overlap
-        )
-        documents = text_splitter.split_documents(documents)
-        fixed_documents: List[Document] = []
-        for doc in documents:
-            doc.page_content = self.process_text(doc.page_content)
-            if not doc.page_content:
-                continue
-            fixed_documents.append(doc)
-        ids: List[str] = [
-            f"{path.split('/')[-1].replace('.txt', '')}{i}"
-            for path, i in itertools.product(file_paths, range(1, len(fixed_documents) + 1))
-        ]
-        self.update_text_db(db, fixed_documents, ids)
-        db = Chroma.from_documents(
-            documents=fixed_documents,
-            embedding=self.embeddings,
-            ids=ids,
-            client_settings=Settings(
-                anonymized_telemetry=False,
-                persist_directory="db"
-            )
-        )
-        file_warning = f"Uploaded {len(fixed_documents)} fragments! You can ask questions."
-        return db, file_warning
-    @staticmethod
-    def user(message, history):
-        new_history = history + [[message, None]]
-        return "", new_history
-    @staticmethod
-    def regenerate_response(history):
-        """
-        :param history:
-        :return:
-        """
-        return "", history
-    @staticmethod
-    def retrieve(history, db: Optional[Chroma], retrieved_docs):
-        """
-        :param history:
-        :param db:
-        :param retrieved_docs:
-        :return:
-        """
-        if db:
-            last_user_message = history[-1][0]
-            try:
-                docs = db.similarity_search(last_user_message, k=4)
-                # retriever = db.as_retriever(search_kwargs={"k": k_documents})
-                # docs = retriever.get_relevant_documents(last_user_message)
-            except RuntimeError:
-                docs = db.similarity_search(last_user_message, k=1)
-                # retriever = db.as_retriever(search_kwargs={"k": 1})
-                # docs = retriever.get_relevant_documents(last_user_message)
-            source_docs = set()
-            for doc in docs:
-                for content in doc.metadata.values():
-                    source_docs.add(content.split("/")[-1])
-            retrieved_docs = "\n\n".join([doc.page_content for doc in docs])
-            retrieved_docs = f"A document- {''.join(list(source_docs))}.\n\n{retrieved_docs}"
-        return retrieved_docs
-    def bot(self, history, retrieved_docs):
-        """
-        :param history:
-        :param retrieved_docs:
-        :return:
-        """
-        if not history:
-            return
-        tokens = self.get_system_tokens(self.llama_model)[:]
-        tokens.append(LINEBREAK_TOKEN)
-        for user_message, bot_message in history[:-1]:
-            message_tokens = self.get_message_tokens(model=self.llama_model, role="user", content=user_message)
-            tokens.extend(message_tokens)
-        last_user_message = history[-1][0]
-        if retrieved_docs:
-            last_user_message = f"Context: {retrieved_docs}\n\nUsing context, answer the question:" \
-                                f"{last_user_message}"
-        message_tokens = self.get_message_tokens(model=self.llama_model, role="user", content=last_user_message)
-        tokens.extend(message_tokens)
-        role_tokens = [self.llama_model.token_bos(), BOT_TOKEN, LINEBREAK_TOKEN]
-        tokens.extend(role_tokens)
-        generator = self.llama_model.generate(
-            tokens,
-            top_k=30,
-            top_p=0.9,
-            temp=0.1
-        )
-        partial_text = ""
-        for i, token in enumerate(generator):
-            if token == self.llama_model.token_eos() or (MAX_NEW_TOKENS is not None and i >= MAX_NEW_TOKENS):
-                break
-            partial_text += self.llama_model.detokenize([token]).decode("utf-8", "ignore")
-            history[-1][1] = partial_text
-            yield history
-    def run(self):
-        """
-        :return:
-        """
-        with gr.Blocks(theme=gr.themes.Soft(), css=BLOCK_CSS) as demo:
-            db: Optional[Chroma] = gr.State(None)
-            favicon = f'<img src="{FAVICON_PATH}" width="48px" style="display: inline">'
             gr.Markdown(
-                f"""<h1><center>{favicon} GPT-based text assistant</center></h1>"""
-            )
-            with gr.Row(elem_id="model_selector_row"):
-                models: list = list(DICT_REPO_AND_MODELS.values())
-                model_selector = gr.Dropdown(
-                    choices=models,
-                    value=models[0] if models else "",
-                    interactive=True,
-                    show_label=False,
-                    container=False,
-                )
-            with gr.Row():
-                with gr.Column(scale=5):
-                    chatbot = gr.Chatbot(label="Dialogue", height=400)
-                with gr.Column(min_width=200, scale=4):
-                    retrieved_docs = gr.Textbox(
-                        label="Extracted fragments",
-                        placeholder="Will appear after asking questions",
-                        interactive=False
-                    )
-            with gr.Row():
-                with gr.Column(scale=20):
-                    msg = gr.Textbox(
-                        label="send a message",
-                        show_label=False,
-                        placeholder="send a message",
-                        container=False
-                    )
-                with gr.Column(scale=3, min_width=100):
-                    submit = gr.Button("📤 Send", variant="primary")
-            with gr.Row():
-                # gr.Button(value="👍  Понравилось")
-                # gr.Button(value="👎  Не понравилось")
-                stop = gr.Button(value="⛔ Stop")
-                regenerate = gr.Button(value="🔄  Repeat")
-                clear = gr.Button(value="🗑️  Clear")
-            # # Upload files
-            # file_output.upload(
-            #     fn=self.upload_files,
-            #     inputs=[file_output],
-            #     outputs=[file_paths],
-            #     queue=True,
-            # ).success(
-            #     fn=self.build_index,
-            #     inputs=[file_paths, db, chunk_size, chunk_overlap],
-            #     outputs=[db, file_warning],
-            #     queue=True
-            # )
-            model_selector.change(
-                fn=self.load_model,
-                inputs=[model_selector],
-                outputs=[model_selector]
-            )
-            # Pressing Enter
-            submit_event = msg.submit(
-                fn=self.user,
-                inputs=[msg, chatbot],
-                outputs=[msg, chatbot],
-                queue=False,
-            ).success(
-                fn=self.retrieve,
-                inputs=[chatbot, db, retrieved_docs],
-                outputs=[retrieved_docs],
-                queue=True,
-            ).success(
-                fn=self.bot,
-                inputs=[chatbot, retrieved_docs],
-                outputs=chatbot,
-                queue=True,
-            )
-            # Pressing the button
-            submit_click_event = submit.click(
-                fn=self.user,
-                inputs=[msg, chatbot],
-                outputs=[msg, chatbot],
-                queue=False,
-            ).success(
-                fn=self.retrieve,
-                inputs=[chatbot, db, retrieved_docs],
-                outputs=[retrieved_docs],
-                queue=True,
-            ).success(
-                fn=self.bot,
-                inputs=[chatbot, retrieved_docs],
-                outputs=chatbot,
-                queue=True,
-            )
-            # Stop generation
-            stop.click(
-                fn=None,
-                inputs=None,
-                outputs=None,
-                cancels=[submit_event, submit_click_event],
-                queue=False,
-            )
-            # Regenerate
-            regenerate.click(
-                fn=self.regenerate_response,
-                inputs=[chatbot],
-                outputs=[msg, chatbot],
-                queue=False,
-            ).success(
-                fn=self.retrieve,
-                inputs=[chatbot, db, retrieved_docs],
-                outputs=[retrieved_docs],
-                queue=True,
-            ).success(
-                fn=self.bot,
-                inputs=[chatbot, retrieved_docs],
-                outputs=chatbot,
-                queue=True,
-            )
-            # Clear history
-            clear.click(lambda: None, None, chatbot, queue=False)
-        demo.queue(max_size=128, default_concurrency_limit=10, api_open=False)
-        demo.launch(server_name="0.0.0.0", max_threads=200)
 if __name__ == "__main__":
-    local_chat_gpt = LocalChatGPT()
-    local_chat_gpt.run()

 import gradio as gr
+import os
 from llama_cpp import Llama
+import datetime
+from huggingface_hub import hf_hub_download
+#MODEL SETTINGS also for DISPLAY
+convHistory = ''
+modelfile = hf_hub_download(
+        repo_id=os.environ.get("REPO_ID", "slasiyal/deepseek-coder-1.3b-instruct.gguf"),
+        filename=os.environ.get("MODEL_FILE", "deepseek-coder-1.3b-instruct.gguf"),
+    )
+repetitionpenalty = 1.15
+contextlength=4096
+logfile = 'logs.txt'
+print("loading model...")
+stt = datetime.datetime.now()
+# Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
+llm = Llama(
+  model_path=modelfile,  # Download the model file first
+  n_ctx=contextlength,  # The max sequence length to use - note that longer sequence lengths require much more resources
+  #n_threads=2,            # The number of CPU threads to use, tailor to your system and the resulting performance
+)
+dt = datetime.datetime.now() - stt
+print(f"Model loaded in {dt}")
+def writehistory(text):
+    with open(logfile, 'a') as f:
+        f.write(text)
+        f.write('\n')
+    f.close()
+"""
+gr.themes.Base()
+gr.themes.Default()
+gr.themes.Glass()
+gr.themes.Monochrome()
+gr.themes.Soft()
+"""
+def combine(a, b, c, d,e,f):
+    global convHistory
+    import datetime
+    SYSTEM_PROMPT = f"""{a}
+    """
+    temperature = c
+    max_new_tokens = d
+    repeat_penalty = f
+    top_p = e
+    prompt = f"<|user|>\n{b}<|endoftext|>\n<|assistant|>"
+    start = datetime.datetime.now()
+    generation = ""
+    delta = ""
+    prompt_tokens = f"Prompt Tokens: {len(llm.tokenize(bytes(prompt,encoding='utf-8')))}"
+    generated_text = ""
+    answer_tokens = ''
+    total_tokens = ''
+    for character in llm(prompt,
+                max_tokens=max_new_tokens,
+                stop=["</s>"],
+                temperature = temperature,
+                repeat_penalty = repeat_penalty,
+                top_p = top_p,   # Example stop token - not necessarily correct for this specific model! Please check before using.
+                echo=False,
+                stream=True):
+        generation += character["choices"][0]["text"]
+        answer_tokens = f"Out Tkns: {len(llm.tokenize(bytes(generation,encoding='utf-8')))}"
+        total_tokens = f"Total Tkns: {len(llm.tokenize(bytes(prompt,encoding='utf-8'))) + len(llm.tokenize(bytes(generation,encoding='utf-8')))}"
+        delta = datetime.datetime.now() - start
+        yield generation, delta, prompt_tokens, answer_tokens, total_tokens
+    timestamp = datetime.datetime.now()
+    logger = f"""time: {timestamp}\n Temp: {temperature} - MaxNewTokens: {max_new_tokens} - RepPenalty: 1.5 \nPROMPT: \n{prompt}\nStableZephyr3B: {generation}\nGenerated in {delta}\nPromptTokens: {prompt_tokens}   Output Tokens: {answer_tokens}  Total Tokens: {total_tokens}\n\n---\n\n"""
+    writehistory(logger)
+    convHistory = convHistory + prompt + "\n" + generation + "\n"
+    print(convHistory)
+    return generation, delta, prompt_tokens, answer_tokens, total_tokens
+    #return generation, delta
+# MAIN GRADIO INTERFACE
+with gr.Blocks(theme='Medguy/base2') as demo:   #theme=gr.themes.Glass()  #theme='remilia/Ghostly'
+    #TITLE SECTION
+    with gr.Row(variant='compact'):
+            with gr.Column(scale=12):
+                gr.HTML("<center>"
+                + "<h3>Prompt Engineering Playground!</h3>"
+                + "<h1>🐦 StableLM-Zephyr-3B - 4K context window</h2></center>")
+            gr.Image(value='https://github.com/fabiomatricardi/GradioStudies/raw/main/20231205/logo-banner-StableZephyr.jpg', height=95, show_label = False,
+                     show_download_button = False, container = False)
+    # INTERACTIVE INFOGRAPHIC SECTION
+    with gr.Row():
+        with gr.Column(min_width=80):
+            gentime = gr.Textbox(value="", placeholder="Generation Time:", min_width=50, show_label=False)
+        with gr.Column(min_width=80):
+            prompttokens = gr.Textbox(value="", placeholder="Prompt Tkn:", min_width=50, show_label=False)
+        with gr.Column(min_width=80):
+            outputokens = gr.Textbox(value="", placeholder="Output Tkn:", min_width=50, show_label=False)
+        with gr.Column(min_width=80):
+            totaltokens = gr.Textbox(value="", placeholder="Total Tokens:", min_width=50, show_label=False)
+    # PLAYGROUND INTERFACE SECTION
+    with gr.Row():
+        with gr.Column(scale=1):
             gr.Markdown(
+            f"""
+            ### Tunning Parameters""")
+            temp = gr.Slider(label="Temperature",minimum=0.0, maximum=1.0, step=0.01, value=0.42)
+            top_p = gr.Slider(label="Top_P",minimum=0.0, maximum=1.0, step=0.01, value=0.8)
+            repPen = gr.Slider(label="Repetition Penalty",minimum=0.0, maximum=4.0, step=0.01, value=1.2)
+            max_len = gr.Slider(label="Maximum output lenght", minimum=10,maximum=(contextlength-500),step=2, value=900)
+            gr.Markdown(
+            """
+            Fill the System Prompt and User Prompt
+            And then click the Button below
+            """)
+            btn = gr.Button(value="🐦 Generate", variant='primary')
+            gr.Markdown(
+            f"""
+            - **Prompt Template**: OpenChat 🐦
+            - **Repetition Penalty**: {repetitionpenalty}
+            - **Context Lenght**: {contextlength} tokens
+            - **LLM Engine**: CTransformers
+            - **Model**: 🐦 StarlingLM-7b
+            - **Log File**: {logfile}
+            """)
+        with gr.Column(scale=4):
+            txt = gr.Textbox(label="System Prompt", value = "", placeholder = "This models does not have any System prompt...",lines=1, interactive = False)
+            txt_2 = gr.Textbox(label="User Prompt", lines=6)
+            txt_3 = gr.Textbox(value="", label="Output", lines = 13, show_copy_button=True)
+            btn.click(combine, inputs=[txt, txt_2,temp,max_len,top_p,repPen], outputs=[txt_3,gentime,prompttokens,outputokens,totaltokens])
 if __name__ == "__main__":
+    demo.launch(inbrowser=True)

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-llama-cpp-python==0.2.18
 langchain==0.0.331
 huggingface-hub==0.17.3
 chromadb==0.4.18

+llama-cpp-python
 langchain==0.0.331
 huggingface-hub==0.17.3
 chromadb==0.4.18