Spaces:

rodrigomasini
/

marco-o1

Sleeping

App Files Files Community

rodrigomasini commited on Nov 26, 2024

Commit

543fed2

verified ·

1 Parent(s): 7e72b19

Update helper.py

Browse files

Files changed (1) hide show

helper.py +88 -56

helper.py CHANGED Viewed

@@ -1,74 +1,82 @@
 import os
 import gradio as gr
-from typing import Callable
 import base64
 from openai import OpenAI
-def get_fn(model_path: str, **model_kwargs):
     """Create a chat function with the specified model."""
-    # instatiate a OpenAI client for a custom endpoint
     try:
-        OPENAI_API_KEY = "-"
         client = OpenAI(
-        base_url=" http://192.222.58.60:8000/v1",
-        api_key="tela",
         )
     except Exception as e:
-        print(f"The api or base url were not definied: {str(e)}")
     def predict(
         message: str,
-        history,
         system_prompt: str,
         temperature: float,
         max_tokens: int,
         top_k: int,
         repetition_penalty: float,
         top_p: float
-    ):
         try:
-            # Format conversation with ChatML format
-            instruction = '<|im_start|>system\n' + system_prompt + '\n<|im_end|>\n'
             for user_msg, assistant_msg in history:
-                instruction += f'<|im_start|>user\n{user_msg}\n<|im_end|>\n<|im_start|>assistant\n{assistant_msg}\n<|im_end|>\n'
-            instruction += f'<|im_start|>user\n{message}\n<|im_end|>\n<|im_start|>assistant\n'
             response = client.chat.completions.create(
-                model=model_name,
                 messages=messages,
                 temperature=temperature,
                 max_tokens=max_tokens,
                 top_k=top_k,
                 repetition_penalty=repetition_penalty,
-                n=1,
                 stream=True,
-                response_format={"type": "text"},
             )
             response_text = ""
             for chunk in response:
-                streamer = chunk.choices[0].delta.content
-                for new_token in streamer:
-                    if new_token in ["<|endoftext|>", "<|im_end|>"]:
-                        break
-                    response_text += new_token
-                    yield response_text.strip()
             if not response_text.strip():
                 yield "I apologize, but I was unable to generate a response. Please try again."
         except Exception as e:
             print(f"Error during generation: {str(e)}")
             yield f"An error occurred: {str(e)}"
     return predict
 def get_image_base64(url: str, ext: str):
     with open(url, "rb") as image_file:
         encoded_string = base64.b64encode(image_file.read()).decode('utf-8')
@@ -101,7 +109,7 @@ def handle_user_msg(message: str):
         raise NotImplementedError
-def get_interface_args(pipeline):
     if pipeline == "chat":
         inputs = None
         outputs = None
@@ -115,47 +123,71 @@ def get_interface_args(pipeline):
                     messages.append({"role": "assistant", "content": assistant_msg})
                 else:
                     files = user_msg
-            if type(message) is str and files is not None:
-                message = {"text":message, "files":files}
-            elif type(message) is dict and files is not None:
-                if message["files"] is None or len(message["files"]) == 0:
                     message["files"] = files
             messages.append({"role": "user", "content": handle_user_msg(message)})
             return {"messages": messages}
-        postprocess = lambda x: x
     else:
-        # Add other pipeline types when they will be needed
         raise ValueError(f"Unsupported pipeline type: {pipeline}")
     return inputs, outputs, preprocess, postprocess
-def get_pipeline(model_name):
-    # Determine the pipeline type based on the model name
-    # For simplicity, assuming all models are chat models at the moment
-    return "chat"
-def registry(name: str = None, **kwargs):
     """Create a Gradio Interface with similar styling and parameters."""
-    fn = get_fn(name, **kwargs)
     interface = gr.ChatInterface(
-        fn=fn,
         additional_inputs_accordion=gr.Accordion("⚙️ Parameters", open=False),
         additional_inputs=[
             gr.Textbox(
-                "You are a helpful AI assistant.",
                 label="System prompt"
             ),
-            gr.Slider(0, 1, 0.7, label="Temperature"),
-            gr.Slider(128, 4096, 1024, label="Max new tokens"),
-            gr.Slider(1, 80, 40, label="Top K sampling"),
-            gr.Slider(0, 2, 1.1, label="Repetition penalty"),
-            gr.Slider(0, 1, 0.95, label="Top P sampling"),
         ],
     )
-    return interface

 import os
 import gradio as gr
+from typing import Callable, Generator
 import base64
 from openai import OpenAI
+def get_fn(model_name: str, **model_kwargs) -> Callable:
     """Create a chat function with the specified model."""
+    # Instantiate an OpenAI client for a custom endpoint
     try:
         client = OpenAI(
+            base_url="http://192.222.58.60:8000/v1",
+            api_key="tela",
         )
     except Exception as e:
+        print(f"The API or base URL were not defined: {str(e)}")
+        raise e  # It's better to raise the exception to prevent the app from running without a client
     def predict(
         message: str,
+        history: list,
         system_prompt: str,
         temperature: float,
         max_tokens: int,
         top_k: int,
         repetition_penalty: float,
         top_p: float
+    ) -> Generator[str, None, None]:
         try:
+            # Initialize the messages list with the system prompt
+            messages = [
+                {"role": "system", "content": system_prompt}
+            ]
+            # Append the conversation history
             for user_msg, assistant_msg in history:
+                messages.append({"role": "user", "content": user_msg})
+                if assistant_msg:
+                    messages.append({"role": "assistant", "content": assistant_msg})
+            # Append the latest user message
+            messages.append({"role": "user", "content": message})
+            # Call the OpenAI API with the formatted messages
             response = client.chat.completions.create(
+                model=model_name,
                 messages=messages,
                 temperature=temperature,
                 max_tokens=max_tokens,
                 top_k=top_k,
                 repetition_penalty=repetition_penalty,
+                top_p=top_p,
                 stream=True,
+                # Ensure response_format is set correctly; typically it's a string like 'text'
+                response_format="text",
             )
             response_text = ""
+            # Iterate over the streaming response
             for chunk in response:
+                if 'choices' in chunk and len(chunk['choices']) > 0:
+                    delta = chunk['choices'][0].get('delta', {})
+                    content = delta.get('content', '')
+                    if content:
+                        response_text += content
+                        yield response_text.strip()
             if not response_text.strip():
                 yield "I apologize, but I was unable to generate a response. Please try again."
         except Exception as e:
             print(f"Error during generation: {str(e)}")
             yield f"An error occurred: {str(e)}"
     return predict
 def get_image_base64(url: str, ext: str):
     with open(url, "rb") as image_file:
         encoded_string = base64.b64encode(image_file.read()).decode('utf-8')
         raise NotImplementedError
+def get_interface_args(pipeline: str):
     if pipeline == "chat":
         inputs = None
         outputs = None
                     messages.append({"role": "assistant", "content": assistant_msg})
                 else:
                     files = user_msg
+            if isinstance(message, str) and files is not None:
+                message = {"text": message, "files": files}
+            elif isinstance(message, dict) and files is not None:
+                if not message.get("files"):
                     message["files"] = files
             messages.append({"role": "user", "content": handle_user_msg(message)})
             return {"messages": messages}
+        postprocess = lambda x: x  # No additional postprocessing needed
     else:
+        # Add other pipeline types when they are needed
         raise ValueError(f"Unsupported pipeline type: {pipeline}")
     return inputs, outputs, preprocess, postprocess
+def registry(name: str = None, **kwargs) -> gr.ChatInterface:
     """Create a Gradio Interface with similar styling and parameters."""
+    # Retrieve preprocess and postprocess functions
+    _, _, preprocess, postprocess = get_interface_args("chat")
+    # Get the predict function
+    predict_fn = get_fn(model_path=name, **kwargs)
+    # Define a wrapper function that integrates preprocessing and postprocessing
+    def wrapper(message, history, system_prompt, temperature, max_tokens, top_k, repetition_penalty, top_p):
+        # Preprocess the inputs
+        preprocessed = preprocess(message, history)
+        # Extract the preprocessed messages
+        messages = preprocessed["messages"]
+        # Call the predict function and generate the response
+        response_generator = predict_fn(
+            messages=messages,
+            temperature=temperature,
+            max_tokens=max_tokens,
+            top_k=top_k,
+            repetition_penalty=repetition_penalty,
+            top_p=top_p
+        )
+        # Collect the generated response
+        response = ""
+        for partial_response in response_generator:
+            response = partial_response  # Gradio will handle streaming
+            yield response
+    # Create the Gradio ChatInterface with the wrapper function
     interface = gr.ChatInterface(
+        fn=wrapper,
         additional_inputs_accordion=gr.Accordion("⚙️ Parameters", open=False),
         additional_inputs=[
             gr.Textbox(
+                value="You are a helpful AI assistant.",
                 label="System prompt"
             ),
+            gr.Slider(0.0, 1.0, value=0.7, label="Temperature"),
+            gr.Slider(128, 4096, value=1024, label="Max new tokens"),
+            gr.Slider(1, 80, value=40, step=1, label="Top K sampling"),
+            gr.Slider(0.0, 2.0, value=1.1, label="Repetition penalty"),
+            gr.Slider(0.0, 1.0, value=0.95, label="Top P sampling"),
         ],
+        # Optionally, you can customize other ChatInterface parameters here
     )
+    return interface