Spaces:

dineth554
/

webenginenovav2

Sleeping

App Files Files Community

dineth554 commited on Jun 4, 2024

Commit

70c3062

verified ·

1 Parent(s): 6f48855

Update app.py

Browse files

Files changed (1) hide show

app.py +99 -96

app.py CHANGED Viewed

@@ -57,115 +57,118 @@ hf_hub_download(
 )
 # Function to respond to user messages
-def respond(message, history, system_message, temperature, top_p, top_k, repeat_penalty):
-    model = "mixtral-8x7b-instruct-v0.1.Q5_K_M.gguf"
-    max_tokens = 3000
-    chat_template = get_messages_formatter_type(model)
-    llm = Llama(
-        model_path=f"models/{model}",
-        flash_attn=True,
-        n_gpu_layers=81,
-        n_batch=1024,
-        n_ctx=get_context_by_model(model),
-    )
-    provider = LlamaCppPythonProvider(llm)
-    logging.info(f"Loaded chat examples: {chat_template}")
-    search_tool = WebSearchTool(
-        llm_provider=provider,
-        message_formatter_type=chat_template,
-        max_tokens_search_results=12000,
-        max_tokens_per_summary=2048,
-    )
-    web_search_agent = LlamaCppAgent(
-        provider,
-        system_prompt=web_search_system_prompt,
-        predefined_messages_formatter_type=chat_template,
-        debug_output=True,
-    )
-    answer_agent = LlamaCppAgent(
-        provider,
-        system_prompt=research_system_prompt,
-        predefined_messages_formatter_type=chat_template,
-        debug_output=True,
-    )
-    settings = provider.get_provider_default_settings()
-    settings.stream = False
-    settings.temperature = temperature
-    settings.top_k = top_k
-    settings.top_p = top_p
-    settings.max_tokens = max_tokens
-    settings.repeat_penalty = repeat_penalty
-    output_settings = LlmStructuredOutputSettings.from_functions(
-        [search_tool.get_tool()]
-    )
-    messages = BasicChatHistory()
-    for msn in history:
-        user = {"role": Roles.user, "content": msn[0]}
-        assistant = {"role": Roles.assistant, "content": msn[1]}
-        messages.add_message(user)
-        messages.add_message(assistant)
-    result = web_search_agent.get_chat_response(
-        message,
-        llm_sampling_settings=settings,
-        structured_output_settings=output_settings,
-        add_message_to_chat_history=False,
-        add_response_to_chat_history=False,
-        print_output=False,
-    )
-    outputs = ""
-    settings.stream = True
-    response_text = answer_agent.get_chat_response(
-        f"Write a detailed and complete research document that fulfills the following user request: '{message}', based on the information from the web below.\n\n" +
-        result[0]["return_value"],
-        role=Roles.tool,
-        llm_sampling_settings=settings,
-        chat_history=messages,
-        returns_streaming_generator=True,
-        print_output=False,
-    )
-    for text in response_text:
-        outputs += text
         yield outputs
-    output_settings = LlmStructuredOutputSettings.from_pydantic_models(
-        [CitingSources], LlmStructuredOutputType.object_instance
-    )
-    citing_sources = answer_agent.get_chat_response(
-        "Cite the sources you used in your response.",
-        role=Roles.tool,
-        llm_sampling_settings=settings,
-        chat_history=messages,
-        returns_streaming_generator=False,
-        structured_output_settings=output_settings,
-        print_output=False,
-    )
-    outputs += "\n\nSources:\n"
-    outputs += "\n".join(citing_sources.sources)
-    yield outputs
 st.title("Novav2 Web Engine")
 message = st.text_input("Enter your message:")
 history = st.session_state.get("history", [])
-system_message = st.text_area("System message", value=web_search_system_prompt)
 temperature = st.slider("Temperature", min_value=0.1, max_value=1.0, value=0.45, step=0.1)
 top_p = st.slider("Top-p", min_value=0.1, max_value=1.0, value=0.95, step=0.05)
 top_k = st.slider("Top-k", min_value=0, max_value=100, value=40, step=1)
 repeat_penalty = st.slider("Repetition penalty", min_value=0.0, max_value=2.0, value=1.1, step=0.1)
 if st.button("Send"):
-    response_generator = respond(message, history, system_message, temperature, top_p, top_k, repeat_penalty)
     for response in response_generator:
         st.write(response)
         history.append((message, response))

 )
 # Function to respond to user messages
+def respond(message, history, temperature, top_p, top_k, repeat_penalty):
+    try:
+        model = "mixtral-8x7b-instruct-v0.1.Q5_K_M.gguf"
+        max_tokens = 3000
+        chat_template = get_messages_formatter_type(model)
+        llm = Llama(
+            model_path=f"models/{model}",
+            flash_attn=True,
+            n_gpu_layers=81,
+            n_batch=1024,
+            n_ctx=get_context_by_model(model),
+        )
+        provider = LlamaCppPythonProvider(llm)
+        logging.info(f"Loaded chat examples: {chat_template}")
+        search_tool = WebSearchTool(
+            llm_provider=provider,
+            message_formatter_type=chat_template,
+            max_tokens_search_results=12000,
+            max_tokens_per_summary=2048,
+        )
+        web_search_agent = LlamaCppAgent(
+            provider,
+            system_prompt=web_search_system_prompt,
+            predefined_messages_formatter_type=chat_template,
+            debug_output=True,
+        )
+        answer_agent = LlamaCppAgent(
+            provider,
+            system_prompt=research_system_prompt,
+            predefined_messages_formatter_type=chat_template,
+            debug_output=True,
+        )
+        settings = provider.get_provider_default_settings()
+        settings.stream = False
+        settings.temperature = temperature
+        settings.top_k = top_k
+        settings.top_p = top_p
+        settings.max_tokens = max_tokens
+        settings.repeat_penalty = repeat_penalty
+        output_settings = LlmStructuredOutputSettings.from_functions(
+            [search_tool.get_tool()]
+        )
+        messages = BasicChatHistory()
+        for msn in history:
+            user = {"role": Roles.user, "content": msn[0]}
+            assistant = {"role": Roles.assistant, "content": msn[1]}
+            messages.add_message(user)
+            messages.add_message(assistant)
+        result = web_search_agent.get_chat_response(
+            message,
+            llm_sampling_settings=settings,
+            structured_output_settings=output_settings,
+            add_message_to_chat_history=False,
+            add_response_to_chat_history=False,
+            print_output=False,
+        )
+        outputs = ""
+        settings.stream = True
+        response_text = answer_agent.get_chat_response(
+            f"Write a detailed and complete research document that fulfills the following user request: '{message}', based on the information from the web below.\n\n" +
+            result[0]["return_value"],
+            role=Roles.tool,
+            llm_sampling_settings=settings,
+            chat_history=messages,
+            returns_streaming_generator=True,
+            print_output=False,
+        )
+        for text in response_text:
+            outputs += text
+            yield outputs
+        output_settings = LlmStructuredOutputSettings.from_pydantic_models(
+            [CitingSources], LlmStructuredOutputType.object_instance
+        )
+        citing_sources = answer_agent.get_chat_response(
+            "Cite the sources you used in your response.",
+            role=Roles.tool,
+            llm_sampling_settings=settings,
+            chat_history=messages,
+            returns_streaming_generator=False,
+            structured_output_settings=output_settings,
+            print_output=False,
+        )
+        outputs += "\n\nSources:\n"
+        outputs += "\n".join(citing_sources.sources)
         yield outputs
+    except Exception as e:
+        st.error(f"An error occurred: {e}")
 st.title("Novav2 Web Engine")
 message = st.text_input("Enter your message:")
 history = st.session_state.get("history", [])
 temperature = st.slider("Temperature", min_value=0.1, max_value=1.0, value=0.45, step=0.1)
 top_p = st.slider("Top-p", min_value=0.1, max_value=1.0, value=0.95, step=0.05)
 top_k = st.slider("Top-k", min_value=0, max_value=100, value=40, step=1)
 repeat_penalty = st.slider("Repetition penalty", min_value=0.0, max_value=2.0, value=1.1, step=0.1)
 if st.button("Send"):
+    response_generator = respond(message, history, temperature, top_p, top_k, repeat_penalty)
     for response in response_generator:
         st.write(response)
         history.append((message, response))