dineth554 commited on
Commit
70c3062
·
verified ·
1 Parent(s): 6f48855

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -96
app.py CHANGED
@@ -57,115 +57,118 @@ hf_hub_download(
57
  )
58
 
59
  # Function to respond to user messages
60
- def respond(message, history, system_message, temperature, top_p, top_k, repeat_penalty):
61
- model = "mixtral-8x7b-instruct-v0.1.Q5_K_M.gguf"
62
- max_tokens = 3000
63
- chat_template = get_messages_formatter_type(model)
64
- llm = Llama(
65
- model_path=f"models/{model}",
66
- flash_attn=True,
67
- n_gpu_layers=81,
68
- n_batch=1024,
69
- n_ctx=get_context_by_model(model),
70
- )
71
- provider = LlamaCppPythonProvider(llm)
72
- logging.info(f"Loaded chat examples: {chat_template}")
73
- search_tool = WebSearchTool(
74
- llm_provider=provider,
75
- message_formatter_type=chat_template,
76
- max_tokens_search_results=12000,
77
- max_tokens_per_summary=2048,
78
- )
79
-
80
- web_search_agent = LlamaCppAgent(
81
- provider,
82
- system_prompt=web_search_system_prompt,
83
- predefined_messages_formatter_type=chat_template,
84
- debug_output=True,
85
- )
86
-
87
- answer_agent = LlamaCppAgent(
88
- provider,
89
- system_prompt=research_system_prompt,
90
- predefined_messages_formatter_type=chat_template,
91
- debug_output=True,
92
- )
93
-
94
- settings = provider.get_provider_default_settings()
95
- settings.stream = False
96
- settings.temperature = temperature
97
- settings.top_k = top_k
98
- settings.top_p = top_p
99
- settings.max_tokens = max_tokens
100
- settings.repeat_penalty = repeat_penalty
101
-
102
- output_settings = LlmStructuredOutputSettings.from_functions(
103
- [search_tool.get_tool()]
104
- )
105
-
106
- messages = BasicChatHistory()
107
-
108
- for msn in history:
109
- user = {"role": Roles.user, "content": msn[0]}
110
- assistant = {"role": Roles.assistant, "content": msn[1]}
111
- messages.add_message(user)
112
- messages.add_message(assistant)
113
-
114
- result = web_search_agent.get_chat_response(
115
- message,
116
- llm_sampling_settings=settings,
117
- structured_output_settings=output_settings,
118
- add_message_to_chat_history=False,
119
- add_response_to_chat_history=False,
120
- print_output=False,
121
- )
122
-
123
- outputs = ""
124
-
125
- settings.stream = True
126
- response_text = answer_agent.get_chat_response(
127
- f"Write a detailed and complete research document that fulfills the following user request: '{message}', based on the information from the web below.\n\n" +
128
- result[0]["return_value"],
129
- role=Roles.tool,
130
- llm_sampling_settings=settings,
131
- chat_history=messages,
132
- returns_streaming_generator=True,
133
- print_output=False,
134
- )
135
-
136
- for text in response_text:
137
- outputs += text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  yield outputs
139
 
140
- output_settings = LlmStructuredOutputSettings.from_pydantic_models(
141
- [CitingSources], LlmStructuredOutputType.object_instance
142
- )
143
-
144
- citing_sources = answer_agent.get_chat_response(
145
- "Cite the sources you used in your response.",
146
- role=Roles.tool,
147
- llm_sampling_settings=settings,
148
- chat_history=messages,
149
- returns_streaming_generator=False,
150
- structured_output_settings=output_settings,
151
- print_output=False,
152
- )
153
- outputs += "\n\nSources:\n"
154
- outputs += "\n".join(citing_sources.sources)
155
- yield outputs
156
 
157
  st.title("Novav2 Web Engine")
158
 
159
  message = st.text_input("Enter your message:")
160
  history = st.session_state.get("history", [])
161
- system_message = st.text_area("System message", value=web_search_system_prompt)
162
  temperature = st.slider("Temperature", min_value=0.1, max_value=1.0, value=0.45, step=0.1)
163
  top_p = st.slider("Top-p", min_value=0.1, max_value=1.0, value=0.95, step=0.05)
164
  top_k = st.slider("Top-k", min_value=0, max_value=100, value=40, step=1)
165
  repeat_penalty = st.slider("Repetition penalty", min_value=0.0, max_value=2.0, value=1.1, step=0.1)
166
 
167
  if st.button("Send"):
168
- response_generator = respond(message, history, system_message, temperature, top_p, top_k, repeat_penalty)
169
  for response in response_generator:
170
  st.write(response)
171
  history.append((message, response))
 
57
  )
58
 
59
  # Function to respond to user messages
60
+ def respond(message, history, temperature, top_p, top_k, repeat_penalty):
61
+ try:
62
+ model = "mixtral-8x7b-instruct-v0.1.Q5_K_M.gguf"
63
+ max_tokens = 3000
64
+ chat_template = get_messages_formatter_type(model)
65
+ llm = Llama(
66
+ model_path=f"models/{model}",
67
+ flash_attn=True,
68
+ n_gpu_layers=81,
69
+ n_batch=1024,
70
+ n_ctx=get_context_by_model(model),
71
+ )
72
+ provider = LlamaCppPythonProvider(llm)
73
+ logging.info(f"Loaded chat examples: {chat_template}")
74
+ search_tool = WebSearchTool(
75
+ llm_provider=provider,
76
+ message_formatter_type=chat_template,
77
+ max_tokens_search_results=12000,
78
+ max_tokens_per_summary=2048,
79
+ )
80
+
81
+ web_search_agent = LlamaCppAgent(
82
+ provider,
83
+ system_prompt=web_search_system_prompt,
84
+ predefined_messages_formatter_type=chat_template,
85
+ debug_output=True,
86
+ )
87
+
88
+ answer_agent = LlamaCppAgent(
89
+ provider,
90
+ system_prompt=research_system_prompt,
91
+ predefined_messages_formatter_type=chat_template,
92
+ debug_output=True,
93
+ )
94
+
95
+ settings = provider.get_provider_default_settings()
96
+ settings.stream = False
97
+ settings.temperature = temperature
98
+ settings.top_k = top_k
99
+ settings.top_p = top_p
100
+ settings.max_tokens = max_tokens
101
+ settings.repeat_penalty = repeat_penalty
102
+
103
+ output_settings = LlmStructuredOutputSettings.from_functions(
104
+ [search_tool.get_tool()]
105
+ )
106
+
107
+ messages = BasicChatHistory()
108
+
109
+ for msn in history:
110
+ user = {"role": Roles.user, "content": msn[0]}
111
+ assistant = {"role": Roles.assistant, "content": msn[1]}
112
+ messages.add_message(user)
113
+ messages.add_message(assistant)
114
+
115
+ result = web_search_agent.get_chat_response(
116
+ message,
117
+ llm_sampling_settings=settings,
118
+ structured_output_settings=output_settings,
119
+ add_message_to_chat_history=False,
120
+ add_response_to_chat_history=False,
121
+ print_output=False,
122
+ )
123
+
124
+ outputs = ""
125
+
126
+ settings.stream = True
127
+ response_text = answer_agent.get_chat_response(
128
+ f"Write a detailed and complete research document that fulfills the following user request: '{message}', based on the information from the web below.\n\n" +
129
+ result[0]["return_value"],
130
+ role=Roles.tool,
131
+ llm_sampling_settings=settings,
132
+ chat_history=messages,
133
+ returns_streaming_generator=True,
134
+ print_output=False,
135
+ )
136
+
137
+ for text in response_text:
138
+ outputs += text
139
+ yield outputs
140
+
141
+ output_settings = LlmStructuredOutputSettings.from_pydantic_models(
142
+ [CitingSources], LlmStructuredOutputType.object_instance
143
+ )
144
+
145
+ citing_sources = answer_agent.get_chat_response(
146
+ "Cite the sources you used in your response.",
147
+ role=Roles.tool,
148
+ llm_sampling_settings=settings,
149
+ chat_history=messages,
150
+ returns_streaming_generator=False,
151
+ structured_output_settings=output_settings,
152
+ print_output=False,
153
+ )
154
+ outputs += "\n\nSources:\n"
155
+ outputs += "\n".join(citing_sources.sources)
156
  yield outputs
157
 
158
+ except Exception as e:
159
+ st.error(f"An error occurred: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
  st.title("Novav2 Web Engine")
162
 
163
  message = st.text_input("Enter your message:")
164
  history = st.session_state.get("history", [])
 
165
  temperature = st.slider("Temperature", min_value=0.1, max_value=1.0, value=0.45, step=0.1)
166
  top_p = st.slider("Top-p", min_value=0.1, max_value=1.0, value=0.95, step=0.05)
167
  top_k = st.slider("Top-k", min_value=0, max_value=100, value=40, step=1)
168
  repeat_penalty = st.slider("Repetition penalty", min_value=0.0, max_value=2.0, value=1.1, step=0.1)
169
 
170
  if st.button("Send"):
171
+ response_generator = respond(message, history, temperature, top_p, top_k, repeat_penalty)
172
  for response in response_generator:
173
  st.write(response)
174
  history.append((message, response))