jchauhan commited on
Commit
6f58cbf
·
1 Parent(s): 8f9f170

Initial Commit

Browse files
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10.13
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt requirements.txt
6
+ RUN pip install -r requirements.txt
7
+
8
+ COPY . .
9
+
10
+ ENV GR_SERVER_NAME=0.0.0.0
11
+ EXPOSE 7860
12
+
13
+ CMD ["python", "app.py"]
README.md CHANGED
@@ -1,4 +1,3 @@
1
- ---
2
  title: Demo Chat Gpt
3
  emoji: 💻
4
  colorFrom: indigo
@@ -8,6 +7,3 @@ sdk_version: 4.44.0
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
1
  title: Demo Chat Gpt
2
  emoji: 💻
3
  colorFrom: indigo
 
7
  app_file: app.py
8
  pinned: false
9
  license: apache-2.0
 
 
 
app.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import instrumentation
4
+
5
+ instrumentation.init("llm-chat-app")
6
+
7
+ from langchain.globals import set_debug
8
+ from langchain_core.prompts import ChatPromptTemplate
9
+ from langchain_core.output_parsers import StrOutputParser
10
+ from langchain_openai import ChatOpenAI
11
+
12
+ set_debug(True)
13
+ import logging
14
+
15
+ logging.basicConfig(level=logging.DEBUG)
16
+ llm = ChatOpenAI(temperature=0.5, max_tokens=100, model="gpt-3.5-turbo")
17
+
18
+ output_parser = StrOutputParser()
19
+ prompt = ChatPromptTemplate.from_messages([
20
+ ("system", "You are Responsible AI assistant to the user. "),
21
+ ("user", "{input}")
22
+ ])
23
+
24
+ def handle_message(message, _history):
25
+ chain = prompt | llm | output_parser
26
+ return chain.invoke({"input": message})
27
+
28
+ server_name = os.environ.get("GR_SERVER_NAME", "127.0.0.1")
29
+ server_port = os.environ.get("GR_SERVER_PORT", "7860")
30
+
31
+ options = {}
32
+
33
+ gr.ChatInterface(handle_message, **options).launch(server_name=server_name, server_port=int(server_port))
docker-compose.yml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: '3.3'
2
+ services:
3
+ llm-chat-app:
4
+ build: ./
5
+ ports:
6
+ - 7860:7860
7
+ environment:
8
+ - OPENAI_BASE_URL=${OPENAI_BASE_URL}
9
+ - OPENAI_API_KEY=${OPENAI_API_KEY}
10
+ - TRACELOOP_BASE_URL=${TRACELOOP_BASE_URL}
11
+ - TRACELOOP_API_KEY=${TRACELOOP_API_KEY}
12
+
instrumentation.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from traceloop.sdk import Traceloop
2
+
3
+ # TRACELOOP_BASE_URL=https://ingestor:8080
4
+ def init(app_name: str):
5
+ Traceloop.init(app_name, disable_batch=True)
llm_chat_app/__init__.py ADDED
File without changes
poetry.lock ADDED
The diff for this file is too large to render. See raw diff
 
pyproject.toml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [tool.poetry]
2
+ name = "llm-chat-app"
3
+ version = "0.1.0"
4
+ description = ""
5
+ authors = ["jchauhan <[email protected]>"]
6
+ readme = "README.md"
7
+
8
+ [tool.poetry.dependencies]
9
+ python = "^3.11"
10
+ gradio = "^4.43.0"
11
+ datasets = "^2.21.0"
12
+ traceloop-sdk = "^0.30.0"
13
+
14
+
15
+ [build-system]
16
+ requires = ["poetry-core"]
17
+ build-backend = "poetry.core.masonry.api"
requirements.txt ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.2.1
2
+ aiohttp==3.9.3
3
+ aiosignal==1.3.1
4
+ altair==5.2.0
5
+ annotated-types==0.6.0
6
+ anyio==4.2.0
7
+ async-timeout==4.0.3
8
+ attrs==23.2.0
9
+ backoff==2.2.1
10
+ certifi==2024.2.2
11
+ charset-normalizer==3.3.2
12
+ click==8.1.7
13
+ colorama==0.4.6
14
+ contourpy==1.2.0
15
+ cycler==0.12.1
16
+ dataclasses-json==0.6.4
17
+ Deprecated==1.2.14
18
+ distro==1.9.0
19
+ exceptiongroup==1.2.0
20
+ fastapi==0.109.2
21
+ ffmpy==0.3.1
22
+ filelock==3.13.1
23
+ fonttools==4.48.1
24
+ frozenlist==1.4.1
25
+ fsspec==2024.2.0
26
+ googleapis-common-protos==1.62.0
27
+ gradio==4.17.0
28
+ gradio_client==0.9.0
29
+ grpcio==1.60.1
30
+ h11==0.14.0
31
+ httpcore==1.0.2
32
+ httpx==0.26.0
33
+ huggingface-hub==0.20.3
34
+ idna==3.6
35
+ importlib-metadata==6.11.0
36
+ importlib-resources==6.1.1
37
+ inflection==0.5.1
38
+ Jinja2==3.1.3
39
+ jsonpatch==1.33
40
+ jsonpointer==2.4
41
+ jsonschema==4.21.1
42
+ jsonschema-specifications==2023.12.1
43
+ kiwisolver==1.4.5
44
+ langchain==0.1.6
45
+ langchain-community==0.0.19
46
+ langchain-core==0.1.22
47
+ langchain-openai==0.0.5
48
+ langsmith==0.0.87
49
+ markdown-it-py==3.0.0
50
+ MarkupSafe==2.1.5
51
+ marshmallow==3.20.2
52
+ matplotlib==3.8.2
53
+ mdurl==0.1.2
54
+ monotonic==1.6
55
+ multidict==6.0.5
56
+ mypy-extensions==1.0.0
57
+ numpy==1.26.4
58
+ openai==1.12.0
59
+ opentelemetry-api==1.22.0
60
+ opentelemetry-exporter-otlp-proto-common==1.22.0
61
+ opentelemetry-exporter-otlp-proto-grpc==1.22.0
62
+ opentelemetry-exporter-otlp-proto-http==1.22.0
63
+ opentelemetry-instrumentation==0.43b0
64
+ opentelemetry-instrumentation-anthropic==0.10.4
65
+ opentelemetry-instrumentation-bedrock==0.10.4
66
+ opentelemetry-instrumentation-chromadb==0.10.4
67
+ opentelemetry-instrumentation-cohere==0.10.4
68
+ opentelemetry-instrumentation-dbapi==0.43b0
69
+ opentelemetry-instrumentation-haystack==0.10.4
70
+ opentelemetry-instrumentation-langchain==0.10.4
71
+ opentelemetry-instrumentation-llamaindex==0.10.4
72
+ opentelemetry-instrumentation-openai==0.10.4
73
+ opentelemetry-instrumentation-pinecone==0.10.4
74
+ opentelemetry-instrumentation-pymysql==0.43b0
75
+ opentelemetry-instrumentation-replicate==0.10.4
76
+ opentelemetry-instrumentation-requests==0.43b0
77
+ opentelemetry-instrumentation-transformers==0.10.4
78
+ opentelemetry-instrumentation-urllib3==0.43b0
79
+ opentelemetry-instrumentation-vertexai==0.10.4
80
+ opentelemetry-instrumentation-watsonx==0.10.4
81
+ opentelemetry-proto==1.22.0
82
+ opentelemetry-sdk==1.22.0
83
+ opentelemetry-semantic-conventions==0.43b0
84
+ opentelemetry-semantic-conventions-ai==0.0.19
85
+ opentelemetry-util-http==0.43b0
86
+ orjson==3.9.13
87
+ packaging==23.2
88
+ pandas==2.2.0
89
+ pillow==10.2.0
90
+ posthog==3.4.0
91
+ protobuf==4.25.2
92
+ pydantic==2.6.1
93
+ pydantic_core==2.16.2
94
+ pydub==0.25.1
95
+ Pygments==2.17.2
96
+ pyparsing==3.1.1
97
+ python-dateutil==2.8.2
98
+ python-multipart==0.0.7
99
+ pytz==2024.1
100
+ PyYAML==6.0.1
101
+ referencing==0.33.0
102
+ regex==2023.12.25
103
+ requests==2.31.0
104
+ rich==13.7.0
105
+ rpds-py==0.17.1
106
+ ruff==0.2.1
107
+ semantic-version==2.10.0
108
+ shellingham==1.5.4
109
+ six==1.16.0
110
+ sniffio==1.3.0
111
+ SQLAlchemy==2.0.25
112
+ starlette==0.36.3
113
+ tenacity==8.2.3
114
+ tiktoken==0.5.2
115
+ tomlkit==0.12.0
116
+ toolz==0.12.1
117
+ tqdm==4.66.1
118
+ traceloop-sdk==0.10.4
119
+ typer==0.9.0
120
+ typing-inspect==0.9.0
121
+ typing_extensions==4.9.0
122
+ tzdata==2023.4
123
+ urllib3==2.2.0
124
+ uvicorn==0.27.0.post1
125
+ websockets==11.0.3
126
+ wrapt==1.16.0
127
+ yarl==1.9.4
128
+ zipp==3.17.0
tests/__init__.py ADDED
File without changes
tests/scripts/send_attack_prompts_2chatapp.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import os
3
+ import random
4
+ from time import sleep
5
+ from gradio_client import Client
6
+
7
+ current_dir = os.path.dirname(os.path.abspath(__file__))
8
+ sys.path.append(os.path.dirname(current_dir))
9
+
10
+ from datasets import load_dataset
11
+
12
+ def send_message(client, message):
13
+ _apps = ["fintechgpt", "healthgpt", "mydoc", "knowledge-centre", "assistantgpt"]
14
+
15
+ # Compute exponential weights for service names
16
+ factor = 2 # Control the steepness of the exponential decrease
17
+ app_weights = [factor ** -i for i in range(len(_apps))]
18
+
19
+ # Select a service_name randomly based on the exponential weights
20
+ service_name = random.choices(_apps, weights=app_weights, k=1)[0]
21
+
22
+ # Define providers and corresponding models with weights
23
+ providers_models = {
24
+ "Meta AI": (["LLaMA 65B", "LLaMA 33B"], 10),
25
+ "Mistral": (["Mistral 7B"], 8),
26
+ "Anthropic": (["Claude 3.5 Sonnet"], 15),
27
+ "OpenAI": (["GPT-3", "GPT-3.5", "GPT-4", "GPT-4o"], 25),
28
+ "Google": (["Gemini Ultra", "Gemini Pro", "Gemini Nano", "Lamda", "Palm"], 20),
29
+ "Databricks": (["Dolly"], 5),
30
+ "IBM AI": (["Watson NLP"], 5),
31
+ "Azure AI": (["Azure OpenAI", "Custom GPT-3.5"], 7),
32
+ "Snowflake": (["Snowflake GPT"], 3),
33
+ "Krutrim": (["Krutrim LLM"], 2),
34
+ "Baidu": (["Ernie 4.0"], 10),
35
+ "Stability AI": (["StableLM 3B", "StableLM 7B"], 4),
36
+ "Microsoft": (["Orca", "Phi-1"], 8)
37
+ }
38
+
39
+ # Extract providers and their weights
40
+ providers = list(providers_models.keys())
41
+ provider_weights = [providers_models[provider][1] for provider in providers]
42
+
43
+ # Randomly select a provider based on weights
44
+ selected_provider = random.choices(providers, weights=provider_weights, k=1)[0]
45
+
46
+ # Randomly select a model from the selected provider
47
+ selected_model = random.choice(providers_models[selected_provider][0])
48
+
49
+ result = client.predict(
50
+ message, # str in 'Message' Textbox component
51
+ api_name="/chat"
52
+ )
53
+ print(f"Sending message: {message} \n\t Result {result}")
54
+
55
+
56
+ def main():
57
+ prompt_injection_dataset = load_dataset("deepset/prompt-injections")
58
+ toxicity_prompts = load_dataset("allenai/real-toxicity-prompts")
59
+
60
+ nats_url = os.environ.get('CHAT_APP_URL', "http://localhost:7860/")
61
+ client = Client(nats_url)
62
+
63
+ while True:
64
+ # Collect messages
65
+ prompt_injection_sample = prompt_injection_dataset["train"].shuffle(seed=42).select(range(10))
66
+ toxicity_sample = toxicity_prompts["train"].shuffle(seed=42).select(range(10))
67
+ plain_messages = ["this is test conversation" for _ in range(10)]
68
+
69
+ # Combine all messages into a single list
70
+ all_messages = [msg["text"] for msg in prompt_injection_sample] + \
71
+ [msg["prompt"]["text"] for msg in toxicity_sample] + \
72
+ plain_messages
73
+
74
+ # Shuffle the combined list to mix message types
75
+ random.shuffle(all_messages)
76
+
77
+ # Send each message
78
+ for message in all_messages:
79
+ send_message(client, message)
80
+ sleep(random.uniform(0.5, 2)) # Random sleep between 0.5 to 2 seconds
81
+
82
+ if __name__ == "__main__":
83
+ main()
tests/scripts/simulate_attack_prompts.py ADDED
@@ -0,0 +1,286 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import random
3
+ import time
4
+ from datetime import datetime
5
+ import os
6
+ import requests
7
+ from datasets import load_dataset
8
+ from time import sleep
9
+
10
+
11
+ TEMPLATE_JSON=json.loads("""
12
+ {
13
+ "resource_spans": [
14
+ {
15
+ "scope_spans": [
16
+ {
17
+ "spans": [
18
+ {
19
+ "trace_id": "NQ01459b3A+aAHE+JwGWNQ==",
20
+ "end_time_unix_nano": "1725721375827041000",
21
+ "span_id": "0PCGpTowmVo=",
22
+ "kind": "SPAN_KIND_CLIENT",
23
+ "name": "openai.chat",
24
+ "start_time_unix_nano": "1725721375188928425",
25
+ "attributes": [
26
+ {
27
+ "value": {
28
+ "string_value": "chat"
29
+ },
30
+ "key": "llm.request.type"
31
+ },
32
+ {
33
+ "value": {
34
+ "string_value": "OpenAI"
35
+ },
36
+ "key": "llm.vendor"
37
+ },
38
+ {
39
+ "value": {
40
+ "string_value": "gpt-3.5-turbo"
41
+ },
42
+ "key": "llm.request.model"
43
+ },
44
+ {
45
+ "value": {
46
+ "int_value": "100"
47
+ },
48
+ "key": "llm.request.max_tokens"
49
+ },
50
+ {
51
+ "value": {
52
+ "double_value": 0.5
53
+ },
54
+ "key": "llm.temperature"
55
+ },
56
+ {
57
+ "value": {
58
+ "string_value": "None"
59
+ },
60
+ "key": "llm.headers"
61
+ },
62
+ {
63
+ "value": {
64
+ "string_value": "system"
65
+ },
66
+ "key": "llm.prompts.0.role"
67
+ },
68
+ {
69
+ "value": {
70
+ "string_value": "You are Responsible AI assistant to the user. "
71
+ },
72
+ "key": "llm.prompts.0.content"
73
+ },
74
+ {
75
+ "value": {
76
+ "string_value": "user"
77
+ },
78
+ "key": "llm.prompts.1.role"
79
+ },
80
+ {
81
+ "value": {
82
+ "string_value": "hello this my test message"
83
+ },
84
+ "key": "llm.prompts.1.content"
85
+ },
86
+ {
87
+ "value": {
88
+ "string_value": "gpt-3.5-turbo-0125"
89
+ },
90
+ "key": "llm.response.model"
91
+ },
92
+ {
93
+ "value": {
94
+ "int_value": "35"
95
+ },
96
+ "key": "llm.usage.total_tokens"
97
+ },
98
+ {
99
+ "value": {
100
+ "int_value": "9"
101
+ },
102
+ "key": "llm.usage.completion_tokens"
103
+ },
104
+ {
105
+ "value": {
106
+ "int_value": "26"
107
+ },
108
+ "key": "llm.usage.prompt_tokens"
109
+ },
110
+ {
111
+ "value": {
112
+ "string_value": "stop"
113
+ },
114
+ "key": "llm.completions.0.finish_reason"
115
+ },
116
+ {
117
+ "value": {
118
+ "string_value": "assistant"
119
+ },
120
+ "key": "llm.completions.0.role"
121
+ },
122
+ {
123
+ "value": {
124
+ "string_value": "Hello! How can I assist you today?"
125
+ },
126
+ "key": "llm.completions.0.content"
127
+ }
128
+ ],
129
+ "status": {}
130
+ }
131
+ ],
132
+ "scope": {
133
+ "name": "opentelemetry.instrumentation.openai.v1",
134
+ "version": "0.10.4"
135
+ }
136
+ }
137
+ ],
138
+ "resource": {
139
+ "attributes": [
140
+ {
141
+ "value": {
142
+ "string_value": "llm-chat-app"
143
+ },
144
+ "key": "service.name"
145
+ }
146
+ ]
147
+ }
148
+ }
149
+ ]
150
+ }
151
+
152
+ """)
153
+
154
+
155
+ def generate_random_id(size):
156
+ return ''.join(random.choices('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=', k=size))
157
+
158
+
159
+ def generate_json_from_template(template, provider, model, service_name, message):
160
+ # Generate random trace_id and span_id
161
+ trace_id = generate_random_id(22)
162
+ span_id = generate_random_id(12)
163
+
164
+ # Get current time in nanoseconds
165
+ current_time_ns = int(time.time() * 1e9)
166
+
167
+ # Update trace_id, span_id, times, provider, model, and service_name
168
+ template['resource_spans'][0]['scope_spans'][0]['spans'][0]['trace_id'] = trace_id
169
+ template['resource_spans'][0]['scope_spans'][0]['spans'][0]['span_id'] = span_id
170
+ template['resource_spans'][0]['scope_spans'][0]['spans'][0]['start_time_unix_nano'] = str(current_time_ns)
171
+ template['resource_spans'][0]['scope_spans'][0]['spans'][0]['end_time_unix_nano'] = str(current_time_ns + random.randint(100000000, 1000000000)) # Random duration
172
+
173
+ # Update provider, model, and service_name information in attributes
174
+ for attr in template['resource_spans'][0]['scope_spans'][0]['spans'][0]['attributes']:
175
+ if attr['key'] == 'llm.vendor':
176
+ attr['value']['string_value'] = provider
177
+ elif attr['key'] == 'llm.request.model':
178
+ attr['value']['string_value'] = model
179
+ elif attr['key'] == 'llm.prompts.1.content': # Update user message content
180
+ attr['value']['string_value'] = message
181
+
182
+ # Update service_name in the resource attributes
183
+ for attr in template['resource_spans'][0]['resource']['attributes']:
184
+ if attr['key'] == 'service.name':
185
+ attr['value']['string_value'] = service_name
186
+
187
+ # Return the modified JSON
188
+ return json.dumps(template)
189
+
190
+
191
+ def send_json_to_remote(json_data):
192
+ # Get environment variables
193
+ base_url = os.getenv('TRACELOOP_BASE_URL')
194
+ api_key = os.getenv('TRACELOOP_API_KEY')
195
+
196
+ if not base_url or not api_key:
197
+ raise EnvironmentError("TRACELOOP_BASE_URL or TRACELOOP_API_KEY is not set in environment variables.")
198
+
199
+ # Set the headers and URL
200
+ url = f"{base_url}/v1/traces"
201
+ headers = {
202
+ 'Content-Type': 'application/json',
203
+ 'Authorization': f"Bearer {api_key}"
204
+ }
205
+
206
+ # Send the POST request
207
+ response = requests.post(url, headers=headers, data=json_data)
208
+
209
+ # Check the response status
210
+ if response.status_code == 200:
211
+ print("Data successfully sent!")
212
+ else:
213
+ print(f"Failed to send data. Status Code: {response.status_code}, Response: {response.text}")
214
+ return (response.status_code, response.text)
215
+
216
+ def send_message(message):
217
+ _apps = ["fintechgpt", "healthgpt", "mydoc", "knowledge-centre", "assistantgpt"]
218
+
219
+ # Compute exponential weights for service names
220
+ factor = 2 # Control the steepness of the exponential decrease
221
+ app_weights = [factor ** -i for i in range(len(_apps))]
222
+
223
+ # Select a service_name randomly based on the exponential weights
224
+ service_name = random.choices(_apps, weights=app_weights, k=1)[0]
225
+
226
+ # Define providers and corresponding models with weights
227
+ providers_models = {
228
+ "Meta AI": (["LLaMA 65B", "LLaMA 33B"], 10),
229
+ "Mistral": (["Mistral 7B"], 8),
230
+ "Anthropic": (["Claude 3.5 Sonnet"], 15),
231
+ "OpenAI": (["GPT-3", "GPT-3.5", "GPT-4", "GPT-4o"], 25),
232
+ "Google": (["Gemini Ultra", "Gemini Pro", "Gemini Nano", "Lamda", "Palm"], 20),
233
+ "Databricks": (["Dolly"], 5),
234
+ "IBM AI": (["Watson NLP"], 5),
235
+ "Azure AI": (["Azure OpenAI", "Custom GPT-3.5"], 7),
236
+ "Snowflake": (["Snowflake GPT"], 3),
237
+ "Krutrim": (["Krutrim LLM"], 2),
238
+ "Baidu": (["Ernie 4.0"], 10),
239
+ "Stability AI": (["StableLM 3B", "StableLM 7B"], 4),
240
+ "Microsoft": (["Orca", "Phi-1"], 8)
241
+ }
242
+
243
+ # Extract providers and their weights
244
+ providers = list(providers_models.keys())
245
+ provider_weights = [providers_models[provider][1] for provider in providers]
246
+
247
+ # Randomly select a provider based on weights
248
+ selected_provider = random.choices(providers, weights=provider_weights, k=1)[0]
249
+
250
+ # Randomly select a model from the selected provider
251
+ selected_model = random.choice(providers_models[selected_provider][0])
252
+
253
+ # Generate the JSON with the selected provider, model, and service_name
254
+ output_json = generate_json_from_template(TEMPLATE_JSON, selected_provider, selected_model, service_name, message)
255
+
256
+ # Send the JSON to the remote server
257
+ return send_json_to_remote(output_json)
258
+
259
+
260
+ def main():
261
+ prompt_injection_dataset = load_dataset("deepset/prompt-injections")
262
+ toxicity_prompts = load_dataset("allenai/real-toxicity-prompts")
263
+
264
+ for i in range(2):
265
+ # Collect messages
266
+ prompt_injection_sample = prompt_injection_dataset["train"].shuffle(seed=42).select(range(10))
267
+ toxicity_sample = toxicity_prompts["train"].shuffle(seed=42).select(range(10))
268
+ plain_messages = ["this is test conversation" for _ in range(10)]
269
+
270
+ # Combine all messages into a single list
271
+ all_messages = [msg["text"] for msg in prompt_injection_sample] + \
272
+ [msg["prompt"]["text"] for msg in toxicity_sample] + \
273
+ plain_messages
274
+
275
+ # Shuffle the combined list to mix message types
276
+ random.shuffle(all_messages)
277
+
278
+ # Send each message
279
+ for message in all_messages:
280
+ print(f"Sending Message {message}")
281
+ send_message(message)
282
+ sleep(random.uniform(2, 4)) # Random sleep between 0.5 to 2 seconds
283
+
284
+ if __name__ == "__main__":
285
+ main()
286
+