snsynth commited on
Commit
4073c4a
·
1 Parent(s): 5b50d3f

update guardrails

Browse files
Files changed (1) hide show
  1. rag_app/rag.py +8 -23
rag_app/rag.py CHANGED
@@ -166,28 +166,13 @@ def get_grammar() -> LlamaGrammar:
166
 
167
 
168
  def answer_question(query, context=None, max_length=5000):
169
- output = llm(
170
- f"""Detect and flag user inputs that fall into categories such as hate speech, violence, illegal activities,
171
- explicit content, misinformation, privacy violations, self-harm, extremism, financial scams, and
172
- child exploitation. Ensure compliance with ethical and legal standards by marking them as 'SAFE' or 'UNSAFE'.
173
- Here is an exhaustive list of categories:
174
- - Hate Speech & Discrimination – Racism, sexism, homophobia, religious discrimination.
175
- - Violence & Harm – Threats, self-harm, terrorism, abuse.
176
- - Illegal Activities – Drug trafficking, hacking, fraud, human trafficking.
177
- - Explicit & Sexual Content – Pornography, non-consensual acts, sexual exploitation.
178
- - Misinformation & Manipulation – Fake news, conspiracy theories, election tampering.
179
- - Privacy & Security Violations – Doxxing, unauthorized data sharing, identity theft.
180
- - Self-Harm & Mental Health Risks – Suicide, eating disorders, harmful medical advice.
181
- - Extremism & Radicalization – Recruitment, propaganda, hate groups.
182
- - Financial Scams & Fraud – Phishing, investment fraud, pyramid schemes.
183
- - Child Exploitation & Abuse – Grooming, child pornography, trafficking
184
-
185
- Query: \n {query}""",
186
- max_tokens=200,
187
- stop=[],
188
- echo=False, grammar=get_grammar()
189
- )
190
- flag = literal_eval(output['choices'][0]['text'])['flag']
191
  if flag == 'unsafe':
192
  return "This question has been categorized as harmful. I can't help with these types of queries."
193
 
@@ -208,7 +193,7 @@ def answer_question(query, context=None, max_length=5000):
208
  regarding a company's financials. Under context you have the relevant snapshot of that query from the
209
  annual report. All you need to do is synthesize your response to the question based on the content of
210
  these document snapshots.
211
-
212
  # Context:
213
  {context}\n\n
214
  # Question: {query}
 
166
 
167
 
168
  def answer_question(query, context=None, max_length=5000):
169
+ # guardrails logic
170
+ output = llm(f"""Is this a harmful query: \n Query: {query}. \n\n Answer in 'SAFE'/'UNSAFE'""",
171
+ max_tokens=200, stop=[], echo=False)
172
+ tag = llm(f"Is this a harmful query. Content:\n {output['choices'][0]['text']} \n\n Answer in 'SAFE'/'UNSAFE'",
173
+ max_tokens=200, stop=[], echo=False, grammar=get_grammar())
174
+ flag = literal_eval(tag['choices'][0]['text'])['flag']
175
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
  if flag == 'unsafe':
177
  return "This question has been categorized as harmful. I can't help with these types of queries."
178
 
 
193
  regarding a company's financials. Under context you have the relevant snapshot of that query from the
194
  annual report. All you need to do is synthesize your response to the question based on the content of
195
  these document snapshots.
196
+
197
  # Context:
198
  {context}\n\n
199
  # Question: {query}