anmol11p commited on
Commit
b57a279
·
verified ·
1 Parent(s): 6471ae8

fix token problem

Browse files
Files changed (1) hide show
  1. src/compliance_lib.py +52 -47
src/compliance_lib.py CHANGED
@@ -1,40 +1,37 @@
1
  import re
2
- from huggingface_hub import InferenceClient
3
  import os
4
  import requests as req
5
  from bs4 import BeautifulSoup
6
- import streamlit as st
7
- from dotenv import load_dotenv
8
- load_dotenv()
9
- RULES={
10
- "GDPR":[
11
- ("Lawful basis documented", r"lawful\s+basis"),
12
  ("Data-subject rights process", r"right\s+to\s+access|erasure"),
13
  ("72-hour breach notice plan", r"72\s*hour"),
14
  ],
15
- "EU_AI_ACT":[
16
- ("High-risk AI DPIA", r"risk\s+assessment"),
17
  ("Training data governance", r"data\s+governance"),
18
  ],
19
- "ISO_27001":[
20
  ("Annex A control list", r"annex\s*a"),
21
  ("Statement of Applicability", r"statement\s+of\s+applicability"),
22
  ]
23
- }
 
24
 
25
- def run_check(text,framework):
26
- # print(text,framework) #array from me aata hai framework
27
- results={}
28
  for fw in framework:
29
- results[fw]=[] #store particular fw data
30
- # one work as label & one work as pattern e.g==>label: Training data governance pattern: data\s+governance
31
  for label, pattern in RULES[fw]:
32
- match = re.search(pattern, text, re.I) # re.I = re.IGNORECASE
33
- results[fw].append((label, bool(match)))
34
  return results
35
 
36
 
37
-
38
  AI_REPORT_PROMPT = """
39
  You are an expert compliance consultant with deep experience in GDPR, the EU AI Act, ISO 27001, and related global data‑privacy and security standards. You have just received a concise checklist summary showing, for each framework, how many controls passed and which specific items failed.
40
 
@@ -75,32 +72,39 @@ Generate the report as markdown.
75
 
76
  HF_MODEL = "mistralai/Mixtral-8x7B-Instruct-v0.1"
77
 
78
-
79
-
80
-
81
- def generate_report(prompt,max_tokens=600):
82
- token = os.getenv("HF_TOKEN")
83
- if not token:
84
- raise EnvironmentError("token is not found in env issue")
85
-
86
- client = InferenceClient(
87
- provider="together",
88
- api_key=token,
89
- )
90
- try:
91
- response = client.chat.completions.create(
92
- model=HF_MODEL,
93
- messages=[ {
94
- "role": "user",
95
- "content": prompt
96
- }]
97
-
 
 
 
 
 
 
 
 
 
98
  )
99
-
100
- return response.choices[0].message.content
101
- except Exception as e:
102
-
103
- return "Error: Failed to generate report."
104
 
105
 
106
  def fetchText(url):
@@ -113,8 +117,9 @@ def fetchText(url):
113
  text = main_content.get_text(separator='\n', strip=True)
114
  else:
115
  text = soup.body.get_text(separator='\n', strip=True)
116
-
117
- return text.strip(), None # No error
118
  except Exception as e:
119
  return "", f"Error fetching URL: {e}"
120
- __all__=["RULES","run_check","AI_REPORT_PROMPT","generate_report","fetchText"]
 
 
 
1
  import re
 
2
  import os
3
  import requests as req
4
  from bs4 import BeautifulSoup
5
+ from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
6
+ import torch
7
+
8
+ RULES = {
9
+ "GDPR": [
10
+ ("Lawful basis documented", r"lawful\s+basis"),
11
  ("Data-subject rights process", r"right\s+to\s+access|erasure"),
12
  ("72-hour breach notice plan", r"72\s*hour"),
13
  ],
14
+ "EU_AI_ACT": [
15
+ ("High-risk AI DPIA", r"risk\s+assessment"),
16
  ("Training data governance", r"data\s+governance"),
17
  ],
18
+ "ISO_27001": [
19
  ("Annex A control list", r"annex\s*a"),
20
  ("Statement of Applicability", r"statement\s+of\s+applicability"),
21
  ]
22
+ }
23
+
24
 
25
+ def run_check(text, framework):
26
+ results = {}
 
27
  for fw in framework:
28
+ results[fw] = []
 
29
  for label, pattern in RULES[fw]:
30
+ match = re.search(pattern, text, re.I)
31
+ results[fw].append((label, bool(match)))
32
  return results
33
 
34
 
 
35
  AI_REPORT_PROMPT = """
36
  You are an expert compliance consultant with deep experience in GDPR, the EU AI Act, ISO 27001, and related global data‑privacy and security standards. You have just received a concise checklist summary showing, for each framework, how many controls passed and which specific items failed.
37
 
 
72
 
73
  HF_MODEL = "mistralai/Mixtral-8x7B-Instruct-v0.1"
74
 
75
+ # Load the text generation pipeline
76
+ def load_pipeline():
77
+ tokenizer = AutoTokenizer.from_pretrained(HF_MODEL)
78
+ model = AutoModelForCausalLM.from_pretrained(
79
+ HF_MODEL,
80
+ torch_dtype=torch.float16,
81
+ device_map="auto",
82
+ trust_remote_code=True
83
+ )
84
+ pipe = pipeline(
85
+ "text-generation",
86
+ model=model,
87
+ tokenizer=tokenizer,
88
+ device_map="auto"
89
+ )
90
+ return pipe
91
+
92
+ generator = load_pipeline()
93
+
94
+
95
+ def generate_report(prompt, max_tokens=600):
96
+ try:
97
+ response = generator(
98
+ prompt,
99
+ max_new_tokens=max_tokens,
100
+ do_sample=True,
101
+ temperature=0.7,
102
+ top_p=0.95,
103
+ return_full_text=False
104
  )
105
+ return response[0]["generated_text"]
106
+ except Exception as e:
107
+ return f"Error: {e}"
 
 
108
 
109
 
110
  def fetchText(url):
 
117
  text = main_content.get_text(separator='\n', strip=True)
118
  else:
119
  text = soup.body.get_text(separator='\n', strip=True)
120
+ return text.strip(), None
 
121
  except Exception as e:
122
  return "", f"Error fetching URL: {e}"
123
+
124
+ # Exported functions
125
+ __all__ = ["RULES", "run_check", "AI_REPORT_PROMPT", "generate_report", "fetchText"]