ppsingh commited on
Commit
5620e1f
1 Parent(s): 71aaf00

Create utils.py

Browse files
Files changed (1) hide show
  1. auditqa/utils.py +72 -0
auditqa/utils.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import logging
3
+ import json
4
+ from langchain.schema import (
5
+ HumanMessage,
6
+ SystemMessage,
7
+ )
8
+
9
+ def save_logs(scheduler, JSON_DATASET_PATH, logs) -> None:
10
+ """ Every interaction with app saves the log of question and answer,
11
+ this is to get the usage statistics of app and evaluate model performances
12
+ """
13
+ with scheduler.lock:
14
+ with JSON_DATASET_PATH.open("a") as f:
15
+ json.dump(logs, f)
16
+ f.write("\n")
17
+ logging.info("logging done")
18
+
19
+ def get_message_template(type, SYSTEM_PROMPT, USER_PROMPT):
20
+ if type == 'NVIDIA':
21
+ messages = [{"role": "system", "content": SYSTEM_PROMPT},
22
+ {"role":"user","content":USER_PROMPT}]
23
+ elif type == 'DEDICATED':
24
+ messages = [
25
+ SystemMessage(content=SYSTEM_PROMPT),
26
+ HumanMessage(content=USER_PROMPT),]
27
+ else:
28
+ messages = None
29
+
30
+ return messages
31
+
32
+
33
+ def make_html_source(source,i):
34
+ """
35
+ takes the text and converts it into html format for display in "source" side tab
36
+ """
37
+ meta = source.metadata
38
+ content = source.page_content.strip()
39
+
40
+ name = meta['filename']
41
+ card = f"""
42
+ <div class="card" id="doc{i}">
43
+ <div class="card-content">
44
+ <h2>Doc {i} - {meta['filename']} - Page {int(meta['page'])}</h2>
45
+ <p>{content}</p>
46
+ </div>
47
+ <div class="card-footer">
48
+ <span>{name}</span>
49
+ <a href="{meta['filename']}#page={int(meta['page'])}" target="_blank" class="pdf-link">
50
+ <span role="img" aria-label="Open PDF">🔗</span>
51
+ </a>
52
+ </div>
53
+ </div>
54
+ """
55
+
56
+ return card
57
+
58
+
59
+ def parse_output_llm_with_sources(output):
60
+ # Split the content into a list of text and "[Doc X]" references
61
+ content_parts = re.split(r'\[(Doc\s?\d+(?:,\s?Doc\s?\d+)*)\]', output)
62
+ parts = []
63
+ for part in content_parts:
64
+ if part.startswith("Doc"):
65
+ subparts = part.split(",")
66
+ subparts = [subpart.lower().replace("doc","").strip() for subpart in subparts]
67
+ subparts = [f"""<a href="#doc{subpart}" class="a-doc-ref" target="_self"><span class='doc-ref'><sup>{subpart}</sup></span></a>""" for subpart in subparts]
68
+ parts.append("".join(subparts))
69
+ else:
70
+ parts.append(part)
71
+ content_parts = "".join(parts)
72
+ return content_parts