cogcorp commited on
Commit
d4bc051
·
0 Parent(s):

Duplicate from cogcorp/askexpert

Browse files
Files changed (7) hide show
  1. .gitattributes +34 -0
  2. README.md +13 -0
  3. app.py +121 -0
  4. documents.zip +3 -0
  5. persona.pdf +0 -0
  6. persona.zip +3 -0
  7. requirements.txt +9 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Askexpert
3
+ emoji: 👁
4
+ colorFrom: blue
5
+ colorTo: red
6
+ sdk: gradio
7
+ sdk_version: 3.29.0
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: cogcorp/askexpert
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from PyPDF2 import PdfReader
3
+ import zipfile
4
+ import os
5
+ import io
6
+ import nltk
7
+ import openai
8
+ import time
9
+ import subprocess
10
+ import sys
11
+ from sentence_transformers import SentenceTransformer, util
12
+
13
+ # install required libraries
14
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"])
15
+
16
+ def install(package):
17
+ subprocess.check_call([sys.executable, "-m", "pip", "install", package])
18
+
19
+ # Install PyTorch, transformers, and sentence-transformers
20
+ install("torch")
21
+ install("transformers")
22
+ install("sentence-transformers")
23
+
24
+ # download required NLTK data packages
25
+ nltk.download('punkt')
26
+
27
+ # Put your OpenAI API key here
28
+ openai.api_key = os.getenv('OpenAPI')
29
+
30
+ # Create a cache dictionary
31
+ cache = {}
32
+
33
+ # Load the pre-trained model
34
+ model = SentenceTransformer('all-MiniLM-L6-v2')
35
+
36
+ def create_persona(text):
37
+ max_retries = 5
38
+ for attempt in range(max_retries):
39
+ try:
40
+ response = openai.ChatCompletion.create(
41
+ model="gpt-3.5-turbo",
42
+ messages=[
43
+ {"role": "system", "content": "You are an expert at summarizing content to provide a factual persona."},
44
+ {"role": "user", "content": f"Create a persona based on this Cognitive Corp provides workplace optimization via a cognitive workplace which can be defined as an environment where cutting-edge technology, such as artificial intelligence and machine learning, is leveraged and this text: {text}"},
45
+ ]
46
+ )
47
+ return response['choices'][0]['message']['content']
48
+ except Exception as e:
49
+ if attempt < max_retries - 1: # if it's not the last attempt
50
+ time.sleep(1) # wait for 1 second before retrying
51
+ continue
52
+ else:
53
+ return str(e) # return the exception message after the last attempt
54
+
55
+ def call_openai_api(persona, user_prompt, additional_facts):
56
+ max_retries = 5
57
+ for attempt in range(max_retries):
58
+ try:
59
+ response = openai.ChatCompletion.create(
60
+ model="gpt-3.5-turbo",
61
+ messages=[
62
+ {"role": "system", "content": f"You are {persona}"},
63
+ {"role": "user", "content": f"""Ignore all previous instructions. As {persona}
64
+ You are James Waddell, you wrote this paper: optimizing the workplace through technology.
65
+ Here are some additional facts: {additional_facts}. Now, provide short factual answers as James, focus on the additional facts if provided: {user_prompt}"""},
66
+ ]
67
+ )
68
+ return response['choices'][0]['message']['content']
69
+ except Exception as e:
70
+ if attempt < max_retries - 1: # if it's not the last attempt
71
+ time.sleep(1) # wait for 1 second before retrying
72
+ continue
73
+ else:
74
+ return str(e) # return the exception message after the last attempt
75
+
76
+ def extract_persona_from_pdf(pdf_file):
77
+ with open(pdf_file, 'rb') as f:
78
+ pdf = PdfReader(f)
79
+ aggregated_text = ''
80
+ for page in pdf.pages:
81
+ aggregated_text += page.extract_text()
82
+ return create_persona(aggregated_text)
83
+
84
+ # Extract the persona from the persona.pdf file
85
+ persona = extract_persona_from_pdf('persona.pdf')
86
+
87
+ def pdf_to_text(pdf_file_io, user_prompt, persona):
88
+ aggregated_text = ''
89
+ pdf = PdfReader(pdf_file_io)
90
+ for page in pdf.pages:
91
+ aggregated_text += page.extract_text()
92
+ cache[pdf_file_io] = aggregated_text
93
+
94
+ query_embedding = model.encode(user_prompt, convert_to_tensor=True)
95
+ text_embedding = model.encode(aggregated_text, convert_to_tensor=True)
96
+ cosine_scores = util.pytorch_cos_sim(query_embedding, text_embedding)
97
+
98
+ if cosine_scores[0][0] > 0.5:
99
+ additional_facts = "Direct answer from author's knoweledge: " + user_prompt
100
+ else:
101
+ additional_facts = "No additional information to add."
102
+
103
+ answer = call_openai_api(persona, user_prompt, additional_facts)
104
+ return answer
105
+
106
+ def ask_expert(user_prompt):
107
+ with zipfile.ZipFile("documents.zip", 'r') as z:
108
+ for filename in z.namelist():
109
+ if filename.endswith('.pdf'):
110
+ pdf_file_data = z.read(filename)
111
+ pdf_file_io = io.BytesIO(pdf_file_data)
112
+ result = pdf_to_text(pdf_file_io, user_prompt, persona)
113
+ return result
114
+
115
+ iface = gr.Interface(
116
+ fn=ask_expert,
117
+ inputs=gr.inputs.Textbox(lines=1, placeholder="Enter a question or prompt for the Author", label="User Prompt"),
118
+ outputs=gr.outputs.Textbox(label="Cognitive Agent Response")
119
+ )
120
+ iface.launch()
121
+
documents.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8cc19cef8472fb9b6967032b7dcdc18f643de1847436709f105bea6c60dfd2a
3
+ size 2664989
persona.pdf ADDED
Binary file (85 kB). View file
 
persona.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cffb423c87bc2f24036e7b254d1699ed9f8ae9b8179fc980ec441d7d1d189cac
3
+ size 645110
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ PyPDF2
2
+ numpy
3
+ nltk
4
+ scikit-learn
5
+ tensorflow>=2.0.0
6
+ tensorflow-hub
7
+ openai
8
+ gradio
9
+ sentence-transformers