File size: 6,614 Bytes
30d349c 17d7a6f 30d349c 0d707b6 30d349c 7a9af26 1f0edc8 30d349c 17d7a6f 30d349c 17d7a6f 30d349c 17d7a6f 30d349c 1f0edc8 30d349c 17d7a6f 30d349c 17d7a6f 30d349c d3f8557 30d349c 4fcd3a3 30d349c 23d19f4 a57d622 4fcd3a3 23d19f4 dc091b9 23d19f4 30d349c 23d19f4 dc091b9 23d19f4 d3f8557 b326e89 23d19f4 dc091b9 23d19f4 d3f8557 a57d622 1f0edc8 4fcd3a3 1f0edc8 30d349c d3f8557 7a9af26 1f0edc8 7a9af26 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
import requests
import json
class VectaraQuery():
def __init__(self, api_key: str, customer_id: str, corpus_id: str, prompt_name: str = None):
self.customer_id = customer_id
self.corpus_id = corpus_id
self.api_key = api_key
self.prompt_name = prompt_name if prompt_name else "vectara-experimental-summary-ext-2023-12-11-large"
self.conv_id = None
def get_body(self, user_response: str):
corpora_key_list = [{
'customer_id': self.customer_id, 'corpus_id': self.corpus_id, 'lexical_interpolation_config': {'lambda': 0.025}
}]
user_response = user_response.replace('"', '\\"') # Escape double quotes
prompt = f'''
[
{{
"role": "system",
"content": "You are an assistant that provides information about drink names based on a given corpus. \
Format the response in the following way:\n\
Reason: <reason why the name cannot be used>\n\
Alternative: <alternative name>\n\
Notes: <additional notes>\n\n\
Example:\n\
Reason: The name 'Vodka Sunrise' cannot be used because it is trademarked.\n\
Alternative: Use 'Morning Delight' instead.\n\
Notes: Ensure the drink contains vodka to match the alternative name."
}},
{{
"role": "user",
"content": "{user_response}"
}}
]
'''
return {
'query': [
{
'query': user_response,
'start': 0,
'numResults': 10,
'corpusKey': corpora_key_list,
'context_config': {
'sentences_before': 2,
'sentences_after': 2,
'start_tag': "%START_SNIPPET%",
'end_tag': "%END_SNIPPET%",
},
'summary': [
{
'responseLang': 'eng',
'maxSummarizedResults': 1,
'summarizerPromptName': self.prompt_name,
'promptText': prompt,
'chat': {
'store': True,
'conversationId': self.conv_id
},
}
]
}
]
}
def get_headers(self):
return {
"Content-Type": "application/json",
"Accept": "application/json",
"customer-id": self.customer_id,
"x-api-key": self.api_key,
"grpc-timeout": "60S"
}
def submit_query(self, query_str: str):
endpoint = f"https://api.vectara.io/v1/stream-query"
body = self.get_body(query_str)
response = requests.post(endpoint, data=json.dumps(body), verify=True, headers=self.get_headers(), stream=True)
if response.status_code != 200:
print(f"Query failed with code {response.status_code}, reason {response.reason}, text {response.text}")
return "Sorry, something went wrong. Please try again later."
accumulated_text = ""
for line in response.iter_lines():
if line: # filter out keep-alive new lines
data = json.loads(line.decode('utf-8'))
print(f"Received data chunk: {json.dumps(data, indent=2)}") # Debugging line
if 'result' not in data:
print("No 'result' in data")
continue
res = data['result']
if 'responseSet' not in res:
print("No 'responseSet' in result")
continue
response_set = res['responseSet']
if response_set:
for result in response_set['response']:
if 'text' not in result:
print("No 'text' in result")
continue
text = result['text']
print(f"Processing text: {text}") # Debugging line
accumulated_text += text
if accumulated_text:
return self.summarize_text(accumulated_text)
return "No relevant information found."
def summarize_text(self, text):
endpoint = f"https://api.vectara.io/v1/stream-summary"
prompt = f'''
[
{{
"role": "system",
"content": "You are an assistant that provides information about drink names based on a given corpus. \
Format the response in the following way:\n\
Reason: <reason why the name cannot be used>\n\
Alternative: <alternative name>\n\
Notes: <additional notes>\n\n\
Example:\n\
Reason: The name 'Vodka Sunrise' cannot be used because it is trademarked.\n\
Alternative: Use 'Morning Delight' instead.\n\
Notes: Ensure the drink contains vodka to match the alternative name."
}},
{{
"role": "user",
"content": "{text}"
}}
]
'''
body = {
'text': text,
'summary': {
'responseLang': 'eng',
'maxSummarizedResults': 1,
'summarizerPromptName': self.prompt_name,
'promptText': prompt,
'chat': {
'store': True,
'conversationId': self.conv_id
},
}
}
response = requests.post(endpoint, data=json.dumps(body), verify=True, headers=self.get_headers(), stream=True)
if response.status_code != 200:
print(f"Summary query failed with code {response.status_code}, reason {response.reason}, text {response.text}")
return "Sorry, something went wrong. Please try again later."
for line in response.iter_lines():
if line: # filter out keep-alive new lines
data = json.loads(line.decode('utf-8'))
print(f"Received summary data chunk: {json.dumps(data, indent=2)}") # Debugging line
if 'summary' in data:
return data['summary']['text']
return "No relevant information found."
|