Spaces:
Sleeping
Sleeping
File size: 4,587 Bytes
30d349c 17d7a6f 30d349c 0d707b6 30d349c 7a9af26 30d349c 17d7a6f 30d349c 17d7a6f 30d349c 17d7a6f 30d349c 17d7a6f 30d349c 17d7a6f 30d349c 17d7a6f 30d349c d3f8557 30d349c 4fcd3a3 30d349c 23d19f4 a57d622 4fcd3a3 23d19f4 dc091b9 23d19f4 30d349c 23d19f4 dc091b9 23d19f4 d3f8557 b326e89 23d19f4 dc091b9 23d19f4 d3f8557 a57d622 4fcd3a3 7bbdc33 dc091b9 7bbdc33 77fa41e d3f8557 30d349c d3f8557 7a9af26 7bbdc33 7a9af26 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
import requests
import json
class VectaraQuery():
def __init__(self, api_key: str, customer_id: str, corpus_id: str, prompt_name: str = None):
self.customer_id = customer_id
self.corpus_id = corpus_id
self.api_key = api_key
self.prompt_name = prompt_name if prompt_name else "vectara-experimental-summary-ext-2023-12-11-large"
self.conv_id = None
def get_body(self, user_response: str):
corpora_key_list = [{
'customer_id': self.customer_id, 'corpus_id': self.corpus_id, 'lexical_interpolation_config': {'lambda': 0.025}
}]
user_response = user_response.replace('"', '\\"') # Escape double quotes
prompt = f'''
[
{{
"role": "system",
"content": "You are an assistant that provides information about drink names based on a given corpus. \
Format the response in the following way:\n\
Reason: <reason why the name cannot be used>\n\
Alternative: <alternative name>\n\
Notes: <additional notes>"
}},
{{
"role": "user",
"content": "{user_response}"
}}
]
'''
return {
'query': [
{
'query': user_response,
'start': 0,
'numResults': 10,
'corpusKey': corpora_key_list,
'context_config': {
'sentences_before': 2,
'sentences_after': 2,
'start_tag': "%START_SNIPPET%",
'end_tag': "%END_SNIPPET%",
}
}
]
}
def get_headers(self):
return {
"Content-Type": "application/json",
"Accept": "application/json",
"customer-id": self.customer_id,
"x-api-key": self.api_key,
"grpc-timeout": "60S"
}
def submit_query(self, query_str: str):
endpoint = f"https://api.vectara.io/v1/stream-query"
body = self.get_body(query_str)
response = requests.post(endpoint, data=json.dumps(body), verify=True, headers=self.get_headers(), stream=True)
if response.status_code != 200:
print(f"Query failed with code {response.status_code}, reason {response.reason}, text {response.text}")
return "Sorry, something went wrong. Please try again later."
accumulated_text = ""
for line in response.iter_lines():
if line: # filter out keep-alive new lines
data = json.loads(line.decode('utf-8'))
print(f"Received data chunk: {json.dumps(data, indent=2)}") # Debugging line
if 'result' not in data:
print("No 'result' in data")
continue
res = data['result']
if 'responseSet' not in res:
print("No 'responseSet' in result")
continue
response_set = res['responseSet']
if response_set:
for result in response_set['response']:
if 'text' not in result:
print("No 'text' in result")
continue
text = result['text']
print(f"Processing text: {text}") # Debugging line
# Improved keyword-based extraction
reason = self.extract_text(text, "Reason:", "Alternative:")
alternative = self.extract_text(text, "Alternative:", "Notes:")
notes = self.extract_text(text, "Notes:", "")
response = f"Reason: {reason}\nAlternative: {alternative}\nNotes: {notes}"
print(f"Generated response: {response}") # Debugging line
return response
return "No relevant information found."
def extract_text(self, text, start_keyword, end_keyword):
start_idx = text.find(start_keyword)
if start_idx == -1:
return "Not available"
start_idx += len(start_keyword)
if end_keyword:
end_idx = text.find(end_keyword, start_idx)
if end_idx == -1:
end_idx = len(text)
else:
end_idx = len(text)
return text[start_idx:end_idx].strip()
|