Spaces:
Sleeping
Sleeping
File size: 3,968 Bytes
30d349c 17d7a6f 30d349c 0d707b6 30d349c 17d7a6f 30d349c 17d7a6f 30d349c 17d7a6f 30d349c 17d7a6f 30d349c 17d7a6f 30d349c 17d7a6f 30d349c 17d7a6f 30d349c d3f8557 30d349c 23d19f4 30d349c 23d19f4 d3f8557 23d19f4 d3f8557 23d19f4 30d349c d3f8557 30d349c d3f8557 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
import requests
import json
import re
class VectaraQuery():
def __init__(self, api_key: str, customer_id: str, corpus_id: str, prompt_name: str = None):
self.customer_id = customer_id
self.corpus_id = corpus_id
self.api_key = api_key
self.prompt_name = prompt_name if prompt_name else "vectara-experimental-summary-ext-2023-12-11-large"
self.conv_id = None
def get_body(self, user_response: str):
corpora_key_list = [{
'customer_id': self.customer_id, 'corpus_id': self.corpus_id, 'lexical_interpolation_config': {'lambda': 0.025}
}]
user_response = user_response.replace('"', '\\"') # Escape double quotes
prompt = f'''
[
{{
"role": "system",
"content": "You are an assistant that provides information about drink names based on a given corpus."
}},
{{
"role": "user",
"content": "{user_response}"
}}
]
'''
return {
'query': [
{
'query': user_response,
'start': 0,
'numResults': 10,
'corpusKey': corpora_key_list,
'context_config': {
'sentences_before': 2,
'sentences_after': 2,
'start_tag': "%START_SNIPPET%",
'end_tag': "%END_SNIPPET%",
}
}
]
}
def get_headers(self):
return {
"Content-Type": "application/json",
"Accept": "application/json",
"customer-id": self.customer_id,
"x-api-key": self.api_key,
"grpc-timeout": "60S"
}
def submit_query(self, query_str: str):
endpoint = f"https://api.vectara.io/v1/stream-query"
body = self.get_body(query_str)
response = requests.post(endpoint, data=json.dumps(body), verify=True, headers=self.get_headers(), stream=True)
if response.status_code != 200:
print(f"Query failed with code {response.status_code}, reason {response.reason}, text {response.text}")
return "Sorry, something went wrong. Please try again later."
chunks = []
accumulated_text = "" # Initialize text accumulation
pattern_max_length = 50 # Example heuristic
for line in response.iter_lines():
if line: # filter out keep-alive new lines
data = json.loads(line.decode('utf-8'))
if 'result' not in data:
continue
res = data['result']
if 'responseSet' not in res:
continue
response_set = res['responseSet']
if response_set:
for result in response_set:
if 'text' not in result:
continue
text = result['text']
# Extract relevant information from the text
reason_match = re.search(r"Reason Why it Can't be Used: (.*?)\n", text)
alternative_match = re.search(r"Alternative: (.*?)\n", text)
notes_match = re.search(r"Notes: (.*?)\n", text)
reason = reason_match.group(1) if reason_match else "Not available"
alternative = alternative_match.group(1) if alternative_match else "Not available"
notes = notes_match.group(1) if notes_match else "Not available"
response = f"Reason: {reason}\nAlternative: {alternative}\nNotes: {notes}"
return response
return "No relevant information found."
|