Spaces:
Sleeping
Sleeping
File size: 4,890 Bytes
30d349c 17d7a6f 30d349c 0d707b6 30d349c 17d7a6f 30d349c 17d7a6f 30d349c 17d7a6f 30d349c 17d7a6f 30d349c 17d7a6f 30d349c 17d7a6f 30d349c 17d7a6f 30d349c d3f8557 30d349c 4fcd3a3 30d349c 23d19f4 a57d622 4fcd3a3 23d19f4 dc091b9 23d19f4 30d349c 23d19f4 dc091b9 23d19f4 d3f8557 b326e89 23d19f4 dc091b9 23d19f4 d3f8557 a57d622 4fcd3a3 77fa41e dc091b9 23d19f4 dc091b9 4fcd3a3 30d349c d3f8557 77fa41e d3f8557 30d349c d3f8557 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
import requests
import json
import re
class VectaraQuery():
def __init__(self, api_key: str, customer_id: str, corpus_id: str, prompt_name: str = None):
self.customer_id = customer_id
self.corpus_id = corpus_id
self.api_key = api_key
self.prompt_name = prompt_name if prompt_name else "vectara-experimental-summary-ext-2023-12-11-large"
self.conv_id = None
def get_body(self, user_response: str):
corpora_key_list = [{
'customer_id': self.customer_id, 'corpus_id': self.corpus_id, 'lexical_interpolation_config': {'lambda': 0.025}
}]
user_response = user_response.replace('"', '\\"') # Escape double quotes
prompt = f'''
[
{{
"role": "system",
"content": "You are an assistant that provides information about drink names based on a given corpus."
}},
{{
"role": "user",
"content": "{user_response}"
}}
]
'''
return {
'query': [
{
'query': user_response,
'start': 0,
'numResults': 10,
'corpusKey': corpora_key_list,
'context_config': {
'sentences_before': 2,
'sentences_after': 2,
'start_tag': "%START_SNIPPET%",
'end_tag': "%END_SNIPPET%",
}
}
]
}
def get_headers(self):
return {
"Content-Type": "application/json",
"Accept": "application/json",
"customer-id": self.customer_id,
"x-api-key": self.api_key,
"grpc-timeout": "60S"
}
def submit_query(self, query_str: str):
endpoint = f"https://api.vectara.io/v1/stream-query"
body = self.get_body(query_str)
response = requests.post(endpoint, data=json.dumps(body), verify=True, headers=self.get_headers(), stream=True)
if response.status_code != 200:
print(f"Query failed with code {response.status_code}, reason {response.reason}, text {response.text}")
return "Sorry, something went wrong. Please try again later."
accumulated_text = ""
for line in response.iter_lines():
if line: # filter out keep-alive new lines
data = json.loads(line.decode('utf-8'))
print(f"Received data chunk: {json.dumps(data, indent=2)}") # Debugging line
if 'result' not in data:
print("No 'result' in data")
continue
res = data['result']
if 'responseSet' not in res:
print("No 'responseSet' in result")
continue
response_set = res['responseSet']
if response_set:
for result in response_set['response']:
if 'text' not in result:
print("No 'text' in result")
continue
text = result['text']
print(f"Processing text: {text}") # Debugging line
# Adjusting regex patterns to be more flexible
reason_match = re.search(r"Reason Why it Can't be Used:\s*(.*?)(?:\n|$)", text, re.DOTALL)
alternative_match = re.search(r"Alternative:\s*(.*?)(?:\n|$)", text, re.DOTALL)
notes_match = re.search(r"Notes:\s*(.*?)(?:\n|$)", text, re.DOTALL)
# Improved regex to capture multiline fields
if not reason_match:
reason_match = re.search(r"DISCUSSION\s*-\s*(.*?)(?=\n|\r\n)", text, re.DOTALL)
if not alternative_match:
alternative_match = re.search(r"Alternative\s*:\s*(.*?)(?=\n|\r\n)", text, re.DOTALL)
if not notes_match:
notes_match = re.search(r"Notes\s*:\s*(.*?)(?=\n|\r\n)", text, re.DOTALL)
reason = reason_match.group(1).strip() if reason_match else "Not available"
alternative = alternative_match.group(1).strip() if alternative_match else "Not available"
notes = notes_match.group(1).strip() if notes_match else "Not available"
response = f"Reason: {reason}\nAlternative: {alternative}\nNotes: {notes}"
print(f"Generated response: {response}") # Debugging line
return response
return "No relevant information found."
|