|
import copy |
|
|
|
import json |
|
import requests |
|
from requests.auth import HTTPBasicAuth |
|
from models.law_component import LawComponent |
|
|
|
base_query = { |
|
"query": { |
|
"bool": { |
|
"should": [ |
|
{ |
|
"match": { |
|
"text": { |
|
"query": None, |
|
"boost": 1.0 |
|
} |
|
} |
|
}, |
|
{ |
|
"match": { |
|
"chapterTitle": { |
|
"query": None, |
|
"boost": 1.0 |
|
} |
|
} |
|
}, |
|
{ |
|
"match_phrase": { |
|
"text": { |
|
"query": None, |
|
"boost": 1.0 |
|
} |
|
} |
|
}, |
|
{ |
|
"match_phrase": { |
|
"chapterTitle": { |
|
"query": None, |
|
"boost": 1.0 |
|
} |
|
} |
|
} |
|
], |
|
"minimum_should_match": 1 |
|
} |
|
} |
|
} |
|
|
|
class ESRetriever: |
|
|
|
def __init__(self, es_host, es_index_name, es_username="", es_password=""): |
|
self.es_host = es_host |
|
self.es_index_name = es_index_name |
|
self.es_username = es_username |
|
self.es_password = es_password |
|
|
|
if (es_username != "" and es_password != ""): |
|
self.auth = HTTPBasicAuth(es_username, es_password) |
|
else: |
|
self.auth = None |
|
|
|
|
|
def retrieve(self, query_text: str): |
|
query = copy.deepcopy(base_query) |
|
query['query']['bool']['should'][0]['match']['text']['query'] = query_text |
|
query['query']['bool']['should'][1]['match']['chapterTitle']['query'] = query_text |
|
query['query']['bool']['should'][2]['match_phrase']['text']['query'] = query_text |
|
query['query']['bool']['should'][3]['match_phrase']['chapterTitle']['query'] = query_text |
|
|
|
|
|
response = requests.get( |
|
self.es_host + self.es_index_name + '/_search', |
|
headers={'Content-Type': 'application/json'}, |
|
data=json.dumps(query), |
|
auth=self.auth |
|
) |
|
|
|
if response.ok: |
|
results = response.json()["hits"]["hits"] |
|
retrieval_results = [] |
|
for result in results: |
|
lc = LawComponent.from_uri(result["_source"]["uri"]) |
|
lc.set_text(result["_source"]["text"]) |
|
retrieval_results.append(lc) |
|
return retrieval_results |
|
|
|
|
|
|