import boto3
import json
import os
import streamlit as st
import base64
import re
import requests 
#import utilities.re_ranker as re_ranker
import utilities.invoke_models as invoke_models
#import langchain
headers = {"Content-Type": "application/json"}
host = "https://search-opensearchservi-shjckef2t7wo-iyv6rajdgxg6jas25aupuxev6i.us-west-2.es.amazonaws.com/"

parent_dirname = "/".join((os.path.dirname(__file__)).split("/")[0:-1])

def query_(awsauth,inputs, session_id,search_types):

    print("using index: "+st.session_state.input_index)

    question = inputs['query']
    
    k=1
    embedding = invoke_models.invoke_model_mm(question,"none")
    
    query_mm = {
        "size": k,
          "_source": {
        "exclude": [
            "processed_element_embedding_bedrock-multimodal","processed_element_embedding_sparse","image_encoding","processed_element_embedding"
        ]
        },
        "query":  {
            "knn": {
                "processed_element_embedding_bedrock-multimodal": {
                    "vector": embedding, 
                    "k": k}
                }
        }
    }

    path = st.session_state.input_index+"_mm/_search"
    url = host+path
    r = requests.get(url, auth=awsauth, json=query_mm, headers=headers)
    response_mm = json.loads(r.text)

    hits = response_mm['hits']['hits']
    context = []
    context_tables = []
    images = []

    for hit in hits:
        images.append({'file':hit['_source']['image'],'caption':hit['_source']['processed_element']})
    
    ####### SEARCH ########
    num_queries = len(search_types)
    
    weights = []
    
    searches = ['Keyword','Vector','NeuralSparse']
    equal_weight = (int(100/num_queries) )/100
    s_pipeline_payload = {}
    s_pipeline_path = "_search/pipeline/rag-search-pipeline" 
    if(st.session_state.input_is_rerank):
        s_pipeline_payload["response_processors"] = [
                {
                    "rerank": {
                    "ml_opensearch": {
                        "model_id": "deBS3pYB5VHEj-qVuPHT"
                    },
                    "context": {
                        "document_fields": [
                        "processed_element"
                        ]
                    }
                    }
                }
                ]

    if(num_queries>1):
        for index,search in enumerate(search_types):
            
            if(index != (num_queries-1)):
                weight = equal_weight
            else:
                weight = 1-sum(weights)
                
            weights.append(weight)
        s_pipeline_payload["phase_results_processors"] = [
                    {
                        "normalization-processor": {
                        "normalization": {
                            "technique": "min_max"
                        },
                        "combination": {
                            "technique": "arithmetic_mean",
                            "parameters": {
                            "weights": weights
                            }
                        }
                        }
                    }
                    ]
                    
    SIZE = 5
    
    hybrid_payload = {
        "_source": {
        "exclude": [
            "processed_element_embedding","processed_element_embedding_sparse"
        ]
        },
        "query": {
        "hybrid": {
            "queries": [
            
            #1. keyword query
            #2. vector search query
            #3. Sparse query
        
            ]
        }
        },"size":SIZE,
    }
    
    
            
    if('Keyword Search' in search_types):
        
        keyword_payload = {
                        "match": {
                        "processed_element": {
                            "query": question
                        }
                        }
                    }
        
        hybrid_payload["query"]["hybrid"]["queries"].append(keyword_payload)
        
    
        
    if('Vector Search' in search_types):
        
        embedding  = invoke_models.invoke_model(question)
        
        vector_payload = {
            "knn": {
                 "processed_element_embedding": {
                     "vector": embedding, 
                     "k": 2}
                 }
                        }
                
        hybrid_payload["query"]["hybrid"]["queries"].append(vector_payload)
        
    if('Sparse Search' in search_types):
            
        sparse_payload =  {  "neural_sparse": {
                "processed_element_embedding_sparse": {
                    "query_text": question,
                    "model_id": "fkol-ZMBTp0efWqBcO2P"
                }
                }}
                    
        
        hybrid_payload["query"]["hybrid"]["queries"].append(sparse_payload)
        
        # path2 =  "_plugins/_ml/models/srrJ-owBQhe1aB-khx2n/_predict"
        # url2 = host+path2
        # payload2 = {
        # "parameters": {
        #     "inputs": question
        #     }
        #         }
        # r2 = requests.post(url2, auth=awsauth, json=payload2, headers=headers)
        # sparse_ = json.loads(r2.text)
        # query_sparse = sparse_["inference_results"][0]["output"][0]["dataAsMap"]["response"][0]
        
    hits = []
    if(num_queries>1): 
        s_pipeline_url = host + s_pipeline_path 
        r = requests.put(s_pipeline_url, auth=awsauth, json=s_pipeline_payload, headers=headers)
        path = st.session_state.input_index+"/_search?search_pipeline=rag-search-pipeline"
    else:
        if(st.session_state.input_is_rerank):
            path = st.session_state.input_index+"/_search?search_pipeline=rerank_pipeline_rag"
        else:
            path = st.session_state.input_index+"/_search"
    url = host+path
    if(len(hybrid_payload["query"]["hybrid"]["queries"])==1):
        single_query = hybrid_payload["query"]["hybrid"]["queries"][0]
        del hybrid_payload["query"]["hybrid"]
        hybrid_payload["query"] = single_query
        if(st.session_state.input_is_rerank):
            hybrid_payload["ext"] = {"rerank": {
                                          "query_context": {
                                             "query_text": question
                                          }
                                        }}
        r = requests.get(url, auth=awsauth, json=hybrid_payload, headers=headers)
        response_ = json.loads(r.text)
        print(response_)
        hits = response_['hits']['hits']
        
    else:
        if(st.session_state.input_is_rerank):
            hybrid_payload["ext"] = {"rerank": {
                                          "query_context": {
                                             "query_text": question
                                          }
                                        }}
        r = requests.get(url, auth=awsauth, json=hybrid_payload, headers=headers)
        response_ = json.loads(r.text)
        hits = response_['hits']['hits']
    
    ##### GET reference tables separately like *_mm index search for images  ######
    # def lazy_get_table():
    #     table_ref = []
    #     any_table_exists = False
    #     for fname in os.listdir(parent_dirname+"/split_pdf_csv"):
    #         if fname.startswith(st.session_state.input_index):
    #             any_table_exists = True
    #             break       
    #     if(any_table_exists):
    #         #################### Basic Match query #################
    #         # payload_tables = {
    #         #                     "query": {
    #         #                         "bool":{
                                
    #         #                         "must":{"match": {
    #         #                                         "processed_element": question
                                                
    #         #                                     }},
                                                
    #         #                             "filter":{"term":{"raw_element_type": "table"}}
                                    
                                
    #         #                     }}}
            
    #         #################### Neural Sparse query #################
    #         payload_tables = {"query":{"neural_sparse": {
    #                 "processed_element_embedding_sparse": {
    #                     "query_text": question,
    #                     "model_id": "fkol-ZMBTp0efWqBcO2P"
    #                 }
    #                 }  }     }
            
            
    #         r_ = requests.get(url, auth=awsauth, json=payload_tables, headers=headers)
    #         r_tables = json.loads(r_.text)
            
    #         for res_ in r_tables['hits']['hits']:
    #             if(res_["_source"]['raw_element_type'] == 'table'):
    #                 table_ref.append({'name':res_["_source"]['table'],'text':res_["_source"]['processed_element']})
    #             if(len(table_ref) == 2):
    #                 break
                    
            
    #     return table_ref
        
        
    ########################### LLM Generation ########################
    prompt_template = """
        The following is a friendly conversation between a human and an AI. 
        The AI is talkative and provides lots of specific details from its context.
        {context}
        Instruction: Based on the above documents, provide a detailed answer for, {question}. Answer "don't know", 
        if not present in the context. 
        Solution:"""
        
    
    
    idx = 0
    images_2 = []
    is_table_in_result = False
    df = []
    for hit in hits[0:5]:
        
        
        if(hit["_source"]["raw_element_type"] == 'table'):
            #print("Need to analyse table")
            is_table_in_result = True
            table_res = invoke_models.read_from_table(hit["_source"]["table"],question)
            df.append({'name':hit["_source"]["table"],'text':hit["_source"]["processed_element"]})
            context_tables.append(table_res+"\n\n"+hit["_source"]["processed_element"])
            
        else:
            if(hit["_source"]["image"]!="None"):
                with open(parent_dirname+'/figures/'+st.session_state.input_index+"/"+hit["_source"]["raw_element_type"].split("_")[1].replace(".jpg","")+"-resized.jpg", "rb") as read_img:
                    input_encoded = base64.b64encode(read_img.read()).decode("utf8")
                context.append(invoke_models.generate_image_captions_llm(input_encoded,question))
            else:
                context.append(hit["_source"]["processed_element"])
            
        if(hit["_source"]["image"]!="None"):
            images_2.append({'file':hit["_source"]["image"],'caption':hit["_source"]["processed_element"]})
            
        idx = idx +1
    
    # if(is_table_in_result == False):
    #     df = lazy_get_table()
    #     print("forcefully selected top 2 tables")
    #     print(df)
        
    #     for pos,table in enumerate(df):
    #         table_res = invoke_models.read_from_table(table['name'],question)
    #         context_tables.append(table_res)#+"\n\n"+table['text']
    
    
    total_context = context_tables + context
    

    llm_prompt = prompt_template.format(context=total_context[0],question=question)
    output = invoke_models.invoke_llm_model( "\n\nHuman: {input}\n\nAssistant:".format(input=llm_prompt) ,False)
    if(len(images_2)==0):
        images_2 = images
    return {'text':output,'source':total_context,'image':images_2,'table':df}