Spaces:

santoshtyss
/

analysis_demo

Build error

File size: 38,426 Bytes

from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import torch 
from mosestokenizer import *
from indicnlp.tokenize import sentence_tokenize
from docx import Document

import os
import torch
import time
import json
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler

from transformers import (
    AutoConfig,
    AutoModelForQuestionAnswering,
    AutoTokenizer,
    squad_convert_examples_to_features
)

from transformers.data.processors.squad import SquadResult, SquadV2Processor, SquadExample
from transformers.data.metrics.squad_metrics import compute_predictions_logits


os.system('git clone https://github.com/TheAtticusProject/cuad.git')
os.system('mv cuad cuad-training')
os.system('unzip cuad-training/data.zip -d cuad-data/')
os.system('mkdir cuad-models')
os.system('curl https://zenodo.org/record/4599830/files/roberta-base.zip?download=1 --output cuad-models/roberta-base.zip')
os.system('unzip cuad-models/roberta-base.zip -d cuad-models/')



trans_tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M" )
trans_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
trans_model = trans_model.to(device)



lang_dict = {
    'english' : 'eng_Latn',
    'assamese' : 'asm_Beng',
    'awadhi' : 'awa_Deva' ,
    'bengali' : 'ben_Beng',
    'bhojpuri' :	'bho_Deva',
    'gujarati' :	'guj_Gujr', 
    'hindi' : 'hin_Deva',
    'kannada' :	'kan_Knda',
    'kashmiri' :	'kas_Deva',
    'maithili' :	'mai_Deva',
    'malayalam' :	'mal_Mlym',
    'marathi' :	'mar_Deva',
    'odia' :	'ory_Orya',
    'punjabi' :	'pan_Guru',
    'sanskrit' :	'san_Deva',
    'sindhi' :	'snd_Arab' ,
    'tamil' :	'tam_Taml' ,
    'telugu' :	'tel_Telu',
    'urdu' :	'urd_Arab'
}

def translate_sentence(article, target):
    inputs = trans_tokenizer(article.replace("\"",""), return_tensors="pt").to(device)

    translated_tokens = trans_model.generate(
        **inputs, forced_bos_token_id=trans_tokenizer.lang_code_to_id[lang_dict[target]], max_length=100)

    return trans_tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]



INDIC_DICT = {"assamese" :"as",   'bengali' : 'bn', 'gujarati' :	'gu', 
    'hindi' : 'hi',
    'kannada' :	'kn',
    'malayalam' :	'ml',
    'marathi' :	'mr',
    'odia' :	'or',
    'punjabi' :	'pa',
    'tamil' :	'ta' ,
    'telugu' :	'te'}

def split_sentences(paragraph, language):
    if language in INDIC_DICT.keys():
        return sentence_tokenize.sentence_split(paragraph, lang=INDIC_DICT[language])
    elif language == 'en':
      with MosesSentenceSplitter('en') as splitter:
            return splitter([paragraph])
    else:
      return paragraph.split(".")

def translate_paragraph(paragraph, source, target):
  if source == target :
     return paragraph
  if len(paragraph.split()) < 100:
    return translate_sentence(paragraph, target)
  else:
      sentences = split_sentences(paragraph, source)
      outputs = []
      for each_sentence in sentences:
        outputs.append(translate_sentence(each_sentence, target))
      return " ".join(outputs)

def docx_replace(doc, data):
    paragraphs = list(doc.paragraphs)
    for t in doc.tables:
        for row in t.rows:
            for cell in row.cells:
                for paragraph in cell.paragraphs:
                    paragraphs.append(paragraph)
    
    for each in data:
        key = list(each.keys())[0]
        val = list(each.values())[0]
        for p in paragraphs:
            #key_name = '${{{}}}'.format(key) # I'm using placeholders in the form ${PlaceholderName}
            key_name = key
            if key_name in p.text:
                #print(f'old one {p.text}')
                inline = p.runs
                # Replace strings and retain the same style.
                # The text to be replaced can be split over several runs so
                # search through, identify which runs need to have text replaced
                # then replace the text in those identified
                started = False
                key_index = 0
                # found_runs is a list of (inline index, index of match, length of match)
                found_runs = list()
                found_all = False
                replace_done = False
                for i in range(len(inline)):

                    # case 1: found in single run so short circuit the replace
                    if key_name in inline[i].text and not started:
                        found_runs.append((i, inline[i].text.find(key_name), len(key_name)))
                        text = inline[i].text.replace(key_name, str(val))
                        inline[i].text = text
                        replace_done = True
                        found_all = True
                        break

                    if key_name[key_index] not in inline[i].text and not started:
                        # keep looking ...
                        continue

                    # case 2: search for partial text, find first run
                    if key_name[key_index] in inline[i].text and inline[i].text[-1] in key_name and not started:
                        # check sequence
                        start_index = inline[i].text.find(key_name[key_index])
                        check_length = len(inline[i].text)
                        for text_index in range(start_index, check_length):
                            if inline[i].text[text_index] != key_name[key_index]:
                                # no match so must be false positive
                                break
                        if key_index == 0:
                            started = True
                        chars_found = check_length - start_index
                        key_index += chars_found
                        found_runs.append((i, start_index, chars_found))
                        if key_index != len(key_name):
                            continue
                        else:
                            # found all chars in key_name
                            found_all = True
                            break

                    # case 2: search for partial text, find subsequent run
                    if key_name[key_index] in inline[i].text and started and not found_all:
                        # check sequence
                        chars_found = 0
                        check_length = len(inline[i].text)
                        for text_index in range(0, check_length):
                            if inline[i].text[text_index] == key_name[key_index]:
                                key_index += 1
                                chars_found += 1
                            else:
                                break
                        # no match so must be end
                        found_runs.append((i, 0, chars_found))
                        if key_index == len(key_name):
                            found_all = True
                            break

                if found_all and not replace_done:
                    for i, item in enumerate(found_runs):
                        index, start, length = [t for t in item]
                        if i == 0:
                            text = inline[index].text.replace(inline[index].text[start:start + length], str(val))
                            inline[index].text = text
                        else:
                            text = inline[index].text.replace(inline[index].text[start:start + length], '')
                            inline[index].text = text
                #print(p.text)
                break

input_output_trans = {}


def translate_fill(document_name,output_file, src, trg):
        print("translate doc")

        doc = docx.Document(document_name)
        if doc.paragraphs[0].text in list(input_output_trans.keys()):
            lang_doc_dict = input_output_trans[doc.paragraphs[0].text]
            if trg in lang_doc_dict.keys():
                time.sleep(2)
                return lang_doc_dict[trg]

        template_document = Document(document_name)

        variables = []
        for paragraph in template_document.paragraphs:
          if(paragraph.text.strip() != ""):
            variables.append({paragraph.text : translate_paragraph(paragraph.text, src, trg)})
            
        for t in template_document.tables:
                for row in t.rows:
                    for cell in row.cells:
                        for paragraph in cell.paragraphs:
                          if(paragraph.text.strip() != ""):
                            variables.append({paragraph.text : translate_paragraph(paragraph.text, src, trg)})
                          
        docx_replace(template_document, variables)
        template_document.save(output_file)
        return output_file



def translate_txt(document_name, output_file, src, trg):
      print("translate text")
      with open(document_name) as fp:
          lines = fp.readlines()
      
      lines = [line.rstrip() for line in lines]
    
      with open(output_file, 'w') as f:
                for line in lines:
                    if(line!=""):
                        f.write( translate_paragraph(line, src, trg) + "\n")
                    else:
                        f.write("\n")
        
      return output_file
      

info_model_path =  'cuad-models/roberta-base/'
info_config_class, info_model_class, info_tokenizer_class = (
        AutoConfig, AutoModelForQuestionAnswering, AutoTokenizer)
info_config = info_config_class.from_pretrained(info_model_path)
info_tokenizer = info_tokenizer_class.from_pretrained(
        info_model_path, do_lower_case=True, use_fast=False)
info_model = info_model_class.from_pretrained(info_model_path, config=info_config)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
info_model.to(device)

def run_prediction(question_texts, context_text):
    ### Setting hyperparameters
    max_seq_length = 512
    doc_stride = 256
    n_best_size = 1
    max_query_length = 64
    max_answer_length = 512
    do_lower_case = False
    null_score_diff_threshold = 0.0

    # model_name_or_path = "../cuad-models/roberta-base/"

    def to_list(tensor):
        return tensor.detach().cpu().tolist()

    processor = SquadV2Processor()
    examples = []

    for i, question_text in enumerate(question_texts):
        example = SquadExample(
            qas_id=str(i),
            question_text=question_text,
            context_text=context_text,
            answer_text=None,
            start_position_character=None,
            title="Predict",
            answers=None,
        )

        examples.append(example)

    features, dataset = squad_convert_examples_to_features(
        examples=examples,
        tokenizer= info_tokenizer,
        max_seq_length=max_seq_length,
        doc_stride=doc_stride,
        max_query_length=max_query_length,
        is_training=False,
        return_dataset="pt",
        threads=1,
    )
    
    eval_sampler = SequentialSampler(dataset)
    eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=10)
       
    all_results = []

    for batch in eval_dataloader:
        info_model.eval()
        batch = tuple(t.to(device) for t in batch)

        with torch.no_grad():
            inputs = {
                "input_ids": batch[0],
                "attention_mask": batch[1],
                "token_type_ids": batch[2],
            }

            example_indices = batch[3]

            outputs = info_model(**inputs)

            for i, example_index in enumerate(example_indices):
                eval_feature = features[example_index.item()]
                unique_id = int(eval_feature.unique_id)

                output = [to_list(output[i]) for output in outputs.to_tuple()]

                start_logits, end_logits = output
                result = SquadResult(unique_id, start_logits, end_logits)
                all_results.append(result)

    final_predictions = compute_predictions_logits(
        all_examples=examples,
        all_features=features,
        all_results=all_results,
        n_best_size=n_best_size,
        max_answer_length=max_answer_length,
        do_lower_case=do_lower_case,
        output_prediction_file=None,
        output_nbest_file=None,
        output_null_log_odds_file=None,
        verbose_logging=False,
        version_2_with_negative=True,
        null_score_diff_threshold=null_score_diff_threshold,
        tokenizer=info_tokenizer
    )

    return final_predictions 


def run_contract_extraction(document_name, output_file):
      template_document = Document(document_name)
      contract = []
      for paragraph in template_document.paragraphs:
            if(paragraph.text.strip()!=''):
              contract.append(paragraph.text)

      contract = "\n".join(contract)
      questions = []

      with open('./cuad-data/CUADv1.json') as json_file:
        data = json.load(json_file)

      #with open('./cuad-data/questions.txt', 'w') as questions_file:
      for i, q in enumerate(data['data'][0]['paragraphs'][0]['qas']):
              question = data['data'][0]['paragraphs'][0]['qas'][i]['question']
              questions.append(question)
             
      predictions = run_prediction(questions, contract)

      with open(output_file, 'w') as f:
          count = 1
          for i, p in enumerate(predictions):
              if(predictions[p]!=''):
                #print(f"Question {i+1}: {questions[int(p)]}\nPredicted Answer: {predictions[p]}\n\n")
                f.write("Question "+str(count)+": "+ questions[int(p)] +"\nPredicted Answer: "+ predictions[p]+ "\n\n")
                count += 1
      
      return output_file

input_output_key = {}

def run_key_clause(document_name, output_name,source_language):
  doc = docx.Document(document_name)
  if doc.paragraphs[0].text in list(input_output_key.keys()):
      time.sleep(2)
      return input_output_key[doc.paragraphs[0].text]

  if source_language != 'english':
     translation_output = translate_fill(document_name, "info_translation.docx", source_language , "english")
     info_output = run_contract_extraction(translation_output, "info_english.txt")
     final_info = translate_txt(info_output, output_name, "english",source_language)
              
  else:
     final_info = run_contract_extraction(document_name, output_name)

  return final_info
  
  
from transformers import AutoModelWithLMHead, AutoTokenizer
from docx import Document

qg_tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap")
qg_model = AutoModelWithLMHead.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
qg_model.to(device)

def get_question(answer, context, max_length=64):
  input_text = "answer: %s  context: %s </s>" % (answer, context)
  features = qg_tokenizer([input_text], return_tensors='pt').to(device)

  output = qg_model.generate(input_ids=features['input_ids'], 
               attention_mask=features['attention_mask'],
               max_length=max_length)

  return qg_tokenizer.decode(output[0])


def run_fill_questions(document_name, output_file, questions_file, delimiter):
      print("QGenerations")
      prev_para = ''
      count = 0
      variables = []
      questions = []

      doc = Document(document_name)

      for paragraph in doc.paragraphs:
              if(paragraph.text.strip()==''):
                    continue
              if(paragraph.text.count(delimiter)>0):
                      var_count = paragraph.text.count(delimiter)
                      format_str = paragraph.text.replace(delimiter, '{}')
                      new_string = format_str.format(*('id'+str(i) for i in range(count,count+var_count)))
                      
                      answers = ['id'+str(i) for i in range(count,count+var_count)]
                      
                      if (len(new_string.split())<10):
                          context = prev_para + " " + new_string
                      else:
                          context = new_string

                          
                      for answer in answers:
                                question_string = get_question(answer, context).replace('<pad> question:','').replace('</s>','').strip()
                                question = "{{"+question_string+"}}"
                                questions.append(question_string)
                                new_string = new_string.replace(answer, question)
                      
                      count += var_count
                      variables.append({paragraph.text : new_string })

              prev_para = paragraph.text
      
      with open(questions_file, 'w') as f:
          count = 1
          for p in questions:
                f.write("Question "+str(count)+": "+ p +"\n")
                count += 1

      
      docx_replace(doc, variables)
      doc.save(output_file)
      return output_file, questions_file


def extract_questions(document_name, output_file):
  questions = []
  doc = Document(document_name)
 
  for paragraph in doc.paragraphs:
        if(paragraph.text.strip()==''):
                    continue
        else:     
          q = re.findall(r'\{{(.*?)\}}',paragraph.text.strip())
          questions.extend(q)
                  

  with open(output_file, 'w') as f:
          count = 1
          for p in questions:
                f.write("Question "+str(count)+": "+ p +"\n")
                count += 1
  
  return output_file

input_output_qg = {}


def run_generate_questions(document_name, output_file, questions_file, delimiter, source_language):
  doc = docx.Document(document_name)
  if doc.paragraphs[0].text in list(input_output_qg.keys()):
      qg_output =  input_output_qg[doc.paragraphs[0].text]
      q_output = extract_questions(qg_output, questions_file)
      time.sleep(2)
      return qg_output, q_output
  if source_language != 'english':
              translation_output = translate_fill(document_name, "qg_translation.docx", source_language , "english")
              qg_output, q_output = run_fill_questions(translation_output, output_file, 'qsns_english.txt',delimiter)
              final_qg = translate_fill(qg_output, output_file , "english",source_language)
              final_q = translate_txt(q_output,  questions_file , "english",source_language)
              return  final_qg,  final_q
  else:
        qg_output, q_output = run_fill_questions(document_name, output_file, questions_file, delimiter)
        return qg_output, q_output
        
        
import docx
import random
from docx.shared import RGBColor
import time 
import re

input_output_red = {}

def run_redflags(filename, output_file):
    print("Red flags")
    doc = docx.Document(filename)
    if doc.paragraphs[0].text in list(input_output_red.keys()):
      return input_output_red[doc.paragraphs[0].text]
    else:   
        for para in doc.paragraphs:
            inline = para.runs
            colour = False
            if (len(para.text.split())>10) and random.random()>0.8:
                colour = True
            if colour:
              for i in range(len(inline)):
                  inline[i].font.color.rgb = RGBColor(255, 000, 000)
        
        time.sleep(2)
        doc.save(output_file)
        return output_file 

     
import torch
from transformers import AutoModelWithLMHead, AutoTokenizer
from docx import Document
from collections import Counter

rc_tokenizer = AutoTokenizer.from_pretrained("tuner007/t5_abs_qa")
rc_model = AutoModelWithLMHead.from_pretrained("tuner007/t5_abs_qa")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
rc_model = rc_model.to(device)

def get_answer(question, context):
      input_text = "context: %s <question for context: %s </s>" % (context,question)
      features = rc_tokenizer([input_text], return_tensors='pt')
      out = rc_model.generate(input_ids=features['input_ids'].to(device), attention_mask=features['attention_mask'].to(device))
      return rc_tokenizer.decode(out[0])

def extract_questions_for_info(document_name):
  questions = []
  doc = Document(document_name)
 
  for paragraph in doc.paragraphs:
        if(paragraph.text.strip()==''):
                    continue
        else:     
          q = re.findall(r'\{{(.*?)\}}',paragraph.text.strip())
          questions.extend(q)
  return questions 


def extract_info(questions, context):
        variables = []
        unanswered = []
        max_length = 512 # The maximum length of a feature (question and context)
        doc_stride = 256


        for question in questions:
                tokenized_example = rc_tokenizer(
                    str(question),
                    str(context.replace('\'','').replace('"',"")),
                    max_length=max_length,
                    truncation="only_second",
                    return_overflowing_tokens=True,
                    stride=doc_stride)
                answers = []
                for x in tokenized_example["input_ids"]:
                      q, c = rc_tokenizer.decode(x).split("</s>")[0], rc_tokenizer.decode(x).split("</s>")[1]
                      answers.append(get_answer(q, c).replace('<pad>','').replace('</s>','').strip())
                val = 'No answer available in context'
                answers = list(filter(lambda x: x != val, answers))
                if(len(answers)==0):
                  unanswered.append(question)
                else: 
                  fre_list = Counter(answers)
                  answer = fre_list.most_common(1)[0][0]
                  variables.append({"{{"+question+"}}" : answer})
        return variables, unanswered

input_output_exin = {}

def run_extract_info(document_name, context, output_file, source_language):
  print("Extract")
  doc = docx.Document(document_name)

  if doc.paragraphs[0].text in list(input_output_exin.keys()):
      exin_output =  input_output_exin[doc.paragraphs[0].text]
      exin_unanswered = extract_questions_for_info(exin_output)
      time.sleep(2)
      return exin_output, exin_unanswered
  else:
      if source_language != 'english':
              translation_output = translate_fill(document_name, "exin_translation.docx", source_language , "english")
              questions = extract_questions_for_info(translation_output )
              context = translate_paragraph(context)

              variables, unanswered = extract_info(questions, context)
              template_document = Document(document_name)
              docx_replace(template_document, variables)
              template_document.save("exin_modified.docx")

              final_exin = translate_fill("exin_modified.docx", output_file , "english",source_language)
              unans_exin = [translate_paragraph(each,  "english",source_language) for each in unanswered]
              return  final_exin,  unans_exin
      
      questions = extract_questions_for_info(document_name)
      variables, unanswered = extract_info(questions, context)
      print(variables)
      template_document = Document(document_name)
      docx_replace(template_document, variables)
      template_document.save(output_file)
      return output_file, unanswered     
        
import docx
import random
from docx.shared import RGBColor
import time 
import re
from docx import Document

from docx.enum.text import WD_COLOR_INDEX

from transformers import AutoTokenizer, AutoModel
import torch
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np


similar_tokenizer = AutoTokenizer.from_pretrained('ai4bharat/indic-bert' )
similar_model = AutoModel.from_pretrained('ai4bharat/indic-bert' )
similar_model.eval()

def obtain_rep(documents):   
     # initialize dictionary to store tokenized sentences
      mean_pooled = []
      with torch.no_grad():
          for sentence in documents:
              # encode each sentence and append to dictionary
              tokens = {'input_ids': [], 'attention_mask': []}

              new_tokens = similar_tokenizer.encode_plus(sentence, max_length=128,
                                                truncation=True, padding='max_length',
                                                return_tensors='pt')
              tokens['input_ids'].append(new_tokens['input_ids'][0])
              tokens['attention_mask'].append(new_tokens['attention_mask'][0])
              tokens['input_ids'] = torch.stack(tokens['input_ids'])
              tokens['attention_mask'] = torch.stack(tokens['attention_mask'])
              
              outputs = similar_model(**tokens)
              mean_pooled.append(outputs.pooler_output)
      
      return torch.stack(mean_pooled).squeeze(1)

def similarity(documents, clauses):    
      clauses = clauses.detach().numpy()
      documents = documents.detach().numpy()
      sim = cosine_similarity(clauses,documents)
      max_sim = np.max(sim, axis=0)
      return max_sim

def fill_yellow(filename, output_file, highlighted_paras):
     doc = docx.Document(filename)
     for each in highlighted_paras:
          for para in doc.paragraphs:
                  inline = para.runs
                  colour = False
                  if  each in para.text:
                      colour = True
                  if colour:
                    for i in range(len(inline)):
                        inline[i].font.highlight_color = WD_COLOR_INDEX.YELLOW 
                    break
     doc.save(output_file)
     return output_file 
      

def get_similar_clauses(filename, output_file,clauses, source_language):
      paras = []
      template_document = Document(filename)
      contract = []
      for paragraph in template_document.paragraphs:
            if(paragraph.text.strip()!=''):
              contract.append(paragraph.text)

      sentence_batch = []

      for paragraph in contract:
        sentence_batch.extend(split_sentences(paragraph, source_language))

      sentence_batch = [each for each in sentence_batch if each!=' ' and len(each.split())>5]

      doc_rep = obtain_rep(sentence_batch)
      clause_rep = obtain_rep(clauses)
      k = similarity(doc_rep, clause_rep)
      pick_top = max(int(0.1*len(sentence_batch)),3)
      ind = k.argsort()[-pick_top:][::-1]
      for each_idx in ind:
          paras.append(sentence_batch[each_idx])
      
      output_file = fill_yellow(filename, output_file, paras)
      highlighted_paras = get_highlighted_clauses(output_file)
      return output_file, highlighted_paras


input_output_similar = {}

def get_highlighted_clauses(filename):
   doc = docx.Document(filename)
   para_highlighted = []
   for para in doc.paragraphs:
            inline = para.runs
            colour = False
            for i in range(len(inline)):
                if inline[i].font.highlight_color == WD_COLOR_INDEX.YELLOW :
                  colour = True
                  break
            if colour:
              para_highlighted.append(para.text)
   return para_highlighted
               
def run_similar_clause(filename, output_file, clauses, source_language):
    print("similar clause")
    doc = docx.Document(filename)
    for doc_input in list(input_output_similar.keys()):
        if doc.paragraphs[0].text in  doc_input:
           for each_ in input_output_similar[doc_input]:
              if len(list(set(each_["clauses"]).intersection(set(clauses))))>0 :
                 output_file =  each_["file"]
           time.sleep(3)
           highlighted_paras = get_highlighted_clauses(output_file)
           return output_file, highlighted_paras
    output_file, highlighted_paras = get_similar_clauses(filename, output_file,clauses, source_language)
    return output_file, highlighted_paras
    
import gradio as gr

analysis_services = ['Translate Contract', 'Identify key Clauses', 'Red flag Identification', 'Similar Semantic Clause search', 'Generate Questions for Contract Template', 'Fill Contract Template by extracting information']
analysis_label = 'Select Contract Analysis Service'
analysis_choices = analysis_services
analysis_choice = ''
lang_choice = 'english'
translation_label = 'Upload contract for Translation'
translation_src_label = 'Select language of uploaded contract'
translation_tgt_label = 'Select language to translate'
keyclause_label = 'Upload contract for Key Clause Extraction'
redflag_label = 'Upload contract for Red Flag Identification'
similar_label = 'Upload contract for Semantic Similar Clauses'
similar_clause_label = 'Enter clauses to be identified (enter one clause per line)'
generate_questions_label = 'Upload template contract for Question Generation'
rc_file_label = 'Upload template contract with questions to fill'
rc_context_label = 'Enter the text to extract answer from'
delimiter_label = "Input placeholder (pattern or symbol used as blank in template)"
button_label = "Upload and Analyze"


translation_output_label = 'Download your translated contract'
keyclause_output_label = 'Download your key clauses from the contract'
redflag_output_label = 'Download your contract with red flags highlighted'
similar_file_label = 'Download your contract with  highlighted similar clauses in yellow'
similar_text_label = 'A quick view of similar clauses'
qg_output_label = 'Download your template contract along with questions'
q_output_label = 'Download only questions to fill the template contract'
rc_output_label = 'Download your template contract along with filled answers'
rc_text_label = 'Unanswered Questions'

def change_analysis(choice):
    global lang_choice, analysis_choices
    lang_choice = choice 
    analysis_choices = [translate_paragraph(paragraph, "english", choice) for paragraph in analysis_services]
    return [gr.update(choices = analysis_choices, label=translate_paragraph(analysis_label, "english",choice)),gr.update(visible=False),gr.update(visible=False),gr.update(visible=False),gr.update(visible=False),gr.update(visible=False),gr.update(visible=False),gr.update(visible=False),gr.update(visible=False)]

def change_inputs(choice):
    global analysis_choice
    analysis_choice = choice
    if analysis_choice == analysis_choices[0]:
          return [gr.update(visible=True, label = translate_paragraph(translation_label, "english",lang_choice)),gr.update(visible=False), gr.update(visible=True, label=''),gr.update(visible=False),gr.update(visible=False),gr.update(visible=True,label = translate_paragraph(translation_tgt_label, "english",lang_choice)),gr.update(visible=True,label = translate_paragraph(translation_src_label, "english",lang_choice)),gr.update(visible=False), gr.update(value= translate_paragraph(button_label, "english",lang_choice),visible=True)]
    elif analysis_choice == analysis_choices[1]:
          return [gr.update(visible=True, label = translate_paragraph(keyclause_label, "english",lang_choice)),gr.update(visible=False), gr.update(visible=True,label=''),gr.update(visible=False),gr.update(visible=False),gr.update(visible=False),gr.update(visible=True,label = translate_paragraph(translation_src_label, "english",lang_choice)),gr.update(visible=False),gr.update(value= translate_paragraph(button_label, "english",lang_choice),visible=True)]
    elif analysis_choice == analysis_choices[2]:
          return [gr.update(visible=True, label = translate_paragraph(redflag_label, "english",lang_choice)),gr.update(visible=False), gr.update(visible=True,label=''),gr.update(visible=False),gr.update(visible=False),gr.update(visible=False),gr.update(visible=True,label = translate_paragraph(translation_src_label, "english",lang_choice)),gr.update(visible=False),gr.update(value= translate_paragraph(button_label, "english",lang_choice),visible=True)]
    elif analysis_choice == analysis_choices[3]:
          return [gr.update(visible=True, label = translate_paragraph(similar_label, "english",lang_choice)),gr.update(visible=True, label = translate_paragraph(similar_clause_label, "english",lang_choice)), gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=False),gr.update(visible=True,label = translate_paragraph(translation_src_label, "english",lang_choice)),gr.update(visible=False),gr.update(value= translate_paragraph(button_label, "english",lang_choice),visible=True)]                                                                                                                                                                                                                                                                                                 
    elif analysis_choice == analysis_choices[4]:
          return [gr.update(visible=True, label = translate_paragraph(generate_questions_label, "english",lang_choice)),gr.update(visible=False), gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=False),gr.update(visible=False),gr.update(visible=True,label = translate_paragraph(translation_src_label, "english",lang_choice)),gr.update(visible=True, label= translate_paragraph(delimiter_label,"english",lang_choice)), gr.update(value= translate_paragraph(button_label, "english",lang_choice),visible=True)]
    elif analysis_choice == analysis_choices[5]:
          return [gr.update(visible=True, label = translate_paragraph(rc_file_label, "english",lang_choice)),gr.update(visible=True, lines = 16, label = translate_paragraph(rc_context_label, "english",lang_choice)), gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=False),gr.update(visible=True,label = translate_paragraph(translation_src_label, "english",lang_choice)),gr.update(visible=False),gr.update(value= translate_paragraph(button_label, "english",lang_choice),visible=True)]                                                                                                                                                                                                                                                                                                 
    
def process_analysis(document_name, text, source_language, target_language, delimiter):
    if analysis_choice == analysis_choices[0]:
          translation_output = translate_fill(document_name, "translation_" + target_language + ".docx", source_language , target_language)
          return [gr.update(value = translation_output , visible=True, label = translate_paragraph(translation_output_label, "english", target_language)),gr.update(visible=False),gr.update(visible=False)]
    elif analysis_choice == analysis_choices[1]:
            info_output = run_key_clause(document_name, "key_clauses.txt",source_language)
            return [gr.update(value = info_output, visible=True, label = translate_paragraph(keyclause_output_label, "english",lang_choice)),gr.update(visible=False), gr.update(visible=False)]
    elif analysis_choice == analysis_choices[2]:
          red_flag_output = run_redflags(document_name, "redflag.docx")
          return [gr.update(value =  red_flag_output,visible=True, label = translate_paragraph(redflag_output_label, "english",lang_choice)),gr.update(visible=False), gr.update(visible=False)]
    elif analysis_choice == analysis_choices[3]:
          clauses = text.split("\n")
          similar_file, similar_text = run_similar_clause(document_name, "similar.docx", clauses, source_language)
          similar_text = "\n\n\n".join(similar_text)
          return [gr.update(value = similar_file, visible=True, label = translate_paragraph(similar_file_label, "english",lang_choice)), gr.update(visible=False),gr.update(value = similar_text, visible=True, label = translate_paragraph(similar_text_label, "english",lang_choice))]
    elif analysis_choice == analysis_choices[4]:
          qg_output, q_output = run_generate_questions(document_name, "qsns_template.docx", "qsns_only.txt", delimiter, source_language)
          return [gr.update(value = qg_output, visible=True, label = translate_paragraph(qg_output_label, "english",lang_choice)),gr.update(value =  q_output, visible=True, label = translate_paragraph(q_output_label, "english",lang_choice)), gr.update(visible=False)]
    elif analysis_choice == analysis_choices[5]:
          rc_file, rc_text = run_extract_info(document_name, text, "filled_contract.docx", source_language)
          rc_text = "\n\n".join(rc_text)
          return [gr.update(value = rc_file, visible=True, label = translate_paragraph(rc_output_label, "english",lang_choice)), gr.update(visible=False),gr.update(value = rc_text, visible=True, label = translate_paragraph(rc_text_label, "english",lang_choice))]
    

with gr.Blocks() as demo:
    lang_radio = gr.Radio(list(lang_dict.keys()), value = 'english', label="Select your language")
    analysis_radio = gr.Radio(analysis_services  , label=analysis_label)

    with gr.Row():
        input_file = gr.File(interactive = True, visible = False)
        with gr.Column():
          translation_source = gr.Dropdown(choices = list(lang_dict.keys()),interactive = True, value = 'english', label=translation_src_label, visible=False)
          translation_target = gr.Dropdown(choices = list(lang_dict.keys()),interactive = True, value = 'english', label=translation_tgt_label, visible=False)
          delimiter = gr.Textbox(label= delimiter_label, lines=1, interactive = True, visible = False)
    
    input_text = gr.Textbox(lines=4, interactive = True, visible = False)

    button = gr.Button(value = button_label , visible = False)
    output_file = gr.File(interactive = False, visible = False)
    output_file2 = gr.File(interactive = False, visible = False)
    output_text = gr.Textbox(interactive = False, visible = False)
    
    lang_radio.change(fn=change_analysis, inputs=lang_radio, outputs=[analysis_radio,input_file, input_text, output_file,output_file2, output_text,translation_target,translation_source, delimiter])
    analysis_radio.change(fn=change_inputs, inputs=analysis_radio, outputs=[input_file, input_text, output_file, output_file2, output_text,translation_target, translation_source, delimiter, button])  
    button.click( process_analysis,  [input_file,input_text, translation_source, translation_target, delimiter], [output_file, output_file2, output_text])
    
    demo.launch(debug=True)