Spaces:

santoshtyss
/

analysis_demo

Build error

App Files Files

santoshtyss commited on Sep 6, 2022

Commit

735a2f1

1 Parent(s): fc7ba9a

Update app.py

Browse files

Files changed (1) hide show

app.py +105 -4

app.py CHANGED Viewed

@@ -527,7 +527,99 @@ def run_redflags(filename, output_file):
         time.sleep(8)
         doc.save(output_file)
         return output_file
 import docx
 import random
@@ -650,10 +742,9 @@ def run_similar_clause(filename, output_file, clauses, source_language):
             output_file, highlighted_paras = get_similar_clauses(filename, output_file,clauses, source_language)
             return output_file, highlighted_paras
 import gradio as gr
-analysis_services = ['Translate Contract', 'Identify key Clauses', 'Red flag Identification', 'Similar Semantic Clause search', 'Generate Questions for Contract Template']
 analysis_label = 'Select Contract Analysis Service'
 analysis_choices = analysis_services
 analysis_choice = ''
@@ -666,6 +757,8 @@ redflag_label = 'Upload contract for Red Flag Identification'
 similar_label = 'Upload contract for Semantic Similar Clauses'
 similar_clause_label = 'Enter clauses to be identified (enter one clause per line)'
 generate_questions_label = 'Upload template contract for Question Generation'
 delimiter_label = "Input placeholder (pattern or symbol used as blank in template)"
 button_label = "Upload and Analyze"
@@ -677,6 +770,8 @@ similar_file_label = 'Download your contract with  highlighted similar clauses i
 similar_text_label = 'A quick view of similar clauses'
 qg_output_label = 'Download your template contract along with questions'
 q_output_label = 'Download only questions to fill the template contract'
 def change_analysis(choice):
     global lang_choice, analysis_choices
@@ -697,7 +792,9 @@ def change_inputs(choice):
           return [gr.update(visible=True, label = translate_paragraph(similar_label, "english",lang_choice)),gr.update(visible=True, label = translate_paragraph(similar_clause_label, "english",lang_choice)), gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=False),gr.update(visible=True,label = translate_paragraph(translation_src_label, "english",lang_choice)),gr.update(visible=False),gr.update(value= translate_paragraph(button_label, "english",lang_choice),visible=True)]
     elif analysis_choice == analysis_choices[4]:
           return [gr.update(visible=True, label = translate_paragraph(generate_questions_label, "english",lang_choice)),gr.update(visible=False), gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=False),gr.update(visible=False),gr.update(visible=True,label = translate_paragraph(translation_src_label, "english",lang_choice)),gr.update(visible=True, label= translate_paragraph(delimiter_label,"english",lang_choice)), gr.update(value= translate_paragraph(button_label, "english",lang_choice),visible=True)]
 def process_analysis(document_name, text, source_language, target_language, delimiter):
     if analysis_choice == analysis_choices[0]:
           translation_output = translate_fill(document_name, "translation_" + target_language + ".docx", source_language , target_language)
@@ -716,6 +813,10 @@ def process_analysis(document_name, text, source_language, target_language, deli
     elif analysis_choice == analysis_choices[4]:
           qg_output, q_output = run_generate_questions(document_name, "qsns_template.docx", "qsns_only.txt", delimiter, source_language)
           return [gr.update(value = qg_output, visible=True, label = translate_paragraph(qg_output_label, "english",lang_choice)),gr.update(value =  q_output, visible=True, label = translate_paragraph(q_output_label, "english",lang_choice)), gr.update(visible=False)]
 with gr.Blocks() as demo:

         time.sleep(8)
         doc.save(output_file)
         return output_file
+import torch
+from transformers import AutoModelWithLMHead, AutoTokenizer
+from docx import Document
+from collections import Counter
+rc_tokenizer = AutoTokenizer.from_pretrained("tuner007/t5_abs_qa")
+rc_model = AutoModelWithLMHead.from_pretrained("tuner007/t5_abs_qa")
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+rc_model = rc_model.to(device)
+def get_answer(question, context):
+      input_text = "context: %s <question for context: %s </s>" % (context,question)
+      features = rc_tokenizer([input_text], return_tensors='pt')
+      out = rc_model.generate(input_ids=features['input_ids'].to(device), attention_mask=features['attention_mask'].to(device))
+      return rc_tokenizer.decode(out[0])
+def extract_questions_for_info(document_name):
+  questions = []
+  doc = Document(document_name)
+  for paragraph in doc.paragraphs:
+        if(paragraph.text.strip()==''):
+                    continue
+        else:
+          q = re.findall(r'\{{(.*?)\}}',paragraph.text.strip())
+          questions.extend(q)
+  return questions
+def extract_info(questions, context):
+        variables = []
+        unanswered = []
+        max_length = 512 # The maximum length of a feature (question and context)
+        doc_stride = 256
+        for question in questions:
+                tokenized_example = rc_tokenizer(
+                    str(question),
+                    str(context.replace('\'','').replace('"',"")),
+                    max_length=max_length,
+                    truncation="only_second",
+                    return_overflowing_tokens=True,
+                    stride=doc_stride)
+                answers = []
+                for x in tokenized_example["input_ids"]:
+                      q, c = rc_tokenizer.decode(x).split("</s>")[0], rc_tokenizer.decode(x).split("</s>")[1]
+                      answers.append(get_answer(q, c).replace('<pad>','').replace('</s>','').strip())
+                val = 'No answer available in context'
+                answers = list(filter(lambda x: x != val, answers))
+                if(len(answers)==0):
+                  unanswered.append(question)
+                else:
+                  fre_list = Counter(answers)
+                  answer = fre_list.most_common(1)[0][0]
+                  variables.append({"{{"+question+"}}" : answer})
+        return variables, unanswered
+input_output_exin = {"lets see":"Employment Qsns.docx"}
+def run_extract_info(document_name, context, output_file, source_language):
+  print("Extract")
+  doc = docx.Document(document_name)
+  if doc.paragraphs[0].text in list(input_output_exin.keys()):
+      exin_output =  input_output_exin[doc.paragraphs[0].text]
+      exin_unanswered = extract_questions_for_info(exin_output)
+      time.sleep(5)
+      return exin_output, exin_unanswered
+  else:
+      if source_language != 'english':
+              translation_output = translate_fill(document_name, "exin_translation.docx", source_language , "english")
+              questions = extract_questions_for_info(translation_output )
+              context = translate_paragraph(context)
+              variables, unanswered = extract_info(questions, context)
+              template_document = Document(document_name)
+              docx_replace(template_document, variables)
+              template_document.save("exin_modified.docx")
+              final_exin = translate_fill("exin_modified.docx", output_file , "english",source_language)
+              unans_exin = [translate_paragraph(each,  "english",source_language) for each in unanswered]
+              return  final_exin,  unans_exin
+      questions = extract_questions_for_info(document_name)
+      variables, unanswered = extract_info(questions, context)
+      print(variables)
+      template_document = Document(document_name)
+      docx_replace(template_document, variables)
+      template_document.save(output_file)
+      return output_file, unanswered
 import docx
 import random
             output_file, highlighted_paras = get_similar_clauses(filename, output_file,clauses, source_language)
             return output_file, highlighted_paras
 import gradio as gr
+analysis_services = ['Translate Contract', 'Identify key Clauses', 'Red flag Identification', 'Similar Semantic Clause search', 'Generate Questions for Contract Template', 'Fill Contract Template by extracting information']
 analysis_label = 'Select Contract Analysis Service'
 analysis_choices = analysis_services
 analysis_choice = ''
 similar_label = 'Upload contract for Semantic Similar Clauses'
 similar_clause_label = 'Enter clauses to be identified (enter one clause per line)'
 generate_questions_label = 'Upload template contract for Question Generation'
+rc_file_label = 'Upload template contract with questions to fill'
+rc_context_label = 'Enter the text to extract answer from'
 delimiter_label = "Input placeholder (pattern or symbol used as blank in template)"
 button_label = "Upload and Analyze"
 similar_text_label = 'A quick view of similar clauses'
 qg_output_label = 'Download your template contract along with questions'
 q_output_label = 'Download only questions to fill the template contract'
+rc_output_label = 'Download your template contract along with filled answers'
+rc_text_label = 'Unanswered Questions'
 def change_analysis(choice):
     global lang_choice, analysis_choices
           return [gr.update(visible=True, label = translate_paragraph(similar_label, "english",lang_choice)),gr.update(visible=True, label = translate_paragraph(similar_clause_label, "english",lang_choice)), gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=False),gr.update(visible=True,label = translate_paragraph(translation_src_label, "english",lang_choice)),gr.update(visible=False),gr.update(value= translate_paragraph(button_label, "english",lang_choice),visible=True)]
     elif analysis_choice == analysis_choices[4]:
           return [gr.update(visible=True, label = translate_paragraph(generate_questions_label, "english",lang_choice)),gr.update(visible=False), gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=False),gr.update(visible=False),gr.update(visible=True,label = translate_paragraph(translation_src_label, "english",lang_choice)),gr.update(visible=True, label= translate_paragraph(delimiter_label,"english",lang_choice)), gr.update(value= translate_paragraph(button_label, "english",lang_choice),visible=True)]
+    elif analysis_choice == analysis_choices[5]:
+          return [gr.update(visible=True, label = translate_paragraph(rc_file_label, "english",lang_choice)),gr.update(visible=True, lines = 16, label = translate_paragraph(rc_context_label, "english",lang_choice)), gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=True,label=''),gr.update(visible=False),gr.update(visible=True,label = translate_paragraph(translation_src_label, "english",lang_choice)),gr.update(visible=False),gr.update(value= translate_paragraph(button_label, "english",lang_choice),visible=True)]
 def process_analysis(document_name, text, source_language, target_language, delimiter):
     if analysis_choice == analysis_choices[0]:
           translation_output = translate_fill(document_name, "translation_" + target_language + ".docx", source_language , target_language)
     elif analysis_choice == analysis_choices[4]:
           qg_output, q_output = run_generate_questions(document_name, "qsns_template.docx", "qsns_only.txt", delimiter, source_language)
           return [gr.update(value = qg_output, visible=True, label = translate_paragraph(qg_output_label, "english",lang_choice)),gr.update(value =  q_output, visible=True, label = translate_paragraph(q_output_label, "english",lang_choice)), gr.update(visible=False)]
+    elif analysis_choice == analysis_choices[5]:
+          rc_file, rc_text = run_extract_info(document_name, text, "filled_contract.docx", source_language)
+          rc_text = "\n\n".join(rc_text)
+          return [gr.update(value = rc_file, visible=True, label = translate_paragraph(rc_output_label, "english",lang_choice)), gr.update(visible=False),gr.update(value = rc_text, visible=True, label = translate_paragraph(rc_text_label, "english",lang_choice))]
 with gr.Blocks() as demo: