Spaces:

research14Lab
/

LingEval

Runtime error

App Files Files Community

research14 commited on Dec 10, 2023

Commit

b308128

1 Parent(s): c0ac2c5

changes

Browse files

Files changed (2) hide show

app.py +133 -29
run_llm.py +80 -11

app.py CHANGED Viewed

@@ -1,52 +1,152 @@
-import gradio as gr
 import json
-from run_llm import template_all, prompt2_pos, prompt2_chunk, prompt2_parse, demon_pos, demon_chunk, demon_parse, model_mapping
 from tqdm import tqdm
-# Your existing code
-theme = gr.themes.Soft()
 with open('sample_uniform_1k_2.txt', 'r') as f:
     selected_idx = f.readlines()
 selected_idx = [int(i.strip()) for i in selected_idx]#[s:e]
-gid_list = selected_idx[0]
 ptb = []
 with open('ptb.jsonl', 'r') as f:
     for l in f:
         ptb.append(json.loads(l))
 # Function to process text based on model and task
 def process_text(model_name, task, text):
     for gid in tqdm(gid_list, desc='Query'):
             text = ptb[gid]['text']
     # Define prompts for each strategy based on the task
-    strategy_prompts = {
-        'Strategy 1': template_all.format(text),
-        'Strategy 2': {
-            'POS': prompt2_pos.format(text),
-            'Chunking': prompt2_chunk.format(text),
-            'Parsing': prompt2_parse.format(text),
-        }.get(task, "Invalid Task Selection for Strategy 2"),
-        'Strategy 3': {
-            'POS': demon_pos,
-            'Chunking': demon_chunk,
-            'Parsing': demon_parse,
-        }.get(task, "Invalid Task Selection for Strategy 3"),
-    }
-    # Get the selected prompt based on the strategy
-    prompt = strategy_prompts.get(model_name, "Invalid Model Selection")
-    # Add your logic to feed the prompt to the selected model and get the result
-    result = "Processed Result"  # Replace this with your actual result
-    return result
-# Dropdown options for model and task
-model_options = list(model_mapping.keys())
-task_options = ['POS', 'Chunking', 'Parsing']
 # Gradio interface
 iface = gr.Interface(
@@ -54,14 +154,18 @@ iface = gr.Interface(
     inputs=[
         gr.Dropdown(model_options, label="Select Model"),
         gr.Dropdown(task_options, label="Select Task"),
     ],
     outputs=[
         gr.Textbox(label="Strategy 1 QA Result"),
         gr.Textbox(label="Strategy 2 Instruction Result"),
         gr.Textbox(label="Strategy 3 Structured Prompting Result"),
     ],
     theme = theme,
     live=False,
 )
 iface.launch()

+import os
+import sys
 import json
+import time
+import openai
+import pickle
+import argparse
+import requests
 from tqdm import tqdm
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, LlamaForCausalLM, LlamaTokenizer
+from fastchat.model import load_model, get_conversation_template, add_model_args
+from nltk.tag.mapping import _UNIVERSAL_TAGS
+import gradio as gr
+from transformers import pipeline
+uni_tags = list(_UNIVERSAL_TAGS)
+uni_tags[-1] = 'PUNC'
+bio_tags = ['B', 'I', 'O']
+chunk_tags = ['ADJP', 'ADVP', 'CONJP', 'INTJ', 'LST', 'NP', 'O', 'PP', 'PRT', 'SBAR', 'UCP', 'VP']
+syntags = ['NP', 'S', 'VP', 'ADJP', 'ADVP', 'SBAR', 'TOP', 'PP', 'POS', 'NAC', "''", 'SINV', 'PRN', 'QP', 'WHNP', 'RB', 'FRAG',
+ 'WHADVP', 'NX', 'PRT', 'VBZ', 'VBP', 'MD', 'NN', 'WHPP', 'SQ', 'SBARQ', 'LST', 'INTJ', 'X', 'UCP', 'CONJP', 'NNP', 'CD', 'JJ',
+ 'VBD', 'WHADJP', 'PRP', 'RRC', 'NNS', 'SYM', 'CC']
+openai.api_key = "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"
+# determinant vs. determiner
+# https://wikidiff.com/determiner/determinant
+ents_prompt = ['Noun','Verb','Adjective','Adverb','Preposition/Subord','Coordinating Conjunction',# 'Cardinal Number',
+    'Determiner',
+    'Noun Phrase','Verb Phrase','Adjective Phrase','Adverb Phrase','Preposition Phrase','Conjunction Phrase','Coordinate Phrase','Quantitave Phrase','Complex Nominal',
+    'Clause','Dependent Clause','Fragment Clause','T-unit','Complex T-unit',# 'Fragment T-unit',
+][7:]
+ents = ['NN', 'VB', 'JJ', 'RB', 'IN', 'CC', 'DT', 'NP', 'VP', 'ADJP', 'ADVP', 'PP', 'CONJP', 'CP', 'QP', 'CN', 'C', 'DC', 'FC', 'T', 'CT'][7:]
+ents_prompt_uni_tags = ['Verb', 'Noun', 'Pronoun', 'Adjective', 'Adverb', 'Preposition and Postposition', 'Coordinating Conjunction',
+                        'Determiner', 'Cardinal Number', 'Particles or other function words',
+                        'Words that cannot be assigned a POS tag', 'Punctuation']
+ents = uni_tags + ents
+ents_prompt = ents_prompt_uni_tags + ents_prompt
+for i, j in zip(ents, ents_prompt):
+    print(i, j)
+model_mapping = {
+    'gpt3.5': 'gpt-3.5-turbo-0613',
+    'vicuna-7b': 'lmsys/vicuna-7b-v1.3',
+    'vicuna-13b': 'lmsys/vicuna-13b-v1.3',
+    'vicuna-33b': 'lmsys/vicuna-33b-v1.3',
+    'fastchat-t5': 'lmsys/fastchat-t5-3b-v1.0',
+    'llama-7b': './llama/hf/7B',
+    'llama-13b': './llama/hf/13B',
+    'llama-30b': './llama/hf/30B',
+    'alpaca': './alpaca-7B',
+}
 with open('sample_uniform_1k_2.txt', 'r') as f:
     selected_idx = f.readlines()
 selected_idx = [int(i.strip()) for i in selected_idx]#[s:e]
 ptb = []
 with open('ptb.jsonl', 'r') as f:
     for l in f:
         ptb.append(json.loads(l))
+## Prompt 1
+template_all = '''Please output the <Noun, Verb, Adjective, Adverb, Preposition/Subord, Coordinating Conjunction, Cardinal Number, Determiner, Noun Phrase, Verb Phrase, Adjective Phrase, Adverb Phrase, Preposition Phrase, Conjunction Phrase, Coordinate Phrase, Quantitave Phrase, Complex Nominal, Clause, Dependent Clause, Fragment Clause, T-unit, Complex T-unit, Fragment T-unit> in the following sentence without any additional text in json format: "{}"'''
+template_single = '''Please output any <{}> in the following sentence one per line without any additional text: "{}"'''
+## Prompt 2
+prompt2_pos = '''Please pos tag the following sentence using Universal POS tag set without generating any additional text: {}'''
+prompt2_chunk = '''Please do sentence chunking for the following sentence as in CoNLL 2000 shared task without generating any addtional text: {}'''
+prompt2_parse = '''Generate textual representation of the constituency parse tree of the following sentence using Penn TreeBank tag set without outputing any additional text: {}'''
+prompt2_chunk = '''Please chunk the following sentence in CoNLL 2000 format with BIO tags without outputing any additional text: {}'''
+## Prompt 3
+with open('demonstration_3_42_pos.txt', 'r') as f:
+    demon_pos = f.read()
+with open('demonstration_3_42_chunk.txt', 'r') as f:
+    demon_chunk = f.read()
+with open('demonstration_3_42_parse.txt', 'r') as f:
+    demon_parse = f.read()
+# Your existing code
+theme = gr.themes.Soft()
+pipeline = pipeline(task="text-generation", model="lmsys/vicuna-7b-v1.3")
+# Dropdown options for model and task
+model_options = list(model_mapping.keys())
+task_options = ['POS', 'Chunking', 'Parsing']
 # Function to process text based on model and task
 def process_text(model_name, task, text):
+    gid_list = selected_idx[0:20]
     for gid in tqdm(gid_list, desc='Query'):
             text = ptb[gid]['text']
+    #if model_name is 'gpt3.5': 'gpt-3.5-turbo-0613',
+    #elif model_name is 'vicuna-7b': 'lmsys/vicuna-7b-v1.3',
+    #elif model_name is 'vicuna-13b': 'lmsys/vicuna-13b-v1.3',
+    #elif model_name is 'vicuna-33b': 'lmsys/vicuna-33b-v1.3',
+    #elif model_name is 'fastchat-t5': 'lmsys/fastchat-t5-3b-v1.0',
+    #elif model_name is 'llama-7b': './llama/hf/7B',
+    #elif model_name is 'llama-13b': './llama/hf/13B',
+    #elif model_name is 'llama-30b': './llama/hf/30B',
+    #elif model_name is 'alpaca': './alpaca-7B',
+    if task == 'POS':
+        strategy1 = pipeline(template_all.format(text))
+        strategy2 = pipeline(prompt2_pos.format(text))
+        strategy3 = pipeline(demon_pos)
+        return (strategy1, strategy2, strategy3)
+    elif task == 'Chunking':
+        strategy1 = pipeline(template_all.format(text))
+        strategy2 = pipeline(prompt2_chunk.format(text))
+        strategy3 = pipeline(demon_chunk)
+        return (strategy1, strategy2, strategy3)
+    elif task == 'Parsing':
+        strategy1 = pipeline(template_all.format(text))
+        strategy2 = pipeline(prompt2_parse.format(text))
+        strategy3 = pipeline(demon_parse)
+        return (strategy1, strategy2, strategy3)
     # Define prompts for each strategy based on the task
+    #strategy_prompts = {
+    #    'Strategy 1': template_all.format(text),
+    #    'Strategy 2': {
+    #        'POS': prompt2_pos.format(text),
+    #        'Chunking': prompt2_chunk.format(text),
+    #        'Parsing': prompt2_parse.format(text),
+    #    }.get(task, "Invalid Task Selection for Strategy 2"),
+    #    'Strategy 3': {
+    #        'POS': demon_pos,
+    #        'Chunking': demon_chunk,
+    #        'Parsing': demon_parse,
+    #    }.get(task, "Invalid Task Selection for Strategy 3"),
+    #}
 # Gradio interface
 iface = gr.Interface(
     inputs=[
         gr.Dropdown(model_options, label="Select Model"),
         gr.Dropdown(task_options, label="Select Task"),
+        gr.Textbox(label="Input Text", placeholder="Enter the text to process..."),
     ],
     outputs=[
         gr.Textbox(label="Strategy 1 QA Result"),
         gr.Textbox(label="Strategy 2 Instruction Result"),
         gr.Textbox(label="Strategy 3 Structured Prompting Result"),
     ],
+    title = "LLM Evaluator For Linguistic Scrutiny",
     theme = theme,
     live=False,
 )
 iface.launch()

run_llm.py CHANGED Viewed

@@ -15,6 +15,7 @@ from fastchat.model import load_model, get_conversation_template, add_model_args
 from nltk.tag.mapping import _UNIVERSAL_TAGS
 import gradio as gr
 uni_tags = list(_UNIVERSAL_TAGS)
 uni_tags[-1] = 'PUNC'
@@ -28,7 +29,6 @@ syntags = ['NP', 'S', 'VP', 'ADJP', 'ADVP', 'SBAR', 'TOP', 'PP', 'POS', 'NAC', "
 openai.api_key = "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"
 # determinant vs. determiner
 # https://wikidiff.com/determiner/determinant
 ents_prompt = ['Noun','Verb','Adjective','Adverb','Preposition/Subord','Coordinating Conjunction',# 'Cardinal Number',
@@ -48,33 +48,23 @@ ents_prompt = ents_prompt_uni_tags + ents_prompt
 for i, j in zip(ents, ents_prompt):
     print(i, j)
-# raise
 model_mapping = {
-    # 'gpt3': 'gpt-3',
     'gpt3.5': 'gpt-3.5-turbo-0613',
     'vicuna-7b': 'lmsys/vicuna-7b-v1.3',
     'vicuna-13b': 'lmsys/vicuna-13b-v1.3',
     'vicuna-33b': 'lmsys/vicuna-33b-v1.3',
     'fastchat-t5': 'lmsys/fastchat-t5-3b-v1.0',
-    # 'llama2-7b': 'meta-llama/Llama-2-7b-hf',
-    # 'llama2-13b': 'meta-llama/Llama-2-13b-hf',
-    # 'llama2-70b': 'meta-llama/Llama-2-70b-hf',
     'llama-7b': './llama/hf/7B',
     'llama-13b': './llama/hf/13B',
     'llama-30b': './llama/hf/30B',
-    # 'llama-65b': './llama/hf/65B',
     'alpaca': './alpaca-7B',
-    # 'koala-7b': 'koala-7b',
-    # 'koala-13b': 'koala-13b',
 }
 with open('sample_uniform_1k_2.txt', 'r') as f:
     selected_idx = f.readlines()
 selected_idx = [int(i.strip()) for i in selected_idx]#[s:e]
 ptb = []
 with open('ptb.jsonl', 'r') as f:
     for l in f:
@@ -100,3 +90,82 @@ with open('demonstration_3_42_chunk.txt', 'r') as f:
 with open('demonstration_3_42_parse.txt', 'r') as f:
     demon_parse = f.read()

 from nltk.tag.mapping import _UNIVERSAL_TAGS
 import gradio as gr
+from transformers import pipeline
 uni_tags = list(_UNIVERSAL_TAGS)
 uni_tags[-1] = 'PUNC'
 openai.api_key = "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"
 # determinant vs. determiner
 # https://wikidiff.com/determiner/determinant
 ents_prompt = ['Noun','Verb','Adjective','Adverb','Preposition/Subord','Coordinating Conjunction',# 'Cardinal Number',
 for i, j in zip(ents, ents_prompt):
     print(i, j)
 model_mapping = {
     'gpt3.5': 'gpt-3.5-turbo-0613',
     'vicuna-7b': 'lmsys/vicuna-7b-v1.3',
     'vicuna-13b': 'lmsys/vicuna-13b-v1.3',
     'vicuna-33b': 'lmsys/vicuna-33b-v1.3',
     'fastchat-t5': 'lmsys/fastchat-t5-3b-v1.0',
     'llama-7b': './llama/hf/7B',
     'llama-13b': './llama/hf/13B',
     'llama-30b': './llama/hf/30B',
     'alpaca': './alpaca-7B',
 }
 with open('sample_uniform_1k_2.txt', 'r') as f:
     selected_idx = f.readlines()
 selected_idx = [int(i.strip()) for i in selected_idx]#[s:e]
 ptb = []
 with open('ptb.jsonl', 'r') as f:
     for l in f:
 with open('demonstration_3_42_parse.txt', 'r') as f:
     demon_parse = f.read()
+# Your existing code
+theme = gr.themes.Soft()
+pipeline = pipeline(task="text-generation", model="lmsys/vicuna-7b-v1.3")
+# Dropdown options for model and task
+model_options = list(model_mapping.keys())
+task_options = ['POS', 'Chunking', 'Parsing']
+# Function to process text based on model and task
+def process_text(model_name, task, text):
+    gid_list = selected_idx[0:20]
+    for gid in tqdm(gid_list, desc='Query'):
+            text = ptb[gid]['text']
+    #if model_name is 'gpt3.5': 'gpt-3.5-turbo-0613',
+    #elif model_name is 'vicuna-7b': 'lmsys/vicuna-7b-v1.3',
+    #elif model_name is 'vicuna-13b': 'lmsys/vicuna-13b-v1.3',
+    #elif model_name is 'vicuna-33b': 'lmsys/vicuna-33b-v1.3',
+    #elif model_name is 'fastchat-t5': 'lmsys/fastchat-t5-3b-v1.0',
+    #elif model_name is 'llama-7b': './llama/hf/7B',
+    #elif model_name is 'llama-13b': './llama/hf/13B',
+    #elif model_name is 'llama-30b': './llama/hf/30B',
+    #elif model_name is 'alpaca': './alpaca-7B',
+    if task == 'POS':
+        strategy1 = pipeline(template_all.format(text))
+        strategy2 = pipeline(prompt2_pos.format(text))
+        strategy3 = pipeline(demon_pos)
+        return (strategy1, strategy2, strategy3)
+    elif task == 'Chunking':
+        strategy1 = pipeline(template_all.format(text))
+        strategy2 = pipeline(prompt2_chunk.format(text))
+        strategy3 = pipeline(demon_chunk)
+        return (strategy1, strategy2, strategy3)
+    elif task == 'Parsing':
+        strategy1 = pipeline(template_all.format(text))
+        strategy2 = pipeline(prompt2_parse.format(text))
+        strategy3 = pipeline(demon_parse)
+        return (strategy1, strategy2, strategy3)
+    # Define prompts for each strategy based on the task
+    #strategy_prompts = {
+    #    'Strategy 1': template_all.format(text),
+    #    'Strategy 2': {
+    #        'POS': prompt2_pos.format(text),
+    #        'Chunking': prompt2_chunk.format(text),
+    #        'Parsing': prompt2_parse.format(text),
+    #    }.get(task, "Invalid Task Selection for Strategy 2"),
+    #    'Strategy 3': {
+    #        'POS': demon_pos,
+    #        'Chunking': demon_chunk,
+    #        'Parsing': demon_parse,
+    #    }.get(task, "Invalid Task Selection for Strategy 3"),
+    #}
+# Gradio interface
+iface = gr.Interface(
+    fn=process_text,
+    inputs=[
+        gr.Dropdown(model_options, label="Select Model"),
+        gr.Dropdown(task_options, label="Select Task"),
+        gr.Textbox(label="Input Text", placeholder="Enter the text to process..."),
+    ],
+    outputs=[
+        gr.Textbox(label="Strategy 1 QA Result"),
+        gr.Textbox(label="Strategy 2 Instruction Result"),
+        gr.Textbox(label="Strategy 3 Structured Prompting Result"),
+    ],
+    title = "LLM Evaluator For Linguistic Scrutiny",
+    theme = theme,
+    live=False,
+)
+iface.launch()