Spaces:
Runtime error
Runtime error
import os | |
import sys | |
import json | |
import time | |
import openai | |
import pickle | |
import argparse | |
import requests | |
from tqdm import tqdm | |
import torch | |
from transformers import AutoTokenizer, AutoModelForCausalLM, LlamaForCausalLM, LlamaTokenizer | |
from fastchat.model import load_model, get_conversation_template, add_model_args | |
from nltk.tag.mapping import _UNIVERSAL_TAGS | |
import gradio as gr | |
uni_tags = list(_UNIVERSAL_TAGS) | |
uni_tags[-1] = 'PUNC' | |
bio_tags = ['B', 'I', 'O'] | |
chunk_tags = ['ADJP', 'ADVP', 'CONJP', 'INTJ', 'LST', 'NP', 'O', 'PP', 'PRT', 'SBAR', 'UCP', 'VP'] | |
syntags = ['NP', 'S', 'VP', 'ADJP', 'ADVP', 'SBAR', 'TOP', 'PP', 'POS', 'NAC', "''", 'SINV', 'PRN', 'QP', 'WHNP', 'RB', 'FRAG', | |
'WHADVP', 'NX', 'PRT', 'VBZ', 'VBP', 'MD', 'NN', 'WHPP', 'SQ', 'SBARQ', 'LST', 'INTJ', 'X', 'UCP', 'CONJP', 'NNP', 'CD', 'JJ', | |
'VBD', 'WHADJP', 'PRP', 'RRC', 'NNS', 'SYM', 'CC'] | |
openai.api_key = "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" | |
# determinant vs. determiner | |
# https://wikidiff.com/determiner/determinant | |
ents_prompt = ['Noun','Verb','Adjective','Adverb','Preposition/Subord','Coordinating Conjunction',# 'Cardinal Number', | |
'Determiner', | |
'Noun Phrase','Verb Phrase','Adjective Phrase','Adverb Phrase','Preposition Phrase','Conjunction Phrase','Coordinate Phrase','Quantitave Phrase','Complex Nominal', | |
'Clause','Dependent Clause','Fragment Clause','T-unit','Complex T-unit',# 'Fragment T-unit', | |
][7:] | |
ents = ['NN', 'VB', 'JJ', 'RB', 'IN', 'CC', 'DT', 'NP', 'VP', 'ADJP', 'ADVP', 'PP', 'CONJP', 'CP', 'QP', 'CN', 'C', 'DC', 'FC', 'T', 'CT'][7:] | |
ents_prompt_uni_tags = ['Verb', 'Noun', 'Pronoun', 'Adjective', 'Adverb', 'Preposition and Postposition', 'Coordinating Conjunction', | |
'Determiner', 'Cardinal Number', 'Particles or other function words', | |
'Words that cannot be assigned a POS tag', 'Punctuation'] | |
ents = uni_tags + ents | |
ents_prompt = ents_prompt_uni_tags + ents_prompt | |
for i, j in zip(ents, ents_prompt): | |
print(i, j) | |
# raise | |
model_mapping = { | |
# 'gpt3': 'gpt-3', | |
'gpt3.5': 'gpt-3.5-turbo-0613', | |
'vicuna-7b': 'lmsys/vicuna-7b-v1.3', | |
'vicuna-13b': 'lmsys/vicuna-13b-v1.3', | |
'vicuna-33b': 'lmsys/vicuna-33b-v1.3', | |
'fastchat-t5': 'lmsys/fastchat-t5-3b-v1.0', | |
# 'llama2-7b': 'meta-llama/Llama-2-7b-hf', | |
# 'llama2-13b': 'meta-llama/Llama-2-13b-hf', | |
# 'llama2-70b': 'meta-llama/Llama-2-70b-hf', | |
'llama-7b': './llama/hf/7B', | |
'llama-13b': './llama/hf/13B', | |
'llama-30b': './llama/hf/30B', | |
# 'llama-65b': './llama/hf/65B', | |
'alpaca': './alpaca-7B', | |
# 'koala-7b': 'koala-7b', | |
# 'koala-13b': 'koala-13b', | |
} | |
with open('sample_uniform_1k_2.txt', 'r') as f: | |
selected_idx = f.readlines() | |
selected_idx = [int(i.strip()) for i in selected_idx]#[s:e] | |
ptb = [] | |
with open('ptb.jsonl', 'r') as f: | |
for l in f: | |
ptb.append(json.loads(l)) | |
## Prompt 1 | |
template_all = '''Please output the <Noun, Verb, Adjective, Adverb, Preposition/Subord, Coordinating Conjunction, Cardinal Number, Determiner, Noun Phrase, Verb Phrase, Adjective Phrase, Adverb Phrase, Preposition Phrase, Conjunction Phrase, Coordinate Phrase, Quantitave Phrase, Complex Nominal, Clause, Dependent Clause, Fragment Clause, T-unit, Complex T-unit, Fragment T-unit> in the following sentence without any additional text in json format: "{}"''' | |
template_single = '''Please output any <{}> in the following sentence one per line without any additional text: "{}"''' | |
## Prompt 2 | |
prompt2_pos = '''Please pos tag the following sentence using Universal POS tag set without generating any additional text: {}''' | |
prompt2_chunk = '''Please do sentence chunking for the following sentence as in CoNLL 2000 shared task without generating any addtional text: {}''' | |
prompt2_parse = '''Generate textual representation of the constituency parse tree of the following sentence using Penn TreeBank tag set without outputing any additional text: {}''' | |
prompt2_chunk = '''Please chunk the following sentence in CoNLL 2000 format with BIO tags without outputing any additional text: {}''' | |
## Prompt 3 | |
with open('demonstration_3_42_pos.txt', 'r') as f: | |
demon_pos = f.read() | |
with open('demonstration_3_42_chunk.txt', 'r') as f: | |
demon_chunk = f.read() | |
with open('demonstration_3_42_parse.txt', 'r') as f: | |
demon_parse = f.read() | |