Spaces:
Runtime error
Runtime error
Joshua Lansford
commited on
Commit
·
14d74fa
1
Parent(s):
9997114
Added demo and removed duplicate data from examples
Browse files- .vscode/launch.json +42 -2
- examples/phonetic/phonetic.csv +0 -0
- examples/piglattin/pig_lattin.csv +0 -0
- examples/piglattin/prepare_training_data.py +7 -2
- transmorgrify.py +42 -17
.vscode/launch.json
CHANGED
@@ -75,7 +75,7 @@
|
|
75 |
"--verbose",
|
76 |
]
|
77 |
},{
|
78 |
-
"name": "short Execute phonetic
|
79 |
"type": "python",
|
80 |
"request": "launch",
|
81 |
"program": "transmorgrify.py",
|
@@ -87,11 +87,51 @@
|
|
87 |
"--out_csv", "./phonetic_out.csv",
|
88 |
"--a_header", "English",
|
89 |
"--b_header", "Phonetic",
|
90 |
-
"--device", "0:1",
|
91 |
"--model", "phonetics_forward.tm",
|
92 |
"--verbose",
|
93 |
"--include_stats",
|
94 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
}
|
96 |
]
|
97 |
}
|
|
|
75 |
"--verbose",
|
76 |
]
|
77 |
},{
|
78 |
+
"name": "short Execute phonetic",
|
79 |
"type": "python",
|
80 |
"request": "launch",
|
81 |
"program": "transmorgrify.py",
|
|
|
87 |
"--out_csv", "./phonetic_out.csv",
|
88 |
"--a_header", "English",
|
89 |
"--b_header", "Phonetic",
|
|
|
90 |
"--model", "phonetics_forward.tm",
|
91 |
"--verbose",
|
92 |
"--include_stats",
|
93 |
]
|
94 |
+
},{
|
95 |
+
"name": "short Execute reverse phonetic",
|
96 |
+
"type": "python",
|
97 |
+
"request": "launch",
|
98 |
+
"program": "transmorgrify.py",
|
99 |
+
"console": "integratedTerminal",
|
100 |
+
"justMyCode": true,
|
101 |
+
"args": [
|
102 |
+
"--execute",
|
103 |
+
"--in_csv", "/home/lansford/Sync/projects/tf_over/sentance_transmogrifier/examples/phonetic/phonetic_short.csv",
|
104 |
+
"--out_csv", "./reverse_phonetic_out.csv",
|
105 |
+
"--b_header", "English",
|
106 |
+
"--a_header", "Phonetic",
|
107 |
+
"--model", "phonetics_backwards.tm",
|
108 |
+
"--verbose",
|
109 |
+
"--include_stats",
|
110 |
+
]
|
111 |
+
},{
|
112 |
+
"name": "gradio reverse phonetic",
|
113 |
+
"type": "python",
|
114 |
+
"request": "launch",
|
115 |
+
"program": "transmorgrify.py",
|
116 |
+
"console": "integratedTerminal",
|
117 |
+
"justMyCode": true,
|
118 |
+
"args": [
|
119 |
+
"--gradio",
|
120 |
+
"--model", "phonetics_backwards.tm",
|
121 |
+
"--share",
|
122 |
+
]
|
123 |
+
},{
|
124 |
+
"name": "gradio forward phonetic",
|
125 |
+
"type": "python",
|
126 |
+
"request": "launch",
|
127 |
+
"program": "transmorgrify.py",
|
128 |
+
"console": "integratedTerminal",
|
129 |
+
"justMyCode": true,
|
130 |
+
"args": [
|
131 |
+
"--gradio",
|
132 |
+
"--model", "phonetics_forward.tm",
|
133 |
+
"--share",
|
134 |
+
]
|
135 |
}
|
136 |
]
|
137 |
}
|
examples/phonetic/phonetic.csv
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
examples/piglattin/pig_lattin.csv
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
examples/piglattin/prepare_training_data.py
CHANGED
@@ -49,15 +49,20 @@ def english_to_piglattin( english ):
|
|
49 |
|
50 |
|
51 |
def main():
|
|
|
52 |
with open( "spa.csv", "rt" ) as fin:
|
53 |
with open( "pig_lattin.csv", "wt" ) as f_out:
|
54 |
f_out.write( "English,Piglattin\n" )
|
55 |
for line in fin:
|
56 |
english = line.split( "\t" )[0]
|
57 |
english = english.replace( ",", " " )
|
58 |
-
piglattin = english_to_piglattin( english )
|
59 |
|
60 |
-
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
if __name__ == '__main__':
|
63 |
main()
|
|
|
49 |
|
50 |
|
51 |
def main():
|
52 |
+
used_englishes = []
|
53 |
with open( "spa.csv", "rt" ) as fin:
|
54 |
with open( "pig_lattin.csv", "wt" ) as f_out:
|
55 |
f_out.write( "English,Piglattin\n" )
|
56 |
for line in fin:
|
57 |
english = line.split( "\t" )[0]
|
58 |
english = english.replace( ",", " " )
|
|
|
59 |
|
60 |
+
if english not in used_englishes:
|
61 |
+
used_englishes.append(english)
|
62 |
+
|
63 |
+
piglattin = english_to_piglattin( english )
|
64 |
+
|
65 |
+
f_out.write( f"{english},{piglattin}\n" )
|
66 |
|
67 |
if __name__ == '__main__':
|
68 |
main()
|
transmorgrify.py
CHANGED
@@ -16,7 +16,6 @@ FILE_VERSION = 1
|
|
16 |
|
17 |
class Transmorgrifyer:
|
18 |
def train( self, from_sentances, to_sentances, iterations, device, trailing_context, leading_context, verbose ):
|
19 |
-
|
20 |
X,Y = _parse_for_training( from_sentances, to_sentances, num_pre_context_chars=leading_context, num_post_context_chars=trailing_context )
|
21 |
|
22 |
#train and save the action_model
|
@@ -32,6 +31,7 @@ class Transmorgrifyer:
|
|
32 |
self.iterations = iterations
|
33 |
|
34 |
def save( self, model ):
|
|
|
35 |
with zipfile.ZipFile( model, mode="w", compression=zipfile.ZIP_DEFLATED, compresslevel=9 ) as myzip:
|
36 |
with myzip.open( 'params.json', mode='w' ) as out:
|
37 |
out.write( json.dumps({
|
@@ -48,13 +48,14 @@ class Transmorgrifyer:
|
|
48 |
os.unlink( temp_filename )
|
49 |
|
50 |
def load( self, model ):
|
|
|
51 |
with zipfile.ZipFile( model, mode='r' ) as zip:
|
52 |
with zip.open( 'params.json' ) as fin:
|
53 |
params = json.loads( fin.read().decode() )
|
54 |
if params['version'] > FILE_VERSION: raise Exception( f"Version {params['version']} greater than {FILE_VERSION}" )
|
55 |
-
self.leading_context = params['leading_context']
|
56 |
-
self.trailing_context = params['trailing_context']
|
57 |
-
self.iterations = params['iterations']
|
58 |
temp_filename = _mktemp()
|
59 |
with zip.open( 'action.cb' ) as fin:
|
60 |
with open( temp_filename, "wb" ) as fout:
|
@@ -80,6 +81,19 @@ class Transmorgrifyer:
|
|
80 |
if verbose and i % 10 == 0:
|
81 |
print( f"{i} of {len(from_sentances)}" )
|
82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
def _list_trace( trace ):
|
84 |
if trace.parrent is None:
|
85 |
result = [trace]
|
@@ -331,7 +345,7 @@ def _train_catboost( X, y, iterations, device, verbose, model_piece, learning_ra
|
|
331 |
model.fit( train_pool, eval_set=validation_pool, verbose=True )
|
332 |
passed = True
|
333 |
|
334 |
-
if( verbose ): print( '{} is fitted: {}'
|
335 |
if( verbose ): print( '{} params:\n{}'.format(model_piece,model.get_params()))
|
336 |
|
337 |
return model
|
@@ -511,35 +525,40 @@ def execute( include_stats, in_csv, out_csv, a_header, b_header, model, execute_
|
|
511 |
})
|
512 |
pd_results.to_csv( out_csv )
|
513 |
|
514 |
-
|
|
|
|
|
|
|
515 |
|
516 |
def main():
|
517 |
parser = argparse.ArgumentParser(
|
518 |
prog = 'transmorgrify.py',
|
519 |
description = 'Converts text from one to another according to a model.',
|
520 |
epilog = '(C) Joshua Lansford')
|
521 |
-
parser.add_argument('-
|
|
|
|
|
|
|
|
|
522 |
parser.add_argument('-o', '--out_csv', help='The csv to write conversion to', default='out.csv' )
|
523 |
parser.add_argument('-a', '--a_header', help='The column header for training or transforming from', default="source" )
|
524 |
parser.add_argument('-b', '--b_header', help='The column header for training the transformation to', default="target" )
|
525 |
parser.add_argument('-m', '--model',help='The model file to create during training or use during transformation', default='model.tm' )
|
526 |
-
parser.add_argument('-n', '--iterations', help='The number of iterations to train', default=
|
527 |
parser.add_argument('-d', '--device', help='Which device, i.e. if useing GPU', default='cpu' )
|
528 |
parser.add_argument('-x', '--context', help='The number of leading and trailing chars to use as context', default=7 )
|
529 |
-
parser.add_argument('-t', '--train', action='store_true', help='Train a model instead of executing a model')
|
530 |
parser.add_argument('-p', '--train_percentage', help="The percentage of data to train on, leaving the rest for testing.")
|
531 |
-
parser.add_argument('-e', '--execute', action='store_true', help='Use an existing trained model.')
|
532 |
parser.add_argument('-v', '--verbose', action='store_true', help='Talks alot?' )
|
533 |
-
parser.add_argument('-
|
534 |
|
535 |
|
536 |
args = parser.parse_args()
|
537 |
|
538 |
-
if not args.train and not args.execute: print( "Must include --execute and/or --
|
539 |
|
540 |
|
541 |
if args.train:
|
542 |
-
train_percentage = args.train_percentage
|
543 |
if train_percentage is None:
|
544 |
if args.execute:
|
545 |
train_percentage = 50
|
@@ -550,10 +569,10 @@ def main():
|
|
550 |
a_header=args.a_header,
|
551 |
b_header=args.b_header,
|
552 |
model=args.model,
|
553 |
-
iterations=args.iterations,
|
554 |
device=args.device,
|
555 |
-
leading_context=args.context,
|
556 |
-
trailing_context=args.context,
|
557 |
train_percentage=train_percentage,
|
558 |
verbose=args.verbose,
|
559 |
)
|
@@ -566,7 +585,7 @@ def main():
|
|
566 |
else:
|
567 |
execute_percentage = 100
|
568 |
else:
|
569 |
-
execute_percentage = 100-args.train_percentage
|
570 |
execute(
|
571 |
include_stats=args.include_stats,
|
572 |
in_csv=args.in_csv,
|
@@ -579,6 +598,12 @@ def main():
|
|
579 |
)
|
580 |
|
581 |
|
|
|
|
|
|
|
|
|
|
|
|
|
582 |
|
583 |
if __name__ == '__main__':
|
584 |
main()
|
|
|
16 |
|
17 |
class Transmorgrifyer:
|
18 |
def train( self, from_sentances, to_sentances, iterations, device, trailing_context, leading_context, verbose ):
|
|
|
19 |
X,Y = _parse_for_training( from_sentances, to_sentances, num_pre_context_chars=leading_context, num_post_context_chars=trailing_context )
|
20 |
|
21 |
#train and save the action_model
|
|
|
31 |
self.iterations = iterations
|
32 |
|
33 |
def save( self, model ):
|
34 |
+
self.name = model
|
35 |
with zipfile.ZipFile( model, mode="w", compression=zipfile.ZIP_DEFLATED, compresslevel=9 ) as myzip:
|
36 |
with myzip.open( 'params.json', mode='w' ) as out:
|
37 |
out.write( json.dumps({
|
|
|
48 |
os.unlink( temp_filename )
|
49 |
|
50 |
def load( self, model ):
|
51 |
+
self.name = model
|
52 |
with zipfile.ZipFile( model, mode='r' ) as zip:
|
53 |
with zip.open( 'params.json' ) as fin:
|
54 |
params = json.loads( fin.read().decode() )
|
55 |
if params['version'] > FILE_VERSION: raise Exception( f"Version {params['version']} greater than {FILE_VERSION}" )
|
56 |
+
self.leading_context = int(params['leading_context'])
|
57 |
+
self.trailing_context = int(params['trailing_context'])
|
58 |
+
self.iterations = int(params['iterations'])
|
59 |
temp_filename = _mktemp()
|
60 |
with zip.open( 'action.cb' ) as fin:
|
61 |
with open( temp_filename, "wb" ) as fout:
|
|
|
81 |
if verbose and i % 10 == 0:
|
82 |
print( f"{i} of {len(from_sentances)}" )
|
83 |
|
84 |
+
def demo( self, share=False ):
|
85 |
+
import gradio as gr
|
86 |
+
|
87 |
+
def gradio_function( text ):
|
88 |
+
return list(self.execute( [text] ))[0]
|
89 |
+
|
90 |
+
with gr.Blocks() as demo:
|
91 |
+
name = gr.Markdown( self.name )
|
92 |
+
inp = gr.Textbox( label="Input" )
|
93 |
+
out = gr.Textbox( label="Output" )
|
94 |
+
inp.change( gradio_function, inputs=[inp], outputs=[out] )
|
95 |
+
demo.launch( share=share )
|
96 |
+
|
97 |
def _list_trace( trace ):
|
98 |
if trace.parrent is None:
|
99 |
result = [trace]
|
|
|
345 |
model.fit( train_pool, eval_set=validation_pool, verbose=True )
|
346 |
passed = True
|
347 |
|
348 |
+
if( verbose ): print( '{} is fitted: {}'.format(model_piece,model.is_fitted()))
|
349 |
if( verbose ): print( '{} params:\n{}'.format(model_piece,model.get_params()))
|
350 |
|
351 |
return model
|
|
|
525 |
})
|
526 |
pd_results.to_csv( out_csv )
|
527 |
|
528 |
+
def safe_float( str ):
|
529 |
+
if str is not None:
|
530 |
+
return float(str)
|
531 |
+
return None #explicit None return.
|
532 |
|
533 |
def main():
|
534 |
parser = argparse.ArgumentParser(
|
535 |
prog = 'transmorgrify.py',
|
536 |
description = 'Converts text from one to another according to a model.',
|
537 |
epilog = '(C) Joshua Lansford')
|
538 |
+
parser.add_argument('-t', '--train', action='store_true', help='Train a model instead of executing a model')
|
539 |
+
parser.add_argument('-e', '--execute', action='store_true', help='Use an existing trained model.')
|
540 |
+
parser.add_argument('-g', '--gradio', action='store_true', help='Start a gradio demo with the selected model.' )
|
541 |
+
parser.add_argument('-s', '--share', action='store_true', help="Share the gradio app with a temporary public URL." )
|
542 |
+
parser.add_argument('-i', '--in_csv', help='The csv to read training or input data from', default='in.csv' )
|
543 |
parser.add_argument('-o', '--out_csv', help='The csv to write conversion to', default='out.csv' )
|
544 |
parser.add_argument('-a', '--a_header', help='The column header for training or transforming from', default="source" )
|
545 |
parser.add_argument('-b', '--b_header', help='The column header for training the transformation to', default="target" )
|
546 |
parser.add_argument('-m', '--model',help='The model file to create during training or use during transformation', default='model.tm' )
|
547 |
+
parser.add_argument('-n', '--iterations', help='The number of iterations to train', default=2000 )
|
548 |
parser.add_argument('-d', '--device', help='Which device, i.e. if useing GPU', default='cpu' )
|
549 |
parser.add_argument('-x', '--context', help='The number of leading and trailing chars to use as context', default=7 )
|
|
|
550 |
parser.add_argument('-p', '--train_percentage', help="The percentage of data to train on, leaving the rest for testing.")
|
|
|
551 |
parser.add_argument('-v', '--verbose', action='store_true', help='Talks alot?' )
|
552 |
+
parser.add_argument('-c', '--include_stats', action='store_true', help='Use b_header to compute stats and add to output csv.')
|
553 |
|
554 |
|
555 |
args = parser.parse_args()
|
556 |
|
557 |
+
if not args.train and not args.execute and not args.gradio: print( "Must include --execute, --train and/or --gradio to do something." )
|
558 |
|
559 |
|
560 |
if args.train:
|
561 |
+
train_percentage = safe_float(args.train_percentage)
|
562 |
if train_percentage is None:
|
563 |
if args.execute:
|
564 |
train_percentage = 50
|
|
|
569 |
a_header=args.a_header,
|
570 |
b_header=args.b_header,
|
571 |
model=args.model,
|
572 |
+
iterations=int(args.iterations),
|
573 |
device=args.device,
|
574 |
+
leading_context=int(args.context),
|
575 |
+
trailing_context=int(args.context),
|
576 |
train_percentage=train_percentage,
|
577 |
verbose=args.verbose,
|
578 |
)
|
|
|
585 |
else:
|
586 |
execute_percentage = 100
|
587 |
else:
|
588 |
+
execute_percentage = 100-safe_float(args.train_percentage)
|
589 |
execute(
|
590 |
include_stats=args.include_stats,
|
591 |
in_csv=args.in_csv,
|
|
|
598 |
)
|
599 |
|
600 |
|
601 |
+
if args.gradio:
|
602 |
+
tm = Transmorgrifyer()
|
603 |
+
tm.load( args.model )
|
604 |
+
|
605 |
+
tm.demo( args.share is not None )
|
606 |
+
|
607 |
|
608 |
if __name__ == '__main__':
|
609 |
main()
|