Joshua Lansford commited on
Commit
14d74fa
·
1 Parent(s): 9997114

Added demo and removed duplicate data from examples

Browse files
.vscode/launch.json CHANGED
@@ -75,7 +75,7 @@
75
  "--verbose",
76
  ]
77
  },{
78
- "name": "short Execute phonetic gpu",
79
  "type": "python",
80
  "request": "launch",
81
  "program": "transmorgrify.py",
@@ -87,11 +87,51 @@
87
  "--out_csv", "./phonetic_out.csv",
88
  "--a_header", "English",
89
  "--b_header", "Phonetic",
90
- "--device", "0:1",
91
  "--model", "phonetics_forward.tm",
92
  "--verbose",
93
  "--include_stats",
94
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  }
96
  ]
97
  }
 
75
  "--verbose",
76
  ]
77
  },{
78
+ "name": "short Execute phonetic",
79
  "type": "python",
80
  "request": "launch",
81
  "program": "transmorgrify.py",
 
87
  "--out_csv", "./phonetic_out.csv",
88
  "--a_header", "English",
89
  "--b_header", "Phonetic",
 
90
  "--model", "phonetics_forward.tm",
91
  "--verbose",
92
  "--include_stats",
93
  ]
94
+ },{
95
+ "name": "short Execute reverse phonetic",
96
+ "type": "python",
97
+ "request": "launch",
98
+ "program": "transmorgrify.py",
99
+ "console": "integratedTerminal",
100
+ "justMyCode": true,
101
+ "args": [
102
+ "--execute",
103
+ "--in_csv", "/home/lansford/Sync/projects/tf_over/sentance_transmogrifier/examples/phonetic/phonetic_short.csv",
104
+ "--out_csv", "./reverse_phonetic_out.csv",
105
+ "--b_header", "English",
106
+ "--a_header", "Phonetic",
107
+ "--model", "phonetics_backwards.tm",
108
+ "--verbose",
109
+ "--include_stats",
110
+ ]
111
+ },{
112
+ "name": "gradio reverse phonetic",
113
+ "type": "python",
114
+ "request": "launch",
115
+ "program": "transmorgrify.py",
116
+ "console": "integratedTerminal",
117
+ "justMyCode": true,
118
+ "args": [
119
+ "--gradio",
120
+ "--model", "phonetics_backwards.tm",
121
+ "--share",
122
+ ]
123
+ },{
124
+ "name": "gradio forward phonetic",
125
+ "type": "python",
126
+ "request": "launch",
127
+ "program": "transmorgrify.py",
128
+ "console": "integratedTerminal",
129
+ "justMyCode": true,
130
+ "args": [
131
+ "--gradio",
132
+ "--model", "phonetics_forward.tm",
133
+ "--share",
134
+ ]
135
  }
136
  ]
137
  }
examples/phonetic/phonetic.csv CHANGED
The diff for this file is too large to render. See raw diff
 
examples/piglattin/pig_lattin.csv CHANGED
The diff for this file is too large to render. See raw diff
 
examples/piglattin/prepare_training_data.py CHANGED
@@ -49,15 +49,20 @@ def english_to_piglattin( english ):
49
 
50
 
51
  def main():
 
52
  with open( "spa.csv", "rt" ) as fin:
53
  with open( "pig_lattin.csv", "wt" ) as f_out:
54
  f_out.write( "English,Piglattin\n" )
55
  for line in fin:
56
  english = line.split( "\t" )[0]
57
  english = english.replace( ",", " " )
58
- piglattin = english_to_piglattin( english )
59
 
60
- f_out.write( f"{english},{piglattin}\n" )
 
 
 
 
 
61
 
62
  if __name__ == '__main__':
63
  main()
 
49
 
50
 
51
  def main():
52
+ used_englishes = []
53
  with open( "spa.csv", "rt" ) as fin:
54
  with open( "pig_lattin.csv", "wt" ) as f_out:
55
  f_out.write( "English,Piglattin\n" )
56
  for line in fin:
57
  english = line.split( "\t" )[0]
58
  english = english.replace( ",", " " )
 
59
 
60
+ if english not in used_englishes:
61
+ used_englishes.append(english)
62
+
63
+ piglattin = english_to_piglattin( english )
64
+
65
+ f_out.write( f"{english},{piglattin}\n" )
66
 
67
  if __name__ == '__main__':
68
  main()
transmorgrify.py CHANGED
@@ -16,7 +16,6 @@ FILE_VERSION = 1
16
 
17
  class Transmorgrifyer:
18
  def train( self, from_sentances, to_sentances, iterations, device, trailing_context, leading_context, verbose ):
19
-
20
  X,Y = _parse_for_training( from_sentances, to_sentances, num_pre_context_chars=leading_context, num_post_context_chars=trailing_context )
21
 
22
  #train and save the action_model
@@ -32,6 +31,7 @@ class Transmorgrifyer:
32
  self.iterations = iterations
33
 
34
  def save( self, model ):
 
35
  with zipfile.ZipFile( model, mode="w", compression=zipfile.ZIP_DEFLATED, compresslevel=9 ) as myzip:
36
  with myzip.open( 'params.json', mode='w' ) as out:
37
  out.write( json.dumps({
@@ -48,13 +48,14 @@ class Transmorgrifyer:
48
  os.unlink( temp_filename )
49
 
50
  def load( self, model ):
 
51
  with zipfile.ZipFile( model, mode='r' ) as zip:
52
  with zip.open( 'params.json' ) as fin:
53
  params = json.loads( fin.read().decode() )
54
  if params['version'] > FILE_VERSION: raise Exception( f"Version {params['version']} greater than {FILE_VERSION}" )
55
- self.leading_context = params['leading_context']
56
- self.trailing_context = params['trailing_context']
57
- self.iterations = params['iterations']
58
  temp_filename = _mktemp()
59
  with zip.open( 'action.cb' ) as fin:
60
  with open( temp_filename, "wb" ) as fout:
@@ -80,6 +81,19 @@ class Transmorgrifyer:
80
  if verbose and i % 10 == 0:
81
  print( f"{i} of {len(from_sentances)}" )
82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  def _list_trace( trace ):
84
  if trace.parrent is None:
85
  result = [trace]
@@ -331,7 +345,7 @@ def _train_catboost( X, y, iterations, device, verbose, model_piece, learning_ra
331
  model.fit( train_pool, eval_set=validation_pool, verbose=True )
332
  passed = True
333
 
334
- if( verbose ): print( '{} is fitted: {}',format(model_piece,model.is_fitted()))
335
  if( verbose ): print( '{} params:\n{}'.format(model_piece,model.get_params()))
336
 
337
  return model
@@ -511,35 +525,40 @@ def execute( include_stats, in_csv, out_csv, a_header, b_header, model, execute_
511
  })
512
  pd_results.to_csv( out_csv )
513
 
514
-
 
 
 
515
 
516
  def main():
517
  parser = argparse.ArgumentParser(
518
  prog = 'transmorgrify.py',
519
  description = 'Converts text from one to another according to a model.',
520
  epilog = '(C) Joshua Lansford')
521
- parser.add_argument('-i', '--in_csv', help='The csv to read training or input data from', required=True )
 
 
 
 
522
  parser.add_argument('-o', '--out_csv', help='The csv to write conversion to', default='out.csv' )
523
  parser.add_argument('-a', '--a_header', help='The column header for training or transforming from', default="source" )
524
  parser.add_argument('-b', '--b_header', help='The column header for training the transformation to', default="target" )
525
  parser.add_argument('-m', '--model',help='The model file to create during training or use during transformation', default='model.tm' )
526
- parser.add_argument('-n', '--iterations', help='The number of iterations to train', default=1000 )
527
  parser.add_argument('-d', '--device', help='Which device, i.e. if useing GPU', default='cpu' )
528
  parser.add_argument('-x', '--context', help='The number of leading and trailing chars to use as context', default=7 )
529
- parser.add_argument('-t', '--train', action='store_true', help='Train a model instead of executing a model')
530
  parser.add_argument('-p', '--train_percentage', help="The percentage of data to train on, leaving the rest for testing.")
531
- parser.add_argument('-e', '--execute', action='store_true', help='Use an existing trained model.')
532
  parser.add_argument('-v', '--verbose', action='store_true', help='Talks alot?' )
533
- parser.add_argument('-s', '--include_stats', action='store_true', help='Use b_header to compute stats and add to output csv.')
534
 
535
 
536
  args = parser.parse_args()
537
 
538
- if not args.train and not args.execute: print( "Must include --execute and/or --train to do something." )
539
 
540
 
541
  if args.train:
542
- train_percentage = args.train_percentage
543
  if train_percentage is None:
544
  if args.execute:
545
  train_percentage = 50
@@ -550,10 +569,10 @@ def main():
550
  a_header=args.a_header,
551
  b_header=args.b_header,
552
  model=args.model,
553
- iterations=args.iterations,
554
  device=args.device,
555
- leading_context=args.context,
556
- trailing_context=args.context,
557
  train_percentage=train_percentage,
558
  verbose=args.verbose,
559
  )
@@ -566,7 +585,7 @@ def main():
566
  else:
567
  execute_percentage = 100
568
  else:
569
- execute_percentage = 100-args.train_percentage
570
  execute(
571
  include_stats=args.include_stats,
572
  in_csv=args.in_csv,
@@ -579,6 +598,12 @@ def main():
579
  )
580
 
581
 
 
 
 
 
 
 
582
 
583
  if __name__ == '__main__':
584
  main()
 
16
 
17
  class Transmorgrifyer:
18
  def train( self, from_sentances, to_sentances, iterations, device, trailing_context, leading_context, verbose ):
 
19
  X,Y = _parse_for_training( from_sentances, to_sentances, num_pre_context_chars=leading_context, num_post_context_chars=trailing_context )
20
 
21
  #train and save the action_model
 
31
  self.iterations = iterations
32
 
33
  def save( self, model ):
34
+ self.name = model
35
  with zipfile.ZipFile( model, mode="w", compression=zipfile.ZIP_DEFLATED, compresslevel=9 ) as myzip:
36
  with myzip.open( 'params.json', mode='w' ) as out:
37
  out.write( json.dumps({
 
48
  os.unlink( temp_filename )
49
 
50
  def load( self, model ):
51
+ self.name = model
52
  with zipfile.ZipFile( model, mode='r' ) as zip:
53
  with zip.open( 'params.json' ) as fin:
54
  params = json.loads( fin.read().decode() )
55
  if params['version'] > FILE_VERSION: raise Exception( f"Version {params['version']} greater than {FILE_VERSION}" )
56
+ self.leading_context = int(params['leading_context'])
57
+ self.trailing_context = int(params['trailing_context'])
58
+ self.iterations = int(params['iterations'])
59
  temp_filename = _mktemp()
60
  with zip.open( 'action.cb' ) as fin:
61
  with open( temp_filename, "wb" ) as fout:
 
81
  if verbose and i % 10 == 0:
82
  print( f"{i} of {len(from_sentances)}" )
83
 
84
+ def demo( self, share=False ):
85
+ import gradio as gr
86
+
87
+ def gradio_function( text ):
88
+ return list(self.execute( [text] ))[0]
89
+
90
+ with gr.Blocks() as demo:
91
+ name = gr.Markdown( self.name )
92
+ inp = gr.Textbox( label="Input" )
93
+ out = gr.Textbox( label="Output" )
94
+ inp.change( gradio_function, inputs=[inp], outputs=[out] )
95
+ demo.launch( share=share )
96
+
97
  def _list_trace( trace ):
98
  if trace.parrent is None:
99
  result = [trace]
 
345
  model.fit( train_pool, eval_set=validation_pool, verbose=True )
346
  passed = True
347
 
348
+ if( verbose ): print( '{} is fitted: {}'.format(model_piece,model.is_fitted()))
349
  if( verbose ): print( '{} params:\n{}'.format(model_piece,model.get_params()))
350
 
351
  return model
 
525
  })
526
  pd_results.to_csv( out_csv )
527
 
528
+ def safe_float( str ):
529
+ if str is not None:
530
+ return float(str)
531
+ return None #explicit None return.
532
 
533
  def main():
534
  parser = argparse.ArgumentParser(
535
  prog = 'transmorgrify.py',
536
  description = 'Converts text from one to another according to a model.',
537
  epilog = '(C) Joshua Lansford')
538
+ parser.add_argument('-t', '--train', action='store_true', help='Train a model instead of executing a model')
539
+ parser.add_argument('-e', '--execute', action='store_true', help='Use an existing trained model.')
540
+ parser.add_argument('-g', '--gradio', action='store_true', help='Start a gradio demo with the selected model.' )
541
+ parser.add_argument('-s', '--share', action='store_true', help="Share the gradio app with a temporary public URL." )
542
+ parser.add_argument('-i', '--in_csv', help='The csv to read training or input data from', default='in.csv' )
543
  parser.add_argument('-o', '--out_csv', help='The csv to write conversion to', default='out.csv' )
544
  parser.add_argument('-a', '--a_header', help='The column header for training or transforming from', default="source" )
545
  parser.add_argument('-b', '--b_header', help='The column header for training the transformation to', default="target" )
546
  parser.add_argument('-m', '--model',help='The model file to create during training or use during transformation', default='model.tm' )
547
+ parser.add_argument('-n', '--iterations', help='The number of iterations to train', default=2000 )
548
  parser.add_argument('-d', '--device', help='Which device, i.e. if useing GPU', default='cpu' )
549
  parser.add_argument('-x', '--context', help='The number of leading and trailing chars to use as context', default=7 )
 
550
  parser.add_argument('-p', '--train_percentage', help="The percentage of data to train on, leaving the rest for testing.")
 
551
  parser.add_argument('-v', '--verbose', action='store_true', help='Talks alot?' )
552
+ parser.add_argument('-c', '--include_stats', action='store_true', help='Use b_header to compute stats and add to output csv.')
553
 
554
 
555
  args = parser.parse_args()
556
 
557
+ if not args.train and not args.execute and not args.gradio: print( "Must include --execute, --train and/or --gradio to do something." )
558
 
559
 
560
  if args.train:
561
+ train_percentage = safe_float(args.train_percentage)
562
  if train_percentage is None:
563
  if args.execute:
564
  train_percentage = 50
 
569
  a_header=args.a_header,
570
  b_header=args.b_header,
571
  model=args.model,
572
+ iterations=int(args.iterations),
573
  device=args.device,
574
+ leading_context=int(args.context),
575
+ trailing_context=int(args.context),
576
  train_percentage=train_percentage,
577
  verbose=args.verbose,
578
  )
 
585
  else:
586
  execute_percentage = 100
587
  else:
588
+ execute_percentage = 100-safe_float(args.train_percentage)
589
  execute(
590
  include_stats=args.include_stats,
591
  in_csv=args.in_csv,
 
598
  )
599
 
600
 
601
+ if args.gradio:
602
+ tm = Transmorgrifyer()
603
+ tm.load( args.model )
604
+
605
+ tm.demo( args.share is not None )
606
+
607
 
608
  if __name__ == '__main__':
609
  main()