Spaces:

JEdward7777
/

SentenceTransmorgrifier

Runtime error

App Files Files Community

Joshua Lansford commited on Nov 3, 2022

Commit

14d74fa

1 Parent(s): 9997114

Added demo and removed duplicate data from examples

Browse files

Files changed (5) hide show

.vscode/launch.json +42 -2
examples/phonetic/phonetic.csv +0 -0
examples/piglattin/pig_lattin.csv +0 -0
examples/piglattin/prepare_training_data.py +7 -2
transmorgrify.py +42 -17

.vscode/launch.json CHANGED Viewed

@@ -75,7 +75,7 @@
                 "--verbose",
             ]
         },{
-            "name": "short Execute phonetic gpu",
             "type": "python",
             "request": "launch",
             "program": "transmorgrify.py",
@@ -87,11 +87,51 @@
                 "--out_csv", "./phonetic_out.csv",
                 "--a_header", "English",
                 "--b_header", "Phonetic",
-                "--device", "0:1",
                 "--model", "phonetics_forward.tm",
                 "--verbose",
                 "--include_stats",
             ]
         }
     ]
 }

                 "--verbose",
             ]
         },{
+            "name": "short Execute phonetic",
             "type": "python",
             "request": "launch",
             "program": "transmorgrify.py",
                 "--out_csv", "./phonetic_out.csv",
                 "--a_header", "English",
                 "--b_header", "Phonetic",
                 "--model", "phonetics_forward.tm",
                 "--verbose",
                 "--include_stats",
             ]
+        },{
+            "name": "short Execute reverse phonetic",
+            "type": "python",
+            "request": "launch",
+            "program": "transmorgrify.py",
+            "console": "integratedTerminal",
+            "justMyCode": true,
+            "args": [
+                "--execute",
+                "--in_csv", "/home/lansford/Sync/projects/tf_over/sentance_transmogrifier/examples/phonetic/phonetic_short.csv",
+                "--out_csv", "./reverse_phonetic_out.csv",
+                "--b_header", "English",
+                "--a_header", "Phonetic",
+                "--model", "phonetics_backwards.tm",
+                "--verbose",
+                "--include_stats",
+            ]
+        },{
+            "name": "gradio reverse phonetic",
+            "type": "python",
+            "request": "launch",
+            "program": "transmorgrify.py",
+            "console": "integratedTerminal",
+            "justMyCode": true,
+            "args": [
+                "--gradio",
+                "--model", "phonetics_backwards.tm",
+                "--share",
+            ]
+        },{
+            "name": "gradio forward phonetic",
+            "type": "python",
+            "request": "launch",
+            "program": "transmorgrify.py",
+            "console": "integratedTerminal",
+            "justMyCode": true,
+            "args": [
+                "--gradio",
+                "--model", "phonetics_forward.tm",
+                "--share",
+            ]
         }
     ]
 }

examples/phonetic/phonetic.csv CHANGED Viewed

The diff for this file is too large to render. See raw diff

examples/piglattin/pig_lattin.csv CHANGED Viewed

The diff for this file is too large to render. See raw diff

examples/piglattin/prepare_training_data.py CHANGED Viewed

@@ -49,15 +49,20 @@ def english_to_piglattin( english ):
 def main():
     with open( "spa.csv", "rt" ) as fin:
         with open( "pig_lattin.csv", "wt" ) as f_out:
             f_out.write( "English,Piglattin\n" )
             for line in fin:
                 english = line.split( "\t" )[0]
                 english = english.replace( ",", " " )
-                piglattin = english_to_piglattin( english )
-                f_out.write( f"{english},{piglattin}\n" )
 if __name__ == '__main__':
     main()

 def main():
+    used_englishes = []
     with open( "spa.csv", "rt" ) as fin:
         with open( "pig_lattin.csv", "wt" ) as f_out:
             f_out.write( "English,Piglattin\n" )
             for line in fin:
                 english = line.split( "\t" )[0]
                 english = english.replace( ",", " " )
+                if english not in used_englishes:
+                    used_englishes.append(english)
+                    piglattin = english_to_piglattin( english )
+                    f_out.write( f"{english},{piglattin}\n" )
 if __name__ == '__main__':
     main()

transmorgrify.py CHANGED Viewed

@@ -16,7 +16,6 @@ FILE_VERSION = 1
 class Transmorgrifyer:
     def train( self, from_sentances, to_sentances, iterations, device, trailing_context, leading_context, verbose ):
         X,Y = _parse_for_training( from_sentances, to_sentances, num_pre_context_chars=leading_context, num_post_context_chars=trailing_context )
         #train and save the action_model
@@ -32,6 +31,7 @@ class Transmorgrifyer:
         self.iterations = iterations
     def save( self, model ):
         with zipfile.ZipFile( model, mode="w", compression=zipfile.ZIP_DEFLATED, compresslevel=9 ) as myzip:
             with myzip.open( 'params.json', mode='w' ) as out:
                 out.write( json.dumps({
@@ -48,13 +48,14 @@ class Transmorgrifyer:
             os.unlink( temp_filename )
     def load( self, model ):
         with zipfile.ZipFile( model, mode='r' ) as zip:
             with zip.open( 'params.json' ) as fin:
                 params = json.loads( fin.read().decode() )
                 if params['version'] > FILE_VERSION: raise Exception( f"Version {params['version']} greater than {FILE_VERSION}" )
-                self.leading_context = params['leading_context']
-                self.trailing_context = params['trailing_context']
-                self.iterations = params['iterations']
             temp_filename = _mktemp()
             with zip.open( 'action.cb' ) as fin:
                 with open( temp_filename, "wb" ) as fout:
@@ -80,6 +81,19 @@ class Transmorgrifyer:
             if verbose and i % 10 == 0:
                 print( f"{i} of {len(from_sentances)}" )
 def _list_trace( trace ):
     if trace.parrent is None:
         result = [trace]
@@ -331,7 +345,7 @@ def _train_catboost( X, y, iterations, device, verbose, model_piece, learning_ra
         model.fit( train_pool, eval_set=validation_pool, verbose=True )
         passed = True
-    if( verbose ): print( '{} is fitted: {}',format(model_piece,model.is_fitted()))
     if( verbose ): print( '{} params:\n{}'.format(model_piece,model.get_params()))
     return model
@@ -511,35 +525,40 @@ def execute( include_stats, in_csv, out_csv, a_header, b_header, model, execute_
         })
         pd_results.to_csv( out_csv )
 def main():
     parser = argparse.ArgumentParser(
                     prog = 'transmorgrify.py',
                     description = 'Converts text from one to another according to a model.',
                     epilog = '(C) Joshua Lansford')
-    parser.add_argument('-i', '--in_csv',  help='The csv to read training or input data from', required=True )
     parser.add_argument('-o', '--out_csv',  help='The csv to write conversion to', default='out.csv' )
     parser.add_argument('-a', '--a_header', help='The column header for training or transforming from', default="source" )
     parser.add_argument('-b', '--b_header',   help='The column header for training the transformation to', default="target"  )
     parser.add_argument('-m', '--model',help='The model file to create during training or use during transformation', default='model.tm' )
-    parser.add_argument('-n', '--iterations', help='The number of iterations to train', default=1000 )
     parser.add_argument('-d', '--device',  help='Which device, i.e. if useing GPU', default='cpu' )
     parser.add_argument('-x', '--context', help='The number of leading and trailing chars to use as context', default=7 )
-    parser.add_argument('-t', '--train', action='store_true', help='Train a model instead of executing a model')
     parser.add_argument('-p', '--train_percentage', help="The percentage of data to train on, leaving the rest for testing.")
-    parser.add_argument('-e', '--execute', action='store_true', help='Use an existing trained model.')
     parser.add_argument('-v', '--verbose', action='store_true', help='Talks alot?' )
-    parser.add_argument('-s', '--include_stats',   action='store_true', help='Use b_header to compute stats and add to output csv.')
     args = parser.parse_args()
-    if not args.train and not args.execute: print( "Must include --execute and/or --train to do something." )
     if args.train:
-        train_percentage = args.train_percentage
         if train_percentage is None:
             if args.execute:
                 train_percentage = 50
@@ -550,10 +569,10 @@ def main():
                a_header=args.a_header,
                b_header=args.b_header,
                model=args.model,
-               iterations=args.iterations,
                device=args.device,
-               leading_context=args.context,
-               trailing_context=args.context,
                train_percentage=train_percentage,
                verbose=args.verbose,
                )
@@ -566,7 +585,7 @@ def main():
             else:
                 execute_percentage = 100
         else:
-            execute_percentage = 100-args.train_percentage
         execute(
             include_stats=args.include_stats,
             in_csv=args.in_csv,
@@ -579,6 +598,12 @@ def main():
         )
 if __name__ == '__main__':
     main()

 class Transmorgrifyer:
     def train( self, from_sentances, to_sentances, iterations, device, trailing_context, leading_context, verbose ):
         X,Y = _parse_for_training( from_sentances, to_sentances, num_pre_context_chars=leading_context, num_post_context_chars=trailing_context )
         #train and save the action_model
         self.iterations = iterations
     def save( self, model ):
+        self.name = model
         with zipfile.ZipFile( model, mode="w", compression=zipfile.ZIP_DEFLATED, compresslevel=9 ) as myzip:
             with myzip.open( 'params.json', mode='w' ) as out:
                 out.write( json.dumps({
             os.unlink( temp_filename )
     def load( self, model ):
+        self.name = model
         with zipfile.ZipFile( model, mode='r' ) as zip:
             with zip.open( 'params.json' ) as fin:
                 params = json.loads( fin.read().decode() )
                 if params['version'] > FILE_VERSION: raise Exception( f"Version {params['version']} greater than {FILE_VERSION}" )
+                self.leading_context = int(params['leading_context'])
+                self.trailing_context = int(params['trailing_context'])
+                self.iterations = int(params['iterations'])
             temp_filename = _mktemp()
             with zip.open( 'action.cb' ) as fin:
                 with open( temp_filename, "wb" ) as fout:
             if verbose and i % 10 == 0:
                 print( f"{i} of {len(from_sentances)}" )
+    def demo( self, share=False ):
+        import gradio as gr
+        def gradio_function( text ):
+            return list(self.execute( [text] ))[0]
+        with gr.Blocks() as demo:
+            name = gr.Markdown( self.name )
+            inp = gr.Textbox( label="Input" )
+            out = gr.Textbox( label="Output" )
+            inp.change( gradio_function, inputs=[inp], outputs=[out] )
+        demo.launch( share=share )
 def _list_trace( trace ):
     if trace.parrent is None:
         result = [trace]
         model.fit( train_pool, eval_set=validation_pool, verbose=True )
         passed = True
+    if( verbose ): print( '{} is fitted: {}'.format(model_piece,model.is_fitted()))
     if( verbose ): print( '{} params:\n{}'.format(model_piece,model.get_params()))
     return model
         })
         pd_results.to_csv( out_csv )
+def safe_float( str ):
+    if str is not None:
+        return float(str)
+    return None #explicit None return.
 def main():
     parser = argparse.ArgumentParser(
                     prog = 'transmorgrify.py',
                     description = 'Converts text from one to another according to a model.',
                     epilog = '(C) Joshua Lansford')
+    parser.add_argument('-t', '--train', action='store_true', help='Train a model instead of executing a model')
+    parser.add_argument('-e', '--execute', action='store_true', help='Use an existing trained model.')
+    parser.add_argument('-g', '--gradio', action='store_true', help='Start a gradio demo with the selected model.' )
+    parser.add_argument('-s', '--share', action='store_true', help="Share the gradio app with a temporary public URL." )
+    parser.add_argument('-i', '--in_csv',  help='The csv to read training or input data from', default='in.csv' )
     parser.add_argument('-o', '--out_csv',  help='The csv to write conversion to', default='out.csv' )
     parser.add_argument('-a', '--a_header', help='The column header for training or transforming from', default="source" )
     parser.add_argument('-b', '--b_header',   help='The column header for training the transformation to', default="target"  )
     parser.add_argument('-m', '--model',help='The model file to create during training or use during transformation', default='model.tm' )
+    parser.add_argument('-n', '--iterations', help='The number of iterations to train', default=2000 )
     parser.add_argument('-d', '--device',  help='Which device, i.e. if useing GPU', default='cpu' )
     parser.add_argument('-x', '--context', help='The number of leading and trailing chars to use as context', default=7 )
     parser.add_argument('-p', '--train_percentage', help="The percentage of data to train on, leaving the rest for testing.")
     parser.add_argument('-v', '--verbose', action='store_true', help='Talks alot?' )
+    parser.add_argument('-c', '--include_stats',   action='store_true', help='Use b_header to compute stats and add to output csv.')
     args = parser.parse_args()
+    if not args.train and not args.execute and not args.gradio: print( "Must include --execute, --train and/or --gradio to do something." )
     if args.train:
+        train_percentage = safe_float(args.train_percentage)
         if train_percentage is None:
             if args.execute:
                 train_percentage = 50
                a_header=args.a_header,
                b_header=args.b_header,
                model=args.model,
+               iterations=int(args.iterations),
                device=args.device,
+               leading_context=int(args.context),
+               trailing_context=int(args.context),
                train_percentage=train_percentage,
                verbose=args.verbose,
                )
             else:
                 execute_percentage = 100
         else:
+            execute_percentage = 100-safe_float(args.train_percentage)
         execute(
             include_stats=args.include_stats,
             in_csv=args.in_csv,
         )
+    if args.gradio:
+        tm = Transmorgrifyer()
+        tm.load( args.model )
+        tm.demo( args.share is not None )
 if __name__ == '__main__':
     main()