import spaces import torch import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM model_name = 'yuntian-deng/gpt2-implicit-cot-multiplication' tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) def preprocess(num): num = str(num).strip().replace(' ', '') reversed_num = ' '.join(num[::-1]) return reversed_num def postprocess(raw_output): prediction = raw_output.replace(' ', '')[::-1] return prediction @spaces.GPU def predict_product(num1, num2): # Reverse input digits and add spaces input_text = f'{preprocess(num1)} * {preprocess(num2)} =' inputs = tokenizer(input_text, return_tensors='pt').to('cuda' if torch.cuda.is_available() else 'cpu') model.to('cuda' if torch.cuda.is_available() else 'cpu') # Generate output outputs = model.generate(**inputs, max_new_tokens=40) output = outputs[0][inputs['input_ids'].shape[-1]:] raw_output = tokenizer.decode(output, skip_special_tokens=True) prediction = postprocess(raw_output) # Evaluate the correctness of the result try: num1_int = int(num1) num2_int = int(num2) valid_input = True except ValueError: valid_input = False if valid_input: correct_product = num1_int * num2_int try: prediction_int = int(prediction) is_correct = (prediction_int == correct_product) except ValueError: is_correct = False result_color = "green" if is_correct else "red" result_message = "Correct!" if is_correct else f"Incorrect! The correct product is {correct_product}." else: result_color = "black" result_message = "Invalid input. Could not evaluate correctness." result_html = f"
{result_message}
" return prediction, result_html demo = gr.Interface( fn=predict_product, inputs=[ gr.Textbox(label='First Number (up to 12 digits)', value='12345'), gr.Textbox(label='Second Number (up to 12 digits)', value='67890'), ], outputs=[ gr.Textbox(label='Predicted Product'), gr.HTML(label='Result Message') ], title='GPT2 Direct Multiplication Calculator (Without Using Chain-of-Thought)', description='This demo uses GPT2 to directly predict the product of two numbers without using any intermediate reasoning steps. The GPT2 model has been fine-tuned to internalize chain-of-thought reasoning within its hidden states, following our stepwise internalization approach detailed in the paper linked at the bottom of this page.', article=""" - [Paper: From Explicit CoT to Implicit CoT: Learning to Internalize CoT Step by Step](https://arxiv.org/pdf/2405.14838) - [Code Repository](https://github.com/da03/Internalize_CoT_Step_by_Step) - [Tweet Announcement](https://twitter.com/yuntiandeng/status/1795854740879774036) """, clear_btn=None, submit_btn="Multiply!", live=False ) demo.launch()