trl-text-environment

Running

App Files Files Community

lvwerra HF Staff commited on Aug 30, 2023

Commit

fee0cbb

1 Parent(s): 5bc0a7e

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -54

app.py CHANGED Viewed

@@ -19,38 +19,6 @@ FIM_SUFFIX = "<fim_suffix>"
 FIM_INDICATOR = "<FILL_HERE>"
-FORMATS = """## Model Formats
-The model is pretrained on code and is formatted with special tokens in addition to the pure code data,\
-such as prefixes specifying the source of the file or tokens separating code from a commit message.\
-Use these templates to explore the model's capacities:
-### 1. Prefixes 🏷️
-For pure code files, use any combination of the following prefixes:
-```
-<reponame>REPONAME<filename>FILENAME<gh_stars>STARS\ncode<|endoftext|>
-```
-STARS can be one of: 0, 1-10, 10-100, 100-1000, 1000+
-### 2. Commits 💾
-The commits data is formatted as follows:
-```
-<commit_before>code<commit_msg>text<commit_after>code<|endoftext|>
-```
-### 3. Jupyter Notebooks 📓
-The model is trained on Jupyter notebooks as Python scripts and structured formats like:
-```
-<start_jupyter><jupyter_text>text<jupyter_code>code<jupyter_output>output<jupyter_text>
-```
-### 4. Issues 🐛
-We also trained on GitHub issues using the following formatting:
-```
-<issue_start><issue_comment>text<issue_comment>...<issue_closed>
-```
-### 5. Fill-in-the-middle 🧩
-Fill in the middle requires rearranging the model inputs. The playground handles this for you - all you need is to specify where to fill:
-```
-code before<FILL_HERE>code after
-```
-"""
 theme = gr.themes.Monochrome(
     primary_hue="indigo",
     secondary_hue="blue",
@@ -284,19 +252,17 @@ css += share_btn_css + monospace_css + ".gradio-container {color: black}"
 description = """
 <div style="text-align: center;">
-    <h1> ⭐ TRL + TextEnvironment <span style='color: #e6b800;'>Models</span> Playground</h1>
 </div>
 <div style="text-align: left;">
     <p>This is a demo to generate text and code with the following StarCoderBase models:</p>
     <ul>
-        <li><a href="https://huggingface.co/bigcode/starcoderplus" style='color: #e6b800;'>StarCoderBase TriviaQA</a>: A finetuned version of StarCoderBase on on the TriviaQA dataset using reinforcement learning via TRL's TextEnvironment (https://github.com/huggingface/trl/pull/424)</li>
-        <li><a href="https://huggingface.co/bigcode/starcoderbase" style='color: #e6b800;'>StarCoderBase GSM8K</a>: A finetuned version of StarCoderBase on on the GSM8K dataset using reinforcement learning via TRL's TextEnvironment (https://github.com/huggingface/trl/pull/424).</li>
     </ul>
-    <p><b>Please note:</b> These models are not designed for instruction purposes. If you're looking for instruction or want to chat with a fine-tuned model, you can visit the <a href="https://huggingface.co/spaces/HuggingFaceH4/starchat-playground">StarChat Playground</a>.</p>
 </div>
 """
-disclaimer = """⚠️<b>Any use or sharing of this demo constitues your acceptance of the BigCode [OpenRAIL-M](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) License Agreement and the use restrictions included within.</b>\
- <br>**Intended Use**: this app and its [supporting model](https://huggingface.co/bigcode) are provided for demonstration purposes; not to serve as replacement for human expertise. For more details on the model's limitations in terms of factuality and biases, see the [model card.](hf.co/bigcode)"""
 with gr.Blocks(theme=theme, analytics_enabled=False, css=css) as demo:
     with gr.Column():
@@ -379,33 +345,18 @@ with gr.Blocks(theme=theme, analytics_enabled=False, css=css) as demo:
                                     )
                 gr.Markdown(disclaimer)
-                with gr.Group(elem_id="share-btn-container"):
-                    community_icon = gr.HTML(community_icon_html, visible=True)
-                    loading_icon = gr.HTML(loading_icon_html, visible=True)
-                    share_button = gr.Button(
-                        "Share to community", elem_id="share-btn", visible=True
-                    )
                 gr_examples = gr.Examples(
                     examples=[example for client in clients.values() for example in client[3]],
                     inputs=[instruction],
                     cache_examples=False,
                 )
-                # def update(version):
-                #     return clients[version][2],
-                # version.select(
-                #     lambda x: (clients[x][2], clients[x][3]),
-                #     inputs=[version],
-                #     outputs=[system_prompt, gr_examples],
-                # )
-                # gr.Markdown(FORMATS)
     submit.click(
         generate,
         inputs=[instruction, system_prompt, version, temperature, max_new_tokens, top_p, repetition_penalty],
         outputs=[output, output2],
     )
-    share_button.click(None, [], [], _js=share_js)
 demo.queue(concurrency_count=16).launch(debug=True)

 FIM_INDICATOR = "<FILL_HERE>"
 theme = gr.themes.Monochrome(
     primary_hue="indigo",
     secondary_hue="blue",
 description = """
 <div style="text-align: center;">
+    <h1> TRL + TextEnvironment </h1>
+    <h2> Teaching Language Models to use tools. </h2>
 </div>
 <div style="text-align: left;">
     <p>This is a demo to generate text and code with the following StarCoderBase models:</p>
     <ul>
+        <li><a href="https://huggingface.co/vwxyzjn/starcoderbase-triviaqa" style='color: #e6b800;'>StarCoderBase TriviaQA</a>: A finetuned version of StarCoderBase on on the TriviaQA dataset using reinforcement learning via [TRL's TextEnvironment](https://github.com/huggingface/trl/pull/424)</li>
+        <li><a href="https://huggingface.co/lvwerra/starcoderbase-gsm8k" style='color: #e6b800;'>StarCoderBase GSM8K</a>: A finetuned version of StarCoderBase on on the GSM8K dataset using reinforcement learning via TRL's [TextEnvironment](https://github.com/huggingface/trl/pull/424).</li>
     </ul>
 </div>
 """
 with gr.Blocks(theme=theme, analytics_enabled=False, css=css) as demo:
     with gr.Column():
                                     )
                 gr.Markdown(disclaimer)
                 gr_examples = gr.Examples(
                     examples=[example for client in clients.values() for example in client[3]],
                     inputs=[instruction],
                     cache_examples=False,
                 )
     submit.click(
         generate,
         inputs=[instruction, system_prompt, version, temperature, max_new_tokens, top_p, repetition_penalty],
         outputs=[output, output2],
     )
 demo.queue(concurrency_count=16).launch(debug=True)