youngtsai commited on
Commit
43b1e8f
·
1 Parent(s): c9b8261

def generate_paragraph_evaluate(model, max_tokens, sys_content, paragraph):

Browse files
Files changed (1) hide show
  1. app.py +102 -95
app.py CHANGED
@@ -579,7 +579,7 @@ def generate_paragraph(topic_sentence, supporting_sentences, conclusion_sentence
579
  paragraph = f"{topic_sentence} {supporting_sentences} {conclusion_sentence}"
580
  return paragraph
581
 
582
- def generate_paragraph_evaluate(model, sys_content, paragraph, user_generate_paragraph_evaluate_prompt):
583
  """
584
  根據用戶輸入的段落,調用 LLM API 生成相關的段落分析。
585
 
@@ -592,6 +592,92 @@ def generate_paragraph_evaluate(model, sys_content, paragraph, user_generate_par
592
  Returns:
593
  gr.update: 包含評估結果的表格更新
594
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
595
  def parse_evaluation_response(content):
596
  """解析 LLM 回應內容"""
597
  try:
@@ -654,7 +740,7 @@ def generate_paragraph_evaluate(model, sys_content, paragraph, user_generate_par
654
  prompt=f"{sys_content}\n{user_content}" if "gemini" in model.lower() else None,
655
  messages=messages,
656
  model=model,
657
- max_tokens=4000,
658
  response_format={"type": "json_object"}
659
  )
660
 
@@ -2854,92 +2940,6 @@ with gr.Blocks(theme=THEME, css=CSS) as demo:
2854
  inputs=[paragraph_output],
2855
  outputs=[paragraph_output_download]
2856
  )
2857
- with gr.Row(visible=False) as paragraph_evaluate_params:
2858
- default_user_generate_paragraph_evaluate_prompt = """
2859
- Based on the final paragraph provided, evaluate the writing in terms of content, organization, grammar, and vocabulary. Provide feedback in simple and supportive language.
2860
-
2861
- -- 根據上述的文章,以「內容(content)」層面評分。
2862
- Assess the student's writing by focusing on the 'Content' category according to the established rubric. Determine the clarity of the theme or thesis statement and whether it is supported by specific and complete details relevant to the topic. Use the following levels to guide your evaluation:
2863
-
2864
- - Excellent (5-4 points): Look for a clear and pertinent theme or thesis, directly related to the topic, with detailed support.
2865
- - Good (3 points): The theme should be present but may lack clarity or emphasis; some narrative development related to the theme should be evident.
2866
- - Fair (2-1 points): Identify if the theme is unclear or if the majority of the narrative is undeveloped or irrelevant to the theme.
2867
- - Poor (0 points): Determine if the response is off-topic or not written at all. Remember that any response that is off-topic or unwritten should receive zero points in all aspects.
2868
-
2869
- Your detailed feedback should explain the score you assign, including specific examples from the text to illustrate how well the student's content meets the criteria.
2870
- Translate your feedback into Traditional Chinese (zh-tw) as the final result (#中文解釋 zh-TW).
2871
-
2872
- 評分結果以 JSON 格式輸出: content: {
2873
- "level": "#Excellent(5-4 pts)/Good(3 pts)/Fair(2-1 pts)/Poor(0 pts)",
2874
- "explanation": "#中文解釋 zh-TW"
2875
- }
2876
-
2877
- -- 根據上述的文章,以「組織(organization)」層面評分。
2878
- Evaluate the student's writing with a focus on 'Organization' according to the grading rubric. Consider the structure of the text, including the presence of a clear introduction, development, and conclusion, as well as the coherence throughout the piece and the use of transitional phrases. Use the following levels to structure your feedback:
2879
-
2880
- - Excellent (5-4 points): Look for clear key points with a logical introduction, development, and conclusion, and note whether transitions are coherent and effectively used.
2881
- - Good (3 points): The key points should be identifiable but may not be well-arranged; observe any imbalance in development and transitional phrase usage.
2882
- - Fair (2-1 points): Identify if the key points are unclear and if the text lacks coherence.
2883
- - Poor (0 points): Check if the writing is completely unorganized or not written according to the prompts. Texts that are entirely unorganized should receive zero points.
2884
-
2885
- Your detailed feedback should explain the score you assign, including specific examples from the text to illustrate how well the student's Organization meets the criteria. Translate your feedback into Traditional Chinese (zh_tw) as the final result (#中文解釋).
2886
-
2887
- 評分結果以 JSON 格式輸出: organization: {
2888
- "level": "#Excellent(5-4 pts)/Good(3 pts)/Fair(2-1 pts)/Poor(0 pts)",
2889
- "explanation": "#中文解釋 zh-TW"
2890
- }
2891
-
2892
- -- 根據上述的文章,以「文法和用法(Grammar and usage)」層面評分。
2893
- Review the student's writing, paying special attention to 'Grammar/Sentence Structure'. Assess the accuracy of grammar and the variety of sentence structures throughout the essay. Use the rubric levels to judge the work as follows:
2894
-
2895
- - Excellent (5-4 points): Search for text with minimal grammatical errors and a diverse range of sentence structures.
2896
- - Good (3 points): There may be some grammatical errors, but they should not affect the overall meaning or flow of the text.
2897
- - Fair (2-1 points): Determine if grammatical errors are frequent and if they significantly affect the meaning of the text.
2898
- - Poor (0 points): If the essay contains severe grammatical errors throughout, leading to an unclear meaning, it should be marked accordingly.
2899
-
2900
- Your detailed feedback should explain the score you assign, including specific examples from the text to illustrate how well the student's Grammar/Sentence Structure meets the criteria. Translate your feedback into Traditional Chinese (zh_tw) as the final result (#中文解釋).
2901
-
2902
- 評分結果以 JSON 格式輸出: grammar_and_usage: {
2903
- "level": "#Excellent(5-4 pts)/Good(3 pts)/Fair(2-1 pts)/Poor(0 pts)",
2904
- "explanation": "#中文解釋 zh-TW"
2905
- }
2906
-
2907
- -- 根據上述的文章,以「詞彙(Vocabulary )」層面評分。
2908
- Assess the use of 'Vocabulary/Spelling' in the student's writing based on the criteria provided. Evaluate the precision and appropriateness of the vocabulary and the presence of spelling errors. Reference the following scoring levels in your analysis:
2909
-
2910
- - Excellent (5-4 points): The writing should contain accurate and appropriate vocabulary with almost no spelling mistakes.
2911
- - Good (3 points): Vocabulary might be somewhat repetitive or mundane; there may be occasional misused words and minor spelling mistakes, but they should not impede understanding.
2912
- - Fair (2-1 points): Notice if there are many vocabulary errors and spelling mistakes that clearly affect the clarity of the text's meaning.
2913
- - Poor (0 points): Writing that only contains scattered words related to the topic or is copied should be scored as such.
2914
-
2915
- Your detailed feedback should explain the score you assign, including specific examples from the text to illustrate how well the student's Vocabulary/Spelling meets the criteria. Translate your feedback into Traditional Chinese (zh_tw) as the final result (#中文解釋).
2916
-
2917
- 評分結果以 JSON 格式輸出: vocabulary: {
2918
- "level": "#Excellent(5-4 pts)/Good(3 pts)/Fair(2-1 pts)/Poor(0 pts)",
2919
- "explanation": "#中文解釋 zh-TW"
2920
- }
2921
-
2922
- -- 根據上述的文章,以「連貫性和連接詞(Coherence and Cohesion)」層面評分。
2923
- - 評分等級有三級:beginner, intermediate, advanced.
2924
- - 以繁體中文 zh-TW 解釋
2925
- 評分結果以 JSON 格式輸出: coherence_and_cohesion: {
2926
- "level": "#beginner/intermediate/advanced",
2927
- "explanation": "#中文解釋 zh-TW"
2928
- }
2929
-
2930
- Restrictions:
2931
- - the _explanation should be in Traditional Chinese (zh-TW), it's very important.
2932
-
2933
- Final Output JSON Format:
2934
- {{
2935
- "content": {{content's dict}},
2936
- "organization": {{organization'dict}},
2937
- "grammar_and_usage": {{grammar_and_usage'dict}},
2938
- "vocabulary": {{vocabulary'dict}},
2939
- "coherence_and_cohesion": {{coherence_and_cohesion'dict}}
2940
- }}
2941
- """
2942
- user_generate_paragraph_evaluate_prompt = gr.Textbox(label="Paragraph evaluate Prompt", value=default_user_generate_paragraph_evaluate_prompt, visible=False)
2943
  with gr.Row():
2944
  generate_paragraph_evaluate_button = gr.Button("✨ 段落分析", variant="primary")
2945
  with gr.Row():
@@ -3065,9 +3065,9 @@ with gr.Blocks(theme=THEME, css=CSS) as demo:
3065
  fn=generate_paragraph_evaluate,
3066
  inputs=[
3067
  model,
 
3068
  sys_content_input,
3069
- paragraph_output,
3070
- user_generate_paragraph_evaluate_prompt
3071
  ],
3072
  outputs=paragraph_evaluate_output
3073
  ).then(
@@ -3233,8 +3233,6 @@ with gr.Blocks(theme=THEME, css=CSS) as demo:
3233
  with gr.Column():
3234
  with gr.Row(visible=False) as full_paragraph_params:
3235
  full_paragraph_sys_content_input = gr.Textbox(label="System Prompt", value="You are an English teacher who is practicing with me to improve my English writing skill.")
3236
- default_user_generate_full_paragraph_evaluate_prompt = default_user_generate_paragraph_evaluate_prompt
3237
- user_generate_full_paragraph_evaluate_prompt = gr.Textbox(label="Paragraph evaluate Prompt", value=default_user_generate_full_paragraph_evaluate_prompt, visible=False)
3238
  with gr.Row():
3239
  gr.Markdown("# 📊 英文段落寫作評分")
3240
  # 輸入段落全文
@@ -3313,7 +3311,12 @@ with gr.Blocks(theme=THEME, css=CSS) as demo:
3313
 
3314
  full_paragraph_evaluate_button.click(
3315
  fn=generate_paragraph_evaluate,
3316
- inputs=[model, sys_content_input, full_paragraph_input, user_generate_full_paragraph_evaluate_prompt],
 
 
 
 
 
3317
  outputs=full_paragraph_evaluate_output
3318
  ).then(
3319
  fn=update_paragraph_correct_grammatical_spelling_errors_input,
@@ -3430,7 +3433,6 @@ with gr.Blocks(theme=THEME, css=CSS) as demo:
3430
  with gr.Column():
3431
  with gr.Row():
3432
  past_exam_evaluation_sys_content_prompt = gr.Textbox(label="System Prompt", value="You are an English teacher who is practicing with me to improve my English writing skill.", visible=False)
3433
- past_exam_evaluation_user_prompt = gr.Textbox(label="Paragraph evaluate Prompt", value=default_user_generate_paragraph_evaluate_prompt, visible=False)
3434
  past_exam_evaluation_input = gr.TextArea("",label="這是你的原始寫作內容,參考 JUTOR 的建議,你可以選擇是否修改:")
3435
  with gr.Column():
3436
  with gr.Row():
@@ -3559,7 +3561,12 @@ with gr.Blocks(theme=THEME, css=CSS) as demo:
3559
 
3560
  past_exam_evaluation_button.click(
3561
  fn=generate_paragraph_evaluate,
3562
- inputs=[model, past_exam_evaluation_sys_content_prompt, past_exam_evaluation_input, past_exam_evaluation_user_prompt],
 
 
 
 
 
3563
  outputs=past_exam_evaluation_output
3564
  ).then(
3565
  fn=update_paragraph_correct_grammatical_spelling_errors_input,
 
579
  paragraph = f"{topic_sentence} {supporting_sentences} {conclusion_sentence}"
580
  return paragraph
581
 
582
+ def generate_paragraph_evaluate(model, max_tokens, sys_content, paragraph):
583
  """
584
  根據用戶輸入的段落,調用 LLM API 生成相關的段落分析。
585
 
 
592
  Returns:
593
  gr.update: 包含評估結果的表格更新
594
  """
595
+
596
+ user_generate_paragraph_evaluate_prompt = """
597
+ Based on the final paragraph provided, evaluate the writing in terms of content, organization, grammar, and vocabulary. Provide feedback in simple and supportive language.
598
+
599
+ -- 根據上述的文章,以「內容(content)」層面評分。
600
+ Assess the student's writing by focusing on the 'Content' category according to the established rubric. Determine the clarity of the theme or thesis statement and whether it is supported by specific and complete details relevant to the topic. Use the following levels to guide your evaluation:
601
+
602
+ - Excellent (5-4 points): Look for a clear and pertinent theme or thesis, directly related to the topic, with detailed support.
603
+ - Good (3 points): The theme should be present but may lack clarity or emphasis; some narrative development related to the theme should be evident.
604
+ - Fair (2-1 points): Identify if the theme is unclear or if the majority of the narrative is undeveloped or irrelevant to the theme.
605
+ - Poor (0 points): Determine if the response is off-topic or not written at all. Remember that any response that is off-topic or unwritten should receive zero points in all aspects.
606
+
607
+ Your detailed feedback should explain the score you assign, including specific examples from the text to illustrate how well the student's content meets the criteria.
608
+ Translate your feedback into Traditional Chinese (zh-tw) as the final result (#中文解釋 zh-TW).
609
+
610
+ 評分結果以 JSON 格式輸出: content: {
611
+ "level": "#Excellent(5-4 pts)/Good(3 pts)/Fair(2-1 pts)/Poor(0 pts)",
612
+ "explanation": "#中文解釋 zh-TW"
613
+ }
614
+
615
+ -- 根據上述的文章,以「組織(organization)」層面評分。
616
+ Evaluate the student's writing with a focus on 'Organization' according to the grading rubric. Consider the structure of the text, including the presence of a clear introduction, development, and conclusion, as well as the coherence throughout the piece and the use of transitional phrases. Use the following levels to structure your feedback:
617
+
618
+ - Excellent (5-4 points): Look for clear key points with a logical introduction, development, and conclusion, and note whether transitions are coherent and effectively used.
619
+ - Good (3 points): The key points should be identifiable but may not be well-arranged; observe any imbalance in development and transitional phrase usage.
620
+ - Fair (2-1 points): Identify if the key points are unclear and if the text lacks coherence.
621
+ - Poor (0 points): Check if the writing is completely unorganized or not written according to the prompts. Texts that are entirely unorganized should receive zero points.
622
+
623
+ Your detailed feedback should explain the score you assign, including specific examples from the text to illustrate how well the student's Organization meets the criteria. Translate your feedback into Traditional Chinese (zh_tw) as the final result (#中文解釋).
624
+
625
+ 評分結果以 JSON 格式輸出: organization: {
626
+ "level": "#Excellent(5-4 pts)/Good(3 pts)/Fair(2-1 pts)/Poor(0 pts)",
627
+ "explanation": "#中文解釋 zh-TW"
628
+ }
629
+
630
+ -- 根據上述的文章,以「文法和用法(Grammar and usage)」層面評分。
631
+ Review the student's writing, paying special attention to 'Grammar/Sentence Structure'. Assess the accuracy of grammar and the variety of sentence structures throughout the essay. Use the rubric levels to judge the work as follows:
632
+
633
+ - Excellent (5-4 points): Search for text with minimal grammatical errors and a diverse range of sentence structures.
634
+ - Good (3 points): There may be some grammatical errors, but they should not affect the overall meaning or flow of the text.
635
+ - Fair (2-1 points): Determine if grammatical errors are frequent and if they significantly affect the meaning of the text.
636
+ - Poor (0 points): If the essay contains severe grammatical errors throughout, leading to an unclear meaning, it should be marked accordingly.
637
+
638
+ Your detailed feedback should explain the score you assign, including specific examples from the text to illustrate how well the student's Grammar/Sentence Structure meets the criteria. Translate your feedback into Traditional Chinese (zh_tw) as the final result (#���文解釋).
639
+
640
+ 評分結果以 JSON 格式輸出: grammar_and_usage: {
641
+ "level": "#Excellent(5-4 pts)/Good(3 pts)/Fair(2-1 pts)/Poor(0 pts)",
642
+ "explanation": "#中文解釋 zh-TW"
643
+ }
644
+
645
+ -- 根據上述的文章,以「詞彙(Vocabulary )」層面評分。
646
+ Assess the use of 'Vocabulary/Spelling' in the student's writing based on the criteria provided. Evaluate the precision and appropriateness of the vocabulary and the presence of spelling errors. Reference the following scoring levels in your analysis:
647
+
648
+ - Excellent (5-4 points): The writing should contain accurate and appropriate vocabulary with almost no spelling mistakes.
649
+ - Good (3 points): Vocabulary might be somewhat repetitive or mundane; there may be occasional misused words and minor spelling mistakes, but they should not impede understanding.
650
+ - Fair (2-1 points): Notice if there are many vocabulary errors and spelling mistakes that clearly affect the clarity of the text's meaning.
651
+ - Poor (0 points): Writing that only contains scattered words related to the topic or is copied should be scored as such.
652
+
653
+ Your detailed feedback should explain the score you assign, including specific examples from the text to illustrate how well the student's Vocabulary/Spelling meets the criteria. Translate your feedback into Traditional Chinese (zh_tw) as the final result (#中文解釋).
654
+
655
+ 評分結果以 JSON 格式輸出: vocabulary: {
656
+ "level": "#Excellent(5-4 pts)/Good(3 pts)/Fair(2-1 pts)/Poor(0 pts)",
657
+ "explanation": "#中文解釋 zh-TW"
658
+ }
659
+
660
+ -- 根據上述的文章,以「連貫性和連接詞(Coherence and Cohesion)」層面評分。
661
+ - 評分等級有三級:beginner, intermediate, advanced.
662
+ - 以繁體中文 zh-TW 解釋
663
+ 評分結果以 JSON 格式輸出: coherence_and_cohesion: {
664
+ "level": "#beginner/intermediate/advanced",
665
+ "explanation": "#中文解釋 zh-TW"
666
+ }
667
+
668
+ Restrictions:
669
+ - the _explanation should be in Traditional Chinese (zh-TW), it's very important.
670
+
671
+ Final Output JSON Format:
672
+ {{
673
+ "content": {{content's dict}},
674
+ "organization": {{organization'dict}},
675
+ "grammar_and_usage": {{grammar_and_usage'dict}},
676
+ "vocabulary": {{vocabulary'dict}},
677
+ "coherence_and_cohesion": {{coherence_and_cohesion'dict}}
678
+ }}
679
+ """
680
+
681
  def parse_evaluation_response(content):
682
  """解析 LLM 回應內容"""
683
  try:
 
740
  prompt=f"{sys_content}\n{user_content}" if "gemini" in model.lower() else None,
741
  messages=messages,
742
  model=model,
743
+ max_tokens=max_tokens,
744
  response_format={"type": "json_object"}
745
  )
746
 
 
2940
  inputs=[paragraph_output],
2941
  outputs=[paragraph_output_download]
2942
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2943
  with gr.Row():
2944
  generate_paragraph_evaluate_button = gr.Button("✨ 段落分析", variant="primary")
2945
  with gr.Row():
 
3065
  fn=generate_paragraph_evaluate,
3066
  inputs=[
3067
  model,
3068
+ max_tokens,
3069
  sys_content_input,
3070
+ paragraph_output
 
3071
  ],
3072
  outputs=paragraph_evaluate_output
3073
  ).then(
 
3233
  with gr.Column():
3234
  with gr.Row(visible=False) as full_paragraph_params:
3235
  full_paragraph_sys_content_input = gr.Textbox(label="System Prompt", value="You are an English teacher who is practicing with me to improve my English writing skill.")
 
 
3236
  with gr.Row():
3237
  gr.Markdown("# 📊 英文段落寫作評分")
3238
  # 輸入段落全文
 
3311
 
3312
  full_paragraph_evaluate_button.click(
3313
  fn=generate_paragraph_evaluate,
3314
+ inputs=[
3315
+ model,
3316
+ max_tokens,
3317
+ sys_content_input,
3318
+ full_paragraph_input
3319
+ ],
3320
  outputs=full_paragraph_evaluate_output
3321
  ).then(
3322
  fn=update_paragraph_correct_grammatical_spelling_errors_input,
 
3433
  with gr.Column():
3434
  with gr.Row():
3435
  past_exam_evaluation_sys_content_prompt = gr.Textbox(label="System Prompt", value="You are an English teacher who is practicing with me to improve my English writing skill.", visible=False)
 
3436
  past_exam_evaluation_input = gr.TextArea("",label="這是你的原始寫作內容,參考 JUTOR 的建議,你可以選擇是否修改:")
3437
  with gr.Column():
3438
  with gr.Row():
 
3561
 
3562
  past_exam_evaluation_button.click(
3563
  fn=generate_paragraph_evaluate,
3564
+ inputs=[
3565
+ model,
3566
+ max_tokens,
3567
+ past_exam_evaluation_sys_content_prompt,
3568
+ past_exam_evaluation_input
3569
+ ],
3570
  outputs=past_exam_evaluation_output
3571
  ).then(
3572
  fn=update_paragraph_correct_grammatical_spelling_errors_input,