Pisethan commited on
Commit
8a8c652
Β·
verified Β·
1 Parent(s): 35b7945

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -7
app.py CHANGED
@@ -3,16 +3,36 @@ import gradio as gr
3
  from transformers import pipeline, AutoTokenizer
4
  import torch
5
  import spaces
 
 
6
 
7
- # Shared dropdown options
 
 
 
 
 
8
  grade_options = ["1", "2", "3", "4", "5", "6"]
9
  topic_options = ["Addition", "Subtraction", "Counting", "Number Recognition", "Multiplication", "Division"]
10
  level_options = ["Beginner", "Intermediate", "Advanced"]
11
 
12
- # Load tokenizer separately so we can access eos_token_id
13
- HF_TOKEN = os.environ.get("HF_TOKEN")
14
  tokenizer = AutoTokenizer.from_pretrained("Pisethan/khmer-lesson-model", token=HF_TOKEN)
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  @spaces.GPU
17
  def generate_lesson(grade, topic, level):
18
  device = 0 if torch.cuda.is_available() else -1
@@ -41,8 +61,20 @@ TaRL Level: {level}
41
  """
42
 
43
  output = pipe(prompt, max_new_tokens=300, temperature=0.7, do_sample=True, eos_token_id=tokenizer.eos_token_id)
44
- return output[0]['generated_text']
45
-
 
 
 
 
 
 
 
 
 
 
 
 
46
  @spaces.GPU
47
  def generate_all_lessons():
48
  device = 0 if torch.cuda.is_available() else -1
@@ -64,10 +96,21 @@ Grade: {grade}
64
  Topic: {topic}
65
  TaRL Level: {level}"""
66
  output = pipe(prompt, max_new_tokens=200, temperature=0.7, do_sample=True)
67
- results += f"πŸ”Ή αžαŸ’αž“αžΆαž€αŸ‹ {grade} | {topic} | {level}\n{output[0]['generated_text']}\n\n{'-'*50}\n\n"
 
 
 
 
 
 
 
 
 
 
 
68
  return results
69
 
70
- # Gradio UI
71
  with gr.Blocks() as demo:
72
  gr.Markdown("## πŸ€– αž’αŸ’αž“αž€αž‡αŸ†αž“αž½αž™αž”αž„αŸ’αž€αžΎαžαž˜αŸαžšαŸ€αž“αž‚αžŽαž·αžαžœαž·αž‘αŸ’αž™αžΆ")
73
  gr.Markdown("αž‡αŸ’αžšαžΎαžŸαžšαžΎαžŸαžαŸ’αž“αžΆαž€αŸ‹ αž”αŸ’αžšαž’αžΆαž“αž”αž‘ αž“αž·αž„αž€αž˜αŸ’αžšαž·αžαžŸαž·αžŸαŸ’αžŸ αžšαž½αž…αž…αž»αž…αž”αž„αŸ’αž€αžΎαžαž˜αŸαžšαŸ€αž“αŸ” αž¬αž…αž»αž…αž”αŸŠαžΌαžαž»αž„αžαžΆαž„αž€αŸ’αžšαŸ„αž˜αžŸαž˜αŸ’αžšαžΆαž”αŸ‹αž”αž„αŸ’αž€αžΎαžαž˜αŸαžšαŸ€αž“αž‘αžΆαŸ†αž„αž’αžŸαŸ‹αŸ”")
 
3
  from transformers import pipeline, AutoTokenizer
4
  import torch
5
  import spaces
6
+ import json
7
+ from huggingface_hub import HfApi, upload_file
8
 
9
+ # --- Constants ---
10
+ HF_TOKEN = os.environ.get("HF_TOKEN")
11
+ DATASET_REPO = "Pisethan/khmer-lesson-dataset-generated"
12
+ LOCAL_JSONL = "generated_lessons.jsonl"
13
+
14
+ # --- Options ---
15
  grade_options = ["1", "2", "3", "4", "5", "6"]
16
  topic_options = ["Addition", "Subtraction", "Counting", "Number Recognition", "Multiplication", "Division"]
17
  level_options = ["Beginner", "Intermediate", "Advanced"]
18
 
19
+ # --- Tokenizer (global) ---
 
20
  tokenizer = AutoTokenizer.from_pretrained("Pisethan/khmer-lesson-model", token=HF_TOKEN)
21
 
22
+ # --- Helper to save and upload ---
23
+ def save_to_jsonl(record):
24
+ with open(LOCAL_JSONL, "a", encoding="utf-8") as f:
25
+ f.write(json.dumps(record, ensure_ascii=False) + "\n")
26
+
27
+ upload_file(
28
+ path_or_fileobj=LOCAL_JSONL,
29
+ path_in_repo="generated_lessons.jsonl",
30
+ repo_id=DATASET_REPO,
31
+ repo_type="dataset",
32
+ token=HF_TOKEN
33
+ )
34
+
35
+ # --- Generation for one lesson ---
36
  @spaces.GPU
37
  def generate_lesson(grade, topic, level):
38
  device = 0 if torch.cuda.is_available() else -1
 
61
  """
62
 
63
  output = pipe(prompt, max_new_tokens=300, temperature=0.7, do_sample=True, eos_token_id=tokenizer.eos_token_id)
64
+ result = output[0]['generated_text']
65
+
66
+ # Save to dataset
67
+ record = {
68
+ "grade": grade,
69
+ "topic": topic,
70
+ "level": level,
71
+ "prompt": prompt.strip(),
72
+ "completion": result.strip()
73
+ }
74
+ save_to_jsonl(record)
75
+ return result
76
+
77
+ # --- Generation for all combinations ---
78
  @spaces.GPU
79
  def generate_all_lessons():
80
  device = 0 if torch.cuda.is_available() else -1
 
96
  Topic: {topic}
97
  TaRL Level: {level}"""
98
  output = pipe(prompt, max_new_tokens=200, temperature=0.7, do_sample=True)
99
+ result = output[0]['generated_text']
100
+
101
+ record = {
102
+ "grade": grade,
103
+ "topic": topic,
104
+ "level": level,
105
+ "prompt": prompt.strip(),
106
+ "completion": result.strip()
107
+ }
108
+ save_to_jsonl(record)
109
+
110
+ results += f"πŸ”Ή αžαŸ’αž“αžΆαž€αŸ‹ {grade} | {topic} | {level}\n{result}\n\n{'-'*50}\n\n"
111
  return results
112
 
113
+ # --- UI ---
114
  with gr.Blocks() as demo:
115
  gr.Markdown("## πŸ€– αž’αŸ’αž“αž€αž‡αŸ†αž“αž½αž™αž”αž„αŸ’αž€αžΎαžαž˜αŸαžšαŸ€αž“αž‚αžŽαž·αžαžœαž·αž‘αŸ’αž™αžΆ")
116
  gr.Markdown("αž‡αŸ’αžšαžΎαžŸαžšαžΎαžŸαžαŸ’αž“αžΆαž€αŸ‹ αž”αŸ’αžšαž’αžΆαž“αž”αž‘ αž“αž·αž„αž€αž˜αŸ’αžšαž·αžαžŸαž·αžŸαŸ’αžŸ αžšαž½αž…αž…αž»αž…αž”αž„αŸ’αž€αžΎαžαž˜αŸαžšαŸ€αž“αŸ” αž¬αž…αž»αž…αž”αŸŠαžΌαžαž»αž„αžαžΆαž„αž€αŸ’αžšαŸ„αž˜αžŸαž˜αŸ’αžšαžΆαž”αŸ‹αž”αž„αŸ’αž€αžΎαžαž˜αŸαžšαŸ€αž“αž‘αžΆαŸ†αž„αž’αžŸαŸ‹αŸ”")