sumuks commited on
Commit
67a2d65
·
verified ·
1 Parent(s): 1d375c6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -18
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import gradio as gr
2
  import spaces
3
  import torch
 
4
  from threading import Thread
5
  from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM, AutoModelForSeq2SeqLM, TextIteratorStreamer
6
 
@@ -34,6 +35,30 @@ def load_model():
34
 
35
  return model, tokenizer
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  @spaces.GPU(duration=60)
38
  def clean_text(text):
39
  model, tokenizer = load_model()
@@ -65,28 +90,53 @@ def clean_text(text):
65
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
66
  thread.start()
67
 
68
- # Yield text as it's generated
69
  generated_text = ""
 
 
 
70
  for new_text in streamer:
71
- generated_text += new_text
72
- yield generated_text
 
 
 
73
 
74
  thread.join()
 
 
 
 
75
 
76
- iface = gr.Interface(
77
- fn=clean_text,
78
- inputs=gr.Textbox(
79
- lines=5,
80
- placeholder="Enter text to clean...",
81
- label="Input Text"
82
- ),
83
- outputs=gr.Textbox(
84
- lines=5,
85
- label="Cleaned Text"
86
- ),
87
- title="TextClean-4B Demo",
88
- description="Simple demo for text cleaning using textcleanlm/textclean-4B model"
89
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
  if __name__ == "__main__":
92
- iface.launch()
 
1
  import gradio as gr
2
  import spaces
3
  import torch
4
+ import difflib
5
  from threading import Thread
6
  from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM, AutoModelForSeq2SeqLM, TextIteratorStreamer
7
 
 
35
 
36
  return model, tokenizer
37
 
38
+ def create_diff_html(original, cleaned):
39
+ """Create HTML diff visualization"""
40
+ original_lines = original.splitlines(keepends=True)
41
+ cleaned_lines = cleaned.splitlines(keepends=True)
42
+
43
+ differ = difflib.unified_diff(original_lines, cleaned_lines, fromfile='Original', tofile='Cleaned', lineterm='')
44
+
45
+ html_diff = '<div style="font-family: monospace; font-size: 12px; white-space: pre-wrap;">'
46
+
47
+ for line in differ:
48
+ if line.startswith('+++') or line.startswith('---'):
49
+ html_diff += f'<div style="color: #666;">{line}</div>'
50
+ elif line.startswith('@@'):
51
+ html_diff += f'<div style="color: #0066cc; font-weight: bold;">{line}</div>'
52
+ elif line.startswith('+'):
53
+ html_diff += f'<div style="background-color: #e6ffed; color: #24292e;">{line}</div>'
54
+ elif line.startswith('-'):
55
+ html_diff += f'<div style="background-color: #ffeef0; color: #24292e;">{line}</div>'
56
+ else:
57
+ html_diff += f'<div>{line}</div>'
58
+
59
+ html_diff += '</div>'
60
+ return html_diff
61
+
62
  @spaces.GPU(duration=60)
63
  def clean_text(text):
64
  model, tokenizer = load_model()
 
90
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
91
  thread.start()
92
 
93
+ # Collect generated text, skipping the input
94
  generated_text = ""
95
+ input_length = len(formatted_text)
96
+ full_output = ""
97
+
98
  for new_text in streamer:
99
+ full_output += new_text
100
+ # Only yield the part after the input
101
+ if len(full_output) > input_length:
102
+ generated_text = full_output[input_length:].strip()
103
+ yield generated_text, ""
104
 
105
  thread.join()
106
+
107
+ # After generation is complete, create diff
108
+ diff_html = create_diff_html(text, generated_text)
109
+ yield generated_text, diff_html
110
 
111
+ # Create the interface with blocks for better control
112
+ with gr.Blocks(title="TextClean-4B Demo") as demo:
113
+ gr.Markdown("# TextClean-4B Demo")
114
+ gr.Markdown("Simple demo for text cleaning using textcleanlm/textclean-4B model")
115
+
116
+ with gr.Row():
117
+ with gr.Column():
118
+ input_text = gr.Textbox(
119
+ lines=5,
120
+ placeholder="Enter text to clean...",
121
+ label="Input Text"
122
+ )
123
+ submit_btn = gr.Button("Clean Text", variant="primary")
124
+
125
+ with gr.Row():
126
+ output_text = gr.Textbox(
127
+ lines=5,
128
+ label="Cleaned Text",
129
+ interactive=False
130
+ )
131
+
132
+ with gr.Row():
133
+ diff_display = gr.HTML(label="Diff View")
134
+
135
+ submit_btn.click(
136
+ fn=clean_text,
137
+ inputs=input_text,
138
+ outputs=[output_text, diff_display]
139
+ )
140
 
141
  if __name__ == "__main__":
142
+ demo.launch()