File size: 15,906 Bytes
613af8d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
if !exists("g:whisper_dir")
    let g:whisper_dir = expand($WHISPER_CPP_HOME)
    if g:whisper_dir == ""
        echoerr "Please provide a path to the whisper.cpp repo in either the $WHISPER_CPP_HOME environment variable, or g:whisper_dir"
    endif
endif
if !exists("g:whisper_lsp_path")
    let g:whisper_lsp_path = g:whisper_dir .. "lsp"
    if !filereadable(g:whisper_lsp_path)
        echoerr "Was not able to locate a lsp executable at: " .. g:whisper_lsp_path
        throw "Executable not found"
    endif
endif
if !exists("g:whisper_model_path")
    " TODO: allow custom paths relative to the repo dir
    let g:whisper_model_path = g:whisper_dir .. "models/ggml-base.en.bin"
    if !filereadable(g:whisper_model_path)
        echoerr "Could not find model at: " .. g:whisper_model_path
        throw "Model not found"
    endif
endif
let s:output_buffer = bufnr("whisper_log", v:true)
call setbufvar(s:output_buffer,"&buftype","nofile")
let s:lsp_command = [g:whisper_lsp_path,"-m",g:whisper_model_path]
" For faster execution. TODO: server load multiple models/run multiple servers?
" let s:lsp_command = [g:whisper_lsp_path, "-m", g:whisper_dir .. "models/ggml-tiny.en.bin", "-ac", "128"]

" requestCommands([params_dict])
func whisper#requestCommands(...)
    let l:req = {"method": "guided", "params": {"commandset_index": 0}}
    if a:0 > 0
        call extend(l:req.params, a:1)
    endif
    let resp = ch_sendexpr(g:lsp_job, l:req, {"callback": function("s:commandCallback", [l:req.params, 0])})
endfunction

" doTranscription([params_dict])
func whisper#doTranscription(...)
    let l:req = {"method": "unguided", "params": {}}
    if a:0 > 0
        call extend(l:req.params, a:1)
    endif
    let resp = ch_sendexpr(g:lsp_job, l:req, {"callback": function("s:transcriptionCallback", [function("s:insertText"),function("s:endTranscription")])})
endfunction

" For testing
func whisper#uppertest(cha)
    echo tr(a:cha, s:c_lowerkeys, s:c_upperkeys)
endfunction


" (upper, exit, count, motion, command, insert/append, save run) "base"
" (upper, exit, count, motion, command, inside/around)           "motion/visual"
" (upper, exit, count, motion, line,    inside/around)           "command already entered"
" (upper, exit, key,                                 )           "from/till"

" upper and lower keys is used to translate between cases with tr
" Must be sunchronized
let s:c_lowerkeys = "1234567890-=qwertyuiop[]\\asdfghjkl;'zxcvbnm,./\""
let s:c_upperkeys = "!@#$%^&*()_+QWERTYUIOP{}|ASDFGHJKL:\"ZXCVBNM<>?'"
let s:c_count = split("1234567890\"",'\zs')
let s:c_command = split("ryuogpdxcv.iam", '\zs')
let s:c_motion = split("wetf'hjklnb$^)",'\zs')
" object words: Word, Sentence, Paragraph, [, (, <, Tag, {. ", '
let s:c_area = split("wsp])>t}\"'",'\zs')
"Special commands.
let s:c_special_always = ["exit", "upper"]
let s:c_special_normal = ["save", "run", "space"]

" If not in dict, key is spoken word,
" If key resolves to string, value is used for normal/motion, but key for chars
" If key resolves to dict, {0: "normal",1: "motion",2:"single char",3: "area"}
" Missing entries fall back as follows {0: "required", 1: 0, 2: "key", 3: 0}
let s:spoken_dict = {"w": "word", "e": "end", "r": "replace", "t": {0: "till", 3: "tag"}, "y": "yank", "u": "undo", "i": {0: "insert", 1: "inside"}, "o": "open", "p": {0: "paste", 3: "paragraph"},  "a": {0: "append", 1: "around"}, "s": {0: "substitute", 3: "sentence"}, "d": "delete", "f": "from", "g": "go", "h": "left", "j": "down", "k": "up", "l": "right", "c": "change", "v": "visual", "b": "back", "n": "next", "m": "mark", ".": {0: "repeat", 2: "period"}, "]": {0: "bracket", 2: "bracket"}, "'": {0: "jump", 2: "apostrophe", 3:  "apostrophe"}, '"': {0: 'register', 2: "quotation", 3: "quotation"}, "-": {0: "minus", 2: "minus"}, "$": {0: "dollar", 2: "dollar"}, "^": {0: "carrot", 2: "carrot"}, ")": {0: "sentence", 2: "parenthesis",  3: "parenthesis"}, "}": {0: "paragraph", 2: "brace", 3: "brace"}, ">": {0: "indent", 2: "angle", 3: "angle"}}

" Give this another pass. This seems overly hacky even if it's functional
let s:sub_tran_msg = ""
func s:subTranProg(msg)
    if s:sub_tran_msg != ""
        let s:sub_tran_msg = s:sub_tran_msg .. a:msg
        if mode() !=? 'v'
            exe "normal" "u" .. s:sub_tran_msg
        endif
    else
        if s:command_backlog == ""
            " this should not occur
            call s:logCallback(0, "Warning: Encountered sub transcription without prior command")
            let s:command_backlog = "a"
        endif
        if a:msg[0] == ' '
            let s:sub_tran_msg = s:command_backlog .. a:msg[1:-1]
        else
            let s:sub_tran_msg = s:command_backlog  .. a:msg
        endif
        if mode() !=? 'v'
            exe "normal" s:sub_tran_msg
        endif
    endif
    call appendbufline(s:output_buffer, "$", s:sub_tran_msg ..  ":" .. string(a:msg ))
endfunction

func s:subTranFinish(params, timestamp)
    let s:repeat_command = s:sub_tran_msg
    " Visual selection is lot if used with streaming, so streaming of partial
    " transcriptions is disabled in visual mode
    if mode() ==? 'v'
        exe "normal" s:sub_tran_msg
    endif
    let s:sub_tran_msg = ""
    let s:command_backlog = ""
    exe "normal a\<C-G>u"
    let l:params = a:params
    let l:params.timestamp = a:timestamp
    if exists("l:params.commandset_index")
        unlet l:params.commandset_index
    endif
    call whisper#requestCommands(a:params)
endfunction

func s:logCallback(channel, msg)
    call appendbufline(s:output_buffer,"$",a:msg)
endfunction


func s:transcriptionCallback(progressCallback, finishedCallback, channel, msg)
    let l:tr = a:msg.result.transcription

    let l:ex_ind = match(tolower(l:tr),"exit", len(l:tr)-6)
    " The worst case I've observed so far is " Exit.", which is 6 characters
    if l:ex_ind != -1
        call a:progressCallback(strpart(l:tr,0,l:ex_ind-1))
        call a:finishedCallback(a:msg.result.timestamp)
    else
        call a:progressCallback(l:tr)
        let req = {"method": "unguided", "params": {"timestamp": a:msg.result.timestamp, "no_context": v:true}}
        let resp = ch_sendexpr(g:lsp_job, req, {"callback": function("s:transcriptionCallback", [a:progressCallback, a:finishedCallback])})
    endif
endfunc
func s:insertText(msg)
    exe "normal a" .. a:msg
endfunction
func s:endTranscription(timestamp)
    call appendbufline(s:output_buffer, "$", "Ending unguided transcription")
endfunction



" If a command does not include a whole actionable step, attempting to execute
" it discards the remainder of things. There is likely a simpler solution,
" but it can be made functional now by storing a backbuffer until actionable
let s:command_backlog = ""
let s:repeat_command = ""
let s:preceeding_upper = v:false
func s:commandCallback(params, commandset_index, channel, msg)
    let l:command_index = a:msg.result.command_index
    let l:do_execute = v:false
    let l:next_mode = a:commandset_index
    let l:command = s:commandset_list[a:commandset_index][l:command_index]
    call s:logCallback(0, string(a:msg) .. " " .. a:commandset_index .. " " .. l:command)
    if l:command_index == 0
        "exit
        "if s:command_backlog == ""
        call s:logCallback(0,"Stopping command mode")
        echo "No longer listening"
        let s:command_backlog = ""
        return
        "else
        " Legacy code to clear an existing buffer with exit.
        " Was found to be rarely desired and is better introduced as a
        " standalone command (clear?)
        "   call s:logCallback(0,"Clearing command_backlog" .. s:command_backlog)
        "   let s:command_backlog = ""
        "   let s:preceeding_upper = v:false
        " endif
    elseif l:command_index == 1
        " upper
        let s:preceeding_upper = !s:preceeding_upper
    elseif l:command == "save"
        " save and run can only happen in commandset 0,
        exe "w"
    elseif l:command == "run"
        exe "make run"
    elseif l:command == "space"
        exe "normal i \<ESC>l"
    elseif has_key(s:c_user, l:command)
        let Userfunc = s:c_user[l:command]
        if type(Userfunc) == v:t_string
            let Userfunc = function(Userfunc)
        endif
        call Userfunc()
    else
        if s:preceeding_upper
            " Upper should keep commandset
            let s:preceeding_upper = v:false
            let l:visual_command = tr(l:command, s:c_lowerkeys, s:c_upperkeys)
        else
            let l:visual_command = l:command
        endif
        echo s:command_backlog .. " - " .. l:visual_command
        let s:command_backlog = s:command_backlog .. l:visual_command
        if a:commandset_index == 2 || a:commandset_index == 3
            " single key, either completes motion, replace, or register
            " Should move to execute unless part of a register
            " Change will be caught at execute
            if s:command_backlog[-2:-2] !=# '"'
                call s:logCallback(0,"not register")
                let l:do_execute = v:true
            end
            let l:next_mode = 0
            " commandset index only matters for a/i
        elseif (l:command == "a" || l:command == "i") && a:commandset_index == 1
            " inside/around. Is commandset 3
            let l:next_mode = 3
        elseif l:command ==# '"'
            let l:next_mode = 2
        elseif index(s:c_count, l:command) != -1
            let l:next_mode = a:commandset_index
        elseif index(s:c_motion, l:command) != -1
            if l:command == 't' || l:command == 'f' || l:command == "'"
                " prompt single key
                let l:next_mode = 2
            else
                let l:do_execute = v:true
                let l:next_mode = 0
            endif
        elseif index(s:c_command, l:command) != -1
            if index(["y","g","d","c"], s:command_backlog[-1:-1]) != -1 && s:command_backlog[-1:-1] != s:command_backlog[-2:-2] && mode() !=? 'v'
                " need motion or repeated command
                " Potential for bad state here if disparaging command keys are
                " entered (i.e. yd), but vim can handle checks for this at exe
                " And checking for cases like y123d would complicate things
                let l:next_mode = 1
            elseif index(["i","a","c", "o", "s"], l:command) != -1 || s:command_backlog[-1:-1] ==# 'R'
                "'Insert' mode, do general transcription
                let l:req = {"method": "unguided", "params": a:params}
                let l:req.params.timestamp = a:msg.result.timestamp
                let l:req.params.no_context = v:true
                let resp = ch_sendexpr(g:lsp_job, req, {"callback": function("s:transcriptionCallback", [function("s:subTranProg"), function("s:subTranFinish", [a:params])])})
                return
            elseif l:command == 'r' || l:command == 'm'
                let l:next_mode = 2
            elseif l:command == '.'
                let l:next_mode = 0
                let l:do_execute = v:true
                let s:command_backlog = s:command_backlog[0:-2] .. s:repeat_command
            else
                if l:command ==? 'v'
                    let l:next_mode = 1
                else
                    let l:next_mode = 0
                endif
                let l:do_execute = v:true
            endif
        else
            throw "Invalid command state: " .. l:command .. " " .. a:commandset_index .. " " .. s:command_backlog
        endif
    endif
    if l:do_execute
        if mode() ==?'v' && l:next_mode == 0
            let l:next_mode = 1
        elseif match(s:command_backlog, 'c') != -1
            let l:req = {"method": "unguided", "params": a:params}
            let l:req.params.timestamp = a:msg.result.timestamp
            let l:req.params.no_context = v:true
            let resp = ch_sendexpr(g:lsp_job, req, {"callback": function("s:transcriptionCallback", [function("s:subTranProg"), function("s:subTranFinish", [a:params])])})
            return
        endif
        exe "normal" s:command_backlog
        if index(s:c_motion + ["u"],l:command) == -1
            exe "normal a\<C-G>u"
            let s:repeat_command = s:command_backlog
            call s:logCallback(0, s:command_backlog)
        endif
        let s:command_backlog = ""
    endif
    let l:req = {"method": "guided", "params": a:params}
    let l:req.params.timestamp = a:msg.result.timestamp
    let l:req.params.commandset_index = l:next_mode
    let resp = ch_sendexpr(g:lsp_job, l:req, {"callback": function("s:commandCallback",[a:params, l:next_mode])})
endfunction

func s:loadedCallback(channel, msg)
    echo "Loading complete"
    call s:logCallback(a:channel, a:msg)
endfunction

func s:registerCommandset(commandlist, is_final)
    let req = {"method": "registerCommandset"}
    let req.params = a:commandlist
    call s:logCallback(0, join(a:commandlist))
    call add(g:whisper_commandlist_spoken, a:commandlist)
    if a:is_final
        let resp = ch_sendexpr(g:lsp_job, req, {"callback": "s:loadedCallback"})
    else
        let resp = ch_sendexpr(g:lsp_job, req, {"callback": "s:logCallback"})
    endif
endfunction

func s:registerAllCommands()
    let l:normal = s:c_special_always + s:c_special_normal + s:c_count + s:c_command + s:c_motion + keys(s:c_user)
    let l:visual = s:c_special_always + s:c_count + s:c_command + s:c_motion
    " Currently the same as visual.
    " let l:post_command = s:c_special_always + s:c_count + s:c_command + s:c_motion
    let l:single_key = s:c_special_always + split(s:c_lowerkeys, '\zs')
    let l:area = s:c_special_always + s:c_area

    " Used only for compatibility with the testing script
    let g:whisper_commandlist_spoken = []

    let s:commandset_list = [l:normal, l:visual, l:single_key, l:area]
    call s:registerCommandset(s:commandsetToSpoken(l:normal, 0), v:false)
    call s:registerCommandset(s:commandsetToSpoken(l:visual, 1), v:false)
    call s:registerCommandset(s:commandsetToSpoken(l:single_key, 2), v:false)
    call s:registerCommandset(s:commandsetToSpoken(l:area, 3), v:true)
endfunction

func s:commandsetToSpoken(commandset, spoken_index)
    let l:spoken_list = []
    for l:command in a:commandset
        if has_key(s:spoken_dict, l:command)
            let l:spoken_value = s:spoken_dict[l:command]
            if type(l:spoken_value) == v:t_dict
                if has_key(l:spoken_value, a:spoken_index)
                    let l:spoken_value = l:spoken_value[a:spoken_index]
                else
                    if a:spoken_index == 2
                        let l:spoken_value = l:command
                    else
                        let l:spoken_value = l:spoken_value[0]
                    endif
                endif
            else
                if a:spoken_index == 2
                    let l:spoken_value = l:command
                endif
            endif
        else
            let l:spoken_value = l:command
        endif
        call add(l:spoken_list, l:spoken_value)
    endfor
    return l:spoken_list
endfunction

" TODO: Check lifetime. If the script is resourced, is the existing
" s:lsp_job dropped and therefore killed?
" This seems to not be the case and I've had to deal with zombie processes
" that survive exiting vim, even though said behavior conflicts with my
" understanding of the provided documentation
let s:lsp_opts = {"in_mode": "lsp", "out_mode": "lsp", "err_mode": "nl", "err_io": "buffer", "err_buf": s:output_buffer}
if !exists("g:lsp_job")
    if exists("g:whisper_user_commands")
        let s:c_user = g:whisper_user_commands
    else
        let s:c_user = {}
    endif
    let g:lsp_job = job_start(s:lsp_command, s:lsp_opts)
    if job_status(g:lsp_job) == "fail"
        echoerr "Failed to start whisper job"
    endif
    call s:registerAllCommands()
endif