j
initial commit
402daee
--[[
Transcript.lua - Speech transcription data model
]]--
Transcript = Polo {
COLUMN_ORDER = {"id", "seek", "start", "end", "text", "score", "file"},
DEFAULT_HIDE = {
seek = true, temperature = true, tokens = true, avg_logprob = true,
compression_ratio = true, no_speech_prob = true
},
init = function(self)
self:clear()
end
}
Transcript.calculate_offset = function (item, take)
return (
reaper.GetMediaItemInfo_Value(item, 'D_POSITION')
- reaper.GetMediaItemTakeInfo_Value(take, 'D_STARTOFFS'))
end
function Transcript:clear()
self.init_data = {}
self.filtered_data = {}
self.data = {}
self.search = ''
end
function Transcript:get_columns()
if #self.init_data > 0 then
local columns = {"score"}
local row = self.init_data[1]
for k, _ in pairs(row.data) do
if k:sub(1, 1) ~= '_' then
table.insert(columns, k)
end
end
return self:_sort_columns(columns)
end
return {}
end
function Transcript:_sort_columns(columns)
local order = self.COLUMN_ORDER
local column_set = {}
local extra_columns = {}
local order_set = {}
local result = {}
for _, column in pairs(columns) do
column_set[column] = true
end
for _, column in pairs(order) do
order_set[column] = true
if column_set[column] then
table.insert(result, column)
end
end
for _, column in pairs(columns) do
if not order_set[column] then
table.insert(extra_columns, column)
end
end
table.sort(extra_columns)
for _, column in pairs(extra_columns) do
table.insert(result, column)
end
return result
end
function Transcript:add_segment(segment)
table.insert(self.init_data, segment)
end
function Transcript:has_segments()
return #self.init_data > 0
end
function Transcript:get_segments()
return self.data
end
function Transcript:sort(column, ascending)
self.data = {table.unpack(self.filtered_data)}
table.sort(self.data, function (a, b)
local a_val, b_val = a:get(column), b:get(column)
if a_val == nil then a_val = '' end
if b_val == nil then b_val = '' end
if not ascending then
a_val, b_val = b_val, a_val
end
return a_val < b_val
end)
end
function Transcript:to_table()
local segments = {}
for _, segment in pairs(self.data) do
table.insert(segments, segment:to_table())
end
return {segments = segments}
end
function Transcript:to_json()
return json.encode(self:to_table())
end
function Transcript:update()
if #self.init_data == 0 then
self:clear()
return
end
local columns = self:get_columns()
if #self.search > 0 then
local search = self.search
local search_lower = search:lower()
local match_case = (search ~= search_lower)
self.filtered_data = {}
for _, segment in pairs(self.init_data) do
local matching = false
for _, column in pairs(columns) do
if match_case then
if tostring(segment.data[column]):find(search) then
matching = true
break
end
else
if tostring(segment.data[column]):lower():find(search_lower) then
matching = true
break
end
end
end
if matching then
table.insert(self.filtered_data, segment)
end
end
else
self.filtered_data = self.init_data
end
self.data = self.filtered_data
end
function Transcript:create_markers(proj, regions, words)
proj = proj or 0
regions = regions or false
for i, segment in pairs(self.data) do
local offset = self.calculate_offset(segment.item, segment.take)
local want_index = segment:get('id', i)
local color = 0
if words then
for _, word in pairs(segment.words) do
local start = word.start + offset
local end_ = word.end_ + offset
local name = word.word
reaper.AddProjectMarker2(proj, regions, start, end_, name, want_index, color)
end
else
local start = segment.start + offset
local end_ = segment.end_ + offset
local name = segment.text
reaper.AddProjectMarker2(proj, regions, start, end_, name, want_index, color)
end
end
end
function Transcript:create_notes_track(words)
local cur_pos = reaper.GetCursorPosition()
local index = 0
reaper.InsertTrackAtIndex(index, false)
local track = reaper.GetTrack(0, index)
reaper.SetOnlyTrackSelected(track)
reaper.GetSetMediaTrackInfo_String(track, 'P_NAME', 'Speech', true)
for _, segment in pairs(self.data) do
local offset = self.calculate_offset(segment.item, segment.take)
if words then
for _, word in pairs(segment.words) do
local start = word.start + offset
local end_ = word.end_ + offset
local text = word.word
self:_create_note(start, end_, text, false)
end
else
local start = segment.start + offset
local end_ = segment.end_ + offset
local text = segment.text
self:_create_note(start, end_, text, true)
end
end
reaper.SetEditCurPos(cur_pos, true, true)
end
function Transcript:_create_note(start, end_, text, stretch)
local item = self:_create_empty_item(start, end_)
self:_set_note_text(item, text, stretch)
end
function Transcript:_create_empty_item(start, end_)
self:_insert_empty_item()
local item = reaper.GetSelectedMediaItem(0, 0)
reaper.SelectAllMediaItems(0, false)
reaper.SetMediaItemPosition(item, start, true)
reaper.SetMediaItemLength(item, end_ - start, true)
return item
end
function Transcript:_insert_empty_item()
reaper.Main_OnCommand(40142, 0)
end
function Transcript:_set_note_text(item, text, stretch)
local _, chunk = reaper.GetItemStateChunk(item, "", false)
local notes_chunk = ("<NOTES\n|%s\n>\n"):format(text:match("^%s*(.-)%s*$"))
local flags_chunk = (stretch and "IMGRESOURCEFLAGS 11\n" or "")
chunk = chunk:gsub('>', notes_chunk:gsub('%%', '%%%%') .. flags_chunk .. '>')
reaper.SetItemStateChunk(item, chunk, false)
end
TranscriptSegment = Polo {
_proxy_fields = {
start = 'start',
end_ = 'end',
text = 'text',
}
}
TranscriptSegment.__index = function(o, key)
local proxy_target = TranscriptSegment._proxy_fields[key]
if proxy_target then
return o.data[proxy_target]
else
return TranscriptSegment[key]
end
end
function TranscriptSegment:init()
assert(self.data, 'missing data')
assert(self.item, 'missing item')
assert(self.take, 'missing take')
self.data = self._copy(self.data)
self.data['file'] = self:get_file()
end
TranscriptSegment.from_whisper = function(segment, item, take)
local result = {}
local words = segment.words
segment = TranscriptSegment._copy(segment)
segment.text = segment.text:match("^%s*(.-)%s*$")
segment.words = nil
if words then
local transcript_words = {}
for _, word in pairs(words) do
local transcript_word = TranscriptWord.new({
word = word.word:match("^%s*(.-)%s*$"),
probability = word.probability,
start = word.start,
end_ = word['end']
})
table.insert(transcript_words, transcript_word)
end
table.insert(result, TranscriptSegment.new({
data = segment,
item = item,
take = take,
words = transcript_words
}))
else
table.insert(result, TranscriptSegment.new({
data = segment,
item = item,
take = take
}))
end
return result
end
TranscriptSegment.default_hide = function(column)
return Transcript.DEFAULT_HIDE[column] or false
end
TranscriptSegment.merge_words = function(words, index1, index2)
local word1 = words[index1]
local word2 = words[index2]
local new_word = TranscriptWord.new {
word = word1.word .. word2.word,
start = word1.start,
end_ = word2.end_,
probability = (word1.probability + word2.probability) / 2
}
table.remove(words, index2)
table.remove(words, index1)
table.insert(words, index1, new_word)
end
TranscriptSegment.split_word = function(words, index)
local word = words[index]
local length = utf8.len(word.word)
local half_length = math.floor(length / 2)
local new_word1 = TranscriptWord.new {
word = word.word:sub(1, utf8.offset(word.word, half_length)),
start = word.start,
end_ = word.start + (word.end_ - word.start) / 2,
probability = word.probability
}
local new_word2 = TranscriptWord.new {
word = word.word:sub(utf8.offset(word.word, half_length + 1)),
start = word.start + (word.end_ - word.start) / 2,
end_ = word.end_,
probability = word.probability
}
table.remove(words, index)
table.insert(words, index, new_word2)
table.insert(words, index, new_word1)
end
TranscriptSegment._copy = function(data)
local result = {}
for k, v in pairs(data) do
result[k] = v
end
return result
end
function TranscriptSegment:score()
local score = 0.0
if self.words and #self.words > 0 then
for _, word in pairs(self.words) do
score = score + word:score()
end
return score / #self.words
else
return 0.0
end
end
function TranscriptSegment:get(column, default)
if column == 'score' then
return self:score()
elseif self.data[column] then
return self.data[column]
else
return default
end
end
function TranscriptSegment:set_words(words)
self.words = words
self:update_text()
end
function TranscriptSegment:update_text()
local text_chunks = {}
for _, word in pairs(self.words) do
table.insert(text_chunks, word.word)
end
self.data['text'] = table.concat(text_chunks, ' ')
end
function TranscriptSegment:get_file(include_extensions)
include_extensions = include_extensions or false
local file = ''
app:trap(function ()
local source = reaper.GetMediaItemTake_Source(self.take)
if source then
local source_path = reaper.GetMediaSourceFileName(source)
file = source_path:gsub(".*[\\/](.*)", "%1")
if not include_extensions then
file = file:gsub("(.*)[.].*", "%1")
end
end
end)
return file
end
function TranscriptSegment:get_file_with_extension()
return self:get_file(true)
end
function TranscriptSegment:navigate(word_index, autoplay)
local start = self.start
if word_index then
start = self.words[word_index].start
end
local offset = start - reaper.GetMediaItemTakeInfo_Value(self.take, 'D_STARTOFFS')
self:_navigate_to_media_item(self.item)
reaper.MoveEditCursor(offset, false)
if autoplay and reaper.GetPlayState() & 1 == 0 then
self:_transport_play()
end
if reaper.GetPlayState() & 1 == 1 then
self:_transport_play()
end
end
function TranscriptSegment:_navigate_to_media_item(item)
reaper.SelectAllMediaItems(0, false)
reaper.SetMediaItemSelected(item, true)
self:_move_cursor_to_start_of_items()
end
function TranscriptSegment:_move_cursor_to_start_of_items()
reaper.Main_OnCommand(41173, 0)
end
function TranscriptSegment:_transport_play()
reaper.Main_OnCommand(1007, 0)
end
function TranscriptSegment:to_json()
return json.encode(self:to_table())
end
function TranscriptSegment:to_table()
local result = self._copy(self.data)
if self.words then
result['words'] = {}
for _, word in pairs(self.words) do
table.insert(result['words'], word:to_table())
end
end
return result
end
function TranscriptSegment:select_in_timeline(offset)
offset = offset or 0
local start = self.start + offset
local end_ = self.end_ + offset
reaper.GetSet_LoopTimeRange(true, true, start, end_, false)
end
TranscriptWord = Polo {}
function TranscriptWord:init()
assert(self.word, 'missing word')
assert(self.start, 'missing start')
assert(self.end_, 'missing end_')
assert(self.probability, 'missing probability')
end
function TranscriptWord:copy()
return TranscriptWord.new {
word = self.word,
start = self.start,
end_ = self.end_,
probability = self.probability,
}
end
function TranscriptWord:score()
return self.probability
end
function TranscriptWord:to_table()
return {
word = self.word,
start = self.start,
['end'] = self.end_,
probability = self.probability,
}
end
function TranscriptWord:select_in_timeline(offset)
offset = offset or 0
local start = self.start + offset
local end_ = self.end_ + offset
reaper.GetSet_LoopTimeRange(true, true, start, end_, false)
end