diff --git a/doc/gp.nvim.txt b/doc/gp.nvim.txt index 3eda6a8..921f598 100644 --- a/doc/gp.nvim.txt +++ b/doc/gp.nvim.txt @@ -1,4 +1,4 @@ -*gp.nvim.txt* For NVIM v0.8.0 Last change: 2024 January 14 +*gp.nvim.txt* For NVIM v0.8.0 Last change: 2024 January 20 ============================================================================== Table of Contents *gp.nvim-table-of-contents* diff --git a/lua/gp/config.lua b/lua/gp/config.lua index 09b1715..463e996 100644 --- a/lua/gp/config.lua +++ b/lua/gp/config.lua @@ -213,12 +213,21 @@ local config = { -- decrease this number to pick up only louder sounds as possible speech -- you can disable silence trimming by setting this a very high number (like 1000.0) whisper_silence = "1.75", - -- whisper max recording time (mm:ss) - whisper_max_time = "05:00", -- whisper tempo (1.0 is normal speed) whisper_tempo = "1.75", -- The language of the input audio, in ISO-639-1 format. whisper_language = "en", + -- command to use for recording can be nil (unset) for automatic selection + -- string ("sox", "arecord", "ffmpeg") or table with command and arguments: + -- sox is the most universal, but can have start/end cropping issues caused by latency + -- arecord is linux only, but has no cropping issues and is faster + -- ffmpeg in the default configuration is macos only, but can be used on any platform + -- (see https://trac.ffmpeg.org/wiki/Capture/Desktop for more info) + -- below is the default configuration for all three commands: + -- whisper_rec_cmd = {"sox", "-c", "1", "--buffer", "32", "-d", "rec.wav", "trim", "0", "60:00"}, + -- whisper_rec_cmd = {"arecord", "-c", "1", "-f", "S16_LE", "-r", "48000", "-d", "3600", "rec.wav"}, + -- whisper_rec_cmd = {"ffmpeg", "-y", "-f", "avfoundation", "-i", ":0", "-t", "3600", "rec.wav"}, + whisper_rec_cmd = nil, -- image generation settings -- image prompt prefix for asking user for input (supports {{agent}} template variable) diff --git a/lua/gp/init.lua b/lua/gp/init.lua index 4177189..54c9ec6 100644 --- a/lua/gp/init.lua +++ b/lua/gp/init.lua @@ -17,6 +17,7 @@ local deprecated = { chat_system_prompt = "`chat_system_prompt`\n" .. switch_to_agent, command_prompt_prefix = "`command_prompt_prefix`\nPlease use `command_prompt_prefix_template`" .. " with support for \n`{{agent}}` variable so you know which agent is currently active", + whisper_max_time = "`whisper_max_time`\nPlease use fully customizable `whisper_rec_cmd`", } -------------------------------------------------------------------------------- @@ -2863,41 +2864,35 @@ M.Whisper = function(callback) return end + local rec_file = M.config.whisper_dir .. "/rec.wav" local rec_options = { sox = { cmd = "sox", opts = { - -- single channel "-c", "1", - -- small buffer "--buffer", "32", "-d", - -- output file - M.config.whisper_dir .. "/rec.wav", - -- max recording time + "rec.wav", "trim", "0", - M.config.whisper_max_time, + "3600", }, exit_code = 0, }, arecord = { cmd = "arecord", opts = { - -- single channel "-c", "1", "-f", "S16_LE", "-r", "48000", - -- max recording time "-d", 3600, - -- output file - M.config.whisper_dir .. "/rec.wav", + "rec.wav", }, exit_code = 1, }, @@ -2911,7 +2906,7 @@ M.Whisper = function(callback) ":0", "-t", "3600", - M.config.whisper_dir .. "/rec.wav", + "rec.wav", }, exit_code = 255, }, @@ -3047,25 +3042,48 @@ M.Whisper = function(callback) end) end - local rec_cmd = "sox" - if vim.fn.executable("ffmpeg") == 1 then - local devices = vim.fn.system("ffmpeg -devices -v quiet | grep -i avfoundation | wc -l") - devices = string.gsub(devices, "^%s*(.-)%s*$", "%1") - if devices == "1" then - rec_cmd = "ffmpeg" + local cmd = {} + + local rec_cmd = M.config.whisper_rec_cmd + -- if rec_cmd not set explicitly, try to autodetect + if not rec_cmd then + rec_cmd = "sox" + if vim.fn.executable("ffmpeg") == 1 then + local devices = vim.fn.system("ffmpeg -devices -v quiet | grep -i avfoundation | wc -l") + devices = string.gsub(devices, "^%s*(.-)%s*$", "%1") + if devices == "1" then + rec_cmd = "ffmpeg" + end end + if vim.fn.executable("arecord") == 1 then + rec_cmd = "arecord" + end + end + + if type(rec_cmd) == "table" and rec_cmd[1] and rec_options[rec_cmd[1]] then + rec_cmd = vim.deepcopy(rec_cmd) + cmd.cmd = table.remove(rec_cmd, 1) + cmd.exit_code = rec_options[cmd.cmd].exit_code + cmd.opts = rec_cmd + elseif type(rec_cmd) == "string" and rec_options[rec_cmd] then + cmd = rec_options[rec_cmd] + else + M.error(string.format("Whisper got invalid recording command: %s", rec_cmd)) + close() + return end - if vim.fn.executable("arecord") == 1 then - rec_cmd = "arecord" + for i, v in ipairs(cmd.opts) do + if v == "rec.wav" then + cmd.opts[i] = rec_file + end end - local cmd = rec_options[rec_cmd] M._H.process(nil, cmd.cmd, cmd.opts, function(code, signal, stdout, stderr) close() if code and code ~= cmd.exit_code then M.error( - rec_cmd + cmd.cmd .. " exited with code and signal:\ncode: " .. code .. ", signal: "