Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
82 commits
Select commit Hold shift + click to select a range
1c0b0cb
feat: add keyboard event types and word grouping algorithm to cap-pro…
cursoragent Feb 18, 2026
86640c2
feat: add keyboard field to MultipleSegment recording metadata
cursoragent Feb 18, 2026
03019a1
feat: add caption/keyboard track segments, keyboard settings, and bac…
cursoragent Feb 18, 2026
73b5d07
feat: record keyboard presses alongside cursor in studio recording
cursoragent Feb 18, 2026
0f6dadb
feat: add keyboard events to RenderSegment, SegmentMedia, and export …
cursoragent Feb 18, 2026
5b121ee
feat: add keyboard overlay rendering layer with fade and character bu…
cursoragent Feb 18, 2026
17e9327
feat: add caption and keyboard track types to editor context and time…
cursoragent Feb 18, 2026
78763c2
feat: add CaptionsTrack and KeyboardTrack timeline components with fu…
cursoragent Feb 18, 2026
c0f843b
feat: add KeyboardTab sidebar, per-segment caption overrides, and key…
cursoragent Feb 18, 2026
5a43fb1
feat: add generate_keyboard_segments Tauri command for keyboard track…
cursoragent Feb 18, 2026
83348f2
chore: format Rust code with cargo fmt
cursoragent Feb 18, 2026
783d887
fix: adjust caption and keyboard segments when clip timescale changes
cursoragent Feb 18, 2026
2388f2d
Merge branch 'main' into cursor/keyboard-and-captions-tracks-8d45
richiemcilroy Feb 18, 2026
b59adc8
fix(recording): update Meta keycode to LMeta
richiemcilroy Feb 19, 2026
5d85c7a
feat(project): add keyboard path fallback resolution for segments
richiemcilroy Feb 19, 2026
6349804
feat(project): add keyboard and caption segment fields to structs
richiemcilroy Feb 19, 2026
9dedbbf
feat(rendering): add recording_time field to ProjectUniforms
richiemcilroy Feb 19, 2026
6f5a42c
refactor(rendering): simplify caption layer to use timeline segments
richiemcilroy Feb 19, 2026
d59897d
refactor(rendering): simplify keyboard layer fade with per-segment ov…
richiemcilroy Feb 19, 2026
13ea9d5
chore: update auto-generated tauri bindings
richiemcilroy Feb 19, 2026
41b04c4
chore: update auto-generated icon imports
richiemcilroy Feb 19, 2026
d27b706
feat(editor): add badge prop to Field component
richiemcilroy Feb 19, 2026
2f78e95
feat(editor): migrate CaptionsTab to timeline-based caption segments
richiemcilroy Feb 19, 2026
72192cb
feat(editor): add keyboard segment generation and redesign settings UI
richiemcilroy Feb 19, 2026
c9cd085
feat(editor): add keyboard segment selection and config panel
richiemcilroy Feb 19, 2026
315cafe
style(editor): update caption track icon and empty state text
richiemcilroy Feb 19, 2026
572802f
style(editor): update keyboard track segment colors to gray
richiemcilroy Feb 19, 2026
b15209b
Merge branch 'main' into cursor/keyboard-and-captions-tracks-8d45
richiemcilroy Mar 23, 2026
0b40e55
chore(biome): extend formatter ignores and relax CSS lint rules
richiemcilroy Mar 25, 2026
f5b55a8
chore(vendor/tao): allow dead_code on macOS-only APIs
richiemcilroy Mar 25, 2026
cebf249
refactor(storybook): type Storybook package path helper as string
richiemcilroy Mar 25, 2026
a1f9b6f
refactor(web-domain): tighten optional schema generic constraints
richiemcilroy Mar 25, 2026
ed219f1
fix(database): guard session token id with optional chaining
richiemcilroy Mar 25, 2026
0cd47c0
chore(ui-solid): declare lucide chevron icon auto-imports
richiemcilroy Mar 25, 2026
0338478
fix(discord-bot): validate GitHub workflow token claims
richiemcilroy Mar 25, 2026
eb4cdc6
chore(media-server): tighten tests and drop unused imports
richiemcilroy Mar 25, 2026
ba966d4
test(web): prefer optional chaining in schema unit tests
richiemcilroy Mar 25, 2026
a4332b8
fix(web): remove documentation search autofocus
richiemcilroy Mar 25, 2026
452e298
style(web): remove unnecessary important from prose overrides
richiemcilroy Mar 25, 2026
858d4c1
refactor(web): harden docs headings and release metadata parsing
richiemcilroy Mar 25, 2026
958ac83
refactor(web): prefix unused transcribe workflow helpers
richiemcilroy Mar 25, 2026
a4021a2
refactor(web): silence unused translated transcript variable
richiemcilroy Mar 25, 2026
31b4459
feat(project): add binary keyboard events and update styling defaults
richiemcilroy Mar 25, 2026
051d785
chore(editor): add caption and keyboard segments to playback benchmar…
richiemcilroy Mar 25, 2026
cebe8ce
fix(enc-avfoundation): satisfy clippy in mp4 encoder tests
richiemcilroy Mar 25, 2026
a7bb64e
refactor(recording): simplify benchmark runner string formatting
richiemcilroy Mar 25, 2026
c0a631c
refactor(recording): use async muxer setup in pipeline tests
richiemcilroy Mar 25, 2026
c769272
fix(recording): ignore duplicate camera feed sender registrations
richiemcilroy Mar 25, 2026
6bb6b50
feat(recording): improve recovery inspection and keyboard capture pip…
richiemcilroy Mar 25, 2026
b3367a2
feat(rendering): stack keyboard overlay around active captions
richiemcilroy Mar 25, 2026
9aa3b80
refactor(rendering): simplify cursor decimation unit test
richiemcilroy Mar 25, 2026
b0ef6db
feat(desktop): extend general settings for hints and keyboard capture
richiemcilroy Mar 25, 2026
ce240b5
feat(desktop): pass transcription hints into whisper initial prompt
richiemcilroy Mar 25, 2026
919cd95
feat(desktop): wire inspect recovery, remux, and keyboard capture toggle
richiemcilroy Mar 25, 2026
fa938a6
fix(desktop): restore camera window safely and widen settings layout
richiemcilroy Mar 25, 2026
aeffebc
feat(desktop): add shared general settings helpers
richiemcilroy Mar 25, 2026
37de84b
test(desktop): cover transcription hint normalization
richiemcilroy Mar 25, 2026
aaf75b7
refactor(desktop): read general settings types from shared helper
richiemcilroy Mar 25, 2026
08ffa66
feat(desktop): add editor caption utilities and text style controls
richiemcilroy Mar 25, 2026
4e5ffd4
feat(desktop): add transcription settings page and route
richiemcilroy Mar 25, 2026
b708c68
feat(desktop): surface studio recording toggles in general settings
richiemcilroy Mar 25, 2026
fb97d3e
refactor(desktop): tighten settings typing and external actions
richiemcilroy Mar 25, 2026
ad5d36d
refactor(desktop): harden window chrome and client mount guards
richiemcilroy Mar 25, 2026
d3e9107
refactor(desktop): tighten shared utility typings
richiemcilroy Mar 25, 2026
e5972ce
refactor(desktop): remove unsafe assertions in chrome and overlay routes
richiemcilroy Mar 25, 2026
d543352
feat(desktop): add captions and keyboard data to editor timeline
richiemcilroy Mar 25, 2026
403f39c
feat(desktop): expand captions and keyboard editor side panels
richiemcilroy Mar 25, 2026
40a71bc
feat(desktop): integrate new tracks into editor shell and playback
richiemcilroy Mar 25, 2026
472ac37
bits
richiemcilroy Mar 25, 2026
0e0d11e
refactor(desktop): generic composeEventHandlers for keyboard inputs
richiemcilroy Mar 25, 2026
251d421
refactor(desktop): simplify Tauri event listener payload typing
richiemcilroy Mar 25, 2026
a12c215
fix(editor): read hovered mask time once when previewing segments
richiemcilroy Mar 25, 2026
79e1948
fix(editor): preserve section markers with a single adjacent boundary
richiemcilroy Mar 25, 2026
f9d7150
fix(editor): initialize keyboard track when importing captions
richiemcilroy Mar 25, 2026
154a18e
refactor(editor): narrow ComingSoonTooltip prop types
richiemcilroy Mar 25, 2026
804b79d
fix(web): skip doc headings when regex captures are missing
richiemcilroy Mar 25, 2026
10df2da
fix(recording): label space key for keyboard event capture
richiemcilroy Mar 25, 2026
405c3e4
fix(recording): build keyboard recovery paths without unwrap
richiemcilroy Mar 25, 2026
b6c1de1
fix(project): skip empty keyboard segments on group flush
richiemcilroy Mar 25, 2026
3cf1eff
test(project): cover backspace-to-empty keyboard segment regression
richiemcilroy Mar 25, 2026
4d90f02
fix(editor): generate unique IDs for split keyboard and caption segments
richiemcilroy Mar 25, 2026
f515eae
fix(editor): defer caption track enable until generation succeeds
richiemcilroy Mar 25, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

44 changes: 39 additions & 5 deletions apps/desktop/src-tauri/src/captions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use whisper_rs::{FullParams, SamplingStrategy, WhisperContext, WhisperContextPar

pub use cap_project::{CaptionSegment, CaptionSettings, CaptionWord};

use crate::http_client;
use crate::{general_settings::GeneralSettingsStore, http_client};

#[derive(Debug, Serialize, Deserialize, Type, Clone)]
pub struct CaptionData {
Expand Down Expand Up @@ -529,6 +529,7 @@ fn process_with_whisper(
audio_path: &PathBuf,
context: Arc<WhisperContext>,
language: &str,
transcription_hints: &[String],
) -> Result<CaptionData, String> {
log::info!("=== WHISPER TRANSCRIPTION START ===");
log::info!("Processing audio file: {audio_path:?}");
Expand All @@ -544,6 +545,10 @@ fn process_with_whisper(
params.set_language(Some(if language == "auto" { "auto" } else { language }));
params.set_max_len(i32::MAX);

if let Some(initial_prompt) = build_initial_prompt(transcription_hints) {
params.set_initial_prompt(&initial_prompt);
}

log::info!("Whisper params - translate: false, token_timestamps: true, max_len: MAX");

let mut audio_file = File::open(audio_path)
Expand Down Expand Up @@ -783,10 +788,32 @@ fn process_with_whisper(
})
}

fn build_initial_prompt(transcription_hints: &[String]) -> Option<String> {
let mut normalized = Vec::new();

for hint in transcription_hints {
let value = hint.replace('\0', "").trim().to_string();
if value.is_empty() || normalized.contains(&value) {
continue;
}
normalized.push(value);
}

if normalized.is_empty() {
None
} else {
Some(format!(
"Preferred spellings, names, and capitalization for this transcript: {}",
normalized.join("; ")
))
}
}

#[tauri::command]
#[specta::specta]
#[instrument]
pub async fn transcribe_audio(
app: AppHandle,
video_path: String,
model_path: String,
language: String,
Expand Down Expand Up @@ -843,11 +870,18 @@ pub async fn transcribe_audio(
}
};

let transcription_hints = GeneralSettingsStore::get(&app)
.ok()
.flatten()
.map(|settings| settings.transcription_hints)
.unwrap_or_default();

log::info!("Starting Whisper transcription in blocking task...");
let whisper_result =
tokio::task::spawn_blocking(move || process_with_whisper(&audio_path, context, &language))
.await
.map_err(|e| format!("Whisper task panicked: {e}"))?;
let whisper_result = tokio::task::spawn_blocking(move || {
process_with_whisper(&audio_path, context, &language, &transcription_hints)
})
.await
.map_err(|e| format!("Whisper task panicked: {e}"))?;

match whisper_result {
Ok(captions) => {
Expand Down
1 change: 1 addition & 0 deletions apps/desktop/src-tauri/src/export.rs
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,7 @@ pub async fn generate_export_preview(
.iter()
.map(|s| RenderSegment {
cursor: s.cursor.clone(),
keyboard: s.keyboard.clone(),
decoders: s.decoders.clone(),
})
.collect();
Expand Down
19 changes: 17 additions & 2 deletions apps/desktop/src-tauri/src/general_settings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,10 @@ pub struct GeneralSettingsStore {
skip_serializing_if = "no"
)]
pub enable_native_camera_preview: bool,
#[serde(default)]
#[serde(default = "default_true")]
pub auto_zoom_on_clicks: bool,
#[serde(default = "default_true")]
pub capture_keyboard_events: bool,
#[serde(default)]
pub post_deletion_behaviour: PostDeletionBehaviour,
#[serde(default = "default_excluded_windows")]
Expand All @@ -137,6 +139,8 @@ pub struct GeneralSettingsStore {
pub crash_recovery_recording: bool,
#[serde(default = "default_max_fps")]
pub max_fps: u32,
#[serde(default = "default_transcription_hints")]
pub transcription_hints: Vec<String>,
#[serde(default)]
pub editor_preview_quality: EditorPreviewQuality,
#[serde(default)]
Expand Down Expand Up @@ -167,6 +171,15 @@ fn default_max_fps() -> u32 {
60
}

fn default_transcription_hints() -> Vec<String> {
vec![
"Cap".to_string(),
"TypeScript".to_string(),
"My Brand Name".to_string(),
"mywebsite.com".to_string(),
]
}

fn default_server_url() -> String {
std::option_env!("VITE_SERVER_URL")
.unwrap_or("https://cap.so")
Expand Down Expand Up @@ -202,14 +215,16 @@ impl Default for GeneralSettingsStore {
server_url: default_server_url(),
recording_countdown: Some(3),
enable_native_camera_preview: default_enable_native_camera_preview(),
auto_zoom_on_clicks: false,
auto_zoom_on_clicks: true,
capture_keyboard_events: true,
post_deletion_behaviour: PostDeletionBehaviour::DoNothing,
excluded_windows: default_excluded_windows(),
delete_instant_recordings_after_upload: false,
instant_mode_max_resolution: 1920,
default_project_name_template: None,
crash_recovery_recording: true,
max_fps: 60,
transcription_hints: default_transcription_hints(),
editor_preview_quality: EditorPreviewQuality::Half,
main_window_position: None,
camera_window_position: None,
Expand Down
2 changes: 2 additions & 0 deletions apps/desktop/src-tauri/src/import.rs
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,7 @@ pub async fn start_video_import(app: AppHandle, source_path: PathBuf) -> Result<
mic: None,
system_audio: None,
cursor: None,
keyboard: None,
}],
cursors: Cursors::default(),
status: Some(StudioRecordingStatus::InProgress),
Expand Down Expand Up @@ -599,6 +600,7 @@ pub async fn start_video_import(app: AppHandle, source_path: PathBuf) -> Result<
mic: None,
system_audio,
cursor: None,
keyboard: None,
}],
cursors: Cursors::default(),
status: Some(StudioRecordingStatus::Complete),
Expand Down
73 changes: 68 additions & 5 deletions apps/desktop/src-tauri/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2123,6 +2123,51 @@ async fn generate_zoom_segments_from_clicks(
Ok(zoom_segments)
}

#[tauri::command]
#[specta::specta]
#[instrument(skip(editor_instance))]
async fn generate_keyboard_segments(
editor_instance: WindowEditorInstance,
grouping_threshold_ms: f64,
linger_duration_ms: f64,
show_modifiers: bool,
show_special_keys: bool,
) -> Result<Vec<cap_project::KeyboardTrackSegment>, String> {
let meta = editor_instance.meta();

let RecordingMetaInner::Studio(studio_meta) = &meta.inner else {
return Ok(vec![]);
};

let segments = match studio_meta.as_ref() {
StudioRecordingMeta::MultipleSegments { inner, .. } => &inner.segments,
_ => return Ok(vec![]),
};

let mut all_events = cap_project::KeyboardEvents { presses: vec![] };

for segment in segments {
let events = segment.keyboard_events(meta);
all_events.presses.extend(events.presses);
}

all_events.presses.sort_by(|a, b| {
a.time_ms
.partial_cmp(&b.time_ms)
.unwrap_or(std::cmp::Ordering::Equal)
});
Comment on lines +2154 to +2158
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

partial_cmp + unwrap_or(Equal) can hide NaNs and makes ordering less explicit. Since this is f64, total_cmp is a nice drop-in here.

Suggested change
all_events.presses.sort_by(|a, b| {
a.time_ms
.partial_cmp(&b.time_ms)
.unwrap_or(std::cmp::Ordering::Equal)
});
all_events
.presses
.sort_by(|a, b| a.time_ms.total_cmp(&b.time_ms));


let grouped = cap_project::group_key_events(
&all_events,
grouping_threshold_ms,
linger_duration_ms,
show_modifiers,
show_special_keys,
);
Comment on lines +2160 to +2166
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor robustness: if these come from the UI as floats, clamping to non-negative avoids end < start segments when values go negative.

Suggested change
let grouped = cap_project::group_key_events(
&all_events,
grouping_threshold_ms,
linger_duration_ms,
show_modifiers,
show_special_keys,
);
let grouping_threshold_ms = grouping_threshold_ms.max(0.0);
let linger_duration_ms = linger_duration_ms.max(0.0);
let grouped = cap_project::group_key_events(
&all_events,
grouping_threshold_ms,
linger_duration_ms,
show_modifiers,
show_special_keys,
);


Ok(grouped)
}

#[tauri::command]
#[specta::specta]
#[instrument]
Expand Down Expand Up @@ -3105,6 +3150,7 @@ pub async fn run(recording_logging_handle: LoggingHandle, logs_dir: PathBuf) {
set_project_config,
update_project_config_in_memory,
generate_zoom_segments_from_clicks,
generate_keyboard_segments,
permissions::open_permission_settings,
permissions::do_permissions_check,
permissions::request_permission,
Expand Down Expand Up @@ -3673,13 +3719,14 @@ pub async fn run(recording_logging_handle: LoggingHandle, logs_dir: PathBuf) {
id,
CapWindowId::TargetSelectOverlay { .. }
| CapWindowId::Main
| CapWindowId::Camera
)
{
let _ = window.show();
}
}

restore_camera_window(app);

#[cfg(target_os = "windows")]
if !has_open_editor_window(app) {
reopen_main_window(app);
Expand All @@ -3694,12 +3741,12 @@ pub async fn run(recording_logging_handle: LoggingHandle, logs_dir: PathBuf) {
id,
CapWindowId::TargetSelectOverlay { .. }
| CapWindowId::Main
| CapWindowId::Camera
)
{
let _ = window.show();
}
}
restore_camera_window(app);
return;
}
CapWindowId::TargetSelectOverlay { display_id } => {
Expand Down Expand Up @@ -3901,9 +3948,25 @@ fn restore_main_windows_if_no_editors(app: &AppHandle) {
if let Some(main) = CapWindowId::Main.get(app) {
let _ = main.show();
}
if let Some(camera) = CapWindowId::Camera.get(app) {
let _ = camera.show();
}

restore_camera_window(app);
}
}

fn restore_camera_window(app: &AppHandle) {
let should_restore_camera = app
.state::<ArcLock<App>>()
.try_read()
.map(|state| state.selected_camera_id.is_some())
.unwrap_or(false);

if should_restore_camera {
let app = app.clone();
tokio::spawn(async move {
let operation_lock = app.state::<CameraWindowOperationLock>();
let _operation_guard = operation_lock.lock().await;
let _ = ShowCapWindow::Camera { centered: false }.show(&app).await;
});
}
}

Expand Down
10 changes: 9 additions & 1 deletion apps/desktop/src-tauri/src/recording.rs
Original file line number Diff line number Diff line change
Expand Up @@ -878,6 +878,12 @@ pub async fn start_recording(
.map(|s| s.custom_cursor_capture)
.unwrap_or_default(),
)
.with_keyboard_capture(
general_settings
.as_ref()
.map(|s| s.capture_keyboard_events)
.unwrap_or(true),
)
.with_fragmented(
general_settings
.as_ref()
Expand Down Expand Up @@ -2370,6 +2376,8 @@ fn project_config_from_recording(
scene_segments: Vec::new(),
mask_segments: Vec::new(),
text_segments: Vec::new(),
caption_segments: Vec::new(),
keyboard_segments: Vec::new(),
});

config
Expand All @@ -2391,7 +2399,7 @@ pub fn needs_fragment_remux(recording_dir: &Path, meta: &StudioRecordingMeta) ->
}

pub fn remux_fragmented_recording(recording_dir: &Path) -> Result<(), String> {
let incomplete_recording = RecoveryManager::find_incomplete_single(recording_dir);
let incomplete_recording = RecoveryManager::inspect_recording(recording_dir);

if let Some(recording) = incomplete_recording {
RecoveryManager::recover(&recording)
Expand Down
Loading
Loading