a73x

src/renderer.zig

Ref:   Size: 68.7 KiB

const std = @import("std");
const vk = @import("vulkan");

const dl = @cImport({
    @cInclude("dlfcn.h");
});

pub const cell_vert_spv: []const u8 = @embedFile("cell.vert.spv");
pub const cell_frag_spv: []const u8 = @embedFile("cell.frag.spv");

var vk_lib_handle: ?*anyopaque = null;

fn getVkGetInstanceProcAddr() !vk.PfnGetInstanceProcAddr {
    if (vk_lib_handle == null) {
        vk_lib_handle = dl.dlopen("libvulkan.so.1", dl.RTLD_NOW);
    }
    const handle = vk_lib_handle orelse return error.VulkanLibraryNotFound;
    const sym = dl.dlsym(handle, "vkGetInstanceProcAddr") orelse return error.NoVkGetInstanceProcAddr;
    return @ptrCast(@alignCast(sym));
}

// Wrap the raw PfnGetInstanceProcAddr so it matches the anytype loader signature
// expected by BaseWrapper.load (accepts instance + name, returns optional fn ptr).
fn makeBaseLoader(pfn: vk.PfnGetInstanceProcAddr) vk.PfnGetInstanceProcAddr {
    return pfn;
}

const PhysicalDeviceInfo = struct {
    physical: vk.PhysicalDevice,
    graphics_queue_family: u32,
    present_queue_family: u32,
};

const SwapchainResult = struct {
    swapchain: vk.SwapchainKHR,
    format: vk.Format,
    extent: vk.Extent2D,
    images: []vk.Image,
    image_views: []vk.ImageView,
};

fn pickPhysicalDevice(
    alloc: std.mem.Allocator,
    vki: vk.InstanceWrapper,
    instance: vk.Instance,
    surface: vk.SurfaceKHR,
) !PhysicalDeviceInfo {
    var count: u32 = 0;
    _ = try vki.enumeratePhysicalDevices(instance, &count, null);
    if (count == 0) return error.NoVulkanDevices;

    const devices = try alloc.alloc(vk.PhysicalDevice, count);
    defer alloc.free(devices);
    _ = try vki.enumeratePhysicalDevices(instance, &count, devices.ptr);

    for (devices[0..count]) |pd| {
        var qf_count: u32 = 0;
        vki.getPhysicalDeviceQueueFamilyProperties(pd, &qf_count, null);
        const qfs = try alloc.alloc(vk.QueueFamilyProperties, qf_count);
        defer alloc.free(qfs);
        vki.getPhysicalDeviceQueueFamilyProperties(pd, &qf_count, qfs.ptr);

        var graphics_idx: ?u32 = null;
        var present_idx: ?u32 = null;
        for (qfs[0..qf_count], 0..) |qf, i| {
            if (qf.queue_flags.graphics_bit) graphics_idx = @intCast(i);

            const supported = try vki.getPhysicalDeviceSurfaceSupportKHR(pd, @intCast(i), surface);
            if (supported == vk.Bool32.true) present_idx = @intCast(i);

            if (graphics_idx != null and present_idx != null) break;
        }

        if (graphics_idx != null and present_idx != null) {
            return .{
                .physical = pd,
                .graphics_queue_family = graphics_idx.?,
                .present_queue_family = present_idx.?,
            };
        }
    }
    return error.NoSuitableDevice;
}

fn createSwapchain(
    alloc: std.mem.Allocator,
    vki: vk.InstanceWrapper,
    vkd: vk.DeviceWrapper,
    pd_info: PhysicalDeviceInfo,
    surface: vk.SurfaceKHR,
    device: vk.Device,
    width: u32,
    height: u32,
    old_swapchain: vk.SwapchainKHR,
) !SwapchainResult {
    const caps = try vki.getPhysicalDeviceSurfaceCapabilitiesKHR(pd_info.physical, surface);

    var fmt_count: u32 = 0;
    _ = try vki.getPhysicalDeviceSurfaceFormatsKHR(pd_info.physical, surface, &fmt_count, null);
    if (fmt_count == 0) return error.NoSurfaceFormats;
    const formats = try alloc.alloc(vk.SurfaceFormatKHR, fmt_count);
    defer alloc.free(formats);
    _ = try vki.getPhysicalDeviceSurfaceFormatsKHR(pd_info.physical, surface, &fmt_count, formats.ptr);

    var chosen = formats[0];
    for (formats[0..fmt_count]) |f| {
        if (f.format == .b8g8r8a8_unorm and f.color_space == .srgb_nonlinear_khr) {
            chosen = f;
            break;
        }
    }

    var extent = caps.current_extent;
    if (extent.width == 0xFFFFFFFF) {
        extent = .{ .width = width, .height = height };
    }

    var image_count: u32 = caps.min_image_count + 1;
    if (caps.max_image_count > 0 and image_count > caps.max_image_count) {
        image_count = caps.max_image_count;
    }

    // Prefer MAILBOX (non-blocking, no tearing) over IMMEDIATE, fall back to FIFO.
    // FIFO blocks until compositor releases a buffer, requiring a pumped Wayland event loop.
    var pm_count: u32 = 0;
    _ = try vki.getPhysicalDeviceSurfacePresentModesKHR(pd_info.physical, surface, &pm_count, null);
    const present_modes = try alloc.alloc(vk.PresentModeKHR, pm_count);
    defer alloc.free(present_modes);
    _ = try vki.getPhysicalDeviceSurfacePresentModesKHR(pd_info.physical, surface, &pm_count, present_modes.ptr);
    var present_mode: vk.PresentModeKHR = .fifo_khr;
    for (present_modes[0..pm_count]) |pm| {
        if (pm == .mailbox_khr) { present_mode = .mailbox_khr; break; }
    }
    if (present_mode == .fifo_khr) {
        for (present_modes[0..pm_count]) |pm| {
            if (pm == .immediate_khr) { present_mode = .immediate_khr; break; }
        }
    }

    const same_family = pd_info.graphics_queue_family == pd_info.present_queue_family;
    const families = [_]u32{ pd_info.graphics_queue_family, pd_info.present_queue_family };

    const swapchain = try vkd.createSwapchainKHR(device, &vk.SwapchainCreateInfoKHR{
        .surface = surface,
        .min_image_count = image_count,
        .image_format = chosen.format,
        .image_color_space = chosen.color_space,
        .image_extent = extent,
        .image_array_layers = 1,
        .image_usage = .{ .color_attachment_bit = true },
        .image_sharing_mode = if (same_family) .exclusive else .concurrent,
        .queue_family_index_count = if (same_family) 0 else 2,
        .p_queue_family_indices = if (same_family) null else &families,
        .pre_transform = caps.current_transform,
        .composite_alpha = .{ .opaque_bit_khr = true },
        .present_mode = present_mode,
        .clipped = .true,
        .old_swapchain = old_swapchain,
    }, null);

    var sc_count: u32 = 0;
    _ = try vkd.getSwapchainImagesKHR(device, swapchain, &sc_count, null);
    const images = try alloc.alloc(vk.Image, sc_count);
    errdefer alloc.free(images);
    _ = try vkd.getSwapchainImagesKHR(device, swapchain, &sc_count, images.ptr);

    const image_views = try alloc.alloc(vk.ImageView, sc_count);
    errdefer {
        // Clean up any views created before failure
        alloc.free(image_views);
    }
    var views_created: usize = 0;
    errdefer {
        for (image_views[0..views_created]) |view| vkd.destroyImageView(device, view, null);
    }

    for (images[0..sc_count], 0..) |img, i| {
        image_views[i] = try vkd.createImageView(device, &vk.ImageViewCreateInfo{
            .image = img,
            .view_type = .@"2d",
            .format = chosen.format,
            .components = .{ .r = .identity, .g = .identity, .b = .identity, .a = .identity },
            .subresource_range = .{
                .aspect_mask = .{ .color_bit = true },
                .base_mip_level = 0,
                .level_count = 1,
                .base_array_layer = 0,
                .layer_count = 1,
            },
        }, null);
        views_created += 1;
    }

    return .{
        .swapchain = swapchain,
        .format = chosen.format,
        .extent = extent,
        .images = images,
        .image_views = image_views,
    };
}

fn createFramebuffers(
    alloc: std.mem.Allocator,
    vkd: vk.DeviceWrapper,
    device: vk.Device,
    render_pass: vk.RenderPass,
    extent: vk.Extent2D,
    image_views: []vk.ImageView,
) ![]vk.Framebuffer {
    const framebuffers = try alloc.alloc(vk.Framebuffer, image_views.len);
    errdefer alloc.free(framebuffers);
    var created: usize = 0;
    errdefer {
        for (framebuffers[0..created]) |fb| vkd.destroyFramebuffer(device, fb, null);
    }

    for (image_views, 0..) |view, i| {
        framebuffers[i] = try vkd.createFramebuffer(device, &vk.FramebufferCreateInfo{
            .render_pass = render_pass,
            .attachment_count = 1,
            .p_attachments = @ptrCast(&view),
            .width = extent.width,
            .height = extent.height,
            .layers = 1,
        }, null);
        created += 1;
    }

    return framebuffers;
}

/// Push constants layout matching cell.vert
pub const PushConstants = extern struct {
    viewport_size: [2]f32,
    cell_size: [2]f32,
    coverage_params: [2]f32,
};

/// Per-vertex data (binding 0, per-vertex rate)
pub const Vertex = extern struct {
    unit_pos: [2]f32, // location 0
};

/// Per-instance data (binding 1, per-instance rate)
pub const Instance = extern struct {
    cell_pos: [2]f32,     // location 1
    glyph_size: [2]f32,   // location 2
    glyph_bearing: [2]f32, // location 3
    uv_rect: [4]f32,      // location 4
    fg: [4]f32,           // location 5
    bg: [4]f32,           // location 6
};

const BufferResult = struct {
    buffer: vk.Buffer,
    memory: vk.DeviceMemory,
};

fn createHostVisibleBuffer(
    vki: vk.InstanceWrapper,
    physical_device: vk.PhysicalDevice,
    vkd: vk.DeviceWrapper,
    device: vk.Device,
    size: vk.DeviceSize,
    usage: vk.BufferUsageFlags,
) !BufferResult {
    const buffer = try vkd.createBuffer(device, &vk.BufferCreateInfo{
        .size = size,
        .usage = usage,
        .sharing_mode = .exclusive,
    }, null);
    errdefer vkd.destroyBuffer(device, buffer, null);

    const reqs = vkd.getBufferMemoryRequirements(device, buffer);
    const mem_idx = try findMemoryType(
        vki,
        physical_device,
        reqs.memory_type_bits,
        .{ .host_visible_bit = true, .host_coherent_bit = true },
    );
    const memory = try vkd.allocateMemory(device, &vk.MemoryAllocateInfo{
        .allocation_size = reqs.size,
        .memory_type_index = mem_idx,
    }, null);
    errdefer vkd.freeMemory(device, memory, null);

    try vkd.bindBufferMemory(device, buffer, memory, 0);
    return .{ .buffer = buffer, .memory = memory };
}

fn findMemoryType(
    vki: vk.InstanceWrapper,
    physical_device: vk.PhysicalDevice,
    type_filter: u32,
    properties: vk.MemoryPropertyFlags,
) !u32 {
    const mem_props = vki.getPhysicalDeviceMemoryProperties(physical_device);
    var i: u32 = 0;
    while (i < mem_props.memory_type_count) : (i += 1) {
        if ((type_filter & (@as(u32, 1) << @intCast(i))) != 0) {
            const type_props = mem_props.memory_types[i].property_flags;
            if (type_props.contains(properties)) return i;
        }
    }
    return error.NoSuitableMemoryType;
}

fn nextInstanceCapacity(current: u32, needed: u32) u32 {
    var capacity = @max(current, 1);
    while (capacity < needed) {
        capacity = std.math.mul(u32, capacity, 2) catch return needed;
    }
    return capacity;
}

pub const CoverageVariant = enum(u32) {
    baseline,
    mild,
    medium,
    crisp,
};

pub fn coverageVariantParams(variant: CoverageVariant) [2]f32 {
    return switch (variant) {
        .baseline => .{ 1.0, 0.0 },
        .mild => .{ 1.15, 0.0 },
        .medium => .{ 1.3, 0.0 },
        .crisp => .{ 1.55, -0.08 },
    };
}

const InstanceUploadRequest = struct {
    current_capacity: u32,
    offset_instances: u32,
    write_len: u32,
};

const InstanceUploadMode = enum {
    partial,
    full,
    invalid_range,
};

const InstanceUploadDecision = struct {
    needed_capacity: ?u32,
    upload_mode: InstanceUploadMode,
};

const InstanceRangeWrite = struct {
    byte_offset: vk.DeviceSize,
    byte_len: vk.DeviceSize,
};

const InstanceRangeUploadAction = enum {
    no_op,
    partial,
    full,
    invalid_range,
};

fn planInstanceUpload(req: InstanceUploadRequest) InstanceUploadDecision {
    const needed_capacity = std.math.add(u32, req.offset_instances, req.write_len) catch {
        return .{
            .needed_capacity = null,
            .upload_mode = .invalid_range,
        };
    };
    return .{
        .needed_capacity = needed_capacity,
        .upload_mode = if (needed_capacity > req.current_capacity) .full else .partial,
    };
}

fn planInstanceRangeWrite(offset_instances: u32, len_instances: u32) InstanceRangeWrite {
    return .{
        .byte_offset = @as(vk.DeviceSize, offset_instances) * @sizeOf(Instance),
        .byte_len = @as(vk.DeviceSize, len_instances) * @sizeOf(Instance),
    };
}

fn planUploadInstanceRangeAction(
    current_capacity: u32,
    offset_instances: u32,
    len_instances: u32,
) InstanceRangeUploadAction {
    if (len_instances == 0) return .no_op;

    return switch (planInstanceUpload(.{
        .current_capacity = current_capacity,
        .offset_instances = offset_instances,
        .write_len = len_instances,
    }).upload_mode) {
        .partial => .partial,
        .full => .full,
        .invalid_range => .invalid_range,
    };
}

fn writeInstanceRange(
    target: []Instance,
    offset_instances: u32,
    instances: []const Instance,
) !bool {
    if (instances.len == 0) return false;

    const decision = planInstanceUpload(.{
        .current_capacity = std.math.cast(u32, target.len) orelse return error.InvalidInstanceRange,
        .offset_instances = offset_instances,
        .write_len = std.math.cast(u32, instances.len) orelse return error.InvalidInstanceRange,
    });
    switch (decision.upload_mode) {
        .invalid_range => return error.InvalidInstanceRange,
        .full => return true,
        .partial => {},
    }

    const offset: usize = @intCast(offset_instances);
    @memcpy(target[offset .. offset + instances.len], instances);
    return false;
}

fn swapchainNeedsRebuild(result: vk.Result) bool {
    return result == .suboptimal_khr;
}

pub const Context = struct {
    alloc: std.mem.Allocator,
    vkb: vk.BaseWrapper,
    instance: vk.Instance,
    vki: vk.InstanceWrapper,
    surface: vk.SurfaceKHR,
    physical_device: vk.PhysicalDevice,
    graphics_queue_family: u32,
    present_queue_family: u32,
    device: vk.Device,
    vkd: vk.DeviceWrapper,
    graphics_queue: vk.Queue,
    present_queue: vk.Queue,
    swapchain: vk.SwapchainKHR,
    swapchain_format: vk.Format,
    swapchain_extent: vk.Extent2D,
    swapchain_images: []vk.Image,
    swapchain_image_views: []vk.ImageView,
    // Render pass + framebuffers
    render_pass: vk.RenderPass,
    framebuffers: []vk.Framebuffer,
    // Descriptor set layout + pool + set
    descriptor_set_layout: vk.DescriptorSetLayout,
    descriptor_pool: vk.DescriptorPool,
    descriptor_set: vk.DescriptorSet,
    // Pipeline
    pipeline_layout: vk.PipelineLayout,
    pipeline: vk.Pipeline,
    // Commands
    command_pool: vk.CommandPool,
    command_buffer: vk.CommandBuffer,
    // Sync
    image_available: vk.Semaphore,
    render_finished: vk.Semaphore,
    in_flight_fence: vk.Fence,
    // Static unit-quad vertex buffer
    quad_vertex_buffer: vk.Buffer,
    quad_vertex_memory: vk.DeviceMemory,
    // Per-frame instance buffer
    instance_buffer: vk.Buffer,
    instance_memory: vk.DeviceMemory,
    instance_capacity: u32,
    // GPU glyph atlas texture
    atlas_image: vk.Image,
    atlas_memory: vk.DeviceMemory,
    atlas_view: vk.ImageView,
    atlas_sampler: vk.Sampler,
    atlas_width: u32,
    atlas_height: u32,
    // Persistent atlas staging buffer (reused across frames)
    atlas_staging_buffer: vk.Buffer,
    atlas_staging_memory: vk.DeviceMemory,
    // Dedicated transfer command buffer + fence
    atlas_transfer_cb: vk.CommandBuffer,
    atlas_transfer_fence: vk.Fence,

    pub fn init(
        alloc: std.mem.Allocator,
        wl_display: *anyopaque,
        wl_surface: *anyopaque,
        width: u32,
        height: u32,
    ) !Context {
        const get_proc_addr = try getVkGetInstanceProcAddr();
        const vkb = vk.BaseWrapper.load(get_proc_addr);

        // Create instance
        const app_info = vk.ApplicationInfo{
            .p_application_name = "waystty",
            .application_version = @bitCast(vk.makeApiVersion(0, 0, 0, 1)),
            .p_engine_name = "waystty",
            .engine_version = @bitCast(vk.makeApiVersion(0, 0, 0, 1)),
            .api_version = @bitCast(vk.API_VERSION_1_2),
        };

        const instance_exts = [_][*:0]const u8{
            vk.extensions.khr_surface.name,
            vk.extensions.khr_wayland_surface.name,
        };

        const instance = try vkb.createInstance(&vk.InstanceCreateInfo{
            .p_application_info = &app_info,
            .enabled_extension_count = instance_exts.len,
            .pp_enabled_extension_names = &instance_exts,
        }, null);

        const vki = vk.InstanceWrapper.load(instance, vkb.dispatch.vkGetInstanceProcAddr.?);
        errdefer vki.destroyInstance(instance, null);

        // Create wayland surface
        const surface = try vki.createWaylandSurfaceKHR(instance, &vk.WaylandSurfaceCreateInfoKHR{
            .display = @ptrCast(wl_display),
            .surface = @ptrCast(wl_surface),
        }, null);
        errdefer vki.destroySurfaceKHR(instance, surface, null);

        // Pick physical device + queue families
        const pd_info = try pickPhysicalDevice(alloc, vki, instance, surface);

        // Create logical device
        const priority: f32 = 1.0;
        var queue_create_infos: [2]vk.DeviceQueueCreateInfo = undefined;
        var queue_count: u32 = 1;
        queue_create_infos[0] = .{
            .queue_family_index = pd_info.graphics_queue_family,
            .queue_count = 1,
            .p_queue_priorities = @ptrCast(&priority),
        };
        if (pd_info.graphics_queue_family != pd_info.present_queue_family) {
            queue_create_infos[1] = .{
                .queue_family_index = pd_info.present_queue_family,
                .queue_count = 1,
                .p_queue_priorities = @ptrCast(&priority),
            };
            queue_count = 2;
        }

        const device_exts = [_][*:0]const u8{vk.extensions.khr_swapchain.name};

        const empty_layer_name: *const u8 = @ptrFromInt(1); // dummy, count=0 so never dereferenced
        const device = try vki.createDevice(pd_info.physical, &vk.DeviceCreateInfo{
            .queue_create_info_count = queue_count,
            .p_queue_create_infos = &queue_create_infos,
            .enabled_layer_count = 0,
            .pp_enabled_layer_names = &empty_layer_name,
            .enabled_extension_count = device_exts.len,
            .pp_enabled_extension_names = &device_exts,
        }, null);

        const vkd = vk.DeviceWrapper.load(device, vki.dispatch.vkGetDeviceProcAddr.?);
        errdefer vkd.destroyDevice(device, null);

        const graphics_queue = vkd.getDeviceQueue(device, pd_info.graphics_queue_family, 0);
        const present_queue = vkd.getDeviceQueue(device, pd_info.present_queue_family, 0);

        // Create swapchain
        const sc = try createSwapchain(alloc, vki, vkd, pd_info, surface, device, width, height, .null_handle);
        errdefer {
            for (sc.image_views) |view| vkd.destroyImageView(device, view, null);
            alloc.free(sc.image_views);
            alloc.free(sc.images);
            vkd.destroySwapchainKHR(device, sc.swapchain, null);
        }

        // Create render pass
        const color_attachment = vk.AttachmentDescription{
            .format = sc.format,
            .samples = .{ .@"1_bit" = true },
            .load_op = .clear,
            .store_op = .store,
            .stencil_load_op = .dont_care,
            .stencil_store_op = .dont_care,
            .initial_layout = .undefined,
            .final_layout = .present_src_khr,
        };

        const color_ref = vk.AttachmentReference{
            .attachment = 0,
            .layout = .color_attachment_optimal,
        };

        const subpass = vk.SubpassDescription{
            .pipeline_bind_point = .graphics,
            .color_attachment_count = 1,
            .p_color_attachments = @ptrCast(&color_ref),
        };

        const dep = vk.SubpassDependency{
            .src_subpass = vk.SUBPASS_EXTERNAL,
            .dst_subpass = 0,
            .src_stage_mask = .{ .color_attachment_output_bit = true },
            .dst_stage_mask = .{ .color_attachment_output_bit = true },
            .src_access_mask = .{},
            .dst_access_mask = .{ .color_attachment_write_bit = true },
        };

        const render_pass = try vkd.createRenderPass(device, &vk.RenderPassCreateInfo{
            .attachment_count = 1,
            .p_attachments = @ptrCast(&color_attachment),
            .subpass_count = 1,
            .p_subpasses = @ptrCast(&subpass),
            .dependency_count = 1,
            .p_dependencies = @ptrCast(&dep),
        }, null);
        errdefer vkd.destroyRenderPass(device, render_pass, null);

        // Create framebuffers (one per swapchain image view)
        const framebuffers = try createFramebuffers(
            alloc,
            vkd,
            device,
            render_pass,
            sc.extent,
            sc.image_views,
        );

        // Create descriptor set layout (single combined image sampler for glyph atlas)
        const dsl_binding = vk.DescriptorSetLayoutBinding{
            .binding = 0,
            .descriptor_type = .combined_image_sampler,
            .descriptor_count = 1,
            .stage_flags = .{ .fragment_bit = true },
        };
        const descriptor_set_layout = try vkd.createDescriptorSetLayout(device, &vk.DescriptorSetLayoutCreateInfo{
            .binding_count = 1,
            .p_bindings = @ptrCast(&dsl_binding),
        }, null);
        errdefer vkd.destroyDescriptorSetLayout(device, descriptor_set_layout, null);

        // Create pipeline layout (push constants + descriptor set)
        const push_range = vk.PushConstantRange{
            .stage_flags = .{ .vertex_bit = true, .fragment_bit = true },
            .offset = 0,
            .size = @sizeOf(PushConstants),
        };
        const pipeline_layout = try vkd.createPipelineLayout(device, &vk.PipelineLayoutCreateInfo{
            .set_layout_count = 1,
            .p_set_layouts = @ptrCast(&descriptor_set_layout),
            .push_constant_range_count = 1,
            .p_push_constant_ranges = @ptrCast(&push_range),
        }, null);
        errdefer vkd.destroyPipelineLayout(device, pipeline_layout, null);

        // Create shader modules
        const vert_module = try vkd.createShaderModule(device, &vk.ShaderModuleCreateInfo{
            .code_size = cell_vert_spv.len,
            .p_code = @ptrCast(@alignCast(cell_vert_spv.ptr)),
        }, null);
        defer vkd.destroyShaderModule(device, vert_module, null);

        const frag_module = try vkd.createShaderModule(device, &vk.ShaderModuleCreateInfo{
            .code_size = cell_frag_spv.len,
            .p_code = @ptrCast(@alignCast(cell_frag_spv.ptr)),
        }, null);
        defer vkd.destroyShaderModule(device, frag_module, null);

        // Shader stages
        const shader_stages = [_]vk.PipelineShaderStageCreateInfo{
            .{
                .stage = .{ .vertex_bit = true },
                .module = vert_module,
                .p_name = "main",
            },
            .{
                .stage = .{ .fragment_bit = true },
                .module = frag_module,
                .p_name = "main",
            },
        };

        // Vertex input
        const binding_descs = [_]vk.VertexInputBindingDescription{
            .{ .binding = 0, .stride = @sizeOf(Vertex), .input_rate = .vertex },
            .{ .binding = 1, .stride = @sizeOf(Instance), .input_rate = .instance },
        };

        const attr_descs = [_]vk.VertexInputAttributeDescription{
            .{ .location = 0, .binding = 0, .format = .r32g32_sfloat, .offset = 0 },
            .{ .location = 1, .binding = 1, .format = .r32g32_sfloat, .offset = @offsetOf(Instance, "cell_pos") },
            .{ .location = 2, .binding = 1, .format = .r32g32_sfloat, .offset = @offsetOf(Instance, "glyph_size") },
            .{ .location = 3, .binding = 1, .format = .r32g32_sfloat, .offset = @offsetOf(Instance, "glyph_bearing") },
            .{ .location = 4, .binding = 1, .format = .r32g32b32a32_sfloat, .offset = @offsetOf(Instance, "uv_rect") },
            .{ .location = 5, .binding = 1, .format = .r32g32b32a32_sfloat, .offset = @offsetOf(Instance, "fg") },
            .{ .location = 6, .binding = 1, .format = .r32g32b32a32_sfloat, .offset = @offsetOf(Instance, "bg") },
        };

        const vertex_input_info = vk.PipelineVertexInputStateCreateInfo{
            .vertex_binding_description_count = binding_descs.len,
            .p_vertex_binding_descriptions = &binding_descs,
            .vertex_attribute_description_count = attr_descs.len,
            .p_vertex_attribute_descriptions = &attr_descs,
        };

        const input_assembly = vk.PipelineInputAssemblyStateCreateInfo{
            .topology = .triangle_list,
            .primitive_restart_enable = .false,
        };

        // Dynamic viewport + scissor (set at draw time)
        const dynamic_states = [_]vk.DynamicState{ .viewport, .scissor };
        const dynamic_state = vk.PipelineDynamicStateCreateInfo{
            .dynamic_state_count = dynamic_states.len,
            .p_dynamic_states = &dynamic_states,
        };

        const viewport_state = vk.PipelineViewportStateCreateInfo{
            .viewport_count = 1,
            .scissor_count = 1,
        };

        const rasterizer = vk.PipelineRasterizationStateCreateInfo{
            .depth_clamp_enable = .false,
            .rasterizer_discard_enable = .false,
            .polygon_mode = .fill,
            .cull_mode = .{ .back_bit = true },
            .front_face = .clockwise,
            .depth_bias_enable = .false,
            .depth_bias_constant_factor = 0.0,
            .depth_bias_clamp = 0.0,
            .depth_bias_slope_factor = 0.0,
            .line_width = 1.0,
        };

        const multisampling = vk.PipelineMultisampleStateCreateInfo{
            .rasterization_samples = .{ .@"1_bit" = true },
            .sample_shading_enable = .false,
            .min_sample_shading = 1.0,
            .alpha_to_coverage_enable = .false,
            .alpha_to_one_enable = .false,
        };

        const color_blend_attachment = vk.PipelineColorBlendAttachmentState{
            .blend_enable = .true,
            .src_color_blend_factor = .src_alpha,
            .dst_color_blend_factor = .one_minus_src_alpha,
            .color_blend_op = .add,
            .src_alpha_blend_factor = .one,
            .dst_alpha_blend_factor = .zero,
            .alpha_blend_op = .add,
            .color_write_mask = .{ .r_bit = true, .g_bit = true, .b_bit = true, .a_bit = true },
        };

        const color_blend = vk.PipelineColorBlendStateCreateInfo{
            .logic_op_enable = .false,
            .logic_op = .copy,
            .attachment_count = 1,
            .p_attachments = @ptrCast(&color_blend_attachment),
            .blend_constants = .{ 0.0, 0.0, 0.0, 0.0 },
        };

        const pipeline_create_info = vk.GraphicsPipelineCreateInfo{
            .stage_count = shader_stages.len,
            .p_stages = &shader_stages,
            .p_vertex_input_state = &vertex_input_info,
            .p_input_assembly_state = &input_assembly,
            .p_viewport_state = &viewport_state,
            .p_rasterization_state = &rasterizer,
            .p_multisample_state = &multisampling,
            .p_color_blend_state = &color_blend,
            .p_dynamic_state = &dynamic_state,
            .layout = pipeline_layout,
            .render_pass = render_pass,
            .subpass = 0,
            .base_pipeline_index = -1,
        };

        var pipeline: vk.Pipeline = undefined;
        _ = try vkd.createGraphicsPipelines(device, .null_handle, 1, @ptrCast(&pipeline_create_info), null, @ptrCast(&pipeline));
        errdefer vkd.destroyPipeline(device, pipeline, null);

        // Descriptor pool + set
        const pool_size = vk.DescriptorPoolSize{
            .type = .combined_image_sampler,
            .descriptor_count = 1,
        };
        const descriptor_pool = try vkd.createDescriptorPool(device, &vk.DescriptorPoolCreateInfo{
            .max_sets = 1,
            .pool_size_count = 1,
            .p_pool_sizes = @ptrCast(&pool_size),
        }, null);
        errdefer vkd.destroyDescriptorPool(device, descriptor_pool, null);

        var descriptor_set: vk.DescriptorSet = undefined;
        try vkd.allocateDescriptorSets(device, &vk.DescriptorSetAllocateInfo{
            .descriptor_pool = descriptor_pool,
            .descriptor_set_count = 1,
            .p_set_layouts = @ptrCast(&descriptor_set_layout),
        }, @ptrCast(&descriptor_set));

        // Command pool + buffer
        const command_pool = try vkd.createCommandPool(device, &vk.CommandPoolCreateInfo{
            .flags = .{ .reset_command_buffer_bit = true },
            .queue_family_index = pd_info.graphics_queue_family,
        }, null);
        errdefer vkd.destroyCommandPool(device, command_pool, null);

        var command_buffer: vk.CommandBuffer = undefined;
        try vkd.allocateCommandBuffers(device, &vk.CommandBufferAllocateInfo{
            .command_pool = command_pool,
            .level = .primary,
            .command_buffer_count = 1,
        }, @ptrCast(&command_buffer));

        // Sync objects
        const image_available = try vkd.createSemaphore(device, &vk.SemaphoreCreateInfo{}, null);
        errdefer vkd.destroySemaphore(device, image_available, null);

        const render_finished = try vkd.createSemaphore(device, &vk.SemaphoreCreateInfo{}, null);
        errdefer vkd.destroySemaphore(device, render_finished, null);

        const in_flight_fence = try vkd.createFence(device, &vk.FenceCreateInfo{
            .flags = .{ .signaled_bit = true }, // start signaled so first wait returns immediately
        }, null);
        errdefer vkd.destroyFence(device, in_flight_fence, null);

        // --- Quad vertex buffer ---
        const quad_verts = [_]Vertex{
            .{ .unit_pos = .{ 0, 0 } },
            .{ .unit_pos = .{ 1, 0 } },
            .{ .unit_pos = .{ 1, 1 } },
            .{ .unit_pos = .{ 0, 0 } },
            .{ .unit_pos = .{ 1, 1 } },
            .{ .unit_pos = .{ 0, 1 } },
        };
        const quad_vb_size: vk.DeviceSize = @sizeOf(@TypeOf(quad_verts));
        const quad = try createHostVisibleBuffer(vki, pd_info.physical, vkd, device, quad_vb_size, .{ .vertex_buffer_bit = true });
        errdefer {
            vkd.destroyBuffer(device, quad.buffer, null);
            vkd.freeMemory(device, quad.memory, null);
        }
        {
            const mapped = try vkd.mapMemory(device, quad.memory, 0, quad_vb_size, .{});
            @memcpy(
                @as([*]Vertex, @ptrCast(@alignCast(mapped)))[0..quad_verts.len],
                &quad_verts,
            );
            vkd.unmapMemory(device, quad.memory);
        }

        // --- Instance buffer ---
        const max_instances: u32 = 200 * 80;
        const instance_size: vk.DeviceSize = @sizeOf(Instance) * max_instances;
        const inst = try createHostVisibleBuffer(vki, pd_info.physical, vkd, device, instance_size, .{ .vertex_buffer_bit = true });
        errdefer {
            vkd.destroyBuffer(device, inst.buffer, null);
            vkd.freeMemory(device, inst.memory, null);
        }

        // --- Atlas texture ---
        const atlas_width: u32 = 1024;
        const atlas_height: u32 = 1024;

        const atlas_image = try vkd.createImage(device, &vk.ImageCreateInfo{
            .image_type = .@"2d",
            .format = .r8_unorm,
            .extent = .{ .width = atlas_width, .height = atlas_height, .depth = 1 },
            .mip_levels = 1,
            .array_layers = 1,
            .samples = .{ .@"1_bit" = true },
            .tiling = .optimal,
            .usage = .{ .transfer_dst_bit = true, .sampled_bit = true },
            .sharing_mode = .exclusive,
            .initial_layout = .undefined,
        }, null);
        errdefer vkd.destroyImage(device, atlas_image, null);

        const img_reqs = vkd.getImageMemoryRequirements(device, atlas_image);
        const img_mem_idx = try findMemoryType(vki, pd_info.physical, img_reqs.memory_type_bits, .{ .device_local_bit = true });
        const atlas_memory = try vkd.allocateMemory(device, &vk.MemoryAllocateInfo{
            .allocation_size = img_reqs.size,
            .memory_type_index = img_mem_idx,
        }, null);
        errdefer vkd.freeMemory(device, atlas_memory, null);
        try vkd.bindImageMemory(device, atlas_image, atlas_memory, 0);

        const atlas_view = try vkd.createImageView(device, &vk.ImageViewCreateInfo{
            .image = atlas_image,
            .view_type = .@"2d",
            .format = .r8_unorm,
            .components = .{ .r = .identity, .g = .identity, .b = .identity, .a = .identity },
            .subresource_range = .{
                .aspect_mask = .{ .color_bit = true },
                .base_mip_level = 0,
                .level_count = 1,
                .base_array_layer = 0,
                .layer_count = 1,
            },
        }, null);
        errdefer vkd.destroyImageView(device, atlas_view, null);

        const atlas_sampler = try vkd.createSampler(device, &vk.SamplerCreateInfo{
            .mag_filter = .nearest,
            .min_filter = .nearest,
            .mipmap_mode = .nearest,
            .address_mode_u = .clamp_to_edge,
            .address_mode_v = .clamp_to_edge,
            .address_mode_w = .clamp_to_edge,
            .mip_lod_bias = 0,
            .anisotropy_enable = .false,
            .max_anisotropy = 1,
            .compare_enable = .false,
            .compare_op = .always,
            .min_lod = 0,
            .max_lod = 0,
            .border_color = .int_opaque_black,
            .unnormalized_coordinates = .false,
        }, null);
        errdefer vkd.destroySampler(device, atlas_sampler, null);

        // --- Atlas staging buffer (persistent, reused across frames) ---
        const atlas_staging_size: vk.DeviceSize = @as(vk.DeviceSize, atlas_width) * atlas_height;
        const atlas_staging = try createHostVisibleBuffer(vki, pd_info.physical, vkd, device, atlas_staging_size, .{ .transfer_src_bit = true });
        errdefer {
            vkd.destroyBuffer(device, atlas_staging.buffer, null);
            vkd.freeMemory(device, atlas_staging.memory, null);
        }

        // --- Dedicated atlas transfer command buffer ---
        var atlas_transfer_cb: vk.CommandBuffer = undefined;
        try vkd.allocateCommandBuffers(device, &vk.CommandBufferAllocateInfo{
            .command_pool = command_pool,
            .level = .primary,
            .command_buffer_count = 1,
        }, @ptrCast(&atlas_transfer_cb));

        // --- Atlas transfer fence (starts signaled so first wait is a no-op) ---
        const atlas_transfer_fence = try vkd.createFence(device, &vk.FenceCreateInfo{
            .flags = .{ .signaled_bit = true },
        }, null);
        errdefer vkd.destroyFence(device, atlas_transfer_fence, null);

        // Bind atlas to descriptor set
        const img_info = vk.DescriptorImageInfo{
            .sampler = atlas_sampler,
            .image_view = atlas_view,
            .image_layout = .shader_read_only_optimal,
        };
        vkd.updateDescriptorSets(device, 1, @ptrCast(&vk.WriteDescriptorSet{
            .dst_set = descriptor_set,
            .dst_binding = 0,
            .dst_array_element = 0,
            .descriptor_count = 1,
            .descriptor_type = .combined_image_sampler,
            .p_image_info = @ptrCast(&img_info),
            .p_buffer_info = undefined,
            .p_texel_buffer_view = undefined,
        }), 0, null);

        return .{
            .alloc = alloc,
            .vkb = vkb,
            .instance = instance,
            .vki = vki,
            .surface = surface,
            .physical_device = pd_info.physical,
            .graphics_queue_family = pd_info.graphics_queue_family,
            .present_queue_family = pd_info.present_queue_family,
            .device = device,
            .vkd = vkd,
            .graphics_queue = graphics_queue,
            .present_queue = present_queue,
            .swapchain = sc.swapchain,
            .swapchain_format = sc.format,
            .swapchain_extent = sc.extent,
            .swapchain_images = sc.images,
            .swapchain_image_views = sc.image_views,
            .render_pass = render_pass,
            .framebuffers = framebuffers,
            .descriptor_set_layout = descriptor_set_layout,
            .descriptor_pool = descriptor_pool,
            .descriptor_set = descriptor_set,
            .pipeline_layout = pipeline_layout,
            .pipeline = pipeline,
            .command_pool = command_pool,
            .command_buffer = command_buffer,
            .image_available = image_available,
            .render_finished = render_finished,
            .in_flight_fence = in_flight_fence,
            .quad_vertex_buffer = quad.buffer,
            .quad_vertex_memory = quad.memory,
            .instance_buffer = inst.buffer,
            .instance_memory = inst.memory,
            .instance_capacity = max_instances,
            .atlas_image = atlas_image,
            .atlas_memory = atlas_memory,
            .atlas_view = atlas_view,
            .atlas_sampler = atlas_sampler,
            .atlas_width = atlas_width,
            .atlas_height = atlas_height,
            .atlas_staging_buffer = atlas_staging.buffer,
            .atlas_staging_memory = atlas_staging.memory,
            .atlas_transfer_cb = atlas_transfer_cb,
            .atlas_transfer_fence = atlas_transfer_fence,
        };
    }

    pub fn deinit(self: *Context) void {
        // Wait for device to be idle before destroying anything
        _ = self.vkd.deviceWaitIdle(self.device) catch {};

        // Atlas + buffers (in reverse order of creation)
        self.vkd.destroySampler(self.device, self.atlas_sampler, null);
        self.vkd.destroyImageView(self.device, self.atlas_view, null);
        self.vkd.destroyImage(self.device, self.atlas_image, null);
        self.vkd.freeMemory(self.device, self.atlas_memory, null);
        self.vkd.destroyBuffer(self.device, self.atlas_staging_buffer, null);
        self.vkd.freeMemory(self.device, self.atlas_staging_memory, null);
        self.vkd.destroyFence(self.device, self.atlas_transfer_fence, null);
        self.vkd.destroyBuffer(self.device, self.instance_buffer, null);
        self.vkd.freeMemory(self.device, self.instance_memory, null);
        self.vkd.destroyBuffer(self.device, self.quad_vertex_buffer, null);
        self.vkd.freeMemory(self.device, self.quad_vertex_memory, null);

        // Sync objects
        self.vkd.destroyFence(self.device, self.in_flight_fence, null);
        self.vkd.destroySemaphore(self.device, self.render_finished, null);
        self.vkd.destroySemaphore(self.device, self.image_available, null);

        // Command pool (also frees command buffers)
        self.vkd.destroyCommandPool(self.device, self.command_pool, null);

        // Pipeline
        self.vkd.destroyPipeline(self.device, self.pipeline, null);
        self.vkd.destroyPipelineLayout(self.device, self.pipeline_layout, null);

        // Descriptor pool (also frees descriptor sets) + layout
        self.vkd.destroyDescriptorPool(self.device, self.descriptor_pool, null);
        self.vkd.destroyDescriptorSetLayout(self.device, self.descriptor_set_layout, null);

        self.destroySwapchainResources();

        // Render pass
        self.vkd.destroyRenderPass(self.device, self.render_pass, null);

        self.vkd.destroyDevice(self.device, null);
        self.vki.destroySurfaceKHR(self.instance, self.surface, null);
        self.vki.destroyInstance(self.instance, null);
    }

    fn destroySwapchainResources(self: *Context) void {
        for (self.framebuffers) |fb| self.vkd.destroyFramebuffer(self.device, fb, null);
        self.alloc.free(self.framebuffers);

        for (self.swapchain_image_views) |view| self.vkd.destroyImageView(self.device, view, null);
        self.alloc.free(self.swapchain_image_views);
        self.alloc.free(self.swapchain_images);
        self.vkd.destroySwapchainKHR(self.device, self.swapchain, null);
    }

    pub fn recreateSwapchain(self: *Context, width: u32, height: u32) !void {
        const sc = try createSwapchain(
            self.alloc,
            self.vki,
            self.vkd,
            .{
                .physical = self.physical_device,
                .graphics_queue_family = self.graphics_queue_family,
                .present_queue_family = self.present_queue_family,
            },
            self.surface,
            self.device,
            width,
            height,
            self.swapchain,
        );
        errdefer {
            for (sc.image_views) |view| self.vkd.destroyImageView(self.device, view, null);
            self.alloc.free(sc.image_views);
            self.alloc.free(sc.images);
            self.vkd.destroySwapchainKHR(self.device, sc.swapchain, null);
        }

        const framebuffers = try createFramebuffers(
            self.alloc,
            self.vkd,
            self.device,
            self.render_pass,
            sc.extent,
            sc.image_views,
        );
        errdefer {
            for (framebuffers) |fb| self.vkd.destroyFramebuffer(self.device, fb, null);
            self.alloc.free(framebuffers);
        }

        const old_swapchain = self.swapchain;
        const old_images = self.swapchain_images;
        const old_image_views = self.swapchain_image_views;
        const old_framebuffers = self.framebuffers;

        self.swapchain = sc.swapchain;
        self.swapchain_format = sc.format;
        self.swapchain_extent = sc.extent;
        self.swapchain_images = sc.images;
        self.swapchain_image_views = sc.image_views;
        self.framebuffers = framebuffers;

        for (old_framebuffers) |fb| self.vkd.destroyFramebuffer(self.device, fb, null);
        self.alloc.free(old_framebuffers);

        for (old_image_views) |view| self.vkd.destroyImageView(self.device, view, null);
        self.alloc.free(old_image_views);
        self.alloc.free(old_images);
        self.vkd.destroySwapchainKHR(self.device, old_swapchain, null);
    }

    fn ensureInstanceCapacity(self: *Context, needed: u32) !void {
        if (needed <= self.instance_capacity) return;

        const new_capacity = nextInstanceCapacity(self.instance_capacity, needed);
        const replacement = try createHostVisibleBuffer(
            self.vki,
            self.physical_device,
            self.vkd,
            self.device,
            @as(vk.DeviceSize, @sizeOf(Instance)) * new_capacity,
            .{ .vertex_buffer_bit = true },
        );
        errdefer {
            self.vkd.destroyBuffer(self.device, replacement.buffer, null);
            self.vkd.freeMemory(self.device, replacement.memory, null);
        }

        _ = try self.vkd.deviceWaitIdle(self.device);
        self.vkd.destroyBuffer(self.device, self.instance_buffer, null);
        self.vkd.freeMemory(self.device, self.instance_memory, null);
        self.instance_buffer = replacement.buffer;
        self.instance_memory = replacement.memory;
        self.instance_capacity = new_capacity;
    }

    /// Record a command buffer that begins the render pass with the given clear color and presents.
    /// Does not bind the pipeline or draw — just clear + present.
    /// Blocks until the previous frame's fence signals.
    pub fn drawClear(self: *Context, clear_color: [4]f32) !void {
        // Wait for previous frame to finish
        _ = try self.vkd.waitForFences(self.device, 1, @ptrCast(&self.in_flight_fence), .true, std.math.maxInt(u64));
        try self.vkd.resetFences(self.device, 1, @ptrCast(&self.in_flight_fence));

        // Acquire next image
        const acquire = self.vkd.acquireNextImageKHR(
            self.device,
            self.swapchain,
            std.math.maxInt(u64),
            self.image_available,
            .null_handle,
        ) catch |err| switch (err) {
            error.OutOfDateKHR => return error.OutOfDateKHR,
            else => return err,
        };
        if (swapchainNeedsRebuild(acquire.result)) return error.OutOfDateKHR;
        const image_index = acquire.image_index;

        // Record command buffer
        try self.vkd.resetCommandBuffer(self.command_buffer, .{});
        try self.vkd.beginCommandBuffer(self.command_buffer, &vk.CommandBufferBeginInfo{
            .flags = .{ .one_time_submit_bit = true },
        });

        const clear_value = vk.ClearValue{
            .color = .{ .float_32 = clear_color },
        };

        self.vkd.cmdBeginRenderPass(self.command_buffer, &vk.RenderPassBeginInfo{
            .render_pass = self.render_pass,
            .framebuffer = self.framebuffers[image_index],
            .render_area = .{
                .offset = .{ .x = 0, .y = 0 },
                .extent = self.swapchain_extent,
            },
            .clear_value_count = 1,
            .p_clear_values = @ptrCast(&clear_value),
        }, .@"inline");

        // Don't bind pipeline or draw — just clear.

        self.vkd.cmdEndRenderPass(self.command_buffer);
        try self.vkd.endCommandBuffer(self.command_buffer);

        // Submit
        const wait_stage = vk.PipelineStageFlags{ .color_attachment_output_bit = true };
        try self.vkd.queueSubmit(self.graphics_queue, 1, @ptrCast(&vk.SubmitInfo{
            .wait_semaphore_count = 1,
            .p_wait_semaphores = @ptrCast(&self.image_available),
            .p_wait_dst_stage_mask = @ptrCast(&wait_stage),
            .command_buffer_count = 1,
            .p_command_buffers = @ptrCast(&self.command_buffer),
            .signal_semaphore_count = 1,
            .p_signal_semaphores = @ptrCast(&self.render_finished),
        }), self.in_flight_fence);

        // Present
        const present_result = self.vkd.queuePresentKHR(self.present_queue, &vk.PresentInfoKHR{
            .wait_semaphore_count = 1,
            .p_wait_semaphores = @ptrCast(&self.render_finished),
            .swapchain_count = 1,
            .p_swapchains = @ptrCast(&self.swapchain),
            .p_image_indices = @ptrCast(&image_index),
        }) catch |err| switch (err) {
            error.OutOfDateKHR => return error.OutOfDateKHR,
            else => return err,
        };
        if (swapchainNeedsRebuild(present_result)) return error.OutOfDateKHR;
    }

    /// Upload CPU R8 pixels into the GPU atlas image.
    /// Uses a staging buffer + one-shot command buffer.
    /// Transitions: UNDEFINED -> TRANSFER_DST -> SHADER_READ_ONLY.
    pub fn uploadAtlas(self: *Context, pixels: []const u8) !void {
        const size: vk.DeviceSize = @intCast(pixels.len);

        // Create staging buffer
        const staging = try createHostVisibleBuffer(
            self.vki,
            self.physical_device,
            self.vkd,
            self.device,
            size,
            .{ .transfer_src_bit = true },
        );
        defer {
            self.vkd.destroyBuffer(self.device, staging.buffer, null);
            self.vkd.freeMemory(self.device, staging.memory, null);
        }

        // Copy pixels into staging buffer
        const mapped = try self.vkd.mapMemory(self.device, staging.memory, 0, size, .{});
        @memcpy(@as([*]u8, @ptrCast(mapped))[0..pixels.len], pixels);
        self.vkd.unmapMemory(self.device, staging.memory);

        // One-shot command buffer
        var cb: vk.CommandBuffer = undefined;
        try self.vkd.allocateCommandBuffers(self.device, &vk.CommandBufferAllocateInfo{
            .command_pool = self.command_pool,
            .level = .primary,
            .command_buffer_count = 1,
        }, @ptrCast(&cb));
        defer self.vkd.freeCommandBuffers(self.device, self.command_pool, 1, @ptrCast(&cb));

        try self.vkd.beginCommandBuffer(cb, &vk.CommandBufferBeginInfo{
            .flags = .{ .one_time_submit_bit = true },
        });

        // Barrier: UNDEFINED -> TRANSFER_DST
        const barrier_to_transfer = vk.ImageMemoryBarrier{
            .src_access_mask = .{},
            .dst_access_mask = .{ .transfer_write_bit = true },
            .old_layout = .undefined,
            .new_layout = .transfer_dst_optimal,
            .src_queue_family_index = vk.QUEUE_FAMILY_IGNORED,
            .dst_queue_family_index = vk.QUEUE_FAMILY_IGNORED,
            .image = self.atlas_image,
            .subresource_range = .{
                .aspect_mask = .{ .color_bit = true },
                .base_mip_level = 0,
                .level_count = 1,
                .base_array_layer = 0,
                .layer_count = 1,
            },
        };
        self.vkd.cmdPipelineBarrier(
            cb,
            .{ .top_of_pipe_bit = true },
            .{ .transfer_bit = true },
            .{},
            0, null,
            0, null,
            1, @ptrCast(&barrier_to_transfer),
        );

        // Copy buffer -> image
        const region = vk.BufferImageCopy{
            .buffer_offset = 0,
            .buffer_row_length = 0,
            .buffer_image_height = 0,
            .image_subresource = .{
                .aspect_mask = .{ .color_bit = true },
                .mip_level = 0,
                .base_array_layer = 0,
                .layer_count = 1,
            },
            .image_offset = .{ .x = 0, .y = 0, .z = 0 },
            .image_extent = .{ .width = self.atlas_width, .height = self.atlas_height, .depth = 1 },
        };
        self.vkd.cmdCopyBufferToImage(cb, staging.buffer, self.atlas_image, .transfer_dst_optimal, 1, @ptrCast(&region));

        // Barrier: TRANSFER_DST -> SHADER_READ_ONLY
        const barrier_to_shader = vk.ImageMemoryBarrier{
            .src_access_mask = .{ .transfer_write_bit = true },
            .dst_access_mask = .{ .shader_read_bit = true },
            .old_layout = .transfer_dst_optimal,
            .new_layout = .shader_read_only_optimal,
            .src_queue_family_index = vk.QUEUE_FAMILY_IGNORED,
            .dst_queue_family_index = vk.QUEUE_FAMILY_IGNORED,
            .image = self.atlas_image,
            .subresource_range = .{
                .aspect_mask = .{ .color_bit = true },
                .base_mip_level = 0,
                .level_count = 1,
                .base_array_layer = 0,
                .layer_count = 1,
            },
        };
        self.vkd.cmdPipelineBarrier(
            cb,
            .{ .transfer_bit = true },
            .{ .fragment_shader_bit = true },
            .{},
            0, null,
            0, null,
            1, @ptrCast(&barrier_to_shader),
        );

        try self.vkd.endCommandBuffer(cb);

        try self.vkd.queueSubmit(self.graphics_queue, 1, @ptrCast(&vk.SubmitInfo{
            .command_buffer_count = 1,
            .p_command_buffers = @ptrCast(&cb),
        }), .null_handle);
        try self.vkd.queueWaitIdle(self.graphics_queue);
    }

    /// Upload a horizontal band of the atlas (y_start..y_end) to the GPU.
    /// Uses the persistent staging buffer and dedicated transfer command buffer.
    /// If `full` is true, transitions from UNDEFINED (for initial/reset uploads).
    /// Otherwise transitions from SHADER_READ_ONLY (preserves existing data).
    pub fn uploadAtlasRegion(
        self: *Context,
        pixels: []const u8,
        y_start: u32,
        y_end: u32,
        full: bool,
    ) !void {
        if (y_start >= y_end) return;

        const byte_offset: usize = @as(usize, y_start) * self.atlas_width;
        const byte_len: usize = @as(usize, y_end - y_start) * self.atlas_width;

        // Wait for any prior atlas transfer to finish before reusing staging buffer
        _ = try self.vkd.waitForFences(self.device, 1, @ptrCast(&self.atlas_transfer_fence), .true, std.math.maxInt(u64));
        try self.vkd.resetFences(self.device, 1, @ptrCast(&self.atlas_transfer_fence));

        // Copy dirty band into staging buffer
        const mapped = try self.vkd.mapMemory(self.device, self.atlas_staging_memory, 0, @intCast(byte_len), .{});
        @memcpy(@as([*]u8, @ptrCast(mapped))[0..byte_len], pixels[byte_offset .. byte_offset + byte_len]);
        self.vkd.unmapMemory(self.device, self.atlas_staging_memory);

        // Record transfer command
        try self.vkd.resetCommandBuffer(self.atlas_transfer_cb, .{});
        try self.vkd.beginCommandBuffer(self.atlas_transfer_cb, &vk.CommandBufferBeginInfo{
            .flags = .{ .one_time_submit_bit = true },
        });

        // Barrier: old_layout -> TRANSFER_DST
        const old_layout: vk.ImageLayout = if (full) .undefined else .shader_read_only_optimal;
        const barrier_to_transfer = vk.ImageMemoryBarrier{
            .src_access_mask = if (full) @as(vk.AccessFlags, .{}) else .{ .shader_read_bit = true },
            .dst_access_mask = .{ .transfer_write_bit = true },
            .old_layout = old_layout,
            .new_layout = .transfer_dst_optimal,
            .src_queue_family_index = vk.QUEUE_FAMILY_IGNORED,
            .dst_queue_family_index = vk.QUEUE_FAMILY_IGNORED,
            .image = self.atlas_image,
            .subresource_range = .{
                .aspect_mask = .{ .color_bit = true },
                .base_mip_level = 0,
                .level_count = 1,
                .base_array_layer = 0,
                .layer_count = 1,
            },
        };
        const src_stage: vk.PipelineStageFlags = if (full) .{ .top_of_pipe_bit = true } else .{ .fragment_shader_bit = true };
        self.vkd.cmdPipelineBarrier(
            self.atlas_transfer_cb,
            src_stage,
            .{ .transfer_bit = true },
            .{},
            0, null,
            0, null,
            1, @ptrCast(&barrier_to_transfer),
        );

        // Copy staging buffer -> image (dirty band only)
        const region = vk.BufferImageCopy{
            .buffer_offset = 0,
            .buffer_row_length = 0,
            .buffer_image_height = 0,
            .image_subresource = .{
                .aspect_mask = .{ .color_bit = true },
                .mip_level = 0,
                .base_array_layer = 0,
                .layer_count = 1,
            },
            .image_offset = .{ .x = 0, .y = @intCast(y_start), .z = 0 },
            .image_extent = .{ .width = self.atlas_width, .height = y_end - y_start, .depth = 1 },
        };
        self.vkd.cmdCopyBufferToImage(
            self.atlas_transfer_cb,
            self.atlas_staging_buffer,
            self.atlas_image,
            .transfer_dst_optimal,
            1,
            @ptrCast(&region),
        );

        // Barrier: TRANSFER_DST -> SHADER_READ_ONLY
        const barrier_to_shader = vk.ImageMemoryBarrier{
            .src_access_mask = .{ .transfer_write_bit = true },
            .dst_access_mask = .{ .shader_read_bit = true },
            .old_layout = .transfer_dst_optimal,
            .new_layout = .shader_read_only_optimal,
            .src_queue_family_index = vk.QUEUE_FAMILY_IGNORED,
            .dst_queue_family_index = vk.QUEUE_FAMILY_IGNORED,
            .image = self.atlas_image,
            .subresource_range = .{
                .aspect_mask = .{ .color_bit = true },
                .base_mip_level = 0,
                .level_count = 1,
                .base_array_layer = 0,
                .layer_count = 1,
            },
        };
        self.vkd.cmdPipelineBarrier(
            self.atlas_transfer_cb,
            .{ .transfer_bit = true },
            .{ .fragment_shader_bit = true },
            .{},
            0, null,
            0, null,
            1, @ptrCast(&barrier_to_shader),
        );

        try self.vkd.endCommandBuffer(self.atlas_transfer_cb);

        // Submit with dedicated fence (no queueWaitIdle)
        try self.vkd.queueSubmit(self.graphics_queue, 1, @ptrCast(&vk.SubmitInfo{
            .command_buffer_count = 1,
            .p_command_buffers = @ptrCast(&self.atlas_transfer_cb),
        }), self.atlas_transfer_fence);
    }

    /// Map the instance buffer, copy instances in, unmap.
    pub fn uploadInstances(self: *Context, instances: []const Instance) !void {
        try self.ensureInstanceCapacity(@intCast(instances.len));
        const size: vk.DeviceSize = @sizeOf(Instance) * instances.len;
        const mapped = try self.vkd.mapMemory(self.device, self.instance_memory, 0, size, .{});
        @memcpy(@as([*]Instance, @ptrCast(@alignCast(mapped)))[0..instances.len], instances);
        self.vkd.unmapMemory(self.device, self.instance_memory);
    }

    /// Upload a contiguous instance subrange when the existing buffer is large enough.
    /// Returns true when the caller must fall back to a full upload instead.
    pub fn uploadInstanceRange(
        self: *Context,
        offset_instances: u32,
        instances: []const Instance,
    ) !bool {
        const action = planUploadInstanceRangeAction(
            self.instance_capacity,
            offset_instances,
            std.math.cast(u32, instances.len) orelse return error.InvalidInstanceRange,
        );
        switch (action) {
            .no_op => return false,
            .full => return true,
            .invalid_range => return error.InvalidInstanceRange,
            .partial => {},
        }

        const range = planInstanceRangeWrite(offset_instances, @intCast(instances.len));
        const mapped = try self.vkd.mapMemory(
            self.device,
            self.instance_memory,
            range.byte_offset,
            range.byte_len,
            .{},
        );
        @memcpy(@as([*]Instance, @ptrCast(@alignCast(mapped)))[0..instances.len], instances);
        self.vkd.unmapMemory(self.device, self.instance_memory);
        return false;
    }

    /// Full draw pass: bind pipeline, push constants, vertex + instance buffers, draw, present.
    pub fn drawCells(
        self: *Context,
        instance_count: u32,
        cell_size: [2]f32,
        clear_color: [4]f32,
        coverage_params: [2]f32,
    ) !void {
        // Wait for previous frame to finish
        _ = try self.vkd.waitForFences(self.device, 1, @ptrCast(&self.in_flight_fence), .true, std.math.maxInt(u64));
        try self.vkd.resetFences(self.device, 1, @ptrCast(&self.in_flight_fence));

        // Acquire next image
        const acquire = self.vkd.acquireNextImageKHR(
            self.device,
            self.swapchain,
            std.math.maxInt(u64),
            self.image_available,
            .null_handle,
        ) catch |err| switch (err) {
            error.OutOfDateKHR => return error.OutOfDateKHR,
            else => return err,
        };
        if (swapchainNeedsRebuild(acquire.result)) return error.OutOfDateKHR;
        const image_index = acquire.image_index;

        // Record command buffer
        try self.vkd.resetCommandBuffer(self.command_buffer, .{});
        try self.vkd.beginCommandBuffer(self.command_buffer, &vk.CommandBufferBeginInfo{
            .flags = .{ .one_time_submit_bit = true },
        });

        const clear_value = vk.ClearValue{ .color = .{ .float_32 = clear_color } };

        self.vkd.cmdBeginRenderPass(self.command_buffer, &vk.RenderPassBeginInfo{
            .render_pass = self.render_pass,
            .framebuffer = self.framebuffers[image_index],
            .render_area = .{
                .offset = .{ .x = 0, .y = 0 },
                .extent = self.swapchain_extent,
            },
            .clear_value_count = 1,
            .p_clear_values = @ptrCast(&clear_value),
        }, .@"inline");

        self.vkd.cmdBindPipeline(self.command_buffer, .graphics, self.pipeline);

        // Dynamic viewport + scissor
        const viewport = vk.Viewport{
            .x = 0.0,
            .y = 0.0,
            .width = @floatFromInt(self.swapchain_extent.width),
            .height = @floatFromInt(self.swapchain_extent.height),
            .min_depth = 0.0,
            .max_depth = 1.0,
        };
        self.vkd.cmdSetViewport(self.command_buffer, 0, 1, @ptrCast(&viewport));

        const scissor = vk.Rect2D{
            .offset = .{ .x = 0, .y = 0 },
            .extent = self.swapchain_extent,
        };
        self.vkd.cmdSetScissor(self.command_buffer, 0, 1, @ptrCast(&scissor));

        // Push constants
        const pc = PushConstants{
            .viewport_size = .{
                @floatFromInt(self.swapchain_extent.width),
                @floatFromInt(self.swapchain_extent.height),
            },
            .cell_size = cell_size,
            .coverage_params = coverage_params,
        };
        self.vkd.cmdPushConstants(
            self.command_buffer,
            self.pipeline_layout,
            .{ .vertex_bit = true, .fragment_bit = true },
            0,
            @sizeOf(PushConstants),
            @ptrCast(&pc),
        );

        // Bind descriptor set (atlas sampler)
        self.vkd.cmdBindDescriptorSets(
            self.command_buffer,
            .graphics,
            self.pipeline_layout,
            0, 1, @ptrCast(&self.descriptor_set),
            0, null,
        );

        // Bind vertex buffers: binding 0 = quad, binding 1 = instances
        const buffers = [_]vk.Buffer{ self.quad_vertex_buffer, self.instance_buffer };
        const offsets = [_]vk.DeviceSize{ 0, 0 };
        self.vkd.cmdBindVertexBuffers(self.command_buffer, 0, 2, &buffers, &offsets);

        self.vkd.cmdDraw(self.command_buffer, 6, instance_count, 0, 0);

        self.vkd.cmdEndRenderPass(self.command_buffer);
        try self.vkd.endCommandBuffer(self.command_buffer);

        // Submit
        const wait_stage = vk.PipelineStageFlags{ .color_attachment_output_bit = true };
        try self.vkd.queueSubmit(self.graphics_queue, 1, @ptrCast(&vk.SubmitInfo{
            .wait_semaphore_count = 1,
            .p_wait_semaphores = @ptrCast(&self.image_available),
            .p_wait_dst_stage_mask = @ptrCast(&wait_stage),
            .command_buffer_count = 1,
            .p_command_buffers = @ptrCast(&self.command_buffer),
            .signal_semaphore_count = 1,
            .p_signal_semaphores = @ptrCast(&self.render_finished),
        }), self.in_flight_fence);

        // Present
        const present_result = self.vkd.queuePresentKHR(self.present_queue, &vk.PresentInfoKHR{
            .wait_semaphore_count = 1,
            .p_wait_semaphores = @ptrCast(&self.render_finished),
            .swapchain_count = 1,
            .p_swapchains = @ptrCast(&self.swapchain),
            .p_image_indices = @ptrCast(&image_index),
        }) catch |err| switch (err) {
            error.OutOfDateKHR => return error.OutOfDateKHR,
            else => return err,
        };
        if (swapchainNeedsRebuild(present_result)) return error.OutOfDateKHR;
    }
};

test "vulkan module imports" {
    _ = vk;
}

test "shaders are embedded with SPIR-V magic" {
    try std.testing.expect(cell_vert_spv.len > 0);
    try std.testing.expect(cell_frag_spv.len > 0);

    // SPIR-V magic number (0x07230203), little-endian first 4 bytes
    const magic: u32 = std.mem.readInt(u32, cell_vert_spv[0..4], .little);
    try std.testing.expectEqual(@as(u32, 0x07230203), magic);

    const magic2: u32 = std.mem.readInt(u32, cell_frag_spv[0..4], .little);
    try std.testing.expectEqual(@as(u32, 0x07230203), magic2);
}

test "nextInstanceCapacity grows geometrically" {
    try std.testing.expectEqual(@as(u32, 16_000), nextInstanceCapacity(16_000, 8_000));
    try std.testing.expectEqual(@as(u32, 32_000), nextInstanceCapacity(16_000, 16_001));
    try std.testing.expectEqual(@as(u32, 4), nextInstanceCapacity(1, 3));
}

test "swapchainNeedsRebuild flags suboptimal result" {
    try std.testing.expect(swapchainNeedsRebuild(.suboptimal_khr));
    try std.testing.expect(!swapchainNeedsRebuild(.success));
}

test "coverageVariantParams returns baseline values" {
    try std.testing.expectEqualDeep([2]f32{ 1.0, 0.0 }, coverageVariantParams(.baseline));
}

test "PushConstants carries baseline coverage params" {
    const push_constants = PushConstants{
        .viewport_size = .{ 1920.0, 1080.0 },
        .cell_size = .{ 9.0, 18.0 },
        .coverage_params = coverageVariantParams(.baseline),
    };

    try std.testing.expectEqualDeep([2]f32{ 1.0, 0.0 }, push_constants.coverage_params);
}

test "coverageVariantParams steepens progressively" {
    const mild = coverageVariantParams(.mild);
    const medium = coverageVariantParams(.medium);
    const crisp = coverageVariantParams(.crisp);

    try std.testing.expectEqualDeep([2]f32{ 1.15, 0.0 }, mild);
    try std.testing.expectEqualDeep([2]f32{ 1.3, 0.0 }, medium);
    try std.testing.expectEqualDeep([2]f32{ 1.55, -0.08 }, crisp);
    try std.testing.expect(mild[0] < medium[0]);
    try std.testing.expect(medium[0] < crisp[0]);
}

test "range upload falls back to full upload when capacity must grow" {
    const decision = planInstanceUpload(.{
        .current_capacity = 8,
        .offset_instances = 6,
        .write_len = 4,
    });

    try std.testing.expectEqual(@as(?u32, 10), decision.needed_capacity);
    try std.testing.expectEqual(InstanceUploadMode.full, decision.upload_mode);
}

test "range upload stays partial when capacity is sufficient" {
    const decision = planInstanceUpload(.{
        .current_capacity = 16,
        .offset_instances = 4,
        .write_len = 3,
    });

    try std.testing.expectEqual(@as(?u32, 7), decision.needed_capacity);
    try std.testing.expectEqual(InstanceUploadMode.partial, decision.upload_mode);
}

test "range upload stays partial on an exact fit" {
    const decision = planInstanceUpload(.{
        .current_capacity = 7,
        .offset_instances = 4,
        .write_len = 3,
    });

    try std.testing.expectEqual(@as(?u32, 7), decision.needed_capacity);
    try std.testing.expectEqual(InstanceUploadMode.partial, decision.upload_mode);
}

test "range upload allows zero-length writes without forcing growth" {
    const decision = planInstanceUpload(.{
        .current_capacity = 8,
        .offset_instances = 8,
        .write_len = 0,
    });

    try std.testing.expectEqual(@as(?u32, 8), decision.needed_capacity);
    try std.testing.expectEqual(InstanceUploadMode.partial, decision.upload_mode);
}

test "range upload reports overflow explicitly" {
    const decision = planInstanceUpload(.{
        .current_capacity = std.math.maxInt(u32),
        .offset_instances = std.math.maxInt(u32),
        .write_len = 1,
    });

    try std.testing.expectEqual(@as(?u32, null), decision.needed_capacity);
    try std.testing.expectEqual(InstanceUploadMode.invalid_range, decision.upload_mode);
}

fn testInstance(seed: f32) Instance {
    return .{
        .cell_pos = .{ seed, seed + 1.0 },
        .glyph_size = .{ seed + 2.0, seed + 3.0 },
        .glyph_bearing = .{ seed + 4.0, seed + 5.0 },
        .uv_rect = .{ seed + 6.0, seed + 7.0, seed + 8.0, seed + 9.0 },
        .fg = .{ seed + 10.0, seed + 11.0, seed + 12.0, seed + 13.0 },
        .bg = .{ seed + 14.0, seed + 15.0, seed + 16.0, seed + 17.0 },
    };
}

test "uploadInstanceRangeWrite computes byte offset from instance offset" {
    const write = planInstanceRangeWrite(3, 2);
    try std.testing.expectEqual(@as(vk.DeviceSize, 3 * @sizeOf(Instance)), write.byte_offset);
    try std.testing.expectEqual(@as(vk.DeviceSize, 2 * @sizeOf(Instance)), write.byte_len);
}

test "writeInstanceRange overwrites only the requested window" {
    var target = [_]Instance{
        testInstance(0),
        testInstance(20),
        testInstance(40),
        testInstance(60),
    };
    const replacement = [_]Instance{
        testInstance(100),
        testInstance(120),
    };

    try std.testing.expect(!(try writeInstanceRange(target[0..], 1, replacement[0..])));

    try std.testing.expectEqualDeep(testInstance(0), target[0]);
    try std.testing.expectEqualDeep(testInstance(100), target[1]);
    try std.testing.expectEqualDeep(testInstance(120), target[2]);
    try std.testing.expectEqualDeep(testInstance(60), target[3]);
}

test "writeInstanceRange reports full-upload fallback when capacity is too small" {
    var target = [_]Instance{
        testInstance(0),
        testInstance(20),
    };
    const replacement = [_]Instance{
        testInstance(100),
        testInstance(120),
    };

    try std.testing.expect(try writeInstanceRange(target[0..], 1, replacement[0..]));
    try std.testing.expectEqualDeep(testInstance(0), target[0]);
    try std.testing.expectEqualDeep(testInstance(20), target[1]);
}

test "writeInstanceRange treats zero-length writes as a no-op without fallback" {
    var target = [_]Instance{
        testInstance(0),
        testInstance(20),
    };
    const empty = [_]Instance{};

    try std.testing.expect(!(try writeInstanceRange(target[0..], 99, empty[0..])));
    try std.testing.expectEqualDeep(testInstance(0), target[0]);
    try std.testing.expectEqualDeep(testInstance(20), target[1]);
}

test "writeInstanceRange rejects overflowing ranges" {
    var target = [_]Instance{
        testInstance(0),
        testInstance(20),
    };
    const replacement = [_]Instance{testInstance(100)};

    try std.testing.expectError(
        error.InvalidInstanceRange,
        writeInstanceRange(target[0..], std.math.maxInt(u32), replacement[0..]),
    );
}

test "uploadInstanceRange contract treats zero-length writes as a no-op" {
    const action = planUploadInstanceRangeAction(4, 99, 0);
    try std.testing.expectEqual(InstanceRangeUploadAction.no_op, action);
}

test "uploadInstanceRange contract reports partial writes when capacity fits" {
    const action = planUploadInstanceRangeAction(8, 3, 2);
    try std.testing.expectEqual(InstanceRangeUploadAction.partial, action);
}

test "uploadInstanceRange contract reports full-upload fallback on growth" {
    const action = planUploadInstanceRangeAction(4, 3, 2);
    try std.testing.expectEqual(InstanceRangeUploadAction.full, action);
}

test "uploadInstanceRange contract reports invalid ranges explicitly" {
    const action = planUploadInstanceRangeAction(std.math.maxInt(u32), std.math.maxInt(u32), 1);
    try std.testing.expectEqual(InstanceRangeUploadAction.invalid_range, action);
}