e2859d88
feat(renderer): glyph atlas upload + instanced draw
a73x 2026-04-08 09:22
Add GPU glyph atlas texture (R8, 1024x1024), unit-quad vertex buffer, per-instance buffer, uploadAtlas/uploadInstances/drawCells methods, and a --draw-smoke-test that renders a single 'M' glyph to the window center. Fix FIFO swapchain hang by preferring MAILBOX/IMMEDIATE present modes. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
diff --git a/src/main.zig b/src/main.zig index 0cf4c25..9b5fdfb 100644 --- a/src/main.zig +++ b/src/main.zig @@ -3,6 +3,7 @@ const vt = @import("vt"); const pty = @import("pty"); const wayland_client = @import("wayland-client"); const renderer = @import("renderer"); const font = @import("font"); pub fn main() !void { var gpa: std.heap.DebugAllocator(.{}) = .init; @@ -28,9 +29,92 @@ pub fn main() !void { return runRenderSmokeTest(alloc); } if (args.len >= 2 and std.mem.eql(u8, args[1], "--draw-smoke-test")) { return runDrawSmokeTest(alloc); } std.debug.print("waystty (run with --headless for CLI dump mode)\n", .{}); } fn runDrawSmokeTest(alloc: std.mem.Allocator) !void { var conn = try wayland_client.Connection.init(); defer conn.deinit(); std.debug.print("wayland connected\n", .{}); const window = try conn.createWindow(alloc, "waystty-draw-smoke"); defer window.deinit(); std.debug.print("window created (w={d} h={d})\n", .{ window.width, window.height }); _ = conn.display.roundtrip(); var ctx = try renderer.Context.init( alloc, @ptrCast(conn.display), @ptrCast(window.surface), window.width, window.height, ); defer ctx.deinit(); std.debug.print("vulkan context created\n", .{}); // Load monospace font and create atlas var font_lookup = try font.lookupMonospace(alloc); defer font_lookup.deinit(alloc); const px_size: u32 = 16; var face = try font.Face.init(alloc, font_lookup.path, font_lookup.index, px_size); defer face.deinit(); var atlas = try font.Atlas.init(alloc, 1024, 1024); defer atlas.deinit(); // Rasterize 'M' into the atlas const glyph_uv = try atlas.getOrInsert(&face, 'M'); std.debug.print("glyph 'M': uv=({d:.3},{d:.3})->({d:.3},{d:.3}) size={d}x{d}\n", .{ glyph_uv.u0, glyph_uv.v0, glyph_uv.u1, glyph_uv.v1, glyph_uv.width, glyph_uv.height, }); // Upload atlas pixels to GPU try ctx.uploadAtlas(atlas.pixels); std.debug.print("atlas uploaded\n", .{}); // Cell size from font metrics const cell_w: f32 = @floatFromInt(face.cellWidth()); const cell_h: f32 = @floatFromInt(face.cellHeight()); std.debug.print("cell size: {d}x{d}\n", .{ cell_w, cell_h }); // One Instance: 'M' at cell position (40, 12) — near center of 80x24 grid const instances = [_]renderer.Instance{ .{ .cell_pos = .{ 40.0, 12.0 }, .uv_rect = .{ glyph_uv.u0, glyph_uv.v0, glyph_uv.u1, glyph_uv.v1 }, .fg = .{ 1.0, 1.0, 1.0, 1.0 }, .bg = .{ 0.0, 0.0, 0.0, 1.0 }, }, }; try ctx.uploadInstances(&instances); std.debug.print("instances uploaded, rendering 60 frames...\n", .{}); var i: u32 = 0; while (i < 60) : (i += 1) { // Non-blocking Wayland event read: prepare + read + dispatch. // The Vulkan WSI (FIFO) needs wl_buffer.release events from the compositor // to be read off the socket before it can release an acquire slot. _ = conn.display.flush(); if (conn.display.prepareRead()) { _ = conn.display.readEvents(); } _ = conn.display.dispatchPending(); try ctx.drawCells(1, .{ cell_w, cell_h }); _ = conn.display.flush(); } _ = try ctx.vkd.deviceWaitIdle(ctx.device); std.debug.print("done\n", .{}); } fn runRenderSmokeTest(alloc: std.mem.Allocator) !void { var conn = try wayland_client.Connection.init(); defer conn.deinit(); diff --git a/src/renderer.zig b/src/renderer.zig index c5c6161..a8adf59 100644 --- a/src/renderer.zig +++ b/src/renderer.zig @@ -119,6 +119,23 @@ fn createSwapchain( image_count = caps.max_image_count; } // Prefer MAILBOX (non-blocking, no tearing) over IMMEDIATE, fall back to FIFO. // FIFO blocks until compositor releases a buffer, requiring a pumped Wayland event loop. var pm_count: u32 = 0; _ = try vki.getPhysicalDeviceSurfacePresentModesKHR(pd_info.physical, surface, &pm_count, null); const present_modes = try alloc.alloc(vk.PresentModeKHR, pm_count); defer alloc.free(present_modes); _ = try vki.getPhysicalDeviceSurfacePresentModesKHR(pd_info.physical, surface, &pm_count, present_modes.ptr); var present_mode: vk.PresentModeKHR = .fifo_khr; for (present_modes[0..pm_count]) |pm| { if (pm == .mailbox_khr) { present_mode = .mailbox_khr; break; } } if (present_mode == .fifo_khr) { for (present_modes[0..pm_count]) |pm| { if (pm == .immediate_khr) { present_mode = .immediate_khr; break; } } } const same_family = pd_info.graphics_queue_family == pd_info.present_queue_family; const families = [_]u32{ pd_info.graphics_queue_family, pd_info.present_queue_family }; @@ -135,7 +152,7 @@ fn createSwapchain( .p_queue_family_indices = if (same_family) null else &families, .pre_transform = caps.current_transform, .composite_alpha = .{ .opaque_bit_khr = true }, .present_mode = .fifo_khr, .present_mode = present_mode, .clipped = .true, }, null); @@ -200,6 +217,60 @@ pub const Instance = extern struct { bg: [4]f32, // location 4 }; const BufferResult = struct { buffer: vk.Buffer, memory: vk.DeviceMemory, }; fn createHostVisibleBuffer( vki: vk.InstanceWrapper, physical_device: vk.PhysicalDevice, vkd: vk.DeviceWrapper, device: vk.Device, size: vk.DeviceSize, usage: vk.BufferUsageFlags, ) !BufferResult { const buffer = try vkd.createBuffer(device, &vk.BufferCreateInfo{ .size = size, .usage = usage, .sharing_mode = .exclusive, }, null); errdefer vkd.destroyBuffer(device, buffer, null); const reqs = vkd.getBufferMemoryRequirements(device, buffer); const mem_idx = try findMemoryType( vki, physical_device, reqs.memory_type_bits, .{ .host_visible_bit = true, .host_coherent_bit = true }, ); const memory = try vkd.allocateMemory(device, &vk.MemoryAllocateInfo{ .allocation_size = reqs.size, .memory_type_index = mem_idx, }, null); errdefer vkd.freeMemory(device, memory, null); try vkd.bindBufferMemory(device, buffer, memory, 0); return .{ .buffer = buffer, .memory = memory }; } fn findMemoryType( vki: vk.InstanceWrapper, physical_device: vk.PhysicalDevice, type_filter: u32, properties: vk.MemoryPropertyFlags, ) !u32 { const mem_props = vki.getPhysicalDeviceMemoryProperties(physical_device); var i: u32 = 0; while (i < mem_props.memory_type_count) : (i += 1) { if ((type_filter & (@as(u32, 1) << @intCast(i))) != 0) { const type_props = mem_props.memory_types[i].property_flags; if (type_props.contains(properties)) return i; } } return error.NoSuitableMemoryType; } pub const Context = struct { alloc: std.mem.Allocator, vkb: vk.BaseWrapper, @@ -235,6 +306,20 @@ pub const Context = struct { image_available: vk.Semaphore, render_finished: vk.Semaphore, in_flight_fence: vk.Fence, // Static unit-quad vertex buffer quad_vertex_buffer: vk.Buffer, quad_vertex_memory: vk.DeviceMemory, // Per-frame instance buffer instance_buffer: vk.Buffer, instance_memory: vk.DeviceMemory, instance_capacity: u32, // GPU glyph atlas texture atlas_image: vk.Image, atlas_memory: vk.DeviceMemory, atlas_view: vk.ImageView, atlas_sampler: vk.Sampler, atlas_width: u32, atlas_height: u32, pub fn init( alloc: std.mem.Allocator, @@ -582,6 +667,117 @@ pub const Context = struct { }, null); errdefer vkd.destroyFence(device, in_flight_fence, null); // --- Quad vertex buffer --- const quad_verts = [_]Vertex{ .{ .unit_pos = .{ 0, 0 } }, .{ .unit_pos = .{ 1, 0 } }, .{ .unit_pos = .{ 1, 1 } }, .{ .unit_pos = .{ 0, 0 } }, .{ .unit_pos = .{ 1, 1 } }, .{ .unit_pos = .{ 0, 1 } }, }; const quad_vb_size: vk.DeviceSize = @sizeOf(@TypeOf(quad_verts)); const quad = try createHostVisibleBuffer(vki, pd_info.physical, vkd, device, quad_vb_size, .{ .vertex_buffer_bit = true }); errdefer { vkd.destroyBuffer(device, quad.buffer, null); vkd.freeMemory(device, quad.memory, null); } { const mapped = try vkd.mapMemory(device, quad.memory, 0, quad_vb_size, .{}); @memcpy( @as([*]Vertex, @ptrCast(@alignCast(mapped)))[0..quad_verts.len], &quad_verts, ); vkd.unmapMemory(device, quad.memory); } // --- Instance buffer --- const max_instances: u32 = 200 * 80; const instance_size: vk.DeviceSize = @sizeOf(Instance) * max_instances; const inst = try createHostVisibleBuffer(vki, pd_info.physical, vkd, device, instance_size, .{ .vertex_buffer_bit = true }); errdefer { vkd.destroyBuffer(device, inst.buffer, null); vkd.freeMemory(device, inst.memory, null); } // --- Atlas texture --- const atlas_width: u32 = 1024; const atlas_height: u32 = 1024; const atlas_image = try vkd.createImage(device, &vk.ImageCreateInfo{ .image_type = .@"2d", .format = .r8_unorm, .extent = .{ .width = atlas_width, .height = atlas_height, .depth = 1 }, .mip_levels = 1, .array_layers = 1, .samples = .{ .@"1_bit" = true }, .tiling = .optimal, .usage = .{ .transfer_dst_bit = true, .sampled_bit = true }, .sharing_mode = .exclusive, .initial_layout = .undefined, }, null); errdefer vkd.destroyImage(device, atlas_image, null); const img_reqs = vkd.getImageMemoryRequirements(device, atlas_image); const img_mem_idx = try findMemoryType(vki, pd_info.physical, img_reqs.memory_type_bits, .{ .device_local_bit = true }); const atlas_memory = try vkd.allocateMemory(device, &vk.MemoryAllocateInfo{ .allocation_size = img_reqs.size, .memory_type_index = img_mem_idx, }, null); errdefer vkd.freeMemory(device, atlas_memory, null); try vkd.bindImageMemory(device, atlas_image, atlas_memory, 0); const atlas_view = try vkd.createImageView(device, &vk.ImageViewCreateInfo{ .image = atlas_image, .view_type = .@"2d", .format = .r8_unorm, .components = .{ .r = .identity, .g = .identity, .b = .identity, .a = .identity }, .subresource_range = .{ .aspect_mask = .{ .color_bit = true }, .base_mip_level = 0, .level_count = 1, .base_array_layer = 0, .layer_count = 1, }, }, null); errdefer vkd.destroyImageView(device, atlas_view, null); const atlas_sampler = try vkd.createSampler(device, &vk.SamplerCreateInfo{ .mag_filter = .nearest, .min_filter = .nearest, .mipmap_mode = .nearest, .address_mode_u = .clamp_to_edge, .address_mode_v = .clamp_to_edge, .address_mode_w = .clamp_to_edge, .mip_lod_bias = 0, .anisotropy_enable = .false, .max_anisotropy = 1, .compare_enable = .false, .compare_op = .always, .min_lod = 0, .max_lod = 0, .border_color = .int_opaque_black, .unnormalized_coordinates = .false, }, null); errdefer vkd.destroySampler(device, atlas_sampler, null); // Bind atlas to descriptor set const img_info = vk.DescriptorImageInfo{ .sampler = atlas_sampler, .image_view = atlas_view, .image_layout = .shader_read_only_optimal, }; vkd.updateDescriptorSets(device, 1, @ptrCast(&vk.WriteDescriptorSet{ .dst_set = descriptor_set, .dst_binding = 0, .dst_array_element = 0, .descriptor_count = 1, .descriptor_type = .combined_image_sampler, .p_image_info = @ptrCast(&img_info), .p_buffer_info = undefined, .p_texel_buffer_view = undefined, }), 0, null); return .{ .alloc = alloc, .vkb = vkb, @@ -612,6 +808,17 @@ pub const Context = struct { .image_available = image_available, .render_finished = render_finished, .in_flight_fence = in_flight_fence, .quad_vertex_buffer = quad.buffer, .quad_vertex_memory = quad.memory, .instance_buffer = inst.buffer, .instance_memory = inst.memory, .instance_capacity = max_instances, .atlas_image = atlas_image, .atlas_memory = atlas_memory, .atlas_view = atlas_view, .atlas_sampler = atlas_sampler, .atlas_width = atlas_width, .atlas_height = atlas_height, }; } @@ -619,6 +826,16 @@ pub const Context = struct { // Wait for device to be idle before destroying anything _ = self.vkd.deviceWaitIdle(self.device) catch {}; // Atlas + buffers (in reverse order of creation) self.vkd.destroySampler(self.device, self.atlas_sampler, null); self.vkd.destroyImageView(self.device, self.atlas_view, null); self.vkd.destroyImage(self.device, self.atlas_image, null); self.vkd.freeMemory(self.device, self.atlas_memory, null); self.vkd.destroyBuffer(self.device, self.instance_buffer, null); self.vkd.freeMemory(self.device, self.instance_memory, null); self.vkd.destroyBuffer(self.device, self.quad_vertex_buffer, null); self.vkd.freeMemory(self.device, self.quad_vertex_memory, null); // Sync objects self.vkd.destroyFence(self.device, self.in_flight_fence, null); self.vkd.destroySemaphore(self.device, self.render_finished, null); @@ -718,6 +935,246 @@ pub const Context = struct { .p_image_indices = @ptrCast(&image_index), }); } /// Upload CPU R8 pixels into the GPU atlas image. /// Uses a staging buffer + one-shot command buffer. /// Transitions: UNDEFINED -> TRANSFER_DST -> SHADER_READ_ONLY. pub fn uploadAtlas(self: *Context, pixels: []const u8) !void { const size: vk.DeviceSize = @intCast(pixels.len); // Create staging buffer const staging = try createHostVisibleBuffer( self.vki, self.physical_device, self.vkd, self.device, size, .{ .transfer_src_bit = true }, ); defer { self.vkd.destroyBuffer(self.device, staging.buffer, null); self.vkd.freeMemory(self.device, staging.memory, null); } // Copy pixels into staging buffer const mapped = try self.vkd.mapMemory(self.device, staging.memory, 0, size, .{}); @memcpy(@as([*]u8, @ptrCast(mapped))[0..pixels.len], pixels); self.vkd.unmapMemory(self.device, staging.memory); // One-shot command buffer var cb: vk.CommandBuffer = undefined; try self.vkd.allocateCommandBuffers(self.device, &vk.CommandBufferAllocateInfo{ .command_pool = self.command_pool, .level = .primary, .command_buffer_count = 1, }, @ptrCast(&cb)); defer self.vkd.freeCommandBuffers(self.device, self.command_pool, 1, @ptrCast(&cb)); try self.vkd.beginCommandBuffer(cb, &vk.CommandBufferBeginInfo{ .flags = .{ .one_time_submit_bit = true }, }); // Barrier: UNDEFINED -> TRANSFER_DST const barrier_to_transfer = vk.ImageMemoryBarrier{ .src_access_mask = .{}, .dst_access_mask = .{ .transfer_write_bit = true }, .old_layout = .undefined, .new_layout = .transfer_dst_optimal, .src_queue_family_index = vk.QUEUE_FAMILY_IGNORED, .dst_queue_family_index = vk.QUEUE_FAMILY_IGNORED, .image = self.atlas_image, .subresource_range = .{ .aspect_mask = .{ .color_bit = true }, .base_mip_level = 0, .level_count = 1, .base_array_layer = 0, .layer_count = 1, }, }; self.vkd.cmdPipelineBarrier( cb, .{ .top_of_pipe_bit = true }, .{ .transfer_bit = true }, .{}, 0, null, 0, null, 1, @ptrCast(&barrier_to_transfer), ); // Copy buffer -> image const region = vk.BufferImageCopy{ .buffer_offset = 0, .buffer_row_length = 0, .buffer_image_height = 0, .image_subresource = .{ .aspect_mask = .{ .color_bit = true }, .mip_level = 0, .base_array_layer = 0, .layer_count = 1, }, .image_offset = .{ .x = 0, .y = 0, .z = 0 }, .image_extent = .{ .width = self.atlas_width, .height = self.atlas_height, .depth = 1 }, }; self.vkd.cmdCopyBufferToImage(cb, staging.buffer, self.atlas_image, .transfer_dst_optimal, 1, @ptrCast(®ion)); // Barrier: TRANSFER_DST -> SHADER_READ_ONLY const barrier_to_shader = vk.ImageMemoryBarrier{ .src_access_mask = .{ .transfer_write_bit = true }, .dst_access_mask = .{ .shader_read_bit = true }, .old_layout = .transfer_dst_optimal, .new_layout = .shader_read_only_optimal, .src_queue_family_index = vk.QUEUE_FAMILY_IGNORED, .dst_queue_family_index = vk.QUEUE_FAMILY_IGNORED, .image = self.atlas_image, .subresource_range = .{ .aspect_mask = .{ .color_bit = true }, .base_mip_level = 0, .level_count = 1, .base_array_layer = 0, .layer_count = 1, }, }; self.vkd.cmdPipelineBarrier( cb, .{ .transfer_bit = true }, .{ .fragment_shader_bit = true }, .{}, 0, null, 0, null, 1, @ptrCast(&barrier_to_shader), ); try self.vkd.endCommandBuffer(cb); try self.vkd.queueSubmit(self.graphics_queue, 1, @ptrCast(&vk.SubmitInfo{ .command_buffer_count = 1, .p_command_buffers = @ptrCast(&cb), }), .null_handle); try self.vkd.queueWaitIdle(self.graphics_queue); } /// Map the instance buffer, copy instances in, unmap. pub fn uploadInstances(self: *Context, instances: []const Instance) !void { if (instances.len > self.instance_capacity) return error.TooManyInstances; const size: vk.DeviceSize = @sizeOf(Instance) * instances.len; const mapped = try self.vkd.mapMemory(self.device, self.instance_memory, 0, size, .{}); @memcpy(@as([*]Instance, @ptrCast(@alignCast(mapped)))[0..instances.len], instances); self.vkd.unmapMemory(self.device, self.instance_memory); } /// Full draw pass: bind pipeline, push constants, vertex + instance buffers, draw, present. pub fn drawCells(self: *Context, instance_count: u32, cell_size: [2]f32) !void { // Wait for previous frame to finish _ = try self.vkd.waitForFences(self.device, 1, @ptrCast(&self.in_flight_fence), .true, std.math.maxInt(u64)); try self.vkd.resetFences(self.device, 1, @ptrCast(&self.in_flight_fence)); // Acquire next image const acquire = try self.vkd.acquireNextImageKHR( self.device, self.swapchain, std.math.maxInt(u64), self.image_available, .null_handle, ); const image_index = acquire.image_index; // Record command buffer try self.vkd.resetCommandBuffer(self.command_buffer, .{}); try self.vkd.beginCommandBuffer(self.command_buffer, &vk.CommandBufferBeginInfo{ .flags = .{ .one_time_submit_bit = true }, }); const clear_value = vk.ClearValue{ .color = .{ .float_32 = .{ 0.0, 0.0, 0.0, 1.0 } }, }; self.vkd.cmdBeginRenderPass(self.command_buffer, &vk.RenderPassBeginInfo{ .render_pass = self.render_pass, .framebuffer = self.framebuffers[image_index], .render_area = .{ .offset = .{ .x = 0, .y = 0 }, .extent = self.swapchain_extent, }, .clear_value_count = 1, .p_clear_values = @ptrCast(&clear_value), }, .@"inline"); self.vkd.cmdBindPipeline(self.command_buffer, .graphics, self.pipeline); // Dynamic viewport + scissor const viewport = vk.Viewport{ .x = 0.0, .y = 0.0, .width = @floatFromInt(self.swapchain_extent.width), .height = @floatFromInt(self.swapchain_extent.height), .min_depth = 0.0, .max_depth = 1.0, }; self.vkd.cmdSetViewport(self.command_buffer, 0, 1, @ptrCast(&viewport)); const scissor = vk.Rect2D{ .offset = .{ .x = 0, .y = 0 }, .extent = self.swapchain_extent, }; self.vkd.cmdSetScissor(self.command_buffer, 0, 1, @ptrCast(&scissor)); // Push constants const pc = PushConstants{ .viewport_size = .{ @floatFromInt(self.swapchain_extent.width), @floatFromInt(self.swapchain_extent.height), }, .cell_size = cell_size, }; self.vkd.cmdPushConstants( self.command_buffer, self.pipeline_layout, .{ .vertex_bit = true }, 0, @sizeOf(PushConstants), @ptrCast(&pc), ); // Bind descriptor set (atlas sampler) self.vkd.cmdBindDescriptorSets( self.command_buffer, .graphics, self.pipeline_layout, 0, 1, @ptrCast(&self.descriptor_set), 0, null, ); // Bind vertex buffers: binding 0 = quad, binding 1 = instances const buffers = [_]vk.Buffer{ self.quad_vertex_buffer, self.instance_buffer }; const offsets = [_]vk.DeviceSize{ 0, 0 }; self.vkd.cmdBindVertexBuffers(self.command_buffer, 0, 2, &buffers, &offsets); self.vkd.cmdDraw(self.command_buffer, 6, instance_count, 0, 0); self.vkd.cmdEndRenderPass(self.command_buffer); try self.vkd.endCommandBuffer(self.command_buffer); // Submit const wait_stage = vk.PipelineStageFlags{ .color_attachment_output_bit = true }; try self.vkd.queueSubmit(self.graphics_queue, 1, @ptrCast(&vk.SubmitInfo{ .wait_semaphore_count = 1, .p_wait_semaphores = @ptrCast(&self.image_available), .p_wait_dst_stage_mask = @ptrCast(&wait_stage), .command_buffer_count = 1, .p_command_buffers = @ptrCast(&self.command_buffer), .signal_semaphore_count = 1, .p_signal_semaphores = @ptrCast(&self.render_finished), }), self.in_flight_fence); // Present _ = try self.vkd.queuePresentKHR(self.present_queue, &vk.PresentInfoKHR{ .wait_semaphore_count = 1, .p_wait_semaphores = @ptrCast(&self.render_finished), .swapchain_count = 1, .p_swapchains = @ptrCast(&self.swapchain), .p_image_indices = @ptrCast(&image_index), }); } }; test "vulkan module imports" {