6402847a
Use explicit (clock, timestamp, oid) tiebreak for linked-commit dedup
alex emery 2026-04-12 10:38
Render-time dedup of `IssueCommitLink` events previously relied on the revwalk's topological order (Sort::TOPOLOGICAL | Sort::REVERSE) and a first-seen-wins guard. That breaks for cross-machine concurrent emissions: when Alice and Bob both link the same SHA on different machines and the DAGs merge, the topo order does not deterministically correspond to the spec's "earliest by (clock, then timestamp)" rule. Replace the in-place dedup with an accumulator (HashMap<sha, (key, link)>) that compares an explicit (clock, timestamp, oid_hex) sort key per event and keeps the minimum. After the walk, flush entries into linked_commits sorted by the same key so the rendered order is stable across runs (the old code happened to be stable only because of HashMap iteration luck). Add a regression test that appends two events for the same SHA in linear order but with the second appended event carrying a LOWER clock than the first. Naive first-seen-wins surfaces the wrong event; the new logic correctly keeps the lower-clock entry. The test uses a low-level helper that bypasses `dag::append_event` to set an explicit clock value. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
diff --git a/src/state.rs b/src/state.rs index ff007e1..9e53bc9 100644 --- a/src/state.rs +++ b/src/state.rs @@ -1,3 +1,4 @@ use std::collections::HashMap; use std::fmt; use chrono::{DateTime, Utc}; @@ -259,6 +260,14 @@ impl IssueState { // Higher clock wins; on tie, lexicographically higher OID wins. let mut status_key: Option<(u64, String)> = None; // Accumulator for IssueCommitLink dedup. Keyed by commit SHA, valued by // the (clock, timestamp, oid_hex) sort key plus the LinkedCommit payload. // We keep the entry with the minimum sort key per SHA — i.e. the // earliest emission by (clock, timestamp, oid). Topological walk order // alone does not guarantee this for cross-machine concurrent events // that are reconciled via merge commits, so we sort explicitly. let mut link_acc: HashMap<String, ((u64, String, String), LinkedCommit)> = HashMap::new(); for (oid, event) in events { let ts = parse_timestamp(&event.timestamp); if latest.as_ref().is_none_or(|(prev, _)| ts > *prev) { @@ -350,18 +359,26 @@ impl IssueState { } } Action::IssueCommitLink { commit } => { if let Some(ref mut s) = state { // Render-time dedup by commit SHA. The revwalk is // topo-oldest-first (Sort::TOPOLOGICAL | Sort::REVERSE), // so the first event we see for a given SHA is the // earliest emission — exactly what the spec requires. if !s.linked_commits.iter().any(|lc| lc.commit == commit) { s.linked_commits.push(LinkedCommit { commit, event_author: event.author.clone(), event_timestamp: event.timestamp.clone(), }); } if state.is_some() { // Render-time dedup by commit SHA. We compare an // explicit (clock, timestamp, oid_hex) key per event // and keep the minimum so the surviving entry is the // earliest emission per the spec, regardless of how // the merged DAG happens to topo-order itself. let key = (event.clock, event.timestamp.clone(), oid.to_string()); let new_link = LinkedCommit { commit: commit.clone(), event_author: event.author.clone(), event_timestamp: event.timestamp.clone(), }; link_acc .entry(commit) .and_modify(|existing| { if key < existing.0 { *existing = (key.clone(), new_link.clone()); } }) .or_insert((key, new_link)); } } _ => {} @@ -370,6 +387,15 @@ impl IssueState { if let Some(ref mut s) = state { s.last_updated = latest.map(|(_, raw)| raw).unwrap_or_default(); // Flush the linked-commit accumulator into state.linked_commits in // a stable order. Sort by the same (clock, timestamp, oid) key we // used for the per-SHA min so the rendered list is deterministic // across runs (HashMap iteration order is randomized). let mut entries: Vec<((u64, String, String), LinkedCommit)> = link_acc.into_values().collect(); entries.sort_by(|a, b| a.0.cmp(&b.0)); s.linked_commits = entries.into_iter().map(|(_, lc)| lc).collect(); } state.ok_or_else(|| git2::Error::from_str("no IssueOpen event found in DAG").into()) } diff --git a/tests/commit_link_test.rs b/tests/commit_link_test.rs index dabf514..5e33150 100644 --- a/tests/commit_link_test.rs +++ b/tests/commit_link_test.rs @@ -1,11 +1,61 @@ //! Unit tests for the Action::IssueCommitLink event variant. use git2::Repository; use git_collab::event::{Action, Author, Event}; use git_collab::identity::author_signature; use git_collab::signing::sign_event; use git_collab::state::IssueState; use tempfile::TempDir; mod common; use common::{add_commit_link, alice, init_repo, open_issue, ScopedTestConfig}; use common::{ add_commit_link, alice, init_repo, open_issue, test_signing_key, ScopedTestConfig, }; /// Append a commit-link event with an explicitly chosen `clock` value, /// bypassing `dag::append_event`'s automatic clock-bump. The new commit's /// parent is the current ref tip. Returns the new tip OID. fn append_commit_link_with_clock( repo: &Repository, ref_name: &str, author: &Author, commit_sha: &str, timestamp: &str, clock: u64, ) -> git2::Oid { let sk = test_signing_key(); let event = Event { timestamp: timestamp.to_string(), author: author.clone(), action: Action::IssueCommitLink { commit: commit_sha.to_string(), }, clock, }; let detached = sign_event(&event, &sk).unwrap(); let event_json = serde_json::to_vec_pretty(&event).unwrap(); let manifest = br#"{"version":1,"format":"git-collab"}"#; let event_blob = repo.blob(&event_json).unwrap(); let sig_blob = repo.blob(detached.signature.as_bytes()).unwrap(); let pubkey_blob = repo.blob(detached.pubkey.as_bytes()).unwrap(); let manifest_blob = repo.blob(manifest).unwrap(); let mut tb = repo.treebuilder(None).unwrap(); tb.insert("event.json", event_blob, 0o100644).unwrap(); tb.insert("signature", sig_blob, 0o100644).unwrap(); tb.insert("pubkey", pubkey_blob, 0o100644).unwrap(); tb.insert("manifest.json", manifest_blob, 0o100644).unwrap(); let tree_oid = tb.write().unwrap(); let tree = repo.find_tree(tree_oid).unwrap(); let sig = author_signature(author).unwrap(); let parent_oid = repo.refname_to_id(ref_name).unwrap(); let parent = repo.find_commit(parent_oid).unwrap(); repo.commit(Some(ref_name), &sig, &sig, "issue.commit_link", &tree, &[&parent]) .unwrap() } fn test_author() -> Author { Author { @@ -70,6 +120,53 @@ fn issue_state_surfaces_commit_links_in_order() { } #[test] fn issue_state_dedups_commit_links_keeps_lower_clock_even_when_appended_later() { // This locks in the (clock, timestamp, oid) tiebreak rule. We append two // events for the same commit SHA in linear order, but the SECOND event we // append has a LOWER clock than the first — simulating a cross-machine // case where Bob's locally-appended event was actually authored earlier // (in clock terms) than Alice's. A naive first-seen-wins implementation // would surface Alice's event because it appears first in the topo walk; // the spec-correct dedup must surface Bob's lower-clock event instead. let _config = ScopedTestConfig::new(); let dir = TempDir::new().unwrap(); let repo = init_repo(dir.path(), &alice()); let (ref_name, id) = open_issue(&repo, &alice(), "bug"); let target_sha = sha(0xdd); // Alice appends with clock 50 first. append_commit_link_with_clock( &repo, &ref_name, &alice(), &target_sha, "2026-04-12T12:00:00Z", 50, ); // Bob appends second, but with a LOWER clock (10) — as if his event was // authored earlier on his machine and we're now seeing the merged DAG. append_commit_link_with_clock( &repo, &ref_name, &common::bob(), &target_sha, "2026-04-12T11:00:00Z", 10, ); let issue = IssueState::from_ref_uncached(&repo, &ref_name, &id).unwrap(); assert_eq!(issue.linked_commits.len(), 1); assert_eq!(issue.linked_commits[0].commit, target_sha); // The lower-clock event wins, even though it was appended later. assert_eq!( issue.linked_commits[0].event_author.name, "Bob", "expected Bob's lower-clock event to win the tiebreak" ); } #[test] fn issue_state_dedups_commit_links_by_sha_keeping_earliest() { let _config = ScopedTestConfig::new(); let dir = TempDir::new().unwrap();