a73x

00acf735

Implement commit_link::scan_and_link orchestrator

alex emery   2026-04-12 06:59

Walks refs/heads/* (dedup'd via HashSet<Oid>), parses Issue: trailers
from each commit, resolves to an issue ref, and appends IssueCommitLink
events. Per-sync cache keyed by resolved ref name absorbs repeat
matches; all per-commit and per-issue errors become stderr warnings so
the scan never breaks sync. Archived issues are skipped with a warning.

diff --git a/src/commit_link.rs b/src/commit_link.rs
index cd5f631..f4ffa51 100644
--- a/src/commit_link.rs
+++ b/src/commit_link.rs
@@ -2,13 +2,13 @@
//!
//! See: docs/superpowers/specs/2026-04-12-commit-issue-link-design.md

use std::collections::HashSet;
use std::collections::{HashMap, HashSet};

use git2::Repository;
use git2::{Oid, Repository, Sort};

use crate::dag;
use crate::error::Error;
use crate::event::Action;
use crate::event::{Action, Author, Event};

/// Walk an issue's event DAG and return every commit SHA that has an
/// `IssueCommitLink` event attached. Called lazily on first match per issue
@@ -133,3 +133,147 @@ fn match_issue_line(line: &str) -> Option<String> {
    }
    Some(value.to_string())
}

const ACTIVE_ISSUE_PREFIX: &str = "refs/collab/issues/";
const ARCHIVED_ISSUE_PREFIX: &str = "refs/collab/archive/issues/";

/// Walk every commit reachable from `refs/heads/*`, parse `Issue:` trailers,
/// resolve each to an issue, and emit an `IssueCommitLink` event for any
/// (issue, commit) pair that doesn't already have one.
///
/// **Never breaks sync.** Per-commit and per-issue errors are logged as
/// one-line stderr warnings and iteration continues. The only errors that
/// propagate are "couldn't even start" failures (opening the repo, building
/// the revwalk). Callers treat a returned `Err` as "skip the link scan for
/// this sync" and proceed.
///
/// Returns the number of events actually emitted.
pub fn scan_and_link(
    repo: &Repository,
    author: &Author,
    sk: &ed25519_dalek::SigningKey,
) -> Result<usize, Error> {
    // Build a revwalk seeded from every local branch tip.
    let mut revwalk = repo.revwalk()?;
    revwalk.set_sorting(Sort::TOPOLOGICAL)?;

    let mut seeded_any = false;
    for reference in repo.references_glob("refs/heads/*")? {
        let Ok(reference) = reference else { continue };
        let Some(target) = reference.target() else {
            continue;
        };
        // `revwalk.push` dedups commits across branch tips internally.
        if revwalk.push(target).is_ok() {
            seeded_any = true;
        }
    }
    if !seeded_any {
        // Detached HEAD with no local branches. Silent no-op per spec.
        return Ok(0);
    }

    // Per-sync dedup of commits already visited.
    let mut visited: HashSet<Oid> = HashSet::new();
    // Cache of existing link SHAs per resolved issue ref. `None` = poisoned.
    let mut link_cache: HashMap<String, Option<HashSet<String>>> = HashMap::new();
    let mut emitted: usize = 0;

    for oid_result in revwalk {
        let oid = match oid_result {
            Ok(o) => o,
            Err(e) => {
                eprintln!("warning: revwalk error, stopping scan: {}", e);
                break;
            }
        };
        if !visited.insert(oid) {
            continue;
        }
        let commit = match repo.find_commit(oid) {
            Ok(c) => c,
            Err(e) => {
                eprintln!("warning: cannot load commit {}: {}", oid, e);
                continue;
            }
        };
        let message = commit.message().unwrap_or("");
        let trailers = parse_issue_trailers(message);
        if trailers.is_empty() {
            continue;
        }

        for prefix in trailers {
            match crate::state::resolve_issue_ref(repo, &prefix) {
                Ok((resolved_ref, _resolved_id)) => {
                    if resolved_ref.starts_with(ARCHIVED_ISSUE_PREFIX) {
                        eprintln!(
                            "warning: commit {}: Issue: {} — issue is archived, skipping",
                            oid, prefix
                        );
                        continue;
                    }
                    if !resolved_ref.starts_with(ACTIVE_ISSUE_PREFIX) {
                        // Unknown namespace. Should not happen with current
                        // resolver, but belt-and-braces.
                        eprintln!(
                            "warning: commit {}: Issue: {} — resolved to unexpected ref {}, skipping",
                            oid, prefix, resolved_ref
                        );
                        continue;
                    }

                    let entry = link_cache.entry(resolved_ref.clone()).or_insert_with(|| {
                        match collect_linked_shas(repo, &resolved_ref) {
                            Ok(set) => Some(set),
                            Err(e) => {
                                eprintln!(
                                    "warning: cannot read link events for {}: {} — skipping issue for the rest of this sync",
                                    resolved_ref, e
                                );
                                None
                            }
                        }
                    });
                    let Some(set) = entry.as_mut() else { continue };

                    let sha = oid.to_string();
                    if set.contains(&sha) {
                        continue;
                    }

                    let event = Event {
                        timestamp: chrono::Utc::now().to_rfc3339(),
                        author: author.clone(),
                        action: Action::IssueCommitLink {
                            commit: sha.clone(),
                        },
                        clock: 0,
                    };
                    match dag::append_event(repo, &resolved_ref, &event, sk) {
                        Ok(_) => {
                            set.insert(sha);
                            emitted += 1;
                        }
                        Err(e) => {
                            eprintln!(
                                "warning: failed to emit IssueCommitLink on {}: {}",
                                resolved_ref, e
                            );
                        }
                    }
                }
                Err(e) => {
                    // resolve_issue_ref error message already distinguishes
                    // "no issue found" from "ambiguous prefix".
                    eprintln!(
                        "warning: commit {}: Issue: {} — {}, skipping",
                        oid, prefix, e
                    );
                }
            }
        }
    }

    Ok(emitted)
}
diff --git a/tests/sync_test.rs b/tests/sync_test.rs
index 070a63a..96bca94 100644
--- a/tests/sync_test.rs
+++ b/tests/sync_test.rs
@@ -1185,3 +1185,67 @@ fn test_corrupted_state_file_handled_gracefully() {
        "corrupted state file should be deleted"
    );
}

// ---------------------------------------------------------------------------
// Commit-link tests (src/commit_link.rs)
// ---------------------------------------------------------------------------

use git_collab::commit_link;

fn make_commit_with_message(cluster: &TestCluster, repo: &Repository, message: &str) -> git2::Oid {
    let _ = cluster; // silence unused if not needed
    let sig = git2::Signature::now("Alice", "alice@example.com").unwrap();
    // The TestCluster bare repo commits onto refs/heads/main but its HEAD
    // remains the default refs/heads/master, so clones don't get a local
    // refs/heads/main automatically. Make sure it exists before we extend it.
    let parent_oid = if let Ok(r) = repo.find_reference("refs/heads/main") {
        r.target().unwrap()
    } else {
        let remote_main = repo
            .find_reference("refs/remotes/origin/main")
            .expect("origin/main should exist on the cloned test repo");
        let oid = remote_main.target().unwrap();
        repo.reference("refs/heads/main", oid, false, "seed local main")
            .unwrap();
        oid
    };
    let parent = repo.find_commit(parent_oid).unwrap();
    let tree_oid = parent.tree().unwrap().id();
    let tree = repo.find_tree(tree_oid).unwrap();
    repo.commit(
        Some("refs/heads/main"),
        &sig,
        &sig,
        message,
        &tree,
        &[&parent],
    )
    .unwrap()
}

#[test]
fn commit_link_scan_emits_event_for_matching_trailer() {
    let cluster = TestCluster::new();
    let alice_repo = cluster.alice_repo();

    // Open an issue.
    let (issue_ref, issue_id) = open_issue(&alice_repo, &alice(), "fix the walker");

    // Create a commit whose trailer references that issue.
    let message = format!("Fix walker\n\nIssue: {}", &issue_id[..8]);
    let commit_oid = make_commit_with_message(&cluster, &alice_repo, &message);

    // Run the scanner directly (we test the sync integration in later tests).
    let author = git_collab::identity::get_author(&alice_repo).unwrap();
    let sk = signing::load_signing_key(
        &signing::signing_key_dir().unwrap(),
    )
    .unwrap();
    let emitted = commit_link::scan_and_link(&alice_repo, &author, &sk).unwrap();
    assert_eq!(emitted, 1);

    // Walk the issue's event log and find the link.
    let issue = IssueState::from_ref_uncached(&alice_repo, &issue_ref, &issue_id).unwrap();
    assert_eq!(issue.linked_commits.len(), 1);
    assert_eq!(issue.linked_commits[0].commit, commit_oid.to_string());
}