a73x

src/commit_link.rs

Ref:   Size: 10.4 KiB

//! Auto-link commits to issues via `Issue:` git trailers during sync.
//!
//! See: docs/superpowers/specs/2026-04-12-commit-issue-link-design.md

use std::collections::{HashMap, HashSet};

use git2::{Oid, Repository, Sort};

use crate::dag;
use crate::error::Error;
use crate::event::{Action, Author, Event};

/// Walk an issue's event DAG and return every commit SHA that has an
/// `IssueCommitLink` event attached. Called lazily on first match per issue
/// during `scan_and_link`; the result is cached in the orchestrator's
/// `HashMap<RefName, HashSet<String>>`.
pub fn collect_linked_shas(repo: &Repository, issue_ref: &str) -> Result<HashSet<String>, Error> {
    let events = dag::walk_events(repo, issue_ref)?;
    let mut shas = HashSet::new();
    for (_oid, event) in events {
        if let Action::IssueCommitLink { commit } = event.action {
            shas.insert(commit);
        }
    }
    Ok(shas)
}

/// Parse `Issue:` trailers from a commit message.
///
/// Returns the list of trailer values in order of appearance. Follows git's
/// own trailer-block semantics: only the final paragraph is considered, and
/// *every* non-empty line in it must be trailer-shaped (a `token: value`
/// line) for the paragraph to qualify. Any prose line in the final paragraph
/// disqualifies the whole paragraph — this prevents false positives like
/// `"Thanks Bob.\nIssue: abc"` in commit bodies.
///
/// The key match is `(?i)issue`; the value must be a single non-whitespace
/// token followed by optional trailing whitespace and end-of-line. Values
/// like `abc fixes thing` are rejected so that loose commentary never
/// becomes a silent issue-prefix lookup that warns every sync forever.
pub fn parse_issue_trailers(message: &str) -> Vec<String> {
    // 1. Split into paragraphs (blank-line separated), preserving order.
    //    Trim trailing whitespace from each line for the trailer-shape check,
    //    but keep enough structure to recognize blank lines.
    let lines: Vec<&str> = message.lines().collect();

    // 2. Find the last paragraph: the longest tail slice that contains at
    //    least one non-empty line and has no blank line *before* its first
    //    non-empty line in the tail.
    //
    //    Walking from the end: skip trailing blank/whitespace-only lines,
    //    then collect lines until we hit a blank line.
    let mut end = lines.len();
    while end > 0 && lines[end - 1].trim().is_empty() {
        end -= 1;
    }
    if end == 0 {
        return Vec::new();
    }
    let mut start = end;
    while start > 0 && !lines[start - 1].trim().is_empty() {
        start -= 1;
    }
    let paragraph = &lines[start..end];

    // 3. Validate every non-empty line in the paragraph is trailer-shaped.
    for line in paragraph {
        if line.trim().is_empty() {
            continue;
        }
        if !is_trailer_shaped(line) {
            return Vec::new();
        }
    }

    // 4. Extract `Issue:` values.
    let mut out = Vec::new();
    for line in paragraph {
        if let Some(value) = match_issue_line(line) {
            out.push(value);
        }
    }
    out
}

/// Returns true if a line looks like a git trailer: `<token>: <value>`, where
/// token starts with a letter and consists of `[A-Za-z0-9-]`, and value is at
/// least one non-whitespace character.
fn is_trailer_shaped(line: &str) -> bool {
    let trimmed = line.trim_start();
    let Some(colon_pos) = trimmed.find(':') else {
        return false;
    };
    // Use trim_end() so that `ISSUE : abc` is recognized as the token `ISSUE`
    // — matching what `match_issue_line` does. Without this, the space before
    // the colon would disqualify the line and make the whole paragraph fail
    // the trailer-shape check.
    let token = trimmed[..colon_pos].trim_end();
    if token.is_empty() {
        return false;
    }
    let mut chars = token.chars();
    let first = chars.next().unwrap();
    if !first.is_ascii_alphabetic() {
        return false;
    }
    if !chars.all(|c| c.is_ascii_alphanumeric() || c == '-') {
        return false;
    }
    let value = trimmed[colon_pos + 1..].trim();
    !value.is_empty()
}

/// If `line` is an `Issue: <token>` trailer with exactly one non-whitespace
/// token in its value, returns the token. Otherwise returns None.
fn match_issue_line(line: &str) -> Option<String> {
    let trimmed = line.trim_start();
    let colon_pos = trimmed.find(':')?;
    let key = trimmed[..colon_pos].trim_end();
    if !key.eq_ignore_ascii_case("issue") {
        return None;
    }
    let value_region = &trimmed[colon_pos + 1..];
    let value = value_region.trim();
    if value.is_empty() {
        return None;
    }
    // Reject values with interior whitespace: `abc fixes thing` must not
    // parse to `abc` silently — it must parse to nothing so the user sees
    // that their commentary is being ignored.
    if value.split_whitespace().count() != 1 {
        return None;
    }
    Some(value.to_string())
}

const ACTIVE_ISSUE_PREFIX: &str = "refs/collab/issues/";
const ARCHIVED_ISSUE_PREFIX: &str = "refs/collab/archive/issues/";

/// Walk every commit reachable from `refs/heads/*`, parse `Issue:` trailers,
/// resolve each to an issue, and emit an `IssueCommitLink` event for any
/// (issue, commit) pair that doesn't already have one.
///
/// **Never breaks sync.** Per-commit and per-issue errors are logged as
/// one-line stderr warnings and iteration continues. The only errors that
/// propagate are "couldn't even start" failures (opening the repo, building
/// the revwalk). Callers treat a returned `Err` as "skip the link scan for
/// this sync" and proceed.
///
/// Returns the number of events actually emitted.
pub fn scan_and_link(
    repo: &Repository,
    author: &Author,
    sk: &ed25519_dalek::SigningKey,
) -> Result<usize, Error> {
    // Build a revwalk seeded from every local branch tip.
    let mut revwalk = repo.revwalk()?;
    revwalk.set_sorting(Sort::TOPOLOGICAL)?;

    let mut seeded_any = false;
    for reference in repo.references_glob("refs/heads/*")? {
        let Ok(reference) = reference else { continue };
        let Some(target) = reference.target() else {
            continue;
        };
        // `revwalk.push` dedups commits across branch tips internally.
        if revwalk.push(target).is_ok() {
            seeded_any = true;
        }
    }
    if !seeded_any {
        // Detached HEAD with no local branches. Silent no-op per spec.
        return Ok(0);
    }

    // Per-sync dedup of commits already visited.
    let mut visited: HashSet<Oid> = HashSet::new();
    // Cache of existing link SHAs per resolved issue ref. `None` = poisoned.
    let mut link_cache: HashMap<String, Option<HashSet<String>>> = HashMap::new();
    let mut emitted: usize = 0;

    for oid_result in revwalk {
        let oid = match oid_result {
            Ok(o) => o,
            Err(e) => {
                eprintln!("warning: revwalk error, stopping scan: {}", e);
                break;
            }
        };
        if !visited.insert(oid) {
            continue;
        }
        let commit = match repo.find_commit(oid) {
            Ok(c) => c,
            Err(e) => {
                eprintln!("warning: cannot load commit {}: {}", oid, e);
                continue;
            }
        };
        let message = commit.message().unwrap_or("");
        let trailers = parse_issue_trailers(message);
        if trailers.is_empty() {
            continue;
        }

        for prefix in trailers {
            match crate::state::resolve_issue_ref(repo, &prefix) {
                Ok((resolved_ref, _resolved_id)) => {
                    if resolved_ref.starts_with(ARCHIVED_ISSUE_PREFIX) {
                        eprintln!(
                            "warning: commit {}: Issue: {} — issue is archived, skipping",
                            oid, prefix
                        );
                        continue;
                    }
                    if !resolved_ref.starts_with(ACTIVE_ISSUE_PREFIX) {
                        // Unknown namespace. Should not happen with current
                        // resolver, but belt-and-braces.
                        eprintln!(
                            "warning: commit {}: Issue: {} — resolved to unexpected ref {}, skipping",
                            oid, prefix, resolved_ref
                        );
                        continue;
                    }

                    let entry = link_cache.entry(resolved_ref.clone()).or_insert_with(|| {
                        match collect_linked_shas(repo, &resolved_ref) {
                            Ok(set) => Some(set),
                            Err(e) => {
                                eprintln!(
                                    "warning: cannot read link events for {}: {} — skipping issue for the rest of this sync",
                                    resolved_ref, e
                                );
                                None
                            }
                        }
                    });
                    let Some(set) = entry.as_mut() else { continue };

                    let sha = oid.to_string();
                    if set.contains(&sha) {
                        continue;
                    }

                    let event = Event {
                        timestamp: chrono::Utc::now().to_rfc3339(),
                        author: author.clone(),
                        action: Action::IssueCommitLink {
                            commit: sha.clone(),
                        },
                        clock: 0,
                    };
                    match dag::append_event(repo, &resolved_ref, &event, sk) {
                        Ok(_) => {
                            set.insert(sha);
                            emitted += 1;
                        }
                        Err(e) => {
                            eprintln!(
                                "warning: commit {}: failed to emit IssueCommitLink on {}: {}",
                                oid, resolved_ref, e
                            );
                        }
                    }
                }
                Err(e) => {
                    // resolve_issue_ref error message already distinguishes
                    // "no issue found" from "ambiguous prefix".
                    eprintln!(
                        "warning: commit {}: Issue: {} — {}, skipping",
                        oid, prefix, e
                    );
                }
            }
        }
    }

    Ok(emitted)
}