a73x

979ee6bc

Add src/commit_link.rs with strict Issue: trailer parser

alex emery   2026-04-12 06:38

Implements the spec's strict trailer-block semantics: the final
paragraph qualifies only if every non-empty line is trailer-shaped,
which rejects prose false positives like 'Thanks Bob.\nIssue: a3f9'.
The Issue: value must be a single token, so loose commentary like
'Issue: abc fixes thing' is also rejected. Pure parser, no git or I/O,
14 table-driven unit tests.

diff --git a/src/commit_link.rs b/src/commit_link.rs
new file mode 100644
index 0000000..0e1955d
--- /dev/null
+++ b/src/commit_link.rs
@@ -0,0 +1,112 @@
//! Auto-link commits to issues via `Issue:` git trailers during sync.
//!
//! See: docs/superpowers/specs/2026-04-12-commit-issue-link-design.md

/// Parse `Issue:` trailers from a commit message.
///
/// Returns the list of trailer values in order of appearance. Follows git's
/// own trailer-block semantics: only the final paragraph is considered, and
/// *every* non-empty line in it must be trailer-shaped (a `token: value`
/// line) for the paragraph to qualify. Any prose line in the final paragraph
/// disqualifies the whole paragraph — this prevents false positives like
/// `"Thanks Bob.\nIssue: abc"` in commit bodies.
///
/// The key match is `(?i)issue`; the value must be a single non-whitespace
/// token followed by optional trailing whitespace and end-of-line. Values
/// like `abc fixes thing` are rejected so that loose commentary never
/// becomes a silent issue-prefix lookup that warns every sync forever.
pub fn parse_issue_trailers(message: &str) -> Vec<String> {
    // 1. Split into paragraphs (blank-line separated), preserving order.
    //    Trim trailing whitespace from each line for the trailer-shape check,
    //    but keep enough structure to recognize blank lines.
    let lines: Vec<&str> = message.lines().collect();

    // 2. Find the last paragraph: the longest tail slice that contains at
    //    least one non-empty line and has no blank line *before* its first
    //    non-empty line in the tail.
    //
    //    Walking from the end: skip trailing blank/whitespace-only lines,
    //    then collect lines until we hit a blank line.
    let mut end = lines.len();
    while end > 0 && lines[end - 1].trim().is_empty() {
        end -= 1;
    }
    if end == 0 {
        return Vec::new();
    }
    let mut start = end;
    while start > 0 && !lines[start - 1].trim().is_empty() {
        start -= 1;
    }
    let paragraph = &lines[start..end];

    // 3. Validate every non-empty line in the paragraph is trailer-shaped.
    for line in paragraph {
        if line.trim().is_empty() {
            continue;
        }
        if !is_trailer_shaped(line) {
            return Vec::new();
        }
    }

    // 4. Extract `Issue:` values.
    let mut out = Vec::new();
    for line in paragraph {
        if let Some(value) = match_issue_line(line) {
            out.push(value);
        }
    }
    out
}

/// Returns true if a line looks like a git trailer: `<token>: <value>`, where
/// token starts with a letter and consists of `[A-Za-z0-9-]`, and value is at
/// least one non-whitespace character.
fn is_trailer_shaped(line: &str) -> bool {
    let trimmed = line.trim_start();
    let Some(colon_pos) = trimmed.find(':') else {
        return false;
    };
    // Use trim_end() so that `ISSUE : abc` is recognized as the token `ISSUE`
    // — matching what `match_issue_line` does. Without this, the space before
    // the colon would disqualify the line and make the whole paragraph fail
    // the trailer-shape check.
    let token = trimmed[..colon_pos].trim_end();
    if token.is_empty() {
        return false;
    }
    let mut chars = token.chars();
    let first = chars.next().unwrap();
    if !first.is_ascii_alphabetic() {
        return false;
    }
    if !chars.all(|c| c.is_ascii_alphanumeric() || c == '-') {
        return false;
    }
    let value = trimmed[colon_pos + 1..].trim();
    !value.is_empty()
}

/// If `line` is an `Issue: <token>` trailer with exactly one non-whitespace
/// token in its value, returns the token. Otherwise returns None.
fn match_issue_line(line: &str) -> Option<String> {
    let trimmed = line.trim_start();
    let colon_pos = trimmed.find(':')?;
    let key = trimmed[..colon_pos].trim_end();
    if !key.eq_ignore_ascii_case("issue") {
        return None;
    }
    let value_region = &trimmed[colon_pos + 1..];
    let value = value_region.trim();
    if value.is_empty() {
        return None;
    }
    // Reject values with interior whitespace: `abc fixes thing` must not
    // parse to `abc` silently — it must parse to nothing so the user sees
    // that their commentary is being ignored.
    if value.split_whitespace().count() != 1 {
        return None;
    }
    Some(value.to_string())
}
diff --git a/src/lib.rs b/src/lib.rs
index 29c501f..399889c 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,5 +1,6 @@
pub mod cache;
pub mod cli;
pub mod commit_link;
pub mod dag;
pub mod editor;
pub mod error;
diff --git a/tests/commit_link_test.rs b/tests/commit_link_test.rs
index d8506a3..bd07976 100644
--- a/tests/commit_link_test.rs
+++ b/tests/commit_link_test.rs
@@ -86,3 +86,98 @@ fn issue_state_dedups_commit_links_by_sha_keeping_earliest() {
    assert_eq!(issue.linked_commits[0].commit, sha(0xcc));
    assert_eq!(issue.linked_commits[0].event_author.name, "Alice");
}

use git_collab::commit_link::parse_issue_trailers;

#[test]
fn parser_no_trailer_block() {
    assert_eq!(parse_issue_trailers("Just a plain commit"), Vec::<String>::new());
}

#[test]
fn parser_empty_message() {
    assert_eq!(parse_issue_trailers(""), Vec::<String>::new());
}

#[test]
fn parser_single_trailer_in_pure_block() {
    let msg = "Fix thing\n\nSome context in the body.\n\nIssue: abc";
    assert_eq!(parse_issue_trailers(msg), vec!["abc".to_string()]);
}

#[test]
fn parser_case_variants() {
    let msg1 = "subject\n\nissue: abc";
    let msg2 = "subject\n\nISSUE : abc";
    let msg3 = "subject\n\n  Issue:  abc  ";
    assert_eq!(parse_issue_trailers(msg1), vec!["abc".to_string()]);
    assert_eq!(parse_issue_trailers(msg2), vec!["abc".to_string()]);
    assert_eq!(parse_issue_trailers(msg3), vec!["abc".to_string()]);
}

#[test]
fn parser_two_trailers_in_pure_block() {
    let msg = "subject\n\nIssue: abc\nIssue: def";
    assert_eq!(parse_issue_trailers(msg), vec!["abc".to_string(), "def".to_string()]);
}

#[test]
fn parser_issue_in_body_but_not_final_paragraph() {
    let msg = "subject\n\nIssue: abc\n\nSigned-off-by: alice <a@example.com>";
    // The final paragraph is the signed-off-by block, not the issue line.
    // It's a valid trailer block (Signed-off-by is trailer-shaped), but it
    // contains no Issue: key, so we extract nothing.
    assert_eq!(parse_issue_trailers(msg), Vec::<String>::new());
}

#[test]
fn parser_wrong_key() {
    let msg = "subject\n\nIssues: abc";
    assert_eq!(parse_issue_trailers(msg), Vec::<String>::new());
}

#[test]
fn parser_prose_mention() {
    let msg = "subject\n\nthis fixes issue abc in the body";
    assert_eq!(parse_issue_trailers(msg), Vec::<String>::new());
}

#[test]
fn parser_single_paragraph_whole_message_is_trailer_block() {
    let msg = "Issue: abc";
    assert_eq!(parse_issue_trailers(msg), vec!["abc".to_string()]);
}

#[test]
fn parser_mixed_final_paragraph_rejects_all() {
    let msg = "subject\n\nThanks to Bob for the catch.\nIssue: a3f9";
    // Final paragraph has a prose line, so it's not a trailer block and we
    // extract nothing. This is the "false positive in prose" guard.
    assert_eq!(parse_issue_trailers(msg), Vec::<String>::new());
}

#[test]
fn parser_trailing_whitespace_paragraph_does_not_shadow_trailer_block() {
    // The final paragraph is empty/whitespace, so the walk should fall back
    // to the previous non-empty paragraph, which is a valid trailer block.
    let msg = "subject\n\nIssue: abc\n\n   \n";
    assert_eq!(parse_issue_trailers(msg), vec!["abc".to_string()]);
}

#[test]
fn parser_pure_block_with_mixed_keys() {
    let msg = "subject\n\nSigned-off-by: alice <a@example.com>\nIssue: abc";
    assert_eq!(parse_issue_trailers(msg), vec!["abc".to_string()]);
}

#[test]
fn parser_rejects_value_with_trailing_garbage() {
    let msg = "subject\n\nIssue: abc fixes thing";
    assert_eq!(parse_issue_trailers(msg), Vec::<String>::new());
}

#[test]
fn parser_rejects_empty_value() {
    let msg = "subject\n\nIssue:   ";
    assert_eq!(parse_issue_trailers(msg), Vec::<String>::new());
}