a73x

58dd2715

Add cached state layer to avoid re-walking DAGs

a73x   2026-03-21 16:40

Introduces a file-based cache at .git/collab/cache/ that stores
serialized IssueState/PatchState keyed by ref tip OID. On from_ref(),
if the cached tip matches the current ref, the cached state is returned
without walking the DAG. Cache misses, corruption, and missing cache
directories all fall back gracefully to the full DAG walk.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

diff --git a/src/cache.rs b/src/cache.rs
new file mode 100644
index 0000000..631c653
--- /dev/null
+++ b/src/cache.rs
@@ -0,0 +1,75 @@
use std::fs;
use std::path::PathBuf;

use git2::{Oid, Repository};
use serde::de::DeserializeOwned;
use serde::Serialize;

/// Return the cache directory path for a given repository.
fn cache_dir(repo: &Repository) -> PathBuf {
    repo.path().join("collab").join("cache")
}

/// Sanitize a ref name for use as a filename by replacing `/` with `_`.
fn sanitize_ref_name(ref_name: &str) -> String {
    ref_name.replace('/', "_")
}

/// Cache entry stored on disk: the tip OID at cache time + serialized state.
#[derive(serde::Serialize, serde::Deserialize)]
struct CacheEntry {
    tip_oid: String,
    state: serde_json::Value,
}

/// Try to read a cached state for the given ref.
///
/// Returns `Some(state)` if a cache file exists and its stored tip OID matches
/// the current ref tip. Returns `None` on any mismatch, missing file, or error.
pub fn get_cached_state<T: DeserializeOwned>(
    repo: &Repository,
    ref_name: &str,
) -> Option<T> {
    let current_tip = repo.refname_to_id(ref_name).ok()?;
    let path = cache_dir(repo).join(sanitize_ref_name(ref_name));
    let data = fs::read_to_string(&path).ok()?;
    let entry: CacheEntry = serde_json::from_str(&data).ok()?;
    if entry.tip_oid != current_tip.to_string() {
        return None;
    }
    serde_json::from_value(entry.state).ok()
}

/// Write a cached state for the given ref, keyed by the current tip OID.
///
/// Silently ignores errors (cache is best-effort).
pub fn set_cached_state<T: Serialize>(
    repo: &Repository,
    ref_name: &str,
    tip_oid: Oid,
    state: &T,
) {
    let dir = cache_dir(repo);
    if fs::create_dir_all(&dir).is_err() {
        return;
    }
    let entry = CacheEntry {
        tip_oid: tip_oid.to_string(),
        state: match serde_json::to_value(state) {
            Ok(v) => v,
            Err(_) => return,
        },
    };
    let json = match serde_json::to_string(&entry) {
        Ok(j) => j,
        Err(_) => return,
    };
    let path = dir.join(sanitize_ref_name(ref_name));
    let _ = fs::write(&path, json);
}

/// Invalidate (delete) the cache entry for a ref. Best-effort.
pub fn invalidate(repo: &Repository, ref_name: &str) {
    let path = cache_dir(repo).join(sanitize_ref_name(ref_name));
    let _ = fs::remove_file(path);
}
diff --git a/src/lib.rs b/src/lib.rs
index 7966229..9f37a92 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,3 +1,4 @@
pub mod cache;
pub mod cli;
pub mod dag;
pub mod editor;
diff --git a/src/state.rs b/src/state.rs
index a0dba56..9a6a4a8 100644
--- a/src/state.rs
+++ b/src/state.rs
@@ -1,6 +1,7 @@
use git2::{Oid, Repository};
use serde::Serialize;
use serde::{Deserialize, Serialize};

use crate::cache;
use crate::dag;
use crate::event::{Action, Author, ReviewVerdict};

@@ -15,6 +16,19 @@ fn serialize_oid_option<S: serde::Serializer>(oid: &Option<Oid>, s: S) -> Result
    }
}

fn deserialize_oid<'de, D: serde::Deserializer<'de>>(d: D) -> Result<Oid, D::Error> {
    let s = String::deserialize(d)?;
    Oid::from_str(&s).map_err(serde::de::Error::custom)
}

fn deserialize_oid_option<'de, D: serde::Deserializer<'de>>(d: D) -> Result<Option<Oid>, D::Error> {
    let opt: Option<String> = Option::deserialize(d)?;
    match opt {
        Some(s) => Oid::from_str(&s).map(Some).map_err(serde::de::Error::custom),
        None => Ok(None),
    }
}

fn serialize_verdict<S: serde::Serializer>(v: &ReviewVerdict, s: S) -> Result<S::Ok, S::Error> {
    let str_val = match v {
        ReviewVerdict::Approve => "approve",
@@ -24,31 +38,47 @@ fn serialize_verdict<S: serde::Serializer>(v: &ReviewVerdict, s: S) -> Result<S:
    s.serialize_str(str_val)
}

#[derive(Debug, Clone, PartialEq, Serialize)]
fn deserialize_verdict<'de, D: serde::Deserializer<'de>>(d: D) -> Result<ReviewVerdict, D::Error> {
    let s = String::deserialize(d)?;
    match s.as_str() {
        "approve" => Ok(ReviewVerdict::Approve),
        "request-changes" => Ok(ReviewVerdict::RequestChanges),
        "comment" => Ok(ReviewVerdict::Comment),
        other => Err(serde::de::Error::custom(format!(
            "unknown verdict: {}",
            other
        ))),
    }
}

#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum IssueStatus {
    Open,
    Closed,
}

#[derive(Debug, Clone, Serialize)]
#[derive(Debug, Clone, Serialize, Deserialize)]
#[allow(dead_code)]
pub struct Comment {
    pub author: Author,
    pub body: String,
    pub timestamp: String,
    #[serde(serialize_with = "serialize_oid")]
    #[serde(serialize_with = "serialize_oid", deserialize_with = "deserialize_oid")]
    pub commit_id: Oid,
}

#[derive(Debug, Clone, Serialize)]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct IssueState {
    pub id: String,
    pub title: String,
    pub body: String,
    pub status: IssueStatus,
    pub close_reason: Option<String>,
    #[serde(serialize_with = "serialize_oid_option")]
    #[serde(
        serialize_with = "serialize_oid_option",
        deserialize_with = "deserialize_oid_option"
    )]
    pub closed_by: Option<Oid>,
    pub labels: Vec<String>,
    pub assignees: Vec<String>,
@@ -57,16 +87,16 @@ pub struct IssueState {
    pub author: Author,
}

#[derive(Debug, Clone, Serialize)]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Review {
    pub author: Author,
    #[serde(serialize_with = "serialize_verdict")]
    #[serde(serialize_with = "serialize_verdict", deserialize_with = "deserialize_verdict")]
    pub verdict: ReviewVerdict,
    pub body: String,
    pub timestamp: String,
}

#[derive(Debug, Clone, PartialEq, Serialize)]
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
#[allow(dead_code)]
pub enum PatchStatus {
@@ -75,7 +105,7 @@ pub enum PatchStatus {
    Merged,
}

#[derive(Debug, Clone, Serialize)]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct InlineComment {
    pub author: Author,
    pub file: String,
@@ -84,7 +114,7 @@ pub struct InlineComment {
    pub timestamp: String,
}

#[derive(Debug, Clone, Serialize)]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PatchState {
    pub id: String,
    pub title: String,
@@ -106,6 +136,27 @@ impl IssueState {
        ref_name: &str,
        id: &str,
    ) -> Result<Self, crate::error::Error> {
        // Check cache first
        if let Some(cached) = cache::get_cached_state::<IssueState>(repo, ref_name) {
            return Ok(cached);
        }

        let state = Self::from_ref_uncached(repo, ref_name, id)?;

        // Cache the result
        if let Ok(tip) = repo.refname_to_id(ref_name) {
            cache::set_cached_state(repo, ref_name, tip, &state);
        }

        Ok(state)
    }

    /// Walk the DAG and materialize state without consulting the cache.
    pub fn from_ref_uncached(
        repo: &Repository,
        ref_name: &str,
        id: &str,
    ) -> Result<Self, crate::error::Error> {
        let events = dag::walk_events(repo, ref_name)?;
        let mut state: Option<IssueState> = None;

@@ -240,6 +291,27 @@ impl PatchState {
        ref_name: &str,
        id: &str,
    ) -> Result<Self, crate::error::Error> {
        // Check cache first
        if let Some(cached) = cache::get_cached_state::<PatchState>(repo, ref_name) {
            return Ok(cached);
        }

        let state = Self::from_ref_uncached(repo, ref_name, id)?;

        // Cache the result
        if let Ok(tip) = repo.refname_to_id(ref_name) {
            cache::set_cached_state(repo, ref_name, tip, &state);
        }

        Ok(state)
    }

    /// Walk the DAG and materialize state without consulting the cache.
    pub fn from_ref_uncached(
        repo: &Repository,
        ref_name: &str,
        id: &str,
    ) -> Result<Self, crate::error::Error> {
        let events = dag::walk_events(repo, ref_name)?;
        let mut state: Option<PatchState> = None;

diff --git a/tests/cache_test.rs b/tests/cache_test.rs
new file mode 100644
index 0000000..2756d57
--- /dev/null
+++ b/tests/cache_test.rs
@@ -0,0 +1,191 @@
mod common;

use common::{alice, bob, add_comment, close_issue, init_repo, open_issue, create_patch, add_review};
use git_collab::cache;
use git_collab::event::ReviewVerdict;
use git_collab::state::{IssueState, IssueStatus, PatchState, PatchStatus};
use tempfile::TempDir;

#[test]
fn cache_miss_walks_dag_and_caches() {
    let dir = TempDir::new().unwrap();
    let repo = init_repo(dir.path(), &alice());
    let (ref_name, id) = open_issue(&repo, &alice(), "Cache test issue");

    // No cache exists yet — should return None
    let cached: Option<IssueState> = cache::get_cached_state(&repo, &ref_name);
    assert!(cached.is_none());

    // from_ref should walk the DAG and populate the cache
    let state = IssueState::from_ref(&repo, &ref_name, &id).unwrap();
    assert_eq!(state.title, "Cache test issue");
    assert_eq!(state.status, IssueStatus::Open);

    // Now the cache should have an entry
    let cached: Option<IssueState> = cache::get_cached_state(&repo, &ref_name);
    assert!(cached.is_some());
    let cached = cached.unwrap();
    assert_eq!(cached.title, "Cache test issue");
    assert_eq!(cached.id, id);
}

#[test]
fn cache_hit_returns_cached_state() {
    let dir = TempDir::new().unwrap();
    let repo = init_repo(dir.path(), &alice());
    let (ref_name, id) = open_issue(&repo, &alice(), "Cached issue");

    // Populate cache
    let state1 = IssueState::from_ref(&repo, &ref_name, &id).unwrap();

    // Second call should hit cache and return same result
    let state2 = IssueState::from_ref(&repo, &ref_name, &id).unwrap();
    assert_eq!(state1.title, state2.title);
    assert_eq!(state1.id, state2.id);
    assert_eq!(state1.status, state2.status);
}

#[test]
fn cache_invalidation_on_new_event() {
    let dir = TempDir::new().unwrap();
    let repo = init_repo(dir.path(), &alice());
    let (ref_name, id) = open_issue(&repo, &alice(), "Invalidation test");

    // Populate cache
    let state1 = IssueState::from_ref(&repo, &ref_name, &id).unwrap();
    assert_eq!(state1.comments.len(), 0);

    // Append a comment — this changes the ref tip OID
    add_comment(&repo, &ref_name, &bob(), "Hello!");

    // Cache should be invalidated (tip changed), DAG should be re-walked
    let state2 = IssueState::from_ref(&repo, &ref_name, &id).unwrap();
    assert_eq!(state2.comments.len(), 1);
    assert_eq!(state2.comments[0].body, "Hello!");
}

#[test]
fn cache_invalidation_on_close() {
    let dir = TempDir::new().unwrap();
    let repo = init_repo(dir.path(), &alice());
    let (ref_name, id) = open_issue(&repo, &alice(), "Close test");

    let state1 = IssueState::from_ref(&repo, &ref_name, &id).unwrap();
    assert_eq!(state1.status, IssueStatus::Open);

    close_issue(&repo, &ref_name, &alice());

    let state2 = IssueState::from_ref(&repo, &ref_name, &id).unwrap();
    assert_eq!(state2.status, IssueStatus::Closed);
}

#[test]
fn no_cache_dir_falls_back_gracefully() {
    let dir = TempDir::new().unwrap();
    let repo = init_repo(dir.path(), &alice());
    let (ref_name, id) = open_issue(&repo, &alice(), "No cache dir");

    // Ensure no cache directory exists (it shouldn't by default)
    let cache_path = repo.path().join("collab").join("cache");
    if cache_path.exists() {
        std::fs::remove_dir_all(&cache_path).unwrap();
    }

    // Should still work via DAG walk
    let state = IssueState::from_ref(&repo, &ref_name, &id).unwrap();
    assert_eq!(state.title, "No cache dir");
}

#[test]
fn corrupted_cache_falls_back_to_dag_walk() {
    let dir = TempDir::new().unwrap();
    let repo = init_repo(dir.path(), &alice());
    let (ref_name, id) = open_issue(&repo, &alice(), "Corrupted cache");

    // Populate cache
    IssueState::from_ref(&repo, &ref_name, &id).unwrap();

    // Corrupt the cache file
    let cache_dir = repo.path().join("collab").join("cache");
    let sanitized = ref_name.replace('/', "_");
    let cache_file = cache_dir.join(sanitized);
    std::fs::write(&cache_file, "not valid json").unwrap();

    // Should fall back gracefully
    let state = IssueState::from_ref(&repo, &ref_name, &id).unwrap();
    assert_eq!(state.title, "Corrupted cache");
}

#[test]
fn patch_cache_miss_and_hit() {
    let dir = TempDir::new().unwrap();
    let repo = init_repo(dir.path(), &alice());
    let (ref_name, id) = create_patch(&repo, &alice(), "Cache patch");

    // No cache
    let cached: Option<PatchState> = cache::get_cached_state(&repo, &ref_name);
    assert!(cached.is_none());

    // from_ref populates cache
    let state = PatchState::from_ref(&repo, &ref_name, &id).unwrap();
    assert_eq!(state.title, "Cache patch");
    assert_eq!(state.status, PatchStatus::Open);

    // Cache hit
    let cached: Option<PatchState> = cache::get_cached_state(&repo, &ref_name);
    assert!(cached.is_some());
    assert_eq!(cached.unwrap().title, "Cache patch");
}

#[test]
fn patch_cache_invalidation_on_review() {
    let dir = TempDir::new().unwrap();
    let repo = init_repo(dir.path(), &alice());
    let (ref_name, id) = create_patch(&repo, &alice(), "Review patch");

    let state1 = PatchState::from_ref(&repo, &ref_name, &id).unwrap();
    assert_eq!(state1.reviews.len(), 0);

    add_review(&repo, &ref_name, &bob(), ReviewVerdict::Approve);

    let state2 = PatchState::from_ref(&repo, &ref_name, &id).unwrap();
    assert_eq!(state2.reviews.len(), 1);
}

#[test]
fn cache_invalidate_removes_entry() {
    let dir = TempDir::new().unwrap();
    let repo = init_repo(dir.path(), &alice());
    let (ref_name, id) = open_issue(&repo, &alice(), "Invalidate test");

    // Populate cache
    IssueState::from_ref(&repo, &ref_name, &id).unwrap();
    assert!(cache::get_cached_state::<IssueState>(&repo, &ref_name).is_some());

    // Explicitly invalidate
    cache::invalidate(&repo, &ref_name);
    assert!(cache::get_cached_state::<IssueState>(&repo, &ref_name).is_none());
}

#[test]
fn list_issues_uses_cache() {
    let dir = TempDir::new().unwrap();
    let repo = init_repo(dir.path(), &alice());
    open_issue(&repo, &alice(), "List issue 1");
    open_issue(&repo, &alice(), "List issue 2");

    // First call walks DAGs and populates cache
    let issues1 = git_collab::state::list_issues(&repo).unwrap();
    assert_eq!(issues1.len(), 2);

    // Second call should use cache
    let issues2 = git_collab::state::list_issues(&repo).unwrap();
    assert_eq!(issues2.len(), 2);

    // Titles should match (order may differ)
    let mut titles1: Vec<_> = issues1.iter().map(|i| &i.title).collect();
    let mut titles2: Vec<_> = issues2.iter().map(|i| &i.title).collect();
    titles1.sort();
    titles2.sort();
    assert_eq!(titles1, titles2);
}