58dd2715
Add cached state layer to avoid re-walking DAGs
a73x 2026-03-21 16:40
Introduces a file-based cache at .git/collab/cache/ that stores serialized IssueState/PatchState keyed by ref tip OID. On from_ref(), if the cached tip matches the current ref, the cached state is returned without walking the DAG. Cache misses, corruption, and missing cache directories all fall back gracefully to the full DAG walk. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
diff --git a/src/cache.rs b/src/cache.rs new file mode 100644 index 0000000..631c653 --- /dev/null +++ b/src/cache.rs @@ -0,0 +1,75 @@ use std::fs; use std::path::PathBuf; use git2::{Oid, Repository}; use serde::de::DeserializeOwned; use serde::Serialize; /// Return the cache directory path for a given repository. fn cache_dir(repo: &Repository) -> PathBuf { repo.path().join("collab").join("cache") } /// Sanitize a ref name for use as a filename by replacing `/` with `_`. fn sanitize_ref_name(ref_name: &str) -> String { ref_name.replace('/', "_") } /// Cache entry stored on disk: the tip OID at cache time + serialized state. #[derive(serde::Serialize, serde::Deserialize)] struct CacheEntry { tip_oid: String, state: serde_json::Value, } /// Try to read a cached state for the given ref. /// /// Returns `Some(state)` if a cache file exists and its stored tip OID matches /// the current ref tip. Returns `None` on any mismatch, missing file, or error. pub fn get_cached_state<T: DeserializeOwned>( repo: &Repository, ref_name: &str, ) -> Option<T> { let current_tip = repo.refname_to_id(ref_name).ok()?; let path = cache_dir(repo).join(sanitize_ref_name(ref_name)); let data = fs::read_to_string(&path).ok()?; let entry: CacheEntry = serde_json::from_str(&data).ok()?; if entry.tip_oid != current_tip.to_string() { return None; } serde_json::from_value(entry.state).ok() } /// Write a cached state for the given ref, keyed by the current tip OID. /// /// Silently ignores errors (cache is best-effort). pub fn set_cached_state<T: Serialize>( repo: &Repository, ref_name: &str, tip_oid: Oid, state: &T, ) { let dir = cache_dir(repo); if fs::create_dir_all(&dir).is_err() { return; } let entry = CacheEntry { tip_oid: tip_oid.to_string(), state: match serde_json::to_value(state) { Ok(v) => v, Err(_) => return, }, }; let json = match serde_json::to_string(&entry) { Ok(j) => j, Err(_) => return, }; let path = dir.join(sanitize_ref_name(ref_name)); let _ = fs::write(&path, json); } /// Invalidate (delete) the cache entry for a ref. Best-effort. pub fn invalidate(repo: &Repository, ref_name: &str) { let path = cache_dir(repo).join(sanitize_ref_name(ref_name)); let _ = fs::remove_file(path); } diff --git a/src/lib.rs b/src/lib.rs index 7966229..9f37a92 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,4 @@ pub mod cache; pub mod cli; pub mod dag; pub mod editor; diff --git a/src/state.rs b/src/state.rs index a0dba56..9a6a4a8 100644 --- a/src/state.rs +++ b/src/state.rs @@ -1,6 +1,7 @@ use git2::{Oid, Repository}; use serde::Serialize; use serde::{Deserialize, Serialize}; use crate::cache; use crate::dag; use crate::event::{Action, Author, ReviewVerdict}; @@ -15,6 +16,19 @@ fn serialize_oid_option<S: serde::Serializer>(oid: &Option<Oid>, s: S) -> Result } } fn deserialize_oid<'de, D: serde::Deserializer<'de>>(d: D) -> Result<Oid, D::Error> { let s = String::deserialize(d)?; Oid::from_str(&s).map_err(serde::de::Error::custom) } fn deserialize_oid_option<'de, D: serde::Deserializer<'de>>(d: D) -> Result<Option<Oid>, D::Error> { let opt: Option<String> = Option::deserialize(d)?; match opt { Some(s) => Oid::from_str(&s).map(Some).map_err(serde::de::Error::custom), None => Ok(None), } } fn serialize_verdict<S: serde::Serializer>(v: &ReviewVerdict, s: S) -> Result<S::Ok, S::Error> { let str_val = match v { ReviewVerdict::Approve => "approve", @@ -24,31 +38,47 @@ fn serialize_verdict<S: serde::Serializer>(v: &ReviewVerdict, s: S) -> Result<S: s.serialize_str(str_val) } #[derive(Debug, Clone, PartialEq, Serialize)] fn deserialize_verdict<'de, D: serde::Deserializer<'de>>(d: D) -> Result<ReviewVerdict, D::Error> { let s = String::deserialize(d)?; match s.as_str() { "approve" => Ok(ReviewVerdict::Approve), "request-changes" => Ok(ReviewVerdict::RequestChanges), "comment" => Ok(ReviewVerdict::Comment), other => Err(serde::de::Error::custom(format!( "unknown verdict: {}", other ))), } } #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] #[serde(rename_all = "lowercase")] pub enum IssueStatus { Open, Closed, } #[derive(Debug, Clone, Serialize)] #[derive(Debug, Clone, Serialize, Deserialize)] #[allow(dead_code)] pub struct Comment { pub author: Author, pub body: String, pub timestamp: String, #[serde(serialize_with = "serialize_oid")] #[serde(serialize_with = "serialize_oid", deserialize_with = "deserialize_oid")] pub commit_id: Oid, } #[derive(Debug, Clone, Serialize)] #[derive(Debug, Clone, Serialize, Deserialize)] pub struct IssueState { pub id: String, pub title: String, pub body: String, pub status: IssueStatus, pub close_reason: Option<String>, #[serde(serialize_with = "serialize_oid_option")] #[serde( serialize_with = "serialize_oid_option", deserialize_with = "deserialize_oid_option" )] pub closed_by: Option<Oid>, pub labels: Vec<String>, pub assignees: Vec<String>, @@ -57,16 +87,16 @@ pub struct IssueState { pub author: Author, } #[derive(Debug, Clone, Serialize)] #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Review { pub author: Author, #[serde(serialize_with = "serialize_verdict")] #[serde(serialize_with = "serialize_verdict", deserialize_with = "deserialize_verdict")] pub verdict: ReviewVerdict, pub body: String, pub timestamp: String, } #[derive(Debug, Clone, PartialEq, Serialize)] #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] #[serde(rename_all = "lowercase")] #[allow(dead_code)] pub enum PatchStatus { @@ -75,7 +105,7 @@ pub enum PatchStatus { Merged, } #[derive(Debug, Clone, Serialize)] #[derive(Debug, Clone, Serialize, Deserialize)] pub struct InlineComment { pub author: Author, pub file: String, @@ -84,7 +114,7 @@ pub struct InlineComment { pub timestamp: String, } #[derive(Debug, Clone, Serialize)] #[derive(Debug, Clone, Serialize, Deserialize)] pub struct PatchState { pub id: String, pub title: String, @@ -106,6 +136,27 @@ impl IssueState { ref_name: &str, id: &str, ) -> Result<Self, crate::error::Error> { // Check cache first if let Some(cached) = cache::get_cached_state::<IssueState>(repo, ref_name) { return Ok(cached); } let state = Self::from_ref_uncached(repo, ref_name, id)?; // Cache the result if let Ok(tip) = repo.refname_to_id(ref_name) { cache::set_cached_state(repo, ref_name, tip, &state); } Ok(state) } /// Walk the DAG and materialize state without consulting the cache. pub fn from_ref_uncached( repo: &Repository, ref_name: &str, id: &str, ) -> Result<Self, crate::error::Error> { let events = dag::walk_events(repo, ref_name)?; let mut state: Option<IssueState> = None; @@ -240,6 +291,27 @@ impl PatchState { ref_name: &str, id: &str, ) -> Result<Self, crate::error::Error> { // Check cache first if let Some(cached) = cache::get_cached_state::<PatchState>(repo, ref_name) { return Ok(cached); } let state = Self::from_ref_uncached(repo, ref_name, id)?; // Cache the result if let Ok(tip) = repo.refname_to_id(ref_name) { cache::set_cached_state(repo, ref_name, tip, &state); } Ok(state) } /// Walk the DAG and materialize state without consulting the cache. pub fn from_ref_uncached( repo: &Repository, ref_name: &str, id: &str, ) -> Result<Self, crate::error::Error> { let events = dag::walk_events(repo, ref_name)?; let mut state: Option<PatchState> = None; diff --git a/tests/cache_test.rs b/tests/cache_test.rs new file mode 100644 index 0000000..2756d57 --- /dev/null +++ b/tests/cache_test.rs @@ -0,0 +1,191 @@ mod common; use common::{alice, bob, add_comment, close_issue, init_repo, open_issue, create_patch, add_review}; use git_collab::cache; use git_collab::event::ReviewVerdict; use git_collab::state::{IssueState, IssueStatus, PatchState, PatchStatus}; use tempfile::TempDir; #[test] fn cache_miss_walks_dag_and_caches() { let dir = TempDir::new().unwrap(); let repo = init_repo(dir.path(), &alice()); let (ref_name, id) = open_issue(&repo, &alice(), "Cache test issue"); // No cache exists yet — should return None let cached: Option<IssueState> = cache::get_cached_state(&repo, &ref_name); assert!(cached.is_none()); // from_ref should walk the DAG and populate the cache let state = IssueState::from_ref(&repo, &ref_name, &id).unwrap(); assert_eq!(state.title, "Cache test issue"); assert_eq!(state.status, IssueStatus::Open); // Now the cache should have an entry let cached: Option<IssueState> = cache::get_cached_state(&repo, &ref_name); assert!(cached.is_some()); let cached = cached.unwrap(); assert_eq!(cached.title, "Cache test issue"); assert_eq!(cached.id, id); } #[test] fn cache_hit_returns_cached_state() { let dir = TempDir::new().unwrap(); let repo = init_repo(dir.path(), &alice()); let (ref_name, id) = open_issue(&repo, &alice(), "Cached issue"); // Populate cache let state1 = IssueState::from_ref(&repo, &ref_name, &id).unwrap(); // Second call should hit cache and return same result let state2 = IssueState::from_ref(&repo, &ref_name, &id).unwrap(); assert_eq!(state1.title, state2.title); assert_eq!(state1.id, state2.id); assert_eq!(state1.status, state2.status); } #[test] fn cache_invalidation_on_new_event() { let dir = TempDir::new().unwrap(); let repo = init_repo(dir.path(), &alice()); let (ref_name, id) = open_issue(&repo, &alice(), "Invalidation test"); // Populate cache let state1 = IssueState::from_ref(&repo, &ref_name, &id).unwrap(); assert_eq!(state1.comments.len(), 0); // Append a comment — this changes the ref tip OID add_comment(&repo, &ref_name, &bob(), "Hello!"); // Cache should be invalidated (tip changed), DAG should be re-walked let state2 = IssueState::from_ref(&repo, &ref_name, &id).unwrap(); assert_eq!(state2.comments.len(), 1); assert_eq!(state2.comments[0].body, "Hello!"); } #[test] fn cache_invalidation_on_close() { let dir = TempDir::new().unwrap(); let repo = init_repo(dir.path(), &alice()); let (ref_name, id) = open_issue(&repo, &alice(), "Close test"); let state1 = IssueState::from_ref(&repo, &ref_name, &id).unwrap(); assert_eq!(state1.status, IssueStatus::Open); close_issue(&repo, &ref_name, &alice()); let state2 = IssueState::from_ref(&repo, &ref_name, &id).unwrap(); assert_eq!(state2.status, IssueStatus::Closed); } #[test] fn no_cache_dir_falls_back_gracefully() { let dir = TempDir::new().unwrap(); let repo = init_repo(dir.path(), &alice()); let (ref_name, id) = open_issue(&repo, &alice(), "No cache dir"); // Ensure no cache directory exists (it shouldn't by default) let cache_path = repo.path().join("collab").join("cache"); if cache_path.exists() { std::fs::remove_dir_all(&cache_path).unwrap(); } // Should still work via DAG walk let state = IssueState::from_ref(&repo, &ref_name, &id).unwrap(); assert_eq!(state.title, "No cache dir"); } #[test] fn corrupted_cache_falls_back_to_dag_walk() { let dir = TempDir::new().unwrap(); let repo = init_repo(dir.path(), &alice()); let (ref_name, id) = open_issue(&repo, &alice(), "Corrupted cache"); // Populate cache IssueState::from_ref(&repo, &ref_name, &id).unwrap(); // Corrupt the cache file let cache_dir = repo.path().join("collab").join("cache"); let sanitized = ref_name.replace('/', "_"); let cache_file = cache_dir.join(sanitized); std::fs::write(&cache_file, "not valid json").unwrap(); // Should fall back gracefully let state = IssueState::from_ref(&repo, &ref_name, &id).unwrap(); assert_eq!(state.title, "Corrupted cache"); } #[test] fn patch_cache_miss_and_hit() { let dir = TempDir::new().unwrap(); let repo = init_repo(dir.path(), &alice()); let (ref_name, id) = create_patch(&repo, &alice(), "Cache patch"); // No cache let cached: Option<PatchState> = cache::get_cached_state(&repo, &ref_name); assert!(cached.is_none()); // from_ref populates cache let state = PatchState::from_ref(&repo, &ref_name, &id).unwrap(); assert_eq!(state.title, "Cache patch"); assert_eq!(state.status, PatchStatus::Open); // Cache hit let cached: Option<PatchState> = cache::get_cached_state(&repo, &ref_name); assert!(cached.is_some()); assert_eq!(cached.unwrap().title, "Cache patch"); } #[test] fn patch_cache_invalidation_on_review() { let dir = TempDir::new().unwrap(); let repo = init_repo(dir.path(), &alice()); let (ref_name, id) = create_patch(&repo, &alice(), "Review patch"); let state1 = PatchState::from_ref(&repo, &ref_name, &id).unwrap(); assert_eq!(state1.reviews.len(), 0); add_review(&repo, &ref_name, &bob(), ReviewVerdict::Approve); let state2 = PatchState::from_ref(&repo, &ref_name, &id).unwrap(); assert_eq!(state2.reviews.len(), 1); } #[test] fn cache_invalidate_removes_entry() { let dir = TempDir::new().unwrap(); let repo = init_repo(dir.path(), &alice()); let (ref_name, id) = open_issue(&repo, &alice(), "Invalidate test"); // Populate cache IssueState::from_ref(&repo, &ref_name, &id).unwrap(); assert!(cache::get_cached_state::<IssueState>(&repo, &ref_name).is_some()); // Explicitly invalidate cache::invalidate(&repo, &ref_name); assert!(cache::get_cached_state::<IssueState>(&repo, &ref_name).is_none()); } #[test] fn list_issues_uses_cache() { let dir = TempDir::new().unwrap(); let repo = init_repo(dir.path(), &alice()); open_issue(&repo, &alice(), "List issue 1"); open_issue(&repo, &alice(), "List issue 2"); // First call walks DAGs and populates cache let issues1 = git_collab::state::list_issues(&repo).unwrap(); assert_eq!(issues1.len(), 2); // Second call should use cache let issues2 = git_collab::state::list_issues(&repo).unwrap(); assert_eq!(issues2.len(), 2); // Titles should match (order may differ) let mut titles1: Vec<_> = issues1.iter().map(|i| &i.title).collect(); let mut titles2: Vec<_> = issues2.iter().map(|i| &i.title).collect(); titles1.sort(); titles2.sort(); assert_eq!(titles1, titles2); }