diff --git a/nora-registry/src/gc.rs b/nora-registry/src/gc.rs index 0897026..649fd67 100644 --- a/nora-registry/src/gc.rs +++ b/nora-registry/src/gc.rs @@ -1,10 +1,14 @@ -//! Garbage Collection for orphaned blobs +//! Garbage Collection for orphaned Docker blobs //! //! Mark-and-sweep approach: -//! 1. List all blobs across registries -//! 2. Parse all manifests to find referenced blobs +//! 1. List all Docker blobs +//! 2. Parse Docker manifests to find referenced blobs //! 3. Blobs not referenced by any manifest = orphans //! 4. Delete orphans (with --dry-run support) +//! +//! Currently Docker-only. Other registries (npm, maven, cargo, pypi, go, +//! raw) are excluded because no reference resolver exists for their +//! metadata formats. use std::collections::HashSet; @@ -72,15 +76,15 @@ pub async fn run_gc(storage: &Storage, dry_run: bool) -> GcResult { async fn collect_all_blobs(storage: &Storage) -> Vec { let mut blobs = Vec::new(); - // Collect blobs from all registry types, not just Docker - for prefix in &[ - "docker/", "maven/", "npm/", "cargo/", "pypi/", "raw/", "go/", - ] { - let keys = storage.list(prefix).await; - for key in keys { - if key.contains("/blobs/") || key.contains("/tarballs/") { - blobs.push(key); - } + // Only collect Docker blobs. Other registries (npm, maven, cargo, pypi, + // go, raw) use storage key schemes that collect_referenced_digests does + // not understand, so their artifacts would appear as orphans and be + // deleted. Extending GC to non-Docker registries requires per-registry + // reference resolution. + let keys = storage.list("docker/").await; + for key in keys { + if key.contains("/blobs/") { + blobs.push(key); } } blobs @@ -304,18 +308,103 @@ mod tests { } #[tokio::test] - async fn test_gc_multi_registry_blobs() { + async fn test_gc_ignores_non_docker_registries() { let dir = tempfile::tempdir().unwrap(); let storage = Storage::new_local(dir.path().join("data").to_str().unwrap()); - // npm tarball (not referenced by Docker manifests => orphan candidate) + // Non-Docker artifacts must not be collected by GC, because + // collect_referenced_digests only understands Docker manifests. + // Without this guard, these would all appear as orphans and be deleted. + storage + .put("npm/lodash/tarballs/lodash-4.17.21.tgz", b"tarball-data") + .await + .unwrap(); + storage + .put("maven/com/example/lib/1.0/lib-1.0.jar", b"jar-data") + .await + .unwrap(); + storage + .put("cargo/serde/1.0.0/serde-1.0.0.crate", b"crate-data") + .await + .unwrap(); + + let result = run_gc(&storage, true).await; + assert_eq!(result.total_blobs, 0); + assert_eq!(result.orphaned_blobs, 0); + } + + #[tokio::test] + async fn test_gc_does_not_delete_npm_tarballs() { + let dir = tempfile::tempdir().unwrap(); + let storage = Storage::new_local(dir.path().join("data").to_str().unwrap()); + + // Regression test: npm tarballs were previously collected because + // their keys contain "/tarballs/", but no reference resolver existed + // for npm metadata, so they were all treated as orphans. storage .put("npm/lodash/tarballs/lodash-4.17.21.tgz", b"tarball-data") .await .unwrap(); - let result = run_gc(&storage, true).await; - // npm tarballs contain "tarballs/" which matches the filter - assert_eq!(result.total_blobs, 1); + let result = run_gc(&storage, false).await; + assert_eq!(result.deleted_blobs, 0); + // Verify tarball still exists + assert!(storage + .get("npm/lodash/tarballs/lodash-4.17.21.tgz") + .await + .is_ok()); + } + + #[tokio::test] + async fn test_gc_deletes_docker_orphan_but_preserves_npm() { + let dir = tempfile::tempdir().unwrap(); + let storage = Storage::new_local(dir.path().join("data").to_str().unwrap()); + + // Docker manifest referencing one blob + let manifest = serde_json::json!({ + "config": {"digest": "sha256:configabc"}, + "layers": [] + }); + storage + .put( + "docker/test/manifests/latest.json", + manifest.to_string().as_bytes(), + ) + .await + .unwrap(); + storage + .put("docker/test/blobs/sha256:configabc", b"config") + .await + .unwrap(); + // Orphan Docker blob + storage + .put("docker/test/blobs/sha256:orphan1", b"orphan") + .await + .unwrap(); + // npm tarball that must survive + storage + .put("npm/lodash/tarballs/lodash-4.17.21.tgz", b"tarball-data") + .await + .unwrap(); + + let result = run_gc(&storage, false).await; + assert_eq!(result.total_blobs, 2); // only Docker blobs counted + assert_eq!(result.orphaned_blobs, 1); + assert_eq!(result.deleted_blobs, 1); + // Docker orphan gone + assert!(storage + .get("docker/test/blobs/sha256:orphan1") + .await + .is_err()); + // Docker referenced blob still exists + assert!(storage + .get("docker/test/blobs/sha256:configabc") + .await + .is_ok()); + // npm tarball untouched + assert!(storage + .get("npm/lodash/tarballs/lodash-4.17.21.tgz") + .await + .is_ok()); } }