mirror of
https://github.com/getnora-io/nora.git
synced 2026-04-12 13:50:31 +00:00
fix: scope garbage collection to Docker-only blobs (#109)
collect_all_blobs scanned all seven registry prefixes for keys containing /blobs/ or /tarballs/, but collect_referenced_digests only reads Docker manifests. Non-Docker artifacts (notably npm tarballs) were collected but never marked as referenced, so running gc without --dry-run would delete them. Restrict blob collection to docker/ until per-registry reference resolution exists for other registry types. // ticktockbent
This commit is contained in:
@@ -1,10 +1,14 @@
|
||||
//! Garbage Collection for orphaned blobs
|
||||
//! Garbage Collection for orphaned Docker blobs
|
||||
//!
|
||||
//! Mark-and-sweep approach:
|
||||
//! 1. List all blobs across registries
|
||||
//! 2. Parse all manifests to find referenced blobs
|
||||
//! 1. List all Docker blobs
|
||||
//! 2. Parse Docker manifests to find referenced blobs
|
||||
//! 3. Blobs not referenced by any manifest = orphans
|
||||
//! 4. Delete orphans (with --dry-run support)
|
||||
//!
|
||||
//! Currently Docker-only. Other registries (npm, maven, cargo, pypi, go,
|
||||
//! raw) are excluded because no reference resolver exists for their
|
||||
//! metadata formats.
|
||||
|
||||
use std::collections::HashSet;
|
||||
|
||||
@@ -72,15 +76,15 @@ pub async fn run_gc(storage: &Storage, dry_run: bool) -> GcResult {
|
||||
|
||||
async fn collect_all_blobs(storage: &Storage) -> Vec<String> {
|
||||
let mut blobs = Vec::new();
|
||||
// Collect blobs from all registry types, not just Docker
|
||||
for prefix in &[
|
||||
"docker/", "maven/", "npm/", "cargo/", "pypi/", "raw/", "go/",
|
||||
] {
|
||||
let keys = storage.list(prefix).await;
|
||||
for key in keys {
|
||||
if key.contains("/blobs/") || key.contains("/tarballs/") {
|
||||
blobs.push(key);
|
||||
}
|
||||
// Only collect Docker blobs. Other registries (npm, maven, cargo, pypi,
|
||||
// go, raw) use storage key schemes that collect_referenced_digests does
|
||||
// not understand, so their artifacts would appear as orphans and be
|
||||
// deleted. Extending GC to non-Docker registries requires per-registry
|
||||
// reference resolution.
|
||||
let keys = storage.list("docker/").await;
|
||||
for key in keys {
|
||||
if key.contains("/blobs/") {
|
||||
blobs.push(key);
|
||||
}
|
||||
}
|
||||
blobs
|
||||
@@ -304,18 +308,103 @@ mod tests {
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_gc_multi_registry_blobs() {
|
||||
async fn test_gc_ignores_non_docker_registries() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let storage = Storage::new_local(dir.path().join("data").to_str().unwrap());
|
||||
|
||||
// npm tarball (not referenced by Docker manifests => orphan candidate)
|
||||
// Non-Docker artifacts must not be collected by GC, because
|
||||
// collect_referenced_digests only understands Docker manifests.
|
||||
// Without this guard, these would all appear as orphans and be deleted.
|
||||
storage
|
||||
.put("npm/lodash/tarballs/lodash-4.17.21.tgz", b"tarball-data")
|
||||
.await
|
||||
.unwrap();
|
||||
storage
|
||||
.put("maven/com/example/lib/1.0/lib-1.0.jar", b"jar-data")
|
||||
.await
|
||||
.unwrap();
|
||||
storage
|
||||
.put("cargo/serde/1.0.0/serde-1.0.0.crate", b"crate-data")
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let result = run_gc(&storage, true).await;
|
||||
assert_eq!(result.total_blobs, 0);
|
||||
assert_eq!(result.orphaned_blobs, 0);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_gc_does_not_delete_npm_tarballs() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let storage = Storage::new_local(dir.path().join("data").to_str().unwrap());
|
||||
|
||||
// Regression test: npm tarballs were previously collected because
|
||||
// their keys contain "/tarballs/", but no reference resolver existed
|
||||
// for npm metadata, so they were all treated as orphans.
|
||||
storage
|
||||
.put("npm/lodash/tarballs/lodash-4.17.21.tgz", b"tarball-data")
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let result = run_gc(&storage, true).await;
|
||||
// npm tarballs contain "tarballs/" which matches the filter
|
||||
assert_eq!(result.total_blobs, 1);
|
||||
let result = run_gc(&storage, false).await;
|
||||
assert_eq!(result.deleted_blobs, 0);
|
||||
// Verify tarball still exists
|
||||
assert!(storage
|
||||
.get("npm/lodash/tarballs/lodash-4.17.21.tgz")
|
||||
.await
|
||||
.is_ok());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_gc_deletes_docker_orphan_but_preserves_npm() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let storage = Storage::new_local(dir.path().join("data").to_str().unwrap());
|
||||
|
||||
// Docker manifest referencing one blob
|
||||
let manifest = serde_json::json!({
|
||||
"config": {"digest": "sha256:configabc"},
|
||||
"layers": []
|
||||
});
|
||||
storage
|
||||
.put(
|
||||
"docker/test/manifests/latest.json",
|
||||
manifest.to_string().as_bytes(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
storage
|
||||
.put("docker/test/blobs/sha256:configabc", b"config")
|
||||
.await
|
||||
.unwrap();
|
||||
// Orphan Docker blob
|
||||
storage
|
||||
.put("docker/test/blobs/sha256:orphan1", b"orphan")
|
||||
.await
|
||||
.unwrap();
|
||||
// npm tarball that must survive
|
||||
storage
|
||||
.put("npm/lodash/tarballs/lodash-4.17.21.tgz", b"tarball-data")
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let result = run_gc(&storage, false).await;
|
||||
assert_eq!(result.total_blobs, 2); // only Docker blobs counted
|
||||
assert_eq!(result.orphaned_blobs, 1);
|
||||
assert_eq!(result.deleted_blobs, 1);
|
||||
// Docker orphan gone
|
||||
assert!(storage
|
||||
.get("docker/test/blobs/sha256:orphan1")
|
||||
.await
|
||||
.is_err());
|
||||
// Docker referenced blob still exists
|
||||
assert!(storage
|
||||
.get("docker/test/blobs/sha256:configabc")
|
||||
.await
|
||||
.is_ok());
|
||||
// npm tarball untouched
|
||||
assert!(storage
|
||||
.get("npm/lodash/tarballs/lodash-4.17.21.tgz")
|
||||
.await
|
||||
.is_ok());
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user