mirror of
https://github.com/getnora-io/nora.git
synced 2026-04-12 13:50:31 +00:00
fix: scope garbage collection to Docker-only blobs (#109)
collect_all_blobs scanned all seven registry prefixes for keys containing /blobs/ or /tarballs/, but collect_referenced_digests only reads Docker manifests. Non-Docker artifacts (notably npm tarballs) were collected but never marked as referenced, so running gc without --dry-run would delete them. Restrict blob collection to docker/ until per-registry reference resolution exists for other registry types. // ticktockbent
This commit is contained in:
@@ -1,10 +1,14 @@
|
|||||||
//! Garbage Collection for orphaned blobs
|
//! Garbage Collection for orphaned Docker blobs
|
||||||
//!
|
//!
|
||||||
//! Mark-and-sweep approach:
|
//! Mark-and-sweep approach:
|
||||||
//! 1. List all blobs across registries
|
//! 1. List all Docker blobs
|
||||||
//! 2. Parse all manifests to find referenced blobs
|
//! 2. Parse Docker manifests to find referenced blobs
|
||||||
//! 3. Blobs not referenced by any manifest = orphans
|
//! 3. Blobs not referenced by any manifest = orphans
|
||||||
//! 4. Delete orphans (with --dry-run support)
|
//! 4. Delete orphans (with --dry-run support)
|
||||||
|
//!
|
||||||
|
//! Currently Docker-only. Other registries (npm, maven, cargo, pypi, go,
|
||||||
|
//! raw) are excluded because no reference resolver exists for their
|
||||||
|
//! metadata formats.
|
||||||
|
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
|
|
||||||
@@ -72,15 +76,15 @@ pub async fn run_gc(storage: &Storage, dry_run: bool) -> GcResult {
|
|||||||
|
|
||||||
async fn collect_all_blobs(storage: &Storage) -> Vec<String> {
|
async fn collect_all_blobs(storage: &Storage) -> Vec<String> {
|
||||||
let mut blobs = Vec::new();
|
let mut blobs = Vec::new();
|
||||||
// Collect blobs from all registry types, not just Docker
|
// Only collect Docker blobs. Other registries (npm, maven, cargo, pypi,
|
||||||
for prefix in &[
|
// go, raw) use storage key schemes that collect_referenced_digests does
|
||||||
"docker/", "maven/", "npm/", "cargo/", "pypi/", "raw/", "go/",
|
// not understand, so their artifacts would appear as orphans and be
|
||||||
] {
|
// deleted. Extending GC to non-Docker registries requires per-registry
|
||||||
let keys = storage.list(prefix).await;
|
// reference resolution.
|
||||||
for key in keys {
|
let keys = storage.list("docker/").await;
|
||||||
if key.contains("/blobs/") || key.contains("/tarballs/") {
|
for key in keys {
|
||||||
blobs.push(key);
|
if key.contains("/blobs/") {
|
||||||
}
|
blobs.push(key);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
blobs
|
blobs
|
||||||
@@ -304,18 +308,103 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_gc_multi_registry_blobs() {
|
async fn test_gc_ignores_non_docker_registries() {
|
||||||
let dir = tempfile::tempdir().unwrap();
|
let dir = tempfile::tempdir().unwrap();
|
||||||
let storage = Storage::new_local(dir.path().join("data").to_str().unwrap());
|
let storage = Storage::new_local(dir.path().join("data").to_str().unwrap());
|
||||||
|
|
||||||
// npm tarball (not referenced by Docker manifests => orphan candidate)
|
// Non-Docker artifacts must not be collected by GC, because
|
||||||
|
// collect_referenced_digests only understands Docker manifests.
|
||||||
|
// Without this guard, these would all appear as orphans and be deleted.
|
||||||
|
storage
|
||||||
|
.put("npm/lodash/tarballs/lodash-4.17.21.tgz", b"tarball-data")
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
storage
|
||||||
|
.put("maven/com/example/lib/1.0/lib-1.0.jar", b"jar-data")
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
storage
|
||||||
|
.put("cargo/serde/1.0.0/serde-1.0.0.crate", b"crate-data")
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let result = run_gc(&storage, true).await;
|
||||||
|
assert_eq!(result.total_blobs, 0);
|
||||||
|
assert_eq!(result.orphaned_blobs, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_gc_does_not_delete_npm_tarballs() {
|
||||||
|
let dir = tempfile::tempdir().unwrap();
|
||||||
|
let storage = Storage::new_local(dir.path().join("data").to_str().unwrap());
|
||||||
|
|
||||||
|
// Regression test: npm tarballs were previously collected because
|
||||||
|
// their keys contain "/tarballs/", but no reference resolver existed
|
||||||
|
// for npm metadata, so they were all treated as orphans.
|
||||||
storage
|
storage
|
||||||
.put("npm/lodash/tarballs/lodash-4.17.21.tgz", b"tarball-data")
|
.put("npm/lodash/tarballs/lodash-4.17.21.tgz", b"tarball-data")
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let result = run_gc(&storage, true).await;
|
let result = run_gc(&storage, false).await;
|
||||||
// npm tarballs contain "tarballs/" which matches the filter
|
assert_eq!(result.deleted_blobs, 0);
|
||||||
assert_eq!(result.total_blobs, 1);
|
// Verify tarball still exists
|
||||||
|
assert!(storage
|
||||||
|
.get("npm/lodash/tarballs/lodash-4.17.21.tgz")
|
||||||
|
.await
|
||||||
|
.is_ok());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_gc_deletes_docker_orphan_but_preserves_npm() {
|
||||||
|
let dir = tempfile::tempdir().unwrap();
|
||||||
|
let storage = Storage::new_local(dir.path().join("data").to_str().unwrap());
|
||||||
|
|
||||||
|
// Docker manifest referencing one blob
|
||||||
|
let manifest = serde_json::json!({
|
||||||
|
"config": {"digest": "sha256:configabc"},
|
||||||
|
"layers": []
|
||||||
|
});
|
||||||
|
storage
|
||||||
|
.put(
|
||||||
|
"docker/test/manifests/latest.json",
|
||||||
|
manifest.to_string().as_bytes(),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
storage
|
||||||
|
.put("docker/test/blobs/sha256:configabc", b"config")
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
// Orphan Docker blob
|
||||||
|
storage
|
||||||
|
.put("docker/test/blobs/sha256:orphan1", b"orphan")
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
// npm tarball that must survive
|
||||||
|
storage
|
||||||
|
.put("npm/lodash/tarballs/lodash-4.17.21.tgz", b"tarball-data")
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let result = run_gc(&storage, false).await;
|
||||||
|
assert_eq!(result.total_blobs, 2); // only Docker blobs counted
|
||||||
|
assert_eq!(result.orphaned_blobs, 1);
|
||||||
|
assert_eq!(result.deleted_blobs, 1);
|
||||||
|
// Docker orphan gone
|
||||||
|
assert!(storage
|
||||||
|
.get("docker/test/blobs/sha256:orphan1")
|
||||||
|
.await
|
||||||
|
.is_err());
|
||||||
|
// Docker referenced blob still exists
|
||||||
|
assert!(storage
|
||||||
|
.get("docker/test/blobs/sha256:configabc")
|
||||||
|
.await
|
||||||
|
.is_ok());
|
||||||
|
// npm tarball untouched
|
||||||
|
assert!(storage
|
||||||
|
.get("npm/lodash/tarballs/lodash-4.17.21.tgz")
|
||||||
|
.await
|
||||||
|
.is_ok());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user