mirror of
https://github.com/getnora-io/nora.git
synced 2026-04-12 16:10:31 +00:00
feat: nora mirror v0.4 — yarn.lock, --json, Docker images (#94)
* feat: add yarn.lock support and --json output for nora mirror (#43, #44) - Add yarn.lock v1 parser with scoped packages, multiple ranges, dedup - Add --json flag for machine-readable mirror output (CI pipelines) - Add MirrorFormat::Yarn variant - MirrorResult now implements Serialize - 12 new tests (10 yarn.lock + 2 json serialization) Closes #43, closes #44 * feat: add Docker image mirroring (nora mirror docker) (#41) Add nora mirror docker command to fetch images from upstream registries (Docker Hub, ghcr.io, etc.) and push them into NORA. Supports: - Image references: name:tag, name@digest, registry/name:tag - --images (comma-separated) and --images-file (one per line) - Manifest list resolution (picks linux/amd64) - Layer dedup via HEAD check before fetch - Monolithic blob upload to NORA Makes fetch_blob_from_upstream and fetch_manifest_from_upstream pub in registry/docker.rs for reuse from mirror module. 20 new unit tests for image ref parsing, manifest detection, blob extraction.
This commit is contained in:
610
nora-registry/src/mirror/docker.rs
Normal file
610
nora-registry/src/mirror/docker.rs
Normal file
@@ -0,0 +1,610 @@
|
||||
// Copyright (c) 2026 Volkov Pavel | DevITWay
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
//! Docker image mirroring — fetch images from upstream registries and push to NORA.
|
||||
|
||||
use super::{create_progress_bar, MirrorResult};
|
||||
use crate::registry::docker_auth::DockerAuth;
|
||||
use reqwest::Client;
|
||||
use std::time::Duration;
|
||||
|
||||
const DEFAULT_REGISTRY: &str = "https://registry-1.docker.io";
|
||||
const DEFAULT_TIMEOUT: u64 = 120;
|
||||
|
||||
/// Parsed Docker image reference
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct ImageRef {
|
||||
/// Upstream registry (e.g., "registry-1.docker.io", "ghcr.io")
|
||||
pub registry: String,
|
||||
/// Image name (e.g., "library/alpine", "grafana/grafana")
|
||||
pub name: String,
|
||||
/// Tag or digest reference (e.g., "3.20", "sha256:abc...")
|
||||
pub reference: String,
|
||||
}
|
||||
|
||||
/// Parse an image reference string into structured components.
|
||||
///
|
||||
/// Supports formats:
|
||||
/// - `alpine:3.20` → Docker Hub library/alpine:3.20
|
||||
/// - `grafana/grafana:latest` → Docker Hub grafana/grafana:latest
|
||||
/// - `ghcr.io/owner/repo:v1` → ghcr.io owner/repo:v1
|
||||
/// - `alpine@sha256:abc` → Docker Hub library/alpine@sha256:abc
|
||||
/// - `alpine` → Docker Hub library/alpine:latest
|
||||
pub fn parse_image_ref(input: &str) -> ImageRef {
|
||||
let input = input.trim();
|
||||
|
||||
// Split off @digest or :tag
|
||||
let (name_part, reference) = if let Some(idx) = input.rfind('@') {
|
||||
(&input[..idx], &input[idx + 1..])
|
||||
} else if let Some(idx) = input.rfind(':') {
|
||||
// Make sure colon is not part of a port (e.g., localhost:5000/image)
|
||||
let before_colon = &input[..idx];
|
||||
if let Some(last_slash) = before_colon.rfind('/') {
|
||||
let segment_after_slash = &input[last_slash + 1..];
|
||||
if segment_after_slash.contains(':') {
|
||||
// Colon in last segment — tag separator
|
||||
(&input[..idx], &input[idx + 1..])
|
||||
} else {
|
||||
// Colon in earlier segment (port) — no tag
|
||||
(input, "latest")
|
||||
}
|
||||
} else {
|
||||
(&input[..idx], &input[idx + 1..])
|
||||
}
|
||||
} else {
|
||||
(input, "latest")
|
||||
};
|
||||
|
||||
// Determine if first segment is a registry hostname
|
||||
let parts: Vec<&str> = name_part.splitn(2, '/').collect();
|
||||
|
||||
let (registry, name) = if parts.len() == 1 {
|
||||
// Simple name like "alpine" → Docker Hub library/
|
||||
(
|
||||
DEFAULT_REGISTRY.to_string(),
|
||||
format!("library/{}", parts[0]),
|
||||
)
|
||||
} else {
|
||||
let first = parts[0];
|
||||
// A segment is a registry if it contains a dot or colon (hostname/port)
|
||||
if first.contains('.') || first.contains(':') {
|
||||
let reg = if first.starts_with("http") {
|
||||
first.to_string()
|
||||
} else {
|
||||
format!("https://{}", first)
|
||||
};
|
||||
(reg, parts[1].to_string())
|
||||
} else {
|
||||
// Docker Hub with org, e.g., "grafana/grafana"
|
||||
(DEFAULT_REGISTRY.to_string(), name_part.to_string())
|
||||
}
|
||||
};
|
||||
|
||||
ImageRef {
|
||||
registry,
|
||||
name,
|
||||
reference: reference.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse a list of image references from a newline-separated string.
|
||||
pub fn parse_images_file(content: &str) -> Vec<ImageRef> {
|
||||
content
|
||||
.lines()
|
||||
.map(|l| l.trim())
|
||||
.filter(|l| !l.is_empty() && !l.starts_with('#'))
|
||||
.map(parse_image_ref)
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Mirror Docker images from upstream registries into NORA.
|
||||
pub async fn run_docker_mirror(
|
||||
client: &Client,
|
||||
nora_url: &str,
|
||||
images: &[ImageRef],
|
||||
concurrency: usize,
|
||||
) -> Result<MirrorResult, String> {
|
||||
let docker_auth = DockerAuth::new(DEFAULT_TIMEOUT);
|
||||
let semaphore = std::sync::Arc::new(tokio::sync::Semaphore::new(concurrency));
|
||||
|
||||
let pb = create_progress_bar(images.len() as u64);
|
||||
let nora_base = nora_url.trim_end_matches('/');
|
||||
|
||||
let mut total_fetched = 0usize;
|
||||
let mut total_failed = 0usize;
|
||||
let mut total_bytes = 0u64;
|
||||
|
||||
for image in images {
|
||||
let _permit = semaphore.acquire().await.map_err(|e| e.to_string())?;
|
||||
pb.set_message(format!("{}:{}", image.name, image.reference));
|
||||
|
||||
match mirror_single_image(client, nora_base, image, &docker_auth).await {
|
||||
Ok(bytes) => {
|
||||
total_fetched += 1;
|
||||
total_bytes += bytes;
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!(
|
||||
image = %format!("{}/{}:{}", image.registry, image.name, image.reference),
|
||||
error = %e,
|
||||
"Failed to mirror image"
|
||||
);
|
||||
total_failed += 1;
|
||||
}
|
||||
}
|
||||
pb.inc(1);
|
||||
}
|
||||
|
||||
pb.finish_with_message("done");
|
||||
|
||||
Ok(MirrorResult {
|
||||
total: images.len(),
|
||||
fetched: total_fetched,
|
||||
failed: total_failed,
|
||||
bytes: total_bytes,
|
||||
})
|
||||
}
|
||||
|
||||
/// Mirror a single image: fetch manifest + blobs from upstream, push to NORA.
|
||||
async fn mirror_single_image(
|
||||
client: &Client,
|
||||
nora_base: &str,
|
||||
image: &ImageRef,
|
||||
docker_auth: &DockerAuth,
|
||||
) -> Result<u64, String> {
|
||||
let mut bytes = 0u64;
|
||||
|
||||
// 1. Fetch manifest from upstream
|
||||
let (manifest_bytes, content_type) = crate::registry::docker::fetch_manifest_from_upstream(
|
||||
client,
|
||||
&image.registry,
|
||||
&image.name,
|
||||
&image.reference,
|
||||
docker_auth,
|
||||
DEFAULT_TIMEOUT,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.map_err(|()| format!("Failed to fetch manifest for {}", image.name))?;
|
||||
|
||||
bytes += manifest_bytes.len() as u64;
|
||||
|
||||
// 2. Parse manifest to find layer digests
|
||||
let manifest_json: serde_json::Value = serde_json::from_slice(&manifest_bytes)
|
||||
.map_err(|e| format!("Invalid manifest JSON: {}", e))?;
|
||||
|
||||
// Check if this is a manifest list / OCI index
|
||||
let manifests_to_process = if is_manifest_list(&content_type, &manifest_json) {
|
||||
// Pick linux/amd64 manifest from the list
|
||||
resolve_platform_manifest(
|
||||
client,
|
||||
&image.registry,
|
||||
&image.name,
|
||||
docker_auth,
|
||||
&manifest_json,
|
||||
)
|
||||
.await?
|
||||
} else {
|
||||
vec![(
|
||||
manifest_bytes.clone(),
|
||||
manifest_json.clone(),
|
||||
content_type.clone(),
|
||||
)]
|
||||
};
|
||||
|
||||
for (mf_bytes, mf_json, mf_ct) in &manifests_to_process {
|
||||
// 3. Get config digest and layer digests
|
||||
let blobs = extract_blob_digests(mf_json);
|
||||
|
||||
// 4. For each blob, check if NORA already has it, otherwise fetch and push
|
||||
for digest in &blobs {
|
||||
if blob_exists(client, nora_base, &image.name, digest).await {
|
||||
tracing::debug!(digest = %digest, "Blob already exists, skipping");
|
||||
continue;
|
||||
}
|
||||
|
||||
let blob_data = crate::registry::docker::fetch_blob_from_upstream(
|
||||
client,
|
||||
&image.registry,
|
||||
&image.name,
|
||||
digest,
|
||||
docker_auth,
|
||||
DEFAULT_TIMEOUT,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.map_err(|()| format!("Failed to fetch blob {}", digest))?;
|
||||
|
||||
bytes += blob_data.len() as u64;
|
||||
push_blob(client, nora_base, &image.name, digest, &blob_data).await?;
|
||||
}
|
||||
|
||||
// 5. Push manifest to NORA
|
||||
push_manifest(
|
||||
client,
|
||||
nora_base,
|
||||
&image.name,
|
||||
&image.reference,
|
||||
mf_bytes,
|
||||
mf_ct,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
||||
// If this was a manifest list, also push the list itself
|
||||
if manifests_to_process.len() > 1 || is_manifest_list(&content_type, &manifest_json) {
|
||||
push_manifest(
|
||||
client,
|
||||
nora_base,
|
||||
&image.name,
|
||||
&image.reference,
|
||||
&manifest_bytes,
|
||||
&content_type,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
||||
Ok(bytes)
|
||||
}
|
||||
|
||||
/// Check if a manifest is a manifest list (fat manifest) or OCI index.
|
||||
fn is_manifest_list(content_type: &str, json: &serde_json::Value) -> bool {
|
||||
content_type.contains("manifest.list")
|
||||
|| content_type.contains("image.index")
|
||||
|| json.get("manifests").is_some()
|
||||
}
|
||||
|
||||
/// From a manifest list, resolve the linux/amd64 platform manifest.
|
||||
async fn resolve_platform_manifest(
|
||||
client: &Client,
|
||||
upstream_url: &str,
|
||||
name: &str,
|
||||
docker_auth: &DockerAuth,
|
||||
list_json: &serde_json::Value,
|
||||
) -> Result<Vec<(Vec<u8>, serde_json::Value, String)>, String> {
|
||||
let manifests = list_json
|
||||
.get("manifests")
|
||||
.and_then(|m| m.as_array())
|
||||
.ok_or("Manifest list has no manifests array")?;
|
||||
|
||||
// Find linux/amd64 manifest
|
||||
let target = manifests
|
||||
.iter()
|
||||
.find(|m| {
|
||||
let platform = m.get("platform");
|
||||
let os = platform
|
||||
.and_then(|p| p.get("os"))
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("");
|
||||
let arch = platform
|
||||
.and_then(|p| p.get("architecture"))
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("");
|
||||
os == "linux" && arch == "amd64"
|
||||
})
|
||||
.or_else(|| manifests.first())
|
||||
.ok_or("No suitable platform manifest found")?;
|
||||
|
||||
let digest = target
|
||||
.get("digest")
|
||||
.and_then(|d| d.as_str())
|
||||
.ok_or("Manifest entry missing digest")?;
|
||||
|
||||
let (mf_bytes, mf_ct) = crate::registry::docker::fetch_manifest_from_upstream(
|
||||
client,
|
||||
upstream_url,
|
||||
name,
|
||||
digest,
|
||||
docker_auth,
|
||||
DEFAULT_TIMEOUT,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.map_err(|()| format!("Failed to fetch platform manifest {}", digest))?;
|
||||
|
||||
let mf_json: serde_json::Value = serde_json::from_slice(&mf_bytes)
|
||||
.map_err(|e| format!("Invalid platform manifest: {}", e))?;
|
||||
|
||||
Ok(vec![(mf_bytes, mf_json, mf_ct)])
|
||||
}
|
||||
|
||||
/// Extract all blob digests from a manifest (config + layers).
|
||||
fn extract_blob_digests(manifest: &serde_json::Value) -> Vec<String> {
|
||||
let mut digests = Vec::new();
|
||||
|
||||
// Config blob
|
||||
if let Some(digest) = manifest
|
||||
.get("config")
|
||||
.and_then(|c| c.get("digest"))
|
||||
.and_then(|d| d.as_str())
|
||||
{
|
||||
digests.push(digest.to_string());
|
||||
}
|
||||
|
||||
// Layer blobs
|
||||
if let Some(layers) = manifest.get("layers").and_then(|l| l.as_array()) {
|
||||
for layer in layers {
|
||||
if let Some(digest) = layer.get("digest").and_then(|d| d.as_str()) {
|
||||
digests.push(digest.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
digests
|
||||
}
|
||||
|
||||
/// Check if NORA already has a blob via HEAD request.
|
||||
async fn blob_exists(client: &Client, nora_base: &str, name: &str, digest: &str) -> bool {
|
||||
let url = format!("{}/v2/{}/blobs/{}", nora_base, name, digest);
|
||||
matches!(
|
||||
client
|
||||
.head(&url)
|
||||
.timeout(Duration::from_secs(10))
|
||||
.send()
|
||||
.await,
|
||||
Ok(r) if r.status().is_success()
|
||||
)
|
||||
}
|
||||
|
||||
/// Push a blob to NORA via monolithic upload.
|
||||
async fn push_blob(
|
||||
client: &Client,
|
||||
nora_base: &str,
|
||||
name: &str,
|
||||
digest: &str,
|
||||
data: &[u8],
|
||||
) -> Result<(), String> {
|
||||
// Start upload session
|
||||
let start_url = format!("{}/v2/{}/blobs/uploads/", nora_base, name);
|
||||
let response = client
|
||||
.post(&start_url)
|
||||
.timeout(Duration::from_secs(30))
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("Failed to start blob upload: {}", e))?;
|
||||
|
||||
let location = response
|
||||
.headers()
|
||||
.get("location")
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.ok_or("Missing Location header from upload start")?
|
||||
.to_string();
|
||||
|
||||
// Complete upload with digest
|
||||
let upload_url = if location.contains('?') {
|
||||
format!("{}&digest={}", location, digest)
|
||||
} else {
|
||||
format!("{}?digest={}", location, digest)
|
||||
};
|
||||
|
||||
// Make absolute URL if relative
|
||||
let upload_url = if upload_url.starts_with('/') {
|
||||
format!("{}{}", nora_base, upload_url)
|
||||
} else {
|
||||
upload_url
|
||||
};
|
||||
|
||||
let resp = client
|
||||
.put(&upload_url)
|
||||
.header("Content-Type", "application/octet-stream")
|
||||
.body(data.to_vec())
|
||||
.timeout(Duration::from_secs(DEFAULT_TIMEOUT))
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("Failed to upload blob: {}", e))?;
|
||||
|
||||
if !resp.status().is_success() && resp.status().as_u16() != 201 {
|
||||
return Err(format!("Blob upload failed with status {}", resp.status()));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Push a manifest to NORA.
|
||||
async fn push_manifest(
|
||||
client: &Client,
|
||||
nora_base: &str,
|
||||
name: &str,
|
||||
reference: &str,
|
||||
data: &[u8],
|
||||
content_type: &str,
|
||||
) -> Result<(), String> {
|
||||
let url = format!("{}/v2/{}/manifests/{}", nora_base, name, reference);
|
||||
let resp = client
|
||||
.put(&url)
|
||||
.header("Content-Type", content_type)
|
||||
.body(data.to_vec())
|
||||
.timeout(Duration::from_secs(30))
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| format!("Failed to push manifest: {}", e))?;
|
||||
|
||||
if !resp.status().is_success() && resp.status().as_u16() != 201 {
|
||||
return Err(format!(
|
||||
"Manifest push failed with status {}",
|
||||
resp.status()
|
||||
));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[allow(clippy::unwrap_used)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
// --- parse_image_ref tests ---
|
||||
|
||||
#[test]
|
||||
fn test_parse_simple_name() {
|
||||
let r = parse_image_ref("alpine");
|
||||
assert_eq!(r.registry, DEFAULT_REGISTRY);
|
||||
assert_eq!(r.name, "library/alpine");
|
||||
assert_eq!(r.reference, "latest");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_name_with_tag() {
|
||||
let r = parse_image_ref("alpine:3.20");
|
||||
assert_eq!(r.registry, DEFAULT_REGISTRY);
|
||||
assert_eq!(r.name, "library/alpine");
|
||||
assert_eq!(r.reference, "3.20");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_org_image() {
|
||||
let r = parse_image_ref("grafana/grafana:latest");
|
||||
assert_eq!(r.registry, DEFAULT_REGISTRY);
|
||||
assert_eq!(r.name, "grafana/grafana");
|
||||
assert_eq!(r.reference, "latest");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_org_image_no_tag() {
|
||||
let r = parse_image_ref("grafana/grafana");
|
||||
assert_eq!(r.registry, DEFAULT_REGISTRY);
|
||||
assert_eq!(r.name, "grafana/grafana");
|
||||
assert_eq!(r.reference, "latest");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_custom_registry() {
|
||||
let r = parse_image_ref("ghcr.io/owner/repo:v1.0");
|
||||
assert_eq!(r.registry, "https://ghcr.io");
|
||||
assert_eq!(r.name, "owner/repo");
|
||||
assert_eq!(r.reference, "v1.0");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_digest_reference() {
|
||||
let r = parse_image_ref("alpine@sha256:abcdef1234567890");
|
||||
assert_eq!(r.registry, DEFAULT_REGISTRY);
|
||||
assert_eq!(r.name, "library/alpine");
|
||||
assert_eq!(r.reference, "sha256:abcdef1234567890");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_registry_with_port() {
|
||||
let r = parse_image_ref("localhost:5000/myimage:v1");
|
||||
assert_eq!(r.registry, "https://localhost:5000");
|
||||
assert_eq!(r.name, "myimage");
|
||||
assert_eq!(r.reference, "v1");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_deep_path() {
|
||||
let r = parse_image_ref("ghcr.io/org/sub/image:latest");
|
||||
assert_eq!(r.registry, "https://ghcr.io");
|
||||
assert_eq!(r.name, "org/sub/image");
|
||||
assert_eq!(r.reference, "latest");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_trimmed() {
|
||||
let r = parse_image_ref(" alpine:3.20 ");
|
||||
assert_eq!(r.name, "library/alpine");
|
||||
assert_eq!(r.reference, "3.20");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_images_file() {
|
||||
let content = "alpine:3.20\n# comment\npostgres:15\n\nnginx:1.25\n";
|
||||
let images = parse_images_file(content);
|
||||
assert_eq!(images.len(), 3);
|
||||
assert_eq!(images[0].name, "library/alpine");
|
||||
assert_eq!(images[1].name, "library/postgres");
|
||||
assert_eq!(images[2].name, "library/nginx");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_images_file_empty() {
|
||||
let images = parse_images_file("");
|
||||
assert!(images.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_images_file_comments_only() {
|
||||
let images = parse_images_file("# comment\n# another\n");
|
||||
assert!(images.is_empty());
|
||||
}
|
||||
|
||||
// --- extract_blob_digests tests ---
|
||||
|
||||
#[test]
|
||||
fn test_extract_blob_digests_full_manifest() {
|
||||
let manifest = serde_json::json!({
|
||||
"config": {
|
||||
"digest": "sha256:config111"
|
||||
},
|
||||
"layers": [
|
||||
{"digest": "sha256:layer111"},
|
||||
{"digest": "sha256:layer222"}
|
||||
]
|
||||
});
|
||||
let digests = extract_blob_digests(&manifest);
|
||||
assert_eq!(digests.len(), 3);
|
||||
assert_eq!(digests[0], "sha256:config111");
|
||||
assert_eq!(digests[1], "sha256:layer111");
|
||||
assert_eq!(digests[2], "sha256:layer222");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_blob_digests_no_layers() {
|
||||
let manifest = serde_json::json!({
|
||||
"config": { "digest": "sha256:config111" }
|
||||
});
|
||||
let digests = extract_blob_digests(&manifest);
|
||||
assert_eq!(digests.len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_blob_digests_empty() {
|
||||
let manifest = serde_json::json!({});
|
||||
let digests = extract_blob_digests(&manifest);
|
||||
assert!(digests.is_empty());
|
||||
}
|
||||
|
||||
// --- is_manifest_list tests ---
|
||||
|
||||
#[test]
|
||||
fn test_is_manifest_list_by_content_type() {
|
||||
let json = serde_json::json!({});
|
||||
assert!(is_manifest_list(
|
||||
"application/vnd.docker.distribution.manifest.list.v2+json",
|
||||
&json
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_manifest_list_oci_index() {
|
||||
let json = serde_json::json!({});
|
||||
assert!(is_manifest_list(
|
||||
"application/vnd.oci.image.index.v1+json",
|
||||
&json
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_manifest_list_by_manifests_key() {
|
||||
let json = serde_json::json!({
|
||||
"manifests": [{"digest": "sha256:abc"}]
|
||||
});
|
||||
assert!(is_manifest_list(
|
||||
"application/vnd.docker.distribution.manifest.v2+json",
|
||||
&json
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_not_manifest_list() {
|
||||
let json = serde_json::json!({
|
||||
"config": {},
|
||||
"layers": []
|
||||
});
|
||||
assert!(!is_manifest_list(
|
||||
"application/vnd.docker.distribution.manifest.v2+json",
|
||||
&json
|
||||
));
|
||||
}
|
||||
}
|
||||
@@ -3,6 +3,7 @@
|
||||
|
||||
//! `nora mirror` — pre-fetch dependencies through NORA proxy cache.
|
||||
|
||||
mod docker;
|
||||
mod npm;
|
||||
|
||||
use clap::Subcommand;
|
||||
@@ -24,6 +25,12 @@ pub enum MirrorFormat {
|
||||
#[arg(long)]
|
||||
all_versions: bool,
|
||||
},
|
||||
/// Mirror npm packages from yarn.lock
|
||||
Yarn {
|
||||
/// Path to yarn.lock
|
||||
#[arg(long)]
|
||||
lockfile: PathBuf,
|
||||
},
|
||||
/// Mirror Python packages
|
||||
Pip {
|
||||
/// Path to requirements.txt
|
||||
@@ -42,6 +49,15 @@ pub enum MirrorFormat {
|
||||
#[arg(long)]
|
||||
lockfile: PathBuf,
|
||||
},
|
||||
/// Mirror Docker images from upstream registries
|
||||
Docker {
|
||||
/// Comma-separated image references (e.g., alpine:3.20,postgres:15)
|
||||
#[arg(long, conflicts_with = "images_file", value_delimiter = ',')]
|
||||
images: Option<Vec<String>>,
|
||||
/// Path to file with image references (one per line)
|
||||
#[arg(long, conflicts_with = "images")]
|
||||
images_file: Option<PathBuf>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Hash, Eq, PartialEq)]
|
||||
@@ -50,6 +66,7 @@ pub struct MirrorTarget {
|
||||
pub version: String,
|
||||
}
|
||||
|
||||
#[derive(serde::Serialize)]
|
||||
pub struct MirrorResult {
|
||||
pub total: usize,
|
||||
pub fetched: usize,
|
||||
@@ -74,6 +91,7 @@ pub async fn run_mirror(
|
||||
format: MirrorFormat,
|
||||
registry: &str,
|
||||
concurrency: usize,
|
||||
json_output: bool,
|
||||
) -> Result<(), String> {
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(std::time::Duration::from_secs(120))
|
||||
@@ -110,6 +128,27 @@ pub async fn run_mirror(
|
||||
)
|
||||
.await?
|
||||
}
|
||||
MirrorFormat::Yarn { lockfile } => {
|
||||
let content = std::fs::read_to_string(&lockfile)
|
||||
.map_err(|e| format!("Cannot read {}: {}", lockfile.display(), e))?;
|
||||
let targets = npm::parse_yarn_lock(&content);
|
||||
if targets.is_empty() {
|
||||
println!("No packages found in {}", lockfile.display());
|
||||
MirrorResult {
|
||||
total: 0,
|
||||
fetched: 0,
|
||||
failed: 0,
|
||||
bytes: 0,
|
||||
}
|
||||
} else {
|
||||
println!(
|
||||
"Mirroring {} npm packages from yarn.lock via {}...",
|
||||
targets.len(),
|
||||
registry
|
||||
);
|
||||
npm::mirror_npm_packages(&client, registry, &targets, concurrency).await?
|
||||
}
|
||||
}
|
||||
MirrorFormat::Pip { lockfile } => {
|
||||
mirror_lockfile(&client, registry, "pip", &lockfile).await?
|
||||
}
|
||||
@@ -119,15 +158,46 @@ pub async fn run_mirror(
|
||||
MirrorFormat::Maven { lockfile } => {
|
||||
mirror_lockfile(&client, registry, "maven", &lockfile).await?
|
||||
}
|
||||
MirrorFormat::Docker {
|
||||
images,
|
||||
images_file,
|
||||
} => {
|
||||
let image_refs = if let Some(file) = images_file {
|
||||
let content = std::fs::read_to_string(&file)
|
||||
.map_err(|e| format!("Cannot read {}: {}", file.display(), e))?;
|
||||
docker::parse_images_file(&content)
|
||||
} else if let Some(imgs) = images {
|
||||
imgs.iter().map(|s| docker::parse_image_ref(s)).collect()
|
||||
} else {
|
||||
return Err("Either --images or --images-file is required".to_string());
|
||||
};
|
||||
if image_refs.is_empty() {
|
||||
return Err("No images specified".to_string());
|
||||
}
|
||||
println!(
|
||||
"Mirroring {} Docker images via {}...",
|
||||
image_refs.len(),
|
||||
registry
|
||||
);
|
||||
docker::run_docker_mirror(&client, registry, &image_refs, concurrency).await?
|
||||
}
|
||||
};
|
||||
|
||||
let elapsed = start.elapsed();
|
||||
println!("\nMirror complete:");
|
||||
println!(" Total: {}", result.total);
|
||||
println!(" Fetched: {}", result.fetched);
|
||||
println!(" Failed: {}", result.failed);
|
||||
println!(" Size: {:.1} MB", result.bytes as f64 / 1_048_576.0);
|
||||
println!(" Time: {:.1}s", elapsed.as_secs_f64());
|
||||
|
||||
if json_output {
|
||||
println!(
|
||||
"{}",
|
||||
serde_json::to_string_pretty(&result).unwrap_or_default()
|
||||
);
|
||||
} else {
|
||||
println!("\nMirror complete:");
|
||||
println!(" Total: {}", result.total);
|
||||
println!(" Fetched: {}", result.fetched);
|
||||
println!(" Failed: {}", result.failed);
|
||||
println!(" Size: {:.1} MB", result.bytes as f64 / 1_048_576.0);
|
||||
println!(" Time: {:.1}s", elapsed.as_secs_f64());
|
||||
}
|
||||
|
||||
if result.failed > 0 {
|
||||
Err(format!("{} packages failed to mirror", result.failed))
|
||||
@@ -440,4 +510,32 @@ version = "0.1.0"
|
||||
let pb = create_progress_bar(100);
|
||||
assert_eq!(pb.length(), Some(100));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mirror_result_json_serialization() {
|
||||
let result = MirrorResult {
|
||||
total: 10,
|
||||
fetched: 8,
|
||||
failed: 2,
|
||||
bytes: 1048576,
|
||||
};
|
||||
let json = serde_json::to_string_pretty(&result).unwrap();
|
||||
let parsed: serde_json::Value = serde_json::from_str(&json).unwrap();
|
||||
assert_eq!(parsed["total"], 10);
|
||||
assert_eq!(parsed["fetched"], 8);
|
||||
assert_eq!(parsed["failed"], 2);
|
||||
assert_eq!(parsed["bytes"], 1048576);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mirror_result_json_zero_values() {
|
||||
let result = MirrorResult {
|
||||
total: 0,
|
||||
fetched: 0,
|
||||
failed: 0,
|
||||
bytes: 0,
|
||||
};
|
||||
let json = serde_json::to_string(&result).unwrap();
|
||||
assert!(json.contains("\"total\":0"));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -174,7 +174,7 @@ async fn resolve_npm_packages(
|
||||
}
|
||||
|
||||
/// Fetch packages through NORA (triggers proxy cache)
|
||||
async fn mirror_npm_packages(
|
||||
pub async fn mirror_npm_packages(
|
||||
client: &reqwest::Client,
|
||||
registry: &str,
|
||||
targets: &[MirrorTarget],
|
||||
@@ -250,6 +250,73 @@ async fn mirror_npm_packages(
|
||||
})
|
||||
}
|
||||
|
||||
/// Parse yarn.lock v1 format
|
||||
/// Format: "package@version:\n version \"X.Y.Z\"\n resolved \"url\""
|
||||
pub fn parse_yarn_lock(content: &str) -> Vec<MirrorTarget> {
|
||||
let mut targets = Vec::new();
|
||||
let mut seen = HashSet::new();
|
||||
let mut current_name: Option<String> = None;
|
||||
|
||||
for line in content.lines() {
|
||||
let trimmed = line.trim();
|
||||
|
||||
// Skip comments and empty lines
|
||||
if trimmed.starts_with('#') || trimmed.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Package header: "lodash@^4.17.21:" or "@babel/core@^7.0.0, @babel/core@^7.26.0:"
|
||||
if !line.starts_with(' ') && !line.starts_with('\t') && trimmed.ends_with(':') {
|
||||
let header = trimmed.trim_end_matches(':');
|
||||
// Take first entry before comma (all resolve to same version)
|
||||
let first = header.split(',').next().unwrap_or(header).trim();
|
||||
// Remove quotes if present
|
||||
let first = first.trim_matches('"');
|
||||
// Extract package name: everything before last @
|
||||
if let Some(name) = extract_yarn_package_name(first) {
|
||||
current_name = Some(name.to_string());
|
||||
} else {
|
||||
current_name = None;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Version line: " version "4.17.21""
|
||||
if let Some(ref name) = current_name {
|
||||
if trimmed.starts_with("version ") {
|
||||
let ver = trimmed.trim_start_matches("version ").trim_matches('"');
|
||||
let pair = (name.clone(), ver.to_string());
|
||||
if seen.insert(pair.clone()) {
|
||||
targets.push(MirrorTarget {
|
||||
name: pair.0,
|
||||
version: pair.1,
|
||||
});
|
||||
}
|
||||
current_name = None;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
targets
|
||||
}
|
||||
|
||||
/// Extract package name from yarn.lock entry like "@babel/core@^7.0.0"
|
||||
fn extract_yarn_package_name(entry: &str) -> Option<&str> {
|
||||
if let Some(rest) = entry.strip_prefix('@') {
|
||||
// Scoped: @babel/core@^7.0.0 → find second @
|
||||
let after_scope = rest.find('@')?;
|
||||
Some(&entry[..after_scope + 1])
|
||||
} else {
|
||||
// Regular: lodash@^4.17.21 → find first @
|
||||
let at = entry.find('@')?;
|
||||
if at == 0 {
|
||||
None
|
||||
} else {
|
||||
Some(&entry[..at])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[allow(clippy::unwrap_used)]
|
||||
mod tests {
|
||||
@@ -429,4 +496,119 @@ mod tests {
|
||||
let targets = parse_npm_lockfile(&lockfile.to_string()).unwrap();
|
||||
assert!(targets.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_yarn_lock_basic() {
|
||||
let content = r#"# yarn lockfile v1
|
||||
|
||||
lodash@^4.17.21:
|
||||
version "4.17.21"
|
||||
resolved "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz"
|
||||
|
||||
express@^4.18.0:
|
||||
version "4.18.2"
|
||||
resolved "https://registry.npmjs.org/express/-/express-4.18.2.tgz"
|
||||
"#;
|
||||
let targets = parse_yarn_lock(content);
|
||||
assert_eq!(targets.len(), 2);
|
||||
assert_eq!(targets[0].name, "lodash");
|
||||
assert_eq!(targets[0].version, "4.17.21");
|
||||
assert_eq!(targets[1].name, "express");
|
||||
assert_eq!(targets[1].version, "4.18.2");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_yarn_lock_scoped() {
|
||||
let content = r#"
|
||||
"@babel/core@^7.26.0":
|
||||
version "7.26.0"
|
||||
resolved "https://registry.npmjs.org/@babel/core/-/core-7.26.0.tgz"
|
||||
"#;
|
||||
let targets = parse_yarn_lock(content);
|
||||
assert_eq!(targets.len(), 1);
|
||||
assert_eq!(targets[0].name, "@babel/core");
|
||||
assert_eq!(targets[0].version, "7.26.0");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_yarn_lock_multiple_ranges() {
|
||||
let content = r#"
|
||||
debug@2.6.9, debug@^2.2.0:
|
||||
version "2.6.9"
|
||||
resolved "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz"
|
||||
|
||||
debug@^4.1.0, debug@^4.3.4:
|
||||
version "4.3.7"
|
||||
resolved "https://registry.npmjs.org/debug/-/debug-4.3.7.tgz"
|
||||
"#;
|
||||
let targets = parse_yarn_lock(content);
|
||||
assert_eq!(targets.len(), 2);
|
||||
assert_eq!(targets[0].name, "debug");
|
||||
assert_eq!(targets[0].version, "2.6.9");
|
||||
assert_eq!(targets[1].name, "debug");
|
||||
assert_eq!(targets[1].version, "4.3.7");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_yarn_lock_dedup() {
|
||||
let content = r#"
|
||||
lodash@^4.0.0:
|
||||
version "4.17.21"
|
||||
|
||||
lodash@^4.17.0:
|
||||
version "4.17.21"
|
||||
"#;
|
||||
let targets = parse_yarn_lock(content);
|
||||
assert_eq!(targets.len(), 1); // same name+version deduped
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_yarn_lock_empty() {
|
||||
let targets = parse_yarn_lock(
|
||||
"# yarn lockfile v1
|
||||
|
||||
",
|
||||
);
|
||||
assert!(targets.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_yarn_lock_comments_only() {
|
||||
let content = "# yarn lockfile v1
|
||||
# comment
|
||||
";
|
||||
let targets = parse_yarn_lock(content);
|
||||
assert!(targets.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_yarn_package_name_simple() {
|
||||
assert_eq!(extract_yarn_package_name("lodash@^4.17.21"), Some("lodash"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_yarn_package_name_scoped() {
|
||||
assert_eq!(
|
||||
extract_yarn_package_name("@babel/core@^7.0.0"),
|
||||
Some("@babel/core")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_yarn_package_name_no_at() {
|
||||
assert_eq!(extract_yarn_package_name("lodash"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_yarn_lock_quoted_headers() {
|
||||
let content = r#"
|
||||
"@types/node@^20.0.0":
|
||||
version "20.11.5"
|
||||
resolved "https://registry.npmjs.org/@types/node/-/node-20.11.5.tgz"
|
||||
"#;
|
||||
let targets = parse_yarn_lock(content);
|
||||
assert_eq!(targets.len(), 1);
|
||||
assert_eq!(targets[0].name, "@types/node");
|
||||
assert_eq!(targets[0].version, "20.11.5");
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user