From 8c525bb5c29b9f1f48602ce27043b4c3402fb05b Mon Sep 17 00:00:00 2001 From: DevITWay Date: Fri, 30 Jan 2026 15:52:29 +0000 Subject: [PATCH] feat: add Docker image metadata support - Store metadata (.meta.json) alongside manifests with: - push_timestamp, last_pulled, downloads counter - size_bytes, os, arch, variant - layers list with digest and size - Update metadata on manifest pull (increment downloads, update last_pulled) - Extract OS/arch from config blob on push - Extend UI API TagInfo with metadata fields - Add public_url config option for pull commands - Add Docker upstream proxy with auth support - Add raw repository support - Bump version to 0.2.12 --- Cargo.lock | 6 +- Cargo.toml | 2 +- nora-registry/src/config.rs | 127 +++++- nora-registry/src/dashboard_metrics.rs | 8 + nora-registry/src/main.rs | 7 + nora-registry/src/registry/docker.rs | 500 +++++++++++++++++++++- nora-registry/src/registry/docker_auth.rs | 189 ++++++++ nora-registry/src/registry/mod.rs | 6 +- nora-registry/src/registry/pypi.rs | 28 +- nora-registry/src/registry/raw.rs | 133 ++++++ nora-registry/src/secrets/env.rs | 4 +- nora-registry/src/storage/local.rs | 14 + nora-registry/src/storage/mod.rs | 6 + nora-registry/src/storage/s3.rs | 21 + nora-registry/src/ui/api.rs | 80 +++- nora-registry/src/ui/mod.rs | 2 +- 16 files changed, 1090 insertions(+), 43 deletions(-) create mode 100644 nora-registry/src/registry/docker_auth.rs create mode 100644 nora-registry/src/registry/raw.rs diff --git a/Cargo.lock b/Cargo.lock index 1487014..7ff0244 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1185,7 +1185,7 @@ checksum = "38bf9645c8b145698bb0b18a4637dcacbc421ea49bef2317e4fd8065a387cf21" [[package]] name = "nora-cli" -version = "0.2.11" +version = "0.2.12" dependencies = [ "clap", "flate2", @@ -1199,7 +1199,7 @@ dependencies = [ [[package]] name = "nora-registry" -version = "0.2.11" +version = "0.2.12" dependencies = [ "async-trait", "axum", @@ -1235,7 +1235,7 @@ dependencies = [ [[package]] name = "nora-storage" -version = "0.2.11" +version = "0.2.12" dependencies = [ "axum", "base64", diff --git a/Cargo.toml b/Cargo.toml index 6187fde..bcd58d0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ members = [ ] [workspace.package] -version = "0.2.11" +version = "0.2.12" edition = "2021" license = "MIT" authors = ["DevITWay "] diff --git a/nora-registry/src/config.rs b/nora-registry/src/config.rs index a774302..031eee9 100644 --- a/nora-registry/src/config.rs +++ b/nora-registry/src/config.rs @@ -15,6 +15,10 @@ pub struct Config { #[serde(default)] pub pypi: PypiConfig, #[serde(default)] + pub docker: DockerConfig, + #[serde(default)] + pub raw: RawConfig, + #[serde(default)] pub auth: AuthConfig, #[serde(default)] pub rate_limit: RateLimitConfig, @@ -26,6 +30,9 @@ pub struct Config { pub struct ServerConfig { pub host: String, pub port: u16, + /// Public URL for generating pull commands (e.g., "registry.example.com") + #[serde(default)] + pub public_url: Option, } #[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)] @@ -84,6 +91,44 @@ pub struct PypiConfig { pub proxy_timeout: u64, } +/// Docker registry configuration with upstream proxy support +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct DockerConfig { + #[serde(default = "default_docker_timeout")] + pub proxy_timeout: u64, + #[serde(default)] + pub upstreams: Vec, +} + +/// Docker upstream registry configuration +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct DockerUpstream { + pub url: String, + #[serde(default)] + pub auth: Option, // "user:pass" for basic auth +} + +/// Raw repository configuration for simple file storage +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct RawConfig { + #[serde(default = "default_raw_enabled")] + pub enabled: bool, + #[serde(default = "default_max_file_size")] + pub max_file_size: u64, // in bytes +} + +fn default_docker_timeout() -> u64 { + 60 +} + +fn default_raw_enabled() -> bool { + true +} + +fn default_max_file_size() -> u64 { + 104_857_600 // 100MB +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct AuthConfig { #[serde(default)] @@ -133,6 +178,27 @@ impl Default for PypiConfig { } } +impl Default for DockerConfig { + fn default() -> Self { + Self { + proxy_timeout: 60, + upstreams: vec![DockerUpstream { + url: "https://registry-1.docker.io".to_string(), + auth: None, + }], + } + } +} + +impl Default for RawConfig { + fn default() -> Self { + Self { + enabled: true, + max_file_size: 104_857_600, // 100MB + } + } +} + impl Default for AuthConfig { fn default() -> Self { Self { @@ -181,12 +247,24 @@ pub struct RateLimitConfig { pub general_burst: u32, } -fn default_auth_rps() -> u64 { 1 } -fn default_auth_burst() -> u32 { 5 } -fn default_upload_rps() -> u64 { 200 } -fn default_upload_burst() -> u32 { 500 } -fn default_general_rps() -> u64 { 100 } -fn default_general_burst() -> u32 { 200 } +fn default_auth_rps() -> u64 { + 1 +} +fn default_auth_burst() -> u32 { + 5 +} +fn default_upload_rps() -> u64 { + 200 +} +fn default_upload_burst() -> u32 { + 500 +} +fn default_general_rps() -> u64 { + 100 +} +fn default_general_burst() -> u32 { + 200 +} impl Default for RateLimitConfig { fn default() -> Self { @@ -227,6 +305,9 @@ impl Config { self.server.port = port; } } + if let Ok(val) = env::var("NORA_PUBLIC_URL") { + self.server.public_url = if val.is_empty() { None } else { Some(val) }; + } // Storage config if let Ok(val) = env::var("NORA_STORAGE_MODE") { @@ -283,6 +364,37 @@ impl Config { } } + // Docker config + if let Ok(val) = env::var("NORA_DOCKER_PROXY_TIMEOUT") { + if let Ok(timeout) = val.parse() { + self.docker.proxy_timeout = timeout; + } + } + // NORA_DOCKER_UPSTREAMS format: "url1,url2" or "url1|auth1,url2|auth2" + if let Ok(val) = env::var("NORA_DOCKER_UPSTREAMS") { + self.docker.upstreams = val + .split(',') + .filter(|s| !s.is_empty()) + .map(|s| { + let parts: Vec<&str> = s.trim().splitn(2, '|').collect(); + DockerUpstream { + url: parts[0].to_string(), + auth: parts.get(1).map(|a| a.to_string()), + } + }) + .collect(); + } + + // Raw config + if let Ok(val) = env::var("NORA_RAW_ENABLED") { + self.raw.enabled = val.to_lowercase() == "true" || val == "1"; + } + if let Ok(val) = env::var("NORA_RAW_MAX_FILE_SIZE") { + if let Ok(size) = val.parse() { + self.raw.max_file_size = size; + } + } + // Token storage if let Ok(val) = env::var("NORA_AUTH_TOKEN_STORAGE") { self.auth.token_storage = val; @@ -336,6 +448,7 @@ impl Default for Config { server: ServerConfig { host: String::from("127.0.0.1"), port: 4000, + public_url: None, }, storage: StorageConfig { mode: StorageMode::Local, @@ -346,6 +459,8 @@ impl Default for Config { maven: MavenConfig::default(), npm: NpmConfig::default(), pypi: PypiConfig::default(), + docker: DockerConfig::default(), + raw: RawConfig::default(), auth: AuthConfig::default(), rate_limit: RateLimitConfig::default(), secrets: SecretsConfig::default(), diff --git a/nora-registry/src/dashboard_metrics.rs b/nora-registry/src/dashboard_metrics.rs index 85bd0e9..292f55e 100644 --- a/nora-registry/src/dashboard_metrics.rs +++ b/nora-registry/src/dashboard_metrics.rs @@ -18,6 +18,8 @@ pub struct DashboardMetrics { pub maven_uploads: AtomicU64, pub cargo_downloads: AtomicU64, pub pypi_downloads: AtomicU64, + pub raw_downloads: AtomicU64, + pub raw_uploads: AtomicU64, pub start_time: Instant, } @@ -36,6 +38,8 @@ impl DashboardMetrics { maven_uploads: AtomicU64::new(0), cargo_downloads: AtomicU64::new(0), pypi_downloads: AtomicU64::new(0), + raw_downloads: AtomicU64::new(0), + raw_uploads: AtomicU64::new(0), start_time: Instant::now(), } } @@ -49,6 +53,7 @@ impl DashboardMetrics { "maven" => self.maven_downloads.fetch_add(1, Ordering::Relaxed), "cargo" => self.cargo_downloads.fetch_add(1, Ordering::Relaxed), "pypi" => self.pypi_downloads.fetch_add(1, Ordering::Relaxed), + "raw" => self.raw_downloads.fetch_add(1, Ordering::Relaxed), _ => 0, }; } @@ -59,6 +64,7 @@ impl DashboardMetrics { match registry { "docker" => self.docker_uploads.fetch_add(1, Ordering::Relaxed), "maven" => self.maven_uploads.fetch_add(1, Ordering::Relaxed), + "raw" => self.raw_uploads.fetch_add(1, Ordering::Relaxed), _ => 0, }; } @@ -93,6 +99,7 @@ impl DashboardMetrics { "maven" => self.maven_downloads.load(Ordering::Relaxed), "cargo" => self.cargo_downloads.load(Ordering::Relaxed), "pypi" => self.pypi_downloads.load(Ordering::Relaxed), + "raw" => self.raw_downloads.load(Ordering::Relaxed), _ => 0, } } @@ -102,6 +109,7 @@ impl DashboardMetrics { match registry { "docker" => self.docker_uploads.load(Ordering::Relaxed), "maven" => self.maven_uploads.load(Ordering::Relaxed), + "raw" => self.raw_uploads.load(Ordering::Relaxed), _ => 0, } } diff --git a/nora-registry/src/main.rs b/nora-registry/src/main.rs index 64ac340..61a02a5 100644 --- a/nora-registry/src/main.rs +++ b/nora-registry/src/main.rs @@ -78,6 +78,7 @@ pub struct AppState { pub tokens: Option, pub metrics: DashboardMetrics, pub activity: ActivityLog, + pub docker_auth: registry::DockerAuth, } #[tokio::main] @@ -241,6 +242,9 @@ async fn run_server(config: Config, storage: Storage) { let upload_limiter = rate_limit::upload_rate_limiter(&config.rate_limit); let general_limiter = rate_limit::general_rate_limiter(&config.rate_limit); + // Initialize Docker auth with proxy timeout + let docker_auth = registry::DockerAuth::new(config.docker.proxy_timeout); + let state = Arc::new(AppState { storage, config, @@ -249,6 +253,7 @@ async fn run_server(config: Config, storage: Storage) { tokens, metrics: DashboardMetrics::new(), activity: ActivityLog::new(50), + docker_auth, }); // Token routes with strict rate limiting (brute-force protection) @@ -261,6 +266,7 @@ async fn run_server(config: Config, storage: Storage) { .merge(registry::npm_routes()) .merge(registry::cargo_routes()) .merge(registry::pypi_routes()) + .merge(registry::raw_routes()) .layer(upload_limiter); // Routes WITHOUT rate limiting (health, metrics, UI) @@ -312,6 +318,7 @@ async fn run_server(config: Config, storage: Storage) { npm = "/npm/", cargo = "/cargo/", pypi = "/simple/", + raw = "/raw/", "Available endpoints" ); diff --git a/nora-registry/src/registry/docker.rs b/nora-registry/src/registry/docker.rs index 8673ca9..e3c55f3 100644 --- a/nora-registry/src/registry/docker.rs +++ b/nora-registry/src/registry/docker.rs @@ -1,4 +1,6 @@ use crate::activity_log::{ActionType, ActivityEntry}; +use crate::registry::docker_auth::DockerAuth; +use crate::storage::Storage; use crate::validation::{validate_digest, validate_docker_name, validate_docker_reference}; use crate::AppState; use axum::{ @@ -10,9 +12,32 @@ use axum::{ Json, Router, }; use parking_lot::RwLock; +use serde::{Deserialize, Serialize}; use serde_json::{json, Value}; use std::collections::HashMap; use std::sync::Arc; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; + +/// Metadata for a Docker image stored alongside manifests +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct ImageMetadata { + pub push_timestamp: u64, + pub last_pulled: u64, + pub downloads: u64, + pub size_bytes: u64, + pub os: String, + pub arch: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub variant: Option, + pub layers: Vec, +} + +/// Information about a single layer in a Docker image +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct LayerInfo { + pub digest: String, + pub size: u64, +} /// In-progress upload sessions for chunked uploads /// Maps UUID -> accumulated data @@ -75,25 +100,63 @@ async fn download_blob( } let key = format!("docker/{}/blobs/{}", name, digest); - match state.storage.get(&key).await { - Ok(data) => { + + // Try local storage first + if let Ok(data) = state.storage.get(&key).await { + state.metrics.record_download("docker"); + state.metrics.record_cache_hit(); + state.activity.push(ActivityEntry::new( + ActionType::Pull, + format!("{}@{}", name, &digest[..19.min(digest.len())]), + "docker", + "LOCAL", + )); + return ( + StatusCode::OK, + [(header::CONTENT_TYPE, "application/octet-stream")], + data, + ) + .into_response(); + } + + // Try upstream proxies + for upstream in &state.config.docker.upstreams { + if let Ok(data) = fetch_blob_from_upstream( + &upstream.url, + &name, + &digest, + &state.docker_auth, + state.config.docker.proxy_timeout, + ) + .await + { state.metrics.record_download("docker"); - state.metrics.record_cache_hit(); + state.metrics.record_cache_miss(); state.activity.push(ActivityEntry::new( - ActionType::Pull, + ActionType::ProxyFetch, format!("{}@{}", name, &digest[..19.min(digest.len())]), "docker", - "LOCAL", + "PROXY", )); - ( + + // Cache in storage (fire and forget) + let storage = state.storage.clone(); + let key_clone = key.clone(); + let data_clone = data.clone(); + tokio::spawn(async move { + let _ = storage.put(&key_clone, &data_clone).await; + }); + + return ( StatusCode::OK, [(header::CONTENT_TYPE, "application/octet-stream")], - data, + Bytes::from(data), ) - .into_response() + .into_response(); } - Err(_) => StatusCode::NOT_FOUND.into_response(), } + + StatusCode::NOT_FOUND.into_response() } async fn start_upload(Path(name): Path) -> Response { @@ -213,35 +276,106 @@ async fn get_manifest( } let key = format!("docker/{}/manifests/{}.json", name, reference); - match state.storage.get(&key).await { - Ok(data) => { + + // Try local storage first + if let Ok(data) = state.storage.get(&key).await { + state.metrics.record_download("docker"); + state.metrics.record_cache_hit(); + state.activity.push(ActivityEntry::new( + ActionType::Pull, + format!("{}:{}", name, reference), + "docker", + "LOCAL", + )); + + // Calculate digest for Docker-Content-Digest header + use sha2::Digest; + let digest = format!("sha256:{:x}", sha2::Sha256::digest(&data)); + + // Detect manifest media type from content + let content_type = detect_manifest_media_type(&data); + + // Update metadata (downloads, last_pulled) in background + let meta_key = format!("docker/{}/manifests/{}.meta.json", name, reference); + let storage_clone = state.storage.clone(); + tokio::spawn(update_metadata_on_pull(storage_clone, meta_key)); + + return ( + StatusCode::OK, + [ + (header::CONTENT_TYPE, content_type), + (HeaderName::from_static("docker-content-digest"), digest), + ], + data, + ) + .into_response(); + } + + // Try upstream proxies + for upstream in &state.config.docker.upstreams { + if let Ok((data, content_type)) = fetch_manifest_from_upstream( + &upstream.url, + &name, + &reference, + &state.docker_auth, + state.config.docker.proxy_timeout, + ) + .await + { state.metrics.record_download("docker"); - state.metrics.record_cache_hit(); + state.metrics.record_cache_miss(); state.activity.push(ActivityEntry::new( - ActionType::Pull, + ActionType::ProxyFetch, format!("{}:{}", name, reference), "docker", - "LOCAL", + "PROXY", )); // Calculate digest for Docker-Content-Digest header use sha2::Digest; let digest = format!("sha256:{:x}", sha2::Sha256::digest(&data)); - ( + + // Cache manifest and create metadata (fire and forget) + let storage = state.storage.clone(); + let key_clone = key.clone(); + let data_clone = data.clone(); + let name_clone = name.clone(); + let reference_clone = reference.clone(); + let digest_clone = digest.clone(); + tokio::spawn(async move { + // Store manifest by tag and digest + let _ = storage.put(&key_clone, &data_clone).await; + let digest_key = format!("docker/{}/manifests/{}.json", name_clone, digest_clone); + let _ = storage.put(&digest_key, &data_clone).await; + + // Extract and save metadata + let metadata = extract_metadata(&data_clone, &storage, &name_clone).await; + if let Ok(meta_json) = serde_json::to_vec(&metadata) { + let meta_key = format!( + "docker/{}/manifests/{}.meta.json", + name_clone, reference_clone + ); + let _ = storage.put(&meta_key, &meta_json).await; + + let digest_meta_key = + format!("docker/{}/manifests/{}.meta.json", name_clone, digest_clone); + let _ = storage.put(&digest_meta_key, &meta_json).await; + } + }); + + return ( StatusCode::OK, [ - ( - header::CONTENT_TYPE, - "application/vnd.docker.distribution.manifest.v2+json".to_string(), - ), + (header::CONTENT_TYPE, content_type), (HeaderName::from_static("docker-content-digest"), digest), ], - data, + Bytes::from(data), ) - .into_response() + .into_response(); } - Err(_) => StatusCode::NOT_FOUND.into_response(), } + + StatusCode::NOT_FOUND.into_response() } async fn put_manifest( @@ -272,6 +406,17 @@ async fn put_manifest( return StatusCode::INTERNAL_SERVER_ERROR.into_response(); } + // Extract and save metadata + let metadata = extract_metadata(&body, &state.storage, &name).await; + let meta_key = format!("docker/{}/manifests/{}.meta.json", name, reference); + if let Ok(meta_json) = serde_json::to_vec(&metadata) { + let _ = state.storage.put(&meta_key, &meta_json).await; + + // Also save metadata by digest + let digest_meta_key = format!("docker/{}/manifests/{}.meta.json", name, digest); + let _ = state.storage.put(&digest_meta_key, &meta_json).await; + } + state.metrics.record_upload("docker"); state.activity.push(ActivityEntry::new( ActionType::Push, @@ -308,3 +453,314 @@ async fn list_tags(State(state): State>, Path(name): Path) .collect(); (StatusCode::OK, Json(json!({"name": name, "tags": tags}))).into_response() } + +/// Fetch a blob from an upstream Docker registry +async fn fetch_blob_from_upstream( + upstream_url: &str, + name: &str, + digest: &str, + docker_auth: &DockerAuth, + timeout: u64, +) -> Result, ()> { + let url = format!( + "{}/v2/{}/blobs/{}", + upstream_url.trim_end_matches('/'), + name, + digest + ); + + let client = reqwest::Client::builder() + .timeout(Duration::from_secs(timeout)) + .build() + .map_err(|_| ())?; + + // First try without auth + let response = client.get(&url).send().await.map_err(|_| ())?; + + let response = if response.status() == reqwest::StatusCode::UNAUTHORIZED { + // Get Www-Authenticate header and fetch token + let www_auth = response + .headers() + .get("www-authenticate") + .and_then(|v| v.to_str().ok()) + .map(String::from); + + if let Some(token) = docker_auth + .get_token(upstream_url, name, www_auth.as_deref()) + .await + { + client + .get(&url) + .header("Authorization", format!("Bearer {}", token)) + .send() + .await + .map_err(|_| ())? + } else { + return Err(()); + } + } else { + response + }; + + if !response.status().is_success() { + return Err(()); + } + + response.bytes().await.map(|b| b.to_vec()).map_err(|_| ()) +} + +/// Fetch a manifest from an upstream Docker registry +/// Returns (manifest_bytes, content_type) +async fn fetch_manifest_from_upstream( + upstream_url: &str, + name: &str, + reference: &str, + docker_auth: &DockerAuth, + timeout: u64, +) -> Result<(Vec, String), ()> { + let url = format!( + "{}/v2/{}/manifests/{}", + upstream_url.trim_end_matches('/'), + name, + reference + ); + + let client = reqwest::Client::builder() + .timeout(Duration::from_secs(timeout)) + .build() + .map_err(|_| ())?; + + // Request with Accept header for manifest types + let accept_header = "application/vnd.docker.distribution.manifest.v2+json, \ + application/vnd.docker.distribution.manifest.list.v2+json, \ + application/vnd.oci.image.manifest.v1+json, \ + application/vnd.oci.image.index.v1+json"; + + // First try without auth + let response = client + .get(&url) + .header("Accept", accept_header) + .send() + .await + .map_err(|_| ())?; + + let response = if response.status() == reqwest::StatusCode::UNAUTHORIZED { + // Get Www-Authenticate header and fetch token + let www_auth = response + .headers() + .get("www-authenticate") + .and_then(|v| v.to_str().ok()) + .map(String::from); + + if let Some(token) = docker_auth + .get_token(upstream_url, name, www_auth.as_deref()) + .await + { + client + .get(&url) + .header("Accept", accept_header) + .header("Authorization", format!("Bearer {}", token)) + .send() + .await + .map_err(|_| ())? + } else { + return Err(()); + } + } else { + response + }; + + if !response.status().is_success() { + return Err(()); + } + + let content_type = response + .headers() + .get("content-type") + .and_then(|v| v.to_str().ok()) + .unwrap_or("application/vnd.docker.distribution.manifest.v2+json") + .to_string(); + + let bytes = response.bytes().await.map_err(|_| ())?; + + Ok((bytes.to_vec(), content_type)) +} + +/// Detect manifest media type from its JSON content +fn detect_manifest_media_type(data: &[u8]) -> String { + // Try to parse as JSON and extract mediaType + if let Ok(json) = serde_json::from_slice::(data) { + if let Some(media_type) = json.get("mediaType").and_then(|v| v.as_str()) { + return media_type.to_string(); + } + + // Check schemaVersion for older manifests + if let Some(schema_version) = json.get("schemaVersion").and_then(|v| v.as_u64()) { + if schema_version == 1 { + return "application/vnd.docker.distribution.manifest.v1+json".to_string(); + } + // schemaVersion 2 without mediaType is likely docker manifest v2 + if json.get("config").is_some() { + return "application/vnd.docker.distribution.manifest.v2+json".to_string(); + } + // If it has "manifests" array, it's an index/list + if json.get("manifests").is_some() { + return "application/vnd.oci.image.index.v1+json".to_string(); + } + } + } + + // Default fallback + "application/vnd.docker.distribution.manifest.v2+json".to_string() +} + +/// Extract metadata from a Docker manifest +/// Handles both single-arch manifests and multi-arch indexes +async fn extract_metadata(manifest: &[u8], storage: &Storage, name: &str) -> ImageMetadata { + let now = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_secs(); + + let mut metadata = ImageMetadata { + push_timestamp: now, + last_pulled: 0, + downloads: 0, + ..Default::default() + }; + + let Ok(json) = serde_json::from_slice::(manifest) else { + return metadata; + }; + + // Check if this is a manifest list/index (multi-arch) + if json.get("manifests").is_some() { + // For multi-arch, extract info from the first platform manifest + if let Some(manifests) = json.get("manifests").and_then(|m| m.as_array()) { + // Sum sizes from all platform manifests + let total_size: u64 = manifests + .iter() + .filter_map(|m| m.get("size").and_then(|s| s.as_u64())) + .sum(); + metadata.size_bytes = total_size; + + // Get OS/arch from first platform (usually linux/amd64) + if let Some(first) = manifests.first() { + if let Some(platform) = first.get("platform") { + metadata.os = platform + .get("os") + .and_then(|v| v.as_str()) + .unwrap_or("multi-arch") + .to_string(); + metadata.arch = platform + .get("architecture") + .and_then(|v| v.as_str()) + .unwrap_or("multi") + .to_string(); + metadata.variant = platform + .get("variant") + .and_then(|v| v.as_str()) + .map(String::from); + } + } + } + return metadata; + } + + // Single-arch manifest - extract layers + if let Some(layers) = json.get("layers").and_then(|l| l.as_array()) { + let mut total_size: u64 = 0; + for layer in layers { + let digest = layer + .get("digest") + .and_then(|d| d.as_str()) + .unwrap_or("") + .to_string(); + let size = layer.get("size").and_then(|s| s.as_u64()).unwrap_or(0); + total_size += size; + metadata.layers.push(LayerInfo { digest, size }); + } + metadata.size_bytes = total_size; + } + + // Try to get OS/arch from config blob + if let Some(config) = json.get("config") { + if let Some(config_digest) = config.get("digest").and_then(|d| d.as_str()) { + let (os, arch, variant) = get_config_info(storage, name, config_digest).await; + metadata.os = os; + metadata.arch = arch; + metadata.variant = variant; + } + } + + // If we couldn't get OS/arch, set defaults + if metadata.os.is_empty() { + metadata.os = "unknown".to_string(); + } + if metadata.arch.is_empty() { + metadata.arch = "unknown".to_string(); + } + + metadata +} + +/// Get OS/arch information from a config blob +async fn get_config_info( + storage: &Storage, + name: &str, + config_digest: &str, +) -> (String, String, Option) { + let key = format!("docker/{}/blobs/{}", name, config_digest); + + let Ok(data) = storage.get(&key).await else { + return ("unknown".to_string(), "unknown".to_string(), None); + }; + + let Ok(config) = serde_json::from_slice::(&data) else { + return ("unknown".to_string(), "unknown".to_string(), None); + }; + + let os = config + .get("os") + .and_then(|v| v.as_str()) + .unwrap_or("unknown") + .to_string(); + + let arch = config + .get("architecture") + .and_then(|v| v.as_str()) + .unwrap_or("unknown") + .to_string(); + + let variant = config + .get("variant") + .and_then(|v| v.as_str()) + .map(String::from); + + (os, arch, variant) +} + +/// Update metadata when a manifest is pulled +/// Increments download counter and updates last_pulled timestamp +async fn update_metadata_on_pull(storage: Storage, meta_key: String) { + let now = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_secs(); + + // Try to read existing metadata + let mut metadata = if let Ok(data) = storage.get(&meta_key).await { + serde_json::from_slice::(&data).unwrap_or_default() + } else { + ImageMetadata::default() + }; + + // Update pull stats + metadata.downloads += 1; + metadata.last_pulled = now; + + // Save back + if let Ok(json) = serde_json::to_vec(&metadata) { + let _ = storage.put(&meta_key, &json).await; + } +} diff --git a/nora-registry/src/registry/docker_auth.rs b/nora-registry/src/registry/docker_auth.rs new file mode 100644 index 0000000..6340bf9 --- /dev/null +++ b/nora-registry/src/registry/docker_auth.rs @@ -0,0 +1,189 @@ +use parking_lot::RwLock; +use std::collections::HashMap; +use std::time::{Duration, Instant}; + +/// Cached Docker registry token +struct CachedToken { + token: String, + expires_at: Instant, +} + +/// Docker registry authentication handler +/// Manages Bearer token acquisition and caching for upstream registries +pub struct DockerAuth { + tokens: RwLock>, + client: reqwest::Client, +} + +impl DockerAuth { + pub fn new(timeout: u64) -> Self { + Self { + tokens: RwLock::new(HashMap::new()), + client: reqwest::Client::builder() + .timeout(Duration::from_secs(timeout)) + .build() + .unwrap_or_default(), + } + } + + /// Get a valid token for the given registry and repository scope + /// Returns cached token if still valid, otherwise fetches a new one + pub async fn get_token( + &self, + registry_url: &str, + name: &str, + www_authenticate: Option<&str>, + ) -> Option { + let cache_key = format!("{}:{}", registry_url, name); + + // Check cache first + { + let tokens = self.tokens.read(); + if let Some(cached) = tokens.get(&cache_key) { + if cached.expires_at > Instant::now() { + return Some(cached.token.clone()); + } + } + } + + // Need to fetch a new token + let www_auth = www_authenticate?; + let token = self.fetch_token(www_auth, name).await?; + + // Cache the token (default 5 minute expiry) + { + let mut tokens = self.tokens.write(); + tokens.insert( + cache_key, + CachedToken { + token: token.clone(), + expires_at: Instant::now() + Duration::from_secs(300), + }, + ); + } + + Some(token) + } + + /// Parse Www-Authenticate header and fetch token from auth server + /// Format: Bearer realm="https://auth.docker.io/token",service="registry.docker.io",scope="repository:library/alpine:pull" + async fn fetch_token(&self, www_authenticate: &str, name: &str) -> Option { + let params = parse_www_authenticate(www_authenticate)?; + + let realm = params.get("realm")?; + let service = params.get("service").map(|s| s.as_str()).unwrap_or(""); + + // Build token request URL + let scope = format!("repository:{}:pull", name); + let url = format!("{}?service={}&scope={}", realm, service, scope); + + let response = self.client.get(&url).send().await.ok()?; + + if !response.status().is_success() { + return None; + } + + let json: serde_json::Value = response.json().await.ok()?; + + // Docker Hub returns "token", some registries return "access_token" + json.get("token") + .or_else(|| json.get("access_token")) + .and_then(|v| v.as_str()) + .map(String::from) + } + + /// Make an authenticated request to an upstream registry + pub async fn fetch_with_auth( + &self, + url: &str, + registry_url: &str, + name: &str, + ) -> Result { + // First try without auth + let response = self.client.get(url).send().await.map_err(|_| ())?; + + if response.status() == reqwest::StatusCode::UNAUTHORIZED { + // Extract Www-Authenticate header + let www_auth = response + .headers() + .get("www-authenticate") + .and_then(|v| v.to_str().ok()) + .map(String::from); + + // Get token and retry + if let Some(token) = self + .get_token(registry_url, name, www_auth.as_deref()) + .await + { + return self + .client + .get(url) + .header("Authorization", format!("Bearer {}", token)) + .send() + .await + .map_err(|_| ()); + } + + return Err(()); + } + + Ok(response) + } +} + +impl Default for DockerAuth { + fn default() -> Self { + Self::new(60) + } +} + +/// Parse Www-Authenticate header into key-value pairs +/// Example: Bearer realm="https://auth.docker.io/token",service="registry.docker.io" +fn parse_www_authenticate(header: &str) -> Option> { + let header = header + .strip_prefix("Bearer ") + .or_else(|| header.strip_prefix("bearer "))?; + + let mut params = HashMap::new(); + + for part in header.split(',') { + let part = part.trim(); + if let Some((key, value)) = part.split_once('=') { + let value = value.trim_matches('"'); + params.insert(key.to_string(), value.to_string()); + } + } + + Some(params) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_www_authenticate() { + let header = r#"Bearer realm="https://auth.docker.io/token",service="registry.docker.io",scope="repository:library/alpine:pull""#; + let params = parse_www_authenticate(header).unwrap(); + + assert_eq!( + params.get("realm"), + Some(&"https://auth.docker.io/token".to_string()) + ); + assert_eq!( + params.get("service"), + Some(&"registry.docker.io".to_string()) + ); + } + + #[test] + fn test_parse_www_authenticate_lowercase() { + let header = r#"bearer realm="https://ghcr.io/token",service="ghcr.io""#; + let params = parse_www_authenticate(header).unwrap(); + + assert_eq!( + params.get("realm"), + Some(&"https://ghcr.io/token".to_string()) + ); + } +} diff --git a/nora-registry/src/registry/mod.rs b/nora-registry/src/registry/mod.rs index 78584d5..43377cd 100644 --- a/nora-registry/src/registry/mod.rs +++ b/nora-registry/src/registry/mod.rs @@ -1,11 +1,15 @@ mod cargo_registry; -mod docker; +pub mod docker; +pub mod docker_auth; mod maven; mod npm; mod pypi; +mod raw; pub use cargo_registry::routes as cargo_routes; pub use docker::routes as docker_routes; +pub use docker_auth::DockerAuth; pub use maven::routes as maven_routes; pub use npm::routes as npm_routes; pub use pypi::routes as pypi_routes; +pub use raw::routes as raw_routes; diff --git a/nora-registry/src/registry/pypi.rs b/nora-registry/src/registry/pypi.rs index dd6d06b..cd75b30 100644 --- a/nora-registry/src/registry/pypi.rs +++ b/nora-registry/src/registry/pypi.rs @@ -80,7 +80,7 @@ async fn package_versions( // Try proxy if configured if let Some(proxy_url) = &state.config.pypi.proxy { - let url = format!("{}{}/", proxy_url.trim_end_matches('/'), normalized); + let url = format!("{}/{}/", proxy_url.trim_end_matches('/'), normalized); if let Ok(html) = fetch_package_page(&url, state.config.pypi.proxy_timeout).await { // Rewrite URLs in the HTML to point to our registry @@ -125,7 +125,7 @@ async fn download_file( // Try proxy if configured if let Some(proxy_url) = &state.config.pypi.proxy { // First, fetch the package page to find the actual download URL - let page_url = format!("{}{}/", proxy_url.trim_end_matches('/'), normalized); + let page_url = format!("{}/{}/", proxy_url.trim_end_matches('/'), normalized); if let Ok(html) = fetch_package_page(&page_url, state.config.pypi.proxy_timeout).await { // Find the URL for this specific file @@ -233,6 +233,30 @@ fn rewrite_pypi_links(html: &str, package_name: &str) -> String { } } result.push_str(remaining); + + // Remove data-core-metadata and data-dist-info-metadata attributes + // as we don't serve .metadata files (PEP 658) + let result = remove_attribute(&result, "data-core-metadata"); + let result = remove_attribute(&result, "data-dist-info-metadata"); + result +} + +/// Remove an HTML attribute from all tags +fn remove_attribute(html: &str, attr_name: &str) -> String { + let mut result = String::with_capacity(html.len()); + let mut remaining = html; + let pattern = format!(" {}=\"", attr_name); + + while let Some(attr_start) = remaining.find(&pattern) { + result.push_str(&remaining[..attr_start]); + remaining = &remaining[attr_start + pattern.len()..]; + + // Skip the attribute value + if let Some(attr_end) = remaining.find('"') { + remaining = &remaining[attr_end + 1..]; + } + } + result.push_str(remaining); result } diff --git a/nora-registry/src/registry/raw.rs b/nora-registry/src/registry/raw.rs new file mode 100644 index 0000000..0afc10f --- /dev/null +++ b/nora-registry/src/registry/raw.rs @@ -0,0 +1,133 @@ +use crate::activity_log::{ActionType, ActivityEntry}; +use crate::AppState; +use axum::{ + body::Bytes, + extract::{Path, State}, + http::{header, StatusCode}, + response::{IntoResponse, Response}, + routing::get, + Router, +}; +use std::sync::Arc; + +pub fn routes() -> Router> { + Router::new().route( + "/raw/{*path}", + get(download) + .put(upload) + .delete(delete_file) + .head(check_exists), + ) +} + +async fn download(State(state): State>, Path(path): Path) -> Response { + if !state.config.raw.enabled { + return StatusCode::NOT_FOUND.into_response(); + } + + let key = format!("raw/{}", path); + match state.storage.get(&key).await { + Ok(data) => { + state.metrics.record_download("raw"); + state + .activity + .push(ActivityEntry::new(ActionType::Pull, path, "raw", "LOCAL")); + + // Guess content type from extension + let content_type = guess_content_type(&key); + (StatusCode::OK, [(header::CONTENT_TYPE, content_type)], data).into_response() + } + Err(_) => StatusCode::NOT_FOUND.into_response(), + } +} + +async fn upload( + State(state): State>, + Path(path): Path, + body: Bytes, +) -> Response { + if !state.config.raw.enabled { + return StatusCode::NOT_FOUND.into_response(); + } + + // Check file size limit + if body.len() as u64 > state.config.raw.max_file_size { + return ( + StatusCode::PAYLOAD_TOO_LARGE, + format!( + "File too large. Max size: {} bytes", + state.config.raw.max_file_size + ), + ) + .into_response(); + } + + let key = format!("raw/{}", path); + match state.storage.put(&key, &body).await { + Ok(()) => { + state.metrics.record_upload("raw"); + state + .activity + .push(ActivityEntry::new(ActionType::Push, path, "raw", "LOCAL")); + StatusCode::CREATED.into_response() + } + Err(_) => StatusCode::INTERNAL_SERVER_ERROR.into_response(), + } +} + +async fn delete_file(State(state): State>, Path(path): Path) -> Response { + if !state.config.raw.enabled { + return StatusCode::NOT_FOUND.into_response(); + } + + let key = format!("raw/{}", path); + match state.storage.delete(&key).await { + Ok(()) => StatusCode::NO_CONTENT.into_response(), + Err(crate::storage::StorageError::NotFound) => StatusCode::NOT_FOUND.into_response(), + Err(_) => StatusCode::INTERNAL_SERVER_ERROR.into_response(), + } +} + +async fn check_exists(State(state): State>, Path(path): Path) -> Response { + if !state.config.raw.enabled { + return StatusCode::NOT_FOUND.into_response(); + } + + let key = format!("raw/{}", path); + match state.storage.stat(&key).await { + Some(meta) => ( + StatusCode::OK, + [ + (header::CONTENT_LENGTH, meta.size.to_string()), + (header::CONTENT_TYPE, guess_content_type(&key).to_string()), + ], + ) + .into_response(), + None => StatusCode::NOT_FOUND.into_response(), + } +} + +fn guess_content_type(path: &str) -> &'static str { + let ext = path.rsplit('.').next().unwrap_or(""); + match ext.to_lowercase().as_str() { + "json" => "application/json", + "xml" => "application/xml", + "html" | "htm" => "text/html", + "css" => "text/css", + "js" => "application/javascript", + "txt" => "text/plain", + "md" => "text/markdown", + "yaml" | "yml" => "application/x-yaml", + "toml" => "application/toml", + "tar" => "application/x-tar", + "gz" | "gzip" => "application/gzip", + "zip" => "application/zip", + "png" => "image/png", + "jpg" | "jpeg" => "image/jpeg", + "gif" => "image/gif", + "svg" => "image/svg+xml", + "pdf" => "application/pdf", + "wasm" => "application/wasm", + _ => "application/octet-stream", + } +} diff --git a/nora-registry/src/secrets/env.rs b/nora-registry/src/secrets/env.rs index 3af3ce3..05be59e 100644 --- a/nora-registry/src/secrets/env.rs +++ b/nora-registry/src/secrets/env.rs @@ -101,7 +101,9 @@ mod tests { #[tokio::test] async fn test_get_secret_optional_not_found() { let provider = EnvProvider::new(); - let secret = provider.get_secret_optional("NONEXISTENT_OPTIONAL_XYZ").await; + let secret = provider + .get_secret_optional("NONEXISTENT_OPTIONAL_XYZ") + .await; assert!(secret.is_none()); } diff --git a/nora-registry/src/storage/local.rs b/nora-registry/src/storage/local.rs index 86e884f..03434d5 100644 --- a/nora-registry/src/storage/local.rs +++ b/nora-registry/src/storage/local.rs @@ -85,6 +85,20 @@ impl StorageBackend for LocalStorage { Ok(Bytes::from(buffer)) } + async fn delete(&self, key: &str) -> Result<()> { + let path = self.key_to_path(key); + + if !path.exists() { + return Err(StorageError::NotFound); + } + + fs::remove_file(&path) + .await + .map_err(|e| StorageError::Io(e.to_string()))?; + + Ok(()) + } + async fn list(&self, prefix: &str) -> Vec { let base = self.base_path.clone(); let prefix = prefix.to_string(); diff --git a/nora-registry/src/storage/mod.rs b/nora-registry/src/storage/mod.rs index 74a36be..02abfe4 100644 --- a/nora-registry/src/storage/mod.rs +++ b/nora-registry/src/storage/mod.rs @@ -39,6 +39,7 @@ pub type Result = std::result::Result; pub trait StorageBackend: Send + Sync { async fn put(&self, key: &str, data: &[u8]) -> Result<()>; async fn get(&self, key: &str) -> Result; + async fn delete(&self, key: &str) -> Result<()>; async fn list(&self, prefix: &str) -> Vec; async fn stat(&self, key: &str) -> Option; async fn health_check(&self) -> bool; @@ -74,6 +75,11 @@ impl Storage { self.inner.get(key).await } + pub async fn delete(&self, key: &str) -> Result<()> { + validate_storage_key(key)?; + self.inner.delete(key).await + } + pub async fn list(&self, prefix: &str) -> Vec { // Empty prefix is valid for listing all if !prefix.is_empty() && validate_storage_key(prefix).is_err() { diff --git a/nora-registry/src/storage/s3.rs b/nora-registry/src/storage/s3.rs index 5057d2b..a4b418a 100644 --- a/nora-registry/src/storage/s3.rs +++ b/nora-registry/src/storage/s3.rs @@ -74,6 +74,27 @@ impl StorageBackend for S3Storage { } } + async fn delete(&self, key: &str) -> Result<()> { + let url = format!("{}/{}/{}", self.s3_url, self.bucket, key); + let response = self + .client + .delete(&url) + .send() + .await + .map_err(|e| StorageError::Network(e.to_string()))?; + + if response.status().is_success() || response.status().as_u16() == 204 { + Ok(()) + } else if response.status().as_u16() == 404 { + Err(StorageError::NotFound) + } else { + Err(StorageError::Network(format!( + "DELETE failed: {}", + response.status() + ))) + } + } + async fn list(&self, prefix: &str) -> Vec { let url = format!("{}/{}", self.s3_url, self.bucket); match self.client.get(&url).send().await { diff --git a/nora-registry/src/ui/api.rs b/nora-registry/src/ui/api.rs index 7b13b01..b665f7e 100644 --- a/nora-registry/src/ui/api.rs +++ b/nora-registry/src/ui/api.rs @@ -34,6 +34,12 @@ pub struct TagInfo { pub name: String, pub size: u64, pub created: String, + pub downloads: u64, + pub last_pulled: Option, + pub os: String, + pub arch: String, + pub layers_count: usize, + pub pull_command: String, } #[derive(Serialize)] @@ -252,7 +258,7 @@ pub async fn api_detail( ) -> Json { match registry_type.as_str() { "docker" => { - let detail = get_docker_detail(&state.storage, &name).await; + let detail = get_docker_detail(&state, &name).await; Json(serde_json::to_value(detail).unwrap_or_default()) } "npm" => { @@ -425,25 +431,87 @@ pub async fn get_docker_repos(storage: &Storage) -> Vec { result } -pub async fn get_docker_detail(storage: &Storage, name: &str) -> DockerDetail { +pub async fn get_docker_detail(state: &AppState, name: &str) -> DockerDetail { let prefix = format!("docker/{}/manifests/", name); - let keys = storage.list(&prefix).await; + let keys = state.storage.list(&prefix).await; + + // Build public URL for pull commands + let registry_host = state + .config + .server + .public_url + .clone() + .unwrap_or_else(|| format!("{}:{}", state.config.server.host, state.config.server.port)); let mut tags = Vec::new(); for key in &keys { + // Skip .meta.json files + if key.ends_with(".meta.json") { + continue; + } + if let Some(tag_name) = key .strip_prefix(&prefix) .and_then(|s| s.strip_suffix(".json")) { - let (size, created) = if let Some(meta) = storage.stat(key).await { - (meta.size, format_timestamp(meta.modified)) + // Load metadata from .meta.json file + let meta_key = format!("{}.meta.json", key.trim_end_matches(".json")); + let metadata = if let Ok(meta_data) = state.storage.get(&meta_key).await { + serde_json::from_slice::(&meta_data) + .unwrap_or_default() } else { - (0, "N/A".to_string()) + crate::registry::docker::ImageMetadata::default() }; + + // Get file stats for created timestamp if metadata doesn't have push_timestamp + let created = if metadata.push_timestamp > 0 { + format_timestamp(metadata.push_timestamp) + } else if let Some(file_meta) = state.storage.stat(key).await { + format_timestamp(file_meta.modified) + } else { + "N/A".to_string() + }; + + // Use size from metadata if available, otherwise from file + let size = if metadata.size_bytes > 0 { + metadata.size_bytes + } else { + state + .storage + .stat(key) + .await + .map(|m| m.size) + .unwrap_or(0) + }; + + // Format last_pulled + let last_pulled = if metadata.last_pulled > 0 { + Some(format_timestamp(metadata.last_pulled)) + } else { + None + }; + + // Build pull command + let pull_command = format!("docker pull {}/{}:{}", registry_host, name, tag_name); + tags.push(TagInfo { name: tag_name.to_string(), size, created, + downloads: metadata.downloads, + last_pulled, + os: if metadata.os.is_empty() { + "unknown".to_string() + } else { + metadata.os + }, + arch: if metadata.arch.is_empty() { + "unknown".to_string() + } else { + metadata.arch + }, + layers_count: metadata.layers.len(), + pull_command, }); } } diff --git a/nora-registry/src/ui/mod.rs b/nora-registry/src/ui/mod.rs index b770074..58239fd 100644 --- a/nora-registry/src/ui/mod.rs +++ b/nora-registry/src/ui/mod.rs @@ -108,7 +108,7 @@ async fn docker_detail( &Query(query), headers.get("cookie").and_then(|v| v.to_str().ok()), ); - let detail = get_docker_detail(&state.storage, &name).await; + let detail = get_docker_detail(&state, &name).await; Html(render_docker_detail(&name, &detail, lang)) }