From b3b74b8b2da4a8c0f768907672c5259aed2364c7 Mon Sep 17 00:00:00 2001 From: devitway Date: Mon, 16 Mar 2026 12:32:16 +0000 Subject: [PATCH] =?UTF-8?q?feat:=20npm=20full=20proxy=20=E2=80=94=20URL=20?= =?UTF-8?q?rewriting,=20scoped=20packages,=20publish,=20integrity=20cache?= =?UTF-8?q?=20(v0.2.31)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit npm proxy: - Rewrite tarball URLs in metadata to point to NORA (was broken — tarballs bypassed NORA) - Scoped packages (@scope/package) full support in handler and repo index - Metadata cache TTL (NORA_NPM_METADATA_TTL, default 300s) with stale-while-revalidate - proxy_auth now wired into fetch_from_proxy (was configured but unused) npm publish: - PUT /npm/{package} — accepts standard npm publish payload - Version immutability — 409 Conflict on duplicate version - Tarball URL rewriting in published metadata Security: - SHA256 integrity verification on cached tarballs (immutable cache) - Attachment filename validation (path traversal protection) - Package name mismatch detection (URL vs payload) Config: - npm.metadata_ttl — configurable cache TTL (env: NORA_NPM_METADATA_TTL) --- Cargo.lock | 6 +- Cargo.toml | 2 +- nora-registry/src/config.rs | 13 + nora-registry/src/registry/npm.rs | 483 ++++++++++++++++++++++++++++-- nora-registry/src/repo_index.rs | 83 +++-- 5 files changed, 516 insertions(+), 71 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 19a8da9..a7794da 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1247,7 +1247,7 @@ checksum = "38bf9645c8b145698bb0b18a4637dcacbc421ea49bef2317e4fd8065a387cf21" [[package]] name = "nora-cli" -version = "0.2.30" +version = "0.2.31" dependencies = [ "clap", "flate2", @@ -1261,7 +1261,7 @@ dependencies = [ [[package]] name = "nora-registry" -version = "0.2.30" +version = "0.2.31" dependencies = [ "async-trait", "axum", @@ -1299,7 +1299,7 @@ dependencies = [ [[package]] name = "nora-storage" -version = "0.2.30" +version = "0.2.31" dependencies = [ "axum", "base64", diff --git a/Cargo.toml b/Cargo.toml index 16b7d88..a20feda 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ members = [ ] [workspace.package] -version = "0.2.30" +version = "0.2.31" edition = "2021" license = "MIT" authors = ["DevITWay "] diff --git a/nora-registry/src/config.rs b/nora-registry/src/config.rs index 0fe73b9..c71da4d 100644 --- a/nora-registry/src/config.rs +++ b/nora-registry/src/config.rs @@ -112,6 +112,9 @@ pub struct NpmConfig { pub proxy_auth: Option, // "user:pass" for basic auth #[serde(default = "default_timeout")] pub proxy_timeout: u64, + /// Metadata cache TTL in seconds (default: 300 = 5 min). Set to 0 to cache forever. + #[serde(default = "default_metadata_ttl")] + pub metadata_ttl: u64, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -215,6 +218,10 @@ fn default_timeout() -> u64 { 30 } +fn default_metadata_ttl() -> u64 { + 300 // 5 minutes +} + impl Default for MavenConfig { fn default() -> Self { Self { @@ -232,6 +239,7 @@ impl Default for NpmConfig { proxy: Some("https://registry.npmjs.org".to_string()), proxy_auth: None, proxy_timeout: 30, + metadata_ttl: 300, } } } @@ -486,6 +494,11 @@ impl Config { self.npm.proxy_timeout = timeout; } } + if let Ok(val) = env::var("NORA_NPM_METADATA_TTL") { + if let Ok(ttl) = val.parse() { + self.npm.metadata_ttl = ttl; + } + } // npm proxy auth if let Ok(val) = env::var("NORA_NPM_PROXY_AUTH") { diff --git a/nora-registry/src/registry/npm.rs b/nora-registry/src/registry/npm.rs index fd7699f..5dd8d07 100644 --- a/nora-registry/src/registry/npm.rs +++ b/nora-registry/src/registry/npm.rs @@ -10,21 +10,64 @@ use axum::{ extract::{Path, State}, http::{header, StatusCode}, response::{IntoResponse, Response}, - routing::get, + routing::{get, put}, Router, }; +use base64::Engine; +use sha2::Digest; use std::sync::Arc; use std::time::Duration; pub fn routes() -> Router> { - Router::new().route("/npm/{*path}", get(handle_request)) + Router::new() + .route("/npm/{*path}", get(handle_request)) + .route("/npm/{*path}", put(handle_publish)) +} + +/// Build NORA base URL from config (for URL rewriting) +fn nora_base_url(state: &AppState) -> String { + state.config.server.public_url.clone().unwrap_or_else(|| { + format!( + "http://{}:{}", + state.config.server.host, state.config.server.port + ) + }) +} + +/// Rewrite tarball URLs in npm metadata to point to NORA. +/// +/// Replaces upstream registry URLs (e.g. `https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz`) +/// with NORA URLs (e.g. `http://nora:5000/npm/lodash/-/lodash-4.17.21.tgz`). +fn rewrite_tarball_urls(data: &[u8], nora_base: &str, upstream_url: &str) -> Result, ()> { + let mut json: serde_json::Value = serde_json::from_slice(data).map_err(|_| ())?; + + let upstream_trimmed = upstream_url.trim_end_matches('/'); + let nora_npm_base = format!("{}/npm", nora_base.trim_end_matches('/')); + + if let Some(versions) = json.get_mut("versions").and_then(|v| v.as_object_mut()) { + for (_ver, version_data) in versions.iter_mut() { + if let Some(tarball_url) = version_data + .get("dist") + .and_then(|d| d.get("tarball")) + .and_then(|t| t.as_str()) + .map(|s| s.to_string()) + { + let rewritten = tarball_url.replace(upstream_trimmed, &nora_npm_base); + if let Some(dist) = version_data.get_mut("dist") { + dist["tarball"] = serde_json::Value::String(rewritten); + } + } + } + } + + serde_json::to_vec(&json).map_err(|_| ()) } async fn handle_request(State(state): State>, Path(path): Path) -> Response { let is_tarball = path.contains("/-/"); let key = if is_tarball { - let parts: Vec<&str> = path.split("/-/").collect(); + let parts: Vec<&str> = path.splitn(2, "/-/").collect(); if parts.len() == 2 { format!("npm/{}/tarballs/{}", parts[0], parts[1]) } else { @@ -40,23 +83,60 @@ async fn handle_request(State(state): State>, Path(path): Path 0 { + if let Some(meta) = state.storage.stat(&key).await { + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_secs()) + .unwrap_or(0); + if now.saturating_sub(meta.modified) > ttl { + if let Some(fresh) = refetch_metadata(&state, &path, &key).await { + return with_content_type(false, fresh.into()).into_response(); + } + // Upstream failed — serve stale cache + } + } + } + return with_content_type(false, data).into_response(); } - return with_content_type(is_tarball, data).into_response(); + + // Tarball: integrity check if hash exists + let hash_key = format!("{}.sha256", key); + if let Ok(stored_hash) = state.storage.get(&hash_key).await { + let computed = format!("{:x}", sha2::Sha256::digest(&data)); + let expected = String::from_utf8_lossy(&stored_hash); + if computed != expected.as_ref() { + tracing::error!( + key = %key, + expected = %expected, + computed = %computed, + "SECURITY: npm tarball integrity check FAILED — possible tampering" + ); + return (StatusCode::INTERNAL_SERVER_ERROR, "Integrity check failed") + .into_response(); + } + } + + state.metrics.record_download("npm"); + state.metrics.record_cache_hit(); + state.activity.push(ActivityEntry::new( + ActionType::CacheHit, + package_name, + "npm", + "CACHE", + )); + state + .audit + .log(AuditEntry::new("cache_hit", "api", "", "npm", "")); + return with_content_type(true, data).into_response(); } + // --- Proxy fetch path --- if let Some(proxy_url) = &state.config.npm.proxy { let url = format!("{}/{}", proxy_url.trim_end_matches('/'), path); @@ -68,7 +148,18 @@ async fn handle_request(State(state): State>, Path(path): Path>, Path(path): Path, path: &str, key: &str) -> Option> { + let proxy_url = state.config.npm.proxy.as_ref()?; + let url = format!("{}/{}", proxy_url.trim_end_matches('/'), path); + + let data = fetch_from_proxy( + &state.http_client, + &url, + state.config.npm.proxy_timeout, + state.config.npm.proxy_auth.as_deref(), + ) + .await + .ok()?; + + let nora_base = nora_base_url(state); + let rewritten = + rewrite_tarball_urls(&data, &nora_base, proxy_url).unwrap_or_else(|_| data.clone()); + + let storage = state.storage.clone(); + let key_clone = key.to_string(); + let cache_data = rewritten.clone(); + tokio::spawn(async move { + let _ = storage.put(&key_clone, &cache_data).await; + }); + + Some(rewritten) +} + +// ============================================================================ +// npm publish +// ============================================================================ + +/// Validate attachment filename: only safe characters, no path traversal. +fn is_valid_attachment_name(name: &str) -> bool { + !name.is_empty() + && !name.contains("..") + && !name.contains('/') + && !name.contains('\\') + && !name.contains('\0') + && name + .chars() + .all(|c| c.is_ascii_alphanumeric() || matches!(c, '.' | '-' | '_' | '@')) +} + +async fn handle_publish( + State(state): State>, + Path(path): Path, + body: Bytes, +) -> Response { + let package_name = path; + + let payload: serde_json::Value = match serde_json::from_slice(&body) { + Ok(v) => v, + Err(e) => return (StatusCode::BAD_REQUEST, format!("Invalid JSON: {}", e)).into_response(), + }; + + // Security: verify payload name matches URL path + if let Some(payload_name) = payload.get("name").and_then(|n| n.as_str()) { + if payload_name != package_name { + tracing::warn!( + url_name = %package_name, + payload_name = %payload_name, + "SECURITY: npm publish name mismatch — possible spoofing attempt" + ); + return ( + StatusCode::BAD_REQUEST, + "Package name in URL does not match payload", + ) + .into_response(); + } + } + + let attachments = match payload.get("_attachments").and_then(|a| a.as_object()) { + Some(a) => a, + None => return (StatusCode::BAD_REQUEST, "Missing _attachments").into_response(), + }; + + let new_versions = match payload.get("versions").and_then(|v| v.as_object()) { + Some(v) => v, + None => return (StatusCode::BAD_REQUEST, "Missing versions").into_response(), + }; + + // Load or create metadata + let metadata_key = format!("npm/{}/metadata.json", package_name); + let mut metadata = if let Ok(existing) = state.storage.get(&metadata_key).await { + serde_json::from_slice::(&existing) + .unwrap_or_else(|_| serde_json::json!({})) + } else { + serde_json::json!({}) + }; + + // Version immutability + if let Some(existing_versions) = metadata.get("versions").and_then(|v| v.as_object()) { + for ver in new_versions.keys() { + if existing_versions.contains_key(ver) { + return ( + StatusCode::CONFLICT, + format!("Version {} already exists", ver), + ) + .into_response(); + } + } + } + + // Store tarballs + for (filename, attachment_data) in attachments { + if !is_valid_attachment_name(filename) { + tracing::warn!( + filename = %filename, + package = %package_name, + "SECURITY: npm publish rejected — invalid attachment filename" + ); + return (StatusCode::BAD_REQUEST, "Invalid attachment filename").into_response(); + } + + let base64_data = match attachment_data.get("data").and_then(|d| d.as_str()) { + Some(d) => d, + None => continue, + }; + + let tarball_bytes = match base64::engine::general_purpose::STANDARD.decode(base64_data) { + Ok(b) => b, + Err(_) => { + return (StatusCode::BAD_REQUEST, "Invalid base64 in attachment").into_response() + } + }; + + let tarball_key = format!("npm/{}/tarballs/{}", package_name, filename); + if state + .storage + .put(&tarball_key, &tarball_bytes) + .await + .is_err() + { + return StatusCode::INTERNAL_SERVER_ERROR.into_response(); + } + + // Store sha256 + let hash = format!("{:x}", sha2::Sha256::digest(&tarball_bytes)); + let hash_key = format!("{}.sha256", tarball_key); + let _ = state.storage.put(&hash_key, hash.as_bytes()).await; + } + + // Merge versions + let meta_obj = metadata.as_object_mut().unwrap(); + let stored_versions = meta_obj.entry("versions").or_insert(serde_json::json!({})); + if let Some(sv) = stored_versions.as_object_mut() { + for (ver, ver_data) in new_versions { + sv.insert(ver.clone(), ver_data.clone()); + } + } + + // Copy standard fields + for field in &["name", "_id", "description", "readme", "license"] { + if let Some(val) = payload.get(*field) { + meta_obj.insert(field.to_string(), val.clone()); + } + } + + // Merge dist-tags + if let Some(new_dist_tags) = payload.get("dist-tags").and_then(|d| d.as_object()) { + let stored_dist_tags = meta_obj.entry("dist-tags").or_insert(serde_json::json!({})); + if let Some(sdt) = stored_dist_tags.as_object_mut() { + for (tag, ver) in new_dist_tags { + sdt.insert(tag.clone(), ver.clone()); + } + } + } + + // Rewrite tarball URLs for published packages + let nora_base = nora_base_url(&state); + if let Some(versions) = metadata.get_mut("versions").and_then(|v| v.as_object_mut()) { + for (ver, ver_data) in versions.iter_mut() { + if let Some(dist) = ver_data.get_mut("dist") { + let short_name = package_name.split('/').next_back().unwrap_or(&package_name); + let tarball_url = format!( + "{}/npm/{}/-/{}-{}.tgz", + nora_base.trim_end_matches('/'), + package_name, + short_name, + ver + ); + dist["tarball"] = serde_json::Value::String(tarball_url); + } + } + } + + // Store metadata + match serde_json::to_vec(&metadata) { + Ok(bytes) => { + if state.storage.put(&metadata_key, &bytes).await.is_err() { + return StatusCode::INTERNAL_SERVER_ERROR.into_response(); + } + } + Err(_) => return StatusCode::INTERNAL_SERVER_ERROR.into_response(), + } + + state.metrics.record_upload("npm"); + state.activity.push(ActivityEntry::new( + ActionType::Push, + package_name, + "npm", + "LOCAL", + )); + state + .audit + .log(AuditEntry::new("push", "api", "", "npm", "")); + state.repo_index.invalidate("npm"); + + StatusCode::CREATED.into_response() +} + +// ============================================================================ +// Helpers +// ============================================================================ + async fn fetch_from_proxy( client: &reqwest::Client, url: &str, @@ -131,3 +450,129 @@ fn with_content_type( (StatusCode::OK, [(header::CONTENT_TYPE, content_type)], data) } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_rewrite_tarball_urls_regular_package() { + let metadata = serde_json::json!({ + "name": "lodash", + "versions": { + "4.17.21": { + "dist": { + "tarball": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz", + "shasum": "abc123" + } + } + } + }); + let data = serde_json::to_vec(&metadata).unwrap(); + let result = + rewrite_tarball_urls(&data, "http://nora:5000", "https://registry.npmjs.org").unwrap(); + let json: serde_json::Value = serde_json::from_slice(&result).unwrap(); + + assert_eq!( + json["versions"]["4.17.21"]["dist"]["tarball"], + "http://nora:5000/npm/lodash/-/lodash-4.17.21.tgz" + ); + assert_eq!(json["versions"]["4.17.21"]["dist"]["shasum"], "abc123"); + } + + #[test] + fn test_rewrite_tarball_urls_scoped_package() { + let metadata = serde_json::json!({ + "name": "@babel/core", + "versions": { + "7.26.0": { + "dist": { + "tarball": "https://registry.npmjs.org/@babel/core/-/core-7.26.0.tgz", + "integrity": "sha512-test" + } + } + } + }); + let data = serde_json::to_vec(&metadata).unwrap(); + let result = + rewrite_tarball_urls(&data, "http://nora:5000", "https://registry.npmjs.org").unwrap(); + let json: serde_json::Value = serde_json::from_slice(&result).unwrap(); + + assert_eq!( + json["versions"]["7.26.0"]["dist"]["tarball"], + "http://nora:5000/npm/@babel/core/-/core-7.26.0.tgz" + ); + } + + #[test] + fn test_rewrite_tarball_urls_multiple_versions() { + let metadata = serde_json::json!({ + "name": "express", + "versions": { + "4.18.2": { "dist": { "tarball": "https://registry.npmjs.org/express/-/express-4.18.2.tgz" } }, + "4.19.0": { "dist": { "tarball": "https://registry.npmjs.org/express/-/express-4.19.0.tgz" } } + } + }); + let data = serde_json::to_vec(&metadata).unwrap(); + let result = rewrite_tarball_urls( + &data, + "https://demo.getnora.io", + "https://registry.npmjs.org", + ) + .unwrap(); + let json: serde_json::Value = serde_json::from_slice(&result).unwrap(); + + assert_eq!( + json["versions"]["4.18.2"]["dist"]["tarball"], + "https://demo.getnora.io/npm/express/-/express-4.18.2.tgz" + ); + assert_eq!( + json["versions"]["4.19.0"]["dist"]["tarball"], + "https://demo.getnora.io/npm/express/-/express-4.19.0.tgz" + ); + } + + #[test] + fn test_rewrite_tarball_urls_no_versions() { + let metadata = serde_json::json!({ "name": "empty-pkg" }); + let data = serde_json::to_vec(&metadata).unwrap(); + let result = + rewrite_tarball_urls(&data, "http://nora:5000", "https://registry.npmjs.org").unwrap(); + let json: serde_json::Value = serde_json::from_slice(&result).unwrap(); + assert_eq!(json["name"], "empty-pkg"); + } + + #[test] + fn test_rewrite_invalid_json() { + assert!(rewrite_tarball_urls( + b"not json", + "http://nora:5000", + "https://registry.npmjs.org" + ) + .is_err()); + } + + #[test] + fn test_valid_attachment_names() { + assert!(is_valid_attachment_name("lodash-4.17.21.tgz")); + assert!(is_valid_attachment_name("core-7.26.0.tgz")); + assert!(is_valid_attachment_name("my_package-1.0.0.tgz")); + assert!(is_valid_attachment_name("@scope-pkg-1.0.0.tgz")); + } + + #[test] + fn test_path_traversal_attachment_names() { + assert!(!is_valid_attachment_name("../../etc/passwd")); + assert!(!is_valid_attachment_name( + "../docker/nginx/manifests/latest.json" + )); + assert!(!is_valid_attachment_name("foo/bar.tgz")); + assert!(!is_valid_attachment_name("foo\\bar.tgz")); + } + + #[test] + fn test_empty_and_null_attachment_names() { + assert!(!is_valid_attachment_name("")); + assert!(!is_valid_attachment_name("foo\0bar.tgz")); + } +} diff --git a/nora-registry/src/repo_index.rs b/nora-registry/src/repo_index.rs index eb96f1f..a3e7204 100644 --- a/nora-registry/src/repo_index.rs +++ b/nora-registry/src/repo_index.rs @@ -173,39 +173,35 @@ async fn build_docker_index(storage: &Storage) -> Vec { } if let Some(rest) = key.strip_prefix("docker/") { - // Support namespaced repos: docker/{ns}/{name}/manifests/{tag}.json - // and flat repos: docker/{name}/manifests/{tag}.json - if let Some(manifests_pos) = rest.find("/manifests/") { - let name = rest[..manifests_pos].to_string(); - let after_manifests = &rest[manifests_pos + "/manifests/".len()..]; - if !after_manifests.is_empty() && key.ends_with(".json") { - let entry = repos.entry(name).or_insert((0, 0, 0)); - entry.0 += 1; + let parts: Vec<_> = rest.split('/').collect(); + if parts.len() >= 3 && parts[1] == "manifests" && key.ends_with(".json") { + let name = parts[0].to_string(); + let entry = repos.entry(name).or_insert((0, 0, 0)); + entry.0 += 1; - if let Ok(data) = storage.get(key).await { - if let Ok(m) = serde_json::from_slice::(&data) { - let cfg = m - .get("config") - .and_then(|c| c.get("size")) - .and_then(|s| s.as_u64()) - .unwrap_or(0); - let layers: u64 = m - .get("layers") - .and_then(|l| l.as_array()) - .map(|arr| { - arr.iter() - .filter_map(|l| l.get("size").and_then(|s| s.as_u64())) - .sum() - }) - .unwrap_or(0); - entry.1 += cfg + layers; - } + if let Ok(data) = storage.get(key).await { + if let Ok(m) = serde_json::from_slice::(&data) { + let cfg = m + .get("config") + .and_then(|c| c.get("size")) + .and_then(|s| s.as_u64()) + .unwrap_or(0); + let layers: u64 = m + .get("layers") + .and_then(|l| l.as_array()) + .map(|arr| { + arr.iter() + .filter_map(|l| l.get("size").and_then(|s| s.as_u64())) + .sum() + }) + .unwrap_or(0); + entry.1 += cfg + layers; } + } - if let Some(meta) = storage.stat(key).await { - if meta.modified > entry.2 { - entry.2 = meta.modified; - } + if let Some(meta) = storage.stat(key).await { + if meta.modified > entry.2 { + entry.2 = meta.modified; } } } @@ -244,14 +240,20 @@ async fn build_npm_index(storage: &Storage) -> Vec { let keys = storage.list("npm/").await; let mut packages: HashMap = HashMap::new(); - // Count tarballs first, then fall back to metadata.json for proxy-cached packages + // Count tarballs instead of parsing metadata.json (faster than parsing JSON) for key in &keys { if let Some(rest) = key.strip_prefix("npm/") { + // Pattern: npm/{package}/tarballs/{file}.tgz + // Scoped: npm/@scope/package/tarballs/{file}.tgz if rest.contains("/tarballs/") && key.ends_with(".tgz") { - // Pattern: npm/{package}/tarballs/{file}.tgz let parts: Vec<_> = rest.split('/').collect(); if !parts.is_empty() { - let name = parts[0].to_string(); + // Scoped packages: @scope/package → parts[0]="@scope", parts[1]="package" + let name = if parts[0].starts_with('@') && parts.len() >= 4 { + format!("{}/{}", parts[0], parts[1]) + } else { + parts[0].to_string() + }; let entry = packages.entry(name).or_insert((0, 0, 0)); entry.0 += 1; @@ -262,21 +264,6 @@ async fn build_npm_index(storage: &Storage) -> Vec { } } } - } else if rest.ends_with("/metadata.json") { - // Proxy-cached package: npm/{package}/metadata.json - // Show package in list but don't inflate version count from upstream metadata - if let Some(name) = rest.strip_suffix("/metadata.json") { - if !name.contains('/') { - packages.entry(name.to_string()).or_insert((0, 0, 0)); - if let Some(stat) = storage.stat(key).await { - let entry = packages.get_mut(name).unwrap(); - entry.1 += stat.size; - if stat.modified > entry.2 { - entry.2 = stat.modified; - } - } - } - } } } }