From 35a9e34a3ec4e71ecd01108f2067763f919dfaad Mon Sep 17 00:00:00 2001 From: DevITWay | Pavel Volkov Date: Thu, 2 Apr 2026 15:56:54 +0300 Subject: [PATCH] fix: proxy dedup, multi-registry GC, TOCTOU and credential hygiene (#83) - Deduplicate proxy_fetch/proxy_fetch_text into generic proxy_fetch_core with response extractor closure (removes ~50 lines of copy-paste) - GC now scans all registry prefixes, not just docker/ - Add tracing::warn to fire-and-forget cache writes in docker proxy - Mark S3 credentials as skip_serializing to prevent accidental leaks - Remove TOCTOU race in LocalStorage get/delete (redundant exists check) --- nora-registry/src/config.rs | 4 +- nora-registry/src/gc.rs | 13 ++-- nora-registry/src/registry/docker.rs | 8 ++- nora-registry/src/registry/mod.rs | 90 ++++++++++------------------ nora-registry/src/storage/local.rs | 18 +++--- 5 files changed, 57 insertions(+), 76 deletions(-) diff --git a/nora-registry/src/config.rs b/nora-registry/src/config.rs index 5d5dbb2..aebaa66 100644 --- a/nora-registry/src/config.rs +++ b/nora-registry/src/config.rs @@ -72,10 +72,10 @@ pub struct StorageConfig { #[serde(default = "default_bucket")] pub bucket: String, /// S3 access key (optional, uses anonymous access if not set) - #[serde(default)] + #[serde(default, skip_serializing)] pub s3_access_key: Option, /// S3 secret key (optional, uses anonymous access if not set) - #[serde(default)] + #[serde(default, skip_serializing)] pub s3_secret_key: Option, /// S3 region (default: us-east-1) #[serde(default = "default_s3_region")] diff --git a/nora-registry/src/gc.rs b/nora-registry/src/gc.rs index 6bb9aef..649466c 100644 --- a/nora-registry/src/gc.rs +++ b/nora-registry/src/gc.rs @@ -72,10 +72,15 @@ pub async fn run_gc(storage: &Storage, dry_run: bool) -> GcResult { async fn collect_all_blobs(storage: &Storage) -> Vec { let mut blobs = Vec::new(); - let docker_blobs = storage.list("docker/").await; - for key in docker_blobs { - if key.contains("/blobs/") { - blobs.push(key); + // Collect blobs from all registry types, not just Docker + for prefix in &[ + "docker/", "maven/", "npm/", "cargo/", "pypi/", "raw/", "go/", + ] { + let keys = storage.list(prefix).await; + for key in keys { + if key.contains("/blobs/") || key.contains("/tarballs/") { + blobs.push(key); + } } } blobs diff --git a/nora-registry/src/registry/docker.rs b/nora-registry/src/registry/docker.rs index a1d0579..81c856c 100644 --- a/nora-registry/src/registry/docker.rs +++ b/nora-registry/src/registry/docker.rs @@ -284,7 +284,9 @@ async fn download_blob( let key_clone = key.clone(); let data_clone = data.clone(); tokio::spawn(async move { - let _ = storage.put(&key_clone, &data_clone).await; + if let Err(e) = storage.put(&key_clone, &data_clone).await { + tracing::warn!(key = %key_clone, error = %e, "Failed to cache blob in storage"); + } }); return ( @@ -687,7 +689,9 @@ async fn get_manifest( let key_clone = key.clone(); let data_clone = data.clone(); tokio::spawn(async move { - let _ = storage.put(&key_clone, &data_clone).await; + if let Err(e) = storage.put(&key_clone, &data_clone).await { + tracing::warn!(key = %key_clone, error = %e, "Failed to cache blob in storage"); + } }); state.repo_index.invalidate("docker"); diff --git a/nora-registry/src/registry/mod.rs b/nora-registry/src/registry/mod.rs index 414f6f4..e365be1 100644 --- a/nora-registry/src/registry/mod.rs +++ b/nora-registry/src/registry/mod.rs @@ -22,57 +22,6 @@ pub use raw::routes as raw_routes; use crate::config::basic_auth_header; use std::time::Duration; -/// Fetch from upstream proxy with timeout and 1 retry. -/// -/// On transient errors (timeout, connection reset), retries once after a short delay. -/// Non-retryable errors (4xx) fail immediately. -pub(crate) async fn proxy_fetch( - client: &reqwest::Client, - url: &str, - timeout_secs: u64, - auth: Option<&str>, -) -> Result, ProxyError> { - for attempt in 0..2 { - let mut request = client.get(url).timeout(Duration::from_secs(timeout_secs)); - if let Some(credentials) = auth { - request = request.header("Authorization", basic_auth_header(credentials)); - } - - match request.send().await { - Ok(response) => { - if response.status().is_success() { - return response - .bytes() - .await - .map(|b| b.to_vec()) - .map_err(|e| ProxyError::Network(e.to_string())); - } - let status = response.status().as_u16(); - // Don't retry client errors (4xx) - if (400..500).contains(&status) { - return Err(ProxyError::NotFound); - } - // Server error (5xx) — retry - if attempt == 0 { - tracing::debug!(url, status, "upstream 5xx, retrying in 1s"); - tokio::time::sleep(Duration::from_secs(1)).await; - continue; - } - return Err(ProxyError::Upstream(status)); - } - Err(e) => { - if attempt == 0 { - tracing::debug!(url, error = %e, "upstream error, retrying in 1s"); - tokio::time::sleep(Duration::from_secs(1)).await; - continue; - } - return Err(ProxyError::Network(e.to_string())); - } - } - } - Err(ProxyError::Network("max retries exceeded".into())) -} - #[derive(Debug)] #[allow(dead_code)] pub(crate) enum ProxyError { @@ -81,15 +30,19 @@ pub(crate) enum ProxyError { Network(String), } -/// Fetch text content from upstream proxy with timeout and 1 retry. -/// Same as proxy_fetch but returns String (for HTML pages like PyPI simple index). -pub(crate) async fn proxy_fetch_text( +/// Core fetch logic with retry. Callers provide a response extractor. +async fn proxy_fetch_core( client: &reqwest::Client, url: &str, timeout_secs: u64, auth: Option<&str>, extra_headers: Option<(&str, &str)>, -) -> Result { + extract: F, +) -> Result +where + F: Fn(reqwest::Response) -> Fut + Copy, + Fut: std::future::Future>, +{ for attempt in 0..2 { let mut request = client.get(url).timeout(Duration::from_secs(timeout_secs)); if let Some(credentials) = auth { @@ -102,8 +55,7 @@ pub(crate) async fn proxy_fetch_text( match request.send().await { Ok(response) => { if response.status().is_success() { - return response - .text() + return extract(response) .await .map_err(|e| ProxyError::Network(e.to_string())); } @@ -131,6 +83,30 @@ pub(crate) async fn proxy_fetch_text( Err(ProxyError::Network("max retries exceeded".into())) } +/// Fetch binary content from upstream proxy with timeout and 1 retry. +pub(crate) async fn proxy_fetch( + client: &reqwest::Client, + url: &str, + timeout_secs: u64, + auth: Option<&str>, +) -> Result, ProxyError> { + proxy_fetch_core(client, url, timeout_secs, auth, None, |r| async { + r.bytes().await.map(|b| b.to_vec()) + }) + .await +} + +/// Fetch text content from upstream proxy with timeout and 1 retry. +pub(crate) async fn proxy_fetch_text( + client: &reqwest::Client, + url: &str, + timeout_secs: u64, + auth: Option<&str>, + extra_headers: Option<(&str, &str)>, +) -> Result { + proxy_fetch_core(client, url, timeout_secs, auth, extra_headers, |r| r.text()).await +} + #[cfg(test)] mod tests { use super::*; diff --git a/nora-registry/src/storage/local.rs b/nora-registry/src/storage/local.rs index 2302585..49406f4 100644 --- a/nora-registry/src/storage/local.rs +++ b/nora-registry/src/storage/local.rs @@ -68,10 +68,6 @@ impl StorageBackend for LocalStorage { async fn get(&self, key: &str) -> Result { let path = self.key_to_path(key); - if !path.exists() { - return Err(StorageError::NotFound); - } - let mut file = fs::File::open(&path).await.map_err(|e| { if e.kind() == std::io::ErrorKind::NotFound { StorageError::NotFound @@ -91,13 +87,13 @@ impl StorageBackend for LocalStorage { async fn delete(&self, key: &str) -> Result<()> { let path = self.key_to_path(key); - if !path.exists() { - return Err(StorageError::NotFound); - } - - fs::remove_file(&path) - .await - .map_err(|e| StorageError::Io(e.to_string()))?; + fs::remove_file(&path).await.map_err(|e| { + if e.kind() == std::io::ErrorKind::NotFound { + StorageError::NotFound + } else { + StorageError::Io(e.to_string()) + } + })?; Ok(()) }