diff --git a/README.md b/README.md index dedd057..7554b97 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ Open [http://localhost:4000/ui/](http://localhost:4000/ui/) — your registry is | Registry | Mount Point | Upstream Proxy | Auth | |----------|------------|----------------|------| | Docker Registry v2 | `/v2/` | Docker Hub, GHCR, any OCI | ✓ | -| Maven | `/maven2/` | Maven Central, custom | ✓ | +| Maven | `/maven2/` | Maven Central, custom | proxy-only | | npm | `/npm/` | npmjs.org, custom | ✓ | | Cargo | `/cargo/` | — | ✓ | | PyPI | `/simple/` | pypi.org, custom | ✓ | diff --git a/nora-registry/src/registry/maven.rs b/nora-registry/src/registry/maven.rs index 53b4442..311e398 100644 --- a/nora-registry/src/registry/maven.rs +++ b/nora-registry/src/registry/maven.rs @@ -3,7 +3,7 @@ use crate::activity_log::{ActionType, ActivityEntry}; use crate::audit::AuditEntry; -use crate::config::basic_auth_header; +use crate::registry::proxy_fetch; use crate::AppState; use axum::{ body::Bytes, @@ -14,7 +14,6 @@ use axum::{ Router, }; use std::sync::Arc; -use std::time::Duration; pub fn routes() -> Router> { Router::new() @@ -53,7 +52,7 @@ async fn download(State(state): State>, Path(path): Path) for proxy in &state.config.maven.proxies { let url = format!("{}/{}", proxy.url().trim_end_matches('/'), path); - match fetch_from_proxy( + match proxy_fetch( &state.http_client, &url, state.config.maven.proxy_timeout, @@ -128,25 +127,6 @@ async fn upload( } } -async fn fetch_from_proxy( - client: &reqwest::Client, - url: &str, - timeout_secs: u64, - auth: Option<&str>, -) -> Result, ()> { - let mut request = client.get(url).timeout(Duration::from_secs(timeout_secs)); - if let Some(credentials) = auth { - request = request.header("Authorization", basic_auth_header(credentials)); - } - let response = request.send().await.map_err(|_| ())?; - - if !response.status().is_success() { - return Err(()); - } - - response.bytes().await.map(|b| b.to_vec()).map_err(|_| ()) -} - fn with_content_type( path: &str, data: Bytes, diff --git a/nora-registry/src/registry/mod.rs b/nora-registry/src/registry/mod.rs index 7b9da01..e5e257b 100644 --- a/nora-registry/src/registry/mod.rs +++ b/nora-registry/src/registry/mod.rs @@ -16,3 +16,127 @@ pub use maven::routes as maven_routes; pub use npm::routes as npm_routes; pub use pypi::routes as pypi_routes; pub use raw::routes as raw_routes; + +use crate::config::basic_auth_header; +use std::time::Duration; + +/// Fetch from upstream proxy with timeout and 1 retry. +/// +/// On transient errors (timeout, connection reset), retries once after a short delay. +/// Non-retryable errors (4xx) fail immediately. +pub(crate) async fn proxy_fetch( + client: &reqwest::Client, + url: &str, + timeout_secs: u64, + auth: Option<&str>, +) -> Result, ProxyError> { + for attempt in 0..2 { + let mut request = client.get(url).timeout(Duration::from_secs(timeout_secs)); + if let Some(credentials) = auth { + request = request.header("Authorization", basic_auth_header(credentials)); + } + + match request.send().await { + Ok(response) => { + if response.status().is_success() { + return response + .bytes() + .await + .map(|b| b.to_vec()) + .map_err(|e| ProxyError::Network(e.to_string())); + } + let status = response.status().as_u16(); + // Don't retry client errors (4xx) + if (400..500).contains(&status) { + return Err(ProxyError::NotFound); + } + // Server error (5xx) — retry + if attempt == 0 { + tracing::debug!(url, status, "upstream 5xx, retrying in 1s"); + tokio::time::sleep(Duration::from_secs(1)).await; + continue; + } + return Err(ProxyError::Upstream(status)); + } + Err(e) => { + if attempt == 0 { + tracing::debug!(url, error = %e, "upstream error, retrying in 1s"); + tokio::time::sleep(Duration::from_secs(1)).await; + continue; + } + return Err(ProxyError::Network(e.to_string())); + } + } + } + Err(ProxyError::Network("max retries exceeded".into())) +} + +#[derive(Debug)] +#[allow(dead_code)] +pub(crate) enum ProxyError { + NotFound, + Upstream(u16), + Network(String), +} + +/// Fetch text content from upstream proxy with timeout and 1 retry. +/// Same as proxy_fetch but returns String (for HTML pages like PyPI simple index). +pub(crate) async fn proxy_fetch_text( + client: &reqwest::Client, + url: &str, + timeout_secs: u64, + auth: Option<&str>, + extra_headers: Option<(&str, &str)>, +) -> Result { + for attempt in 0..2 { + let mut request = client.get(url).timeout(Duration::from_secs(timeout_secs)); + if let Some(credentials) = auth { + request = request.header("Authorization", basic_auth_header(credentials)); + } + if let Some((key, val)) = extra_headers { + request = request.header(key, val); + } + + match request.send().await { + Ok(response) => { + if response.status().is_success() { + return response + .text() + .await + .map_err(|e| ProxyError::Network(e.to_string())); + } + let status = response.status().as_u16(); + if (400..500).contains(&status) { + return Err(ProxyError::NotFound); + } + if attempt == 0 { + tracing::debug!(url, status, "upstream 5xx, retrying in 1s"); + tokio::time::sleep(Duration::from_secs(1)).await; + continue; + } + return Err(ProxyError::Upstream(status)); + } + Err(e) => { + if attempt == 0 { + tracing::debug!(url, error = %e, "upstream error, retrying in 1s"); + tokio::time::sleep(Duration::from_secs(1)).await; + continue; + } + return Err(ProxyError::Network(e.to_string())); + } + } + } + Err(ProxyError::Network("max retries exceeded".into())) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn test_proxy_fetch_invalid_url() { + let client = reqwest::Client::new(); + let result = proxy_fetch(&client, "http://127.0.0.1:1/nonexistent", 2, None).await; + assert!(matches!(result, Err(ProxyError::Network(_)))); + } +} diff --git a/nora-registry/src/registry/npm.rs b/nora-registry/src/registry/npm.rs index 5dd8d07..a58ab99 100644 --- a/nora-registry/src/registry/npm.rs +++ b/nora-registry/src/registry/npm.rs @@ -3,7 +3,7 @@ use crate::activity_log::{ActionType, ActivityEntry}; use crate::audit::AuditEntry; -use crate::config::basic_auth_header; +use crate::registry::proxy_fetch; use crate::AppState; use axum::{ body::Bytes, @@ -16,7 +16,6 @@ use axum::{ use base64::Engine; use sha2::Digest; use std::sync::Arc; -use std::time::Duration; pub fn routes() -> Router> { Router::new() @@ -140,7 +139,7 @@ async fn handle_request(State(state): State>, Path(path): Path, path: &str, key: &str) -> Optio let proxy_url = state.config.npm.proxy.as_ref()?; let url = format!("{}/{}", proxy_url.trim_end_matches('/'), path); - let data = fetch_from_proxy( + let data = proxy_fetch( &state.http_client, &url, state.config.npm.proxy_timeout, @@ -419,25 +418,6 @@ async fn handle_publish( // Helpers // ============================================================================ -async fn fetch_from_proxy( - client: &reqwest::Client, - url: &str, - timeout_secs: u64, - auth: Option<&str>, -) -> Result, ()> { - let mut request = client.get(url).timeout(Duration::from_secs(timeout_secs)); - if let Some(credentials) = auth { - request = request.header("Authorization", basic_auth_header(credentials)); - } - let response = request.send().await.map_err(|_| ())?; - - if !response.status().is_success() { - return Err(()); - } - - response.bytes().await.map(|b| b.to_vec()).map_err(|_| ()) -} - fn with_content_type( is_tarball: bool, data: Bytes, diff --git a/nora-registry/src/registry/pypi.rs b/nora-registry/src/registry/pypi.rs index f1978ee..6c0ae04 100644 --- a/nora-registry/src/registry/pypi.rs +++ b/nora-registry/src/registry/pypi.rs @@ -3,7 +3,7 @@ use crate::activity_log::{ActionType, ActivityEntry}; use crate::audit::AuditEntry; -use crate::config::basic_auth_header; +use crate::registry::{proxy_fetch, proxy_fetch_text}; use crate::AppState; use axum::{ extract::{Path, State}, @@ -13,7 +13,6 @@ use axum::{ Router, }; use std::sync::Arc; -use std::time::Duration; pub fn routes() -> Router> { Router::new() @@ -87,11 +86,12 @@ async fn package_versions( if let Some(proxy_url) = &state.config.pypi.proxy { let url = format!("{}/{}/", proxy_url.trim_end_matches('/'), normalized); - if let Ok(html) = fetch_package_page( + if let Ok(html) = proxy_fetch_text( &state.http_client, &url, state.config.pypi.proxy_timeout, state.config.pypi.proxy_auth.as_deref(), + Some(("Accept", "text/html")), ) .await { @@ -142,17 +142,18 @@ async fn download_file( // First, fetch the package page to find the actual download URL let page_url = format!("{}/{}/", proxy_url.trim_end_matches('/'), normalized); - if let Ok(html) = fetch_package_page( + if let Ok(html) = proxy_fetch_text( &state.http_client, &page_url, state.config.pypi.proxy_timeout, state.config.pypi.proxy_auth.as_deref(), + Some(("Accept", "text/html")), ) .await { // Find the URL for this specific file if let Some(file_url) = find_file_url(&html, &filename) { - if let Ok(data) = fetch_file( + if let Ok(data) = proxy_fetch( &state.http_client, &file_url, state.config.pypi.proxy_timeout, @@ -205,49 +206,6 @@ fn normalize_name(name: &str) -> String { name.to_lowercase().replace(['-', '_', '.'], "-") } -/// Fetch package page from upstream -async fn fetch_package_page( - client: &reqwest::Client, - url: &str, - timeout_secs: u64, - auth: Option<&str>, -) -> Result { - let mut request = client - .get(url) - .timeout(Duration::from_secs(timeout_secs)) - .header("Accept", "text/html"); - if let Some(credentials) = auth { - request = request.header("Authorization", basic_auth_header(credentials)); - } - let response = request.send().await.map_err(|_| ())?; - - if !response.status().is_success() { - return Err(()); - } - - response.text().await.map_err(|_| ()) -} - -/// Fetch file from upstream -async fn fetch_file( - client: &reqwest::Client, - url: &str, - timeout_secs: u64, - auth: Option<&str>, -) -> Result, ()> { - let mut request = client.get(url).timeout(Duration::from_secs(timeout_secs)); - if let Some(credentials) = auth { - request = request.header("Authorization", basic_auth_header(credentials)); - } - let response = request.send().await.map_err(|_| ())?; - - if !response.status().is_success() { - return Err(()); - } - - response.bytes().await.map(|b| b.to_vec()).map_err(|_| ()) -} - /// Rewrite PyPI links to point to our registry fn rewrite_pypi_links(html: &str, package_name: &str) -> String { // Simple regex-free approach: find href="..." and rewrite