perf(tokens): add in-memory verification cache with 5min TTL

Token verification previously ran Argon2id + disk read on every
authenticated request. Under load this becomes the bottleneck
(~100ms per Argon2 verify on a single core).

Changes:
- Add in-memory cache (SHA256 -> user/role/expiry) with 5 minute TTL
- Defer last_used timestamp writes to batch flush every 30 seconds
- Invalidate cache entry on token revoke
- Background task flushes pending last_used alongside metrics persist

First verify_token call per token: full Argon2 + disk (unchanged).
Subsequent calls within TTL: HashMap lookup only (sub-microsecond).
This commit is contained in:
2026-04-02 11:51:59 +00:00
parent f09cceb326
commit 0cd79e680f
2 changed files with 157 additions and 12 deletions

View File

@@ -430,13 +430,16 @@ async fn run_server(config: Config, storage: Storage) {
"Available endpoints"
);
// Background task: persist metrics every 30 seconds
// Background task: persist metrics and flush token last_used every 30 seconds
let metrics_state = state.clone();
tokio::spawn(async move {
let mut interval = tokio::time::interval(std::time::Duration::from_secs(30));
loop {
interval.tick().await;
metrics_state.metrics.save();
if let Some(ref token_store) = metrics_state.tokens {
token_store.flush_last_used();
}
}
});

View File

@@ -14,6 +14,23 @@ use std::time::{SystemTime, UNIX_EPOCH};
use thiserror::Error;
use uuid::Uuid;
use parking_lot::RwLock;
use std::collections::HashMap;
use std::sync::Arc;
use std::time::{Duration, Instant};
/// TTL for cached token verifications (avoids Argon2 per request)
const CACHE_TTL: Duration = Duration::from_secs(300);
/// Cached verification result
#[derive(Clone)]
struct CachedToken {
user: String,
role: Role,
expires_at: u64,
cached_at: Instant,
}
const TOKEN_PREFIX: &str = "nra_";
/// Access role for API tokens
@@ -66,6 +83,10 @@ fn default_role() -> Role {
#[derive(Clone)]
pub struct TokenStore {
storage_path: PathBuf,
/// In-memory cache: SHA256(token) -> verified result (avoids Argon2 per request)
cache: Arc<RwLock<HashMap<String, CachedToken>>>,
/// Pending last_used updates: file_id_prefix -> timestamp (flushed periodically)
pending_last_used: Arc<RwLock<HashMap<String, u64>>>,
}
impl TokenStore {
@@ -79,6 +100,8 @@ impl TokenStore {
}
Self {
storage_path: storage_path.to_path_buf(),
cache: Arc::new(RwLock::new(HashMap::new())),
pending_last_used: Arc::new(RwLock::new(HashMap::new())),
}
}
@@ -127,16 +150,41 @@ impl TokenStore {
Ok(raw_token)
}
/// Verify a token and return user info if valid
/// Verify a token and return user info if valid.
///
/// Uses an in-memory cache to avoid Argon2 verification on every request.
/// The `last_used` timestamp is updated in batch via `flush_last_used()`.
pub fn verify_token(&self, token: &str) -> Result<(String, Role), TokenError> {
if !token.starts_with(TOKEN_PREFIX) {
return Err(TokenError::InvalidFormat);
}
let file_id = sha256_hex(token);
let file_path = self.storage_path.join(format!("{}.json", &file_id[..16]));
let cache_key = sha256_hex(token);
// Fast path: check in-memory cache
{
let cache = self.cache.read();
if let Some(cached) = cache.get(&cache_key) {
if cached.cached_at.elapsed() < CACHE_TTL {
let now = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_secs();
if now > cached.expires_at {
return Err(TokenError::Expired);
}
// Schedule deferred last_used update
self.pending_last_used
.write()
.insert(cache_key[..16].to_string(), now);
return Ok((cached.user.clone(), cached.role.clone()));
}
}
}
// Slow path: read from disk and verify Argon2
let file_path = self.storage_path.join(format!("{}.json", &cache_key[..16]));
// TOCTOU fix: read directly, handle NotFound from IO error
let content = match fs::read_to_string(&file_path) {
Ok(c) => c,
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
@@ -183,12 +231,21 @@ impl TokenStore {
return Err(TokenError::Expired);
}
// Update last_used
info.last_used = Some(now);
if let Ok(json) = serde_json::to_string_pretty(&info) {
let _ = fs::write(&file_path, &json);
set_file_permissions_600(&file_path);
}
// Populate cache
self.cache.write().insert(
cache_key[..16].to_string(),
CachedToken {
user: info.user.clone(),
role: info.role.clone(),
expires_at: info.expires_at,
cached_at: Instant::now(),
},
);
// Schedule deferred last_used update
self.pending_last_used
.write()
.insert(cache_key[..16].to_string(), now);
Ok((info.user, info.role))
}
@@ -213,13 +270,53 @@ impl TokenStore {
tokens
}
/// Flush pending last_used timestamps to disk.
/// Called periodically by background task (every 30s).
pub fn flush_last_used(&self) {
let pending: HashMap<String, u64> = {
let mut map = self.pending_last_used.write();
std::mem::take(&mut *map)
};
if pending.is_empty() {
return;
}
for (file_prefix, timestamp) in &pending {
let file_path = self.storage_path.join(format!("{}.json", file_prefix));
let content = match fs::read_to_string(&file_path) {
Ok(c) => c,
Err(_) => continue,
};
let mut info: TokenInfo = match serde_json::from_str(&content) {
Ok(i) => i,
Err(_) => continue,
};
info.last_used = Some(*timestamp);
if let Ok(json) = serde_json::to_string_pretty(&info) {
let _ = fs::write(&file_path, &json);
set_file_permissions_600(&file_path);
}
}
tracing::debug!(count = pending.len(), "Flushed pending last_used updates");
}
/// Remove a token from the in-memory cache (called on revoke)
fn invalidate_cache(&self, hash_prefix: &str) {
self.cache.write().remove(hash_prefix);
}
/// Revoke a token by its hash prefix
pub fn revoke_token(&self, hash_prefix: &str) -> Result<(), TokenError> {
let file_path = self.storage_path.join(format!("{}.json", hash_prefix));
// TOCTOU fix: try remove directly
match fs::remove_file(&file_path) {
Ok(()) => Ok(()),
Ok(()) => {
self.invalidate_cache(hash_prefix);
Ok(())
}
Err(e) if e.kind() == std::io::ErrorKind::NotFound => Err(TokenError::NotFound),
Err(e) => Err(TokenError::Storage(e.to_string())),
}
@@ -511,10 +608,55 @@ mod tests {
store.verify_token(&token).unwrap();
// last_used is deferred — flush to persist
store.flush_last_used();
let tokens = store.list_tokens("testuser");
assert!(tokens[0].last_used.is_some());
}
#[test]
fn test_verify_cache_hit() {
let temp_dir = TempDir::new().unwrap();
let store = TokenStore::new(temp_dir.path());
let token = store
.create_token("testuser", 30, None, Role::Write)
.unwrap();
// First call: cold (disk + Argon2)
let (user1, role1) = store.verify_token(&token).unwrap();
// Second call: should hit cache (no Argon2)
let (user2, role2) = store.verify_token(&token).unwrap();
assert_eq!(user1, user2);
assert_eq!(role1, role2);
assert_eq!(user1, "testuser");
assert_eq!(role1, Role::Write);
}
#[test]
fn test_revoke_invalidates_cache() {
let temp_dir = TempDir::new().unwrap();
let store = TokenStore::new(temp_dir.path());
let token = store
.create_token("testuser", 30, None, Role::Write)
.unwrap();
let file_id = sha256_hex(&token);
let hash_prefix = &file_id[..16];
// Populate cache
assert!(store.verify_token(&token).is_ok());
// Revoke
store.revoke_token(hash_prefix).unwrap();
// Cache should be invalidated
let result = store.verify_token(&token);
assert!(matches!(result, Err(TokenError::NotFound)));
}
#[test]
fn test_token_with_description() {
let temp_dir = TempDir::new().unwrap();