diff --git a/Cargo.toml b/Cargo.toml index f8ced64..372142c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [workspace] -members = ["crates/tirith-core", "crates/tirith", "tools/sign-license", "tools/license-server"] +members = ["crates/tirith-core", "crates/tirith", "tools/sign-license", "tools/license-server", "crates/slopsquatscan"] resolver = "2" [workspace.package] diff --git a/crates/slopsquatscan/Cargo.toml b/crates/slopsquatscan/Cargo.toml new file mode 100644 index 0000000..e17e7aa --- /dev/null +++ b/crates/slopsquatscan/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "slopsquatscan" +version.workspace = true +edition.workspace = true +license.workspace = true +rust-version.workspace = true +description = "Scan installed packages for slopsquatting — hallucinated, typosquatted, or suspicious dependencies" +repository = "https://github.com/sheeki03/tirith" + +[[bin]] +name = "slopsquatscan" +path = "src/main.rs" + +[dependencies] +clap = { workspace = true } +reqwest = { version = "0.12", default-features = false, features = ["blocking", "rustls-tls", "json"] } +serde = { workspace = true } +serde_json = { workspace = true } +chrono = { workspace = true } diff --git a/crates/slopsquatscan/src/main.rs b/crates/slopsquatscan/src/main.rs new file mode 100644 index 0000000..f1f6299 --- /dev/null +++ b/crates/slopsquatscan/src/main.rs @@ -0,0 +1,235 @@ +mod output; +mod registry; + +use clap::Parser; +use registry::{PackageResult, PackageStatus}; +use reqwest::blocking::Client; + +#[derive(Parser)] +#[command( + name = "slopsquatscan", + version, + about = "Scan installed packages for potential slopsquatting" +)] +struct Cli { + /// Scan pip packages only + #[arg(long)] + pip: bool, + + /// Scan npm global packages only + #[arg(long)] + npm: bool, + + /// Scan AUR packages only + #[arg(long)] + aur: bool, + + /// Scan everything (default if no flags) + #[arg(long)] + all: bool, + + /// Show clean packages too + #[arg(long)] + verbose: bool, + + /// Output as JSON + #[arg(long)] + json: bool, +} + +fn run_scan( + client: &Client, + label: &str, + scanner: fn(&Client) -> Vec, + json: bool, + verbose: bool, +) -> Vec { + if !json { + eprintln!("\n{}{}{}", output::BOLD, label, output::RST); + } + let results = scanner(client); + if !json { + print_results(&results, verbose); + } + results +} + +fn main() { + let cli = Cli::parse(); + + let scan_all = cli.all || (!cli.pip && !cli.npm && !cli.aur); + let scan_npm = scan_all || cli.npm; + let scan_pip = scan_all || cli.pip; + let scan_aur = scan_all || cli.aur; + + if !cli.json { + output::banner(); + eprintln!(); + output::thresholds( + registry::npm_threshold(), + registry::pypi_threshold(), + registry::days_threshold(), + ); + } + + let client = Client::new(); + let mut all_results: Vec = Vec::new(); + + if scan_npm { + all_results.extend(run_scan( + &client, + "npm (global)", + registry::scan_npm, + cli.json, + cli.verbose, + )); + } + if scan_pip { + all_results.extend(run_scan( + &client, + "pip", + registry::scan_pip, + cli.json, + cli.verbose, + )); + } + if scan_aur { + all_results.extend(run_scan( + &client, + "AUR (foreign packages)", + registry::scan_aur, + cli.json, + cli.verbose, + )); + } + + let clean = all_results + .iter() + .filter(|r| matches!(r.status, PackageStatus::Clean { .. })) + .count(); + let warnings = all_results + .iter() + .filter(|r| matches!(r.status, PackageStatus::Warning { .. })) + .count(); + let suspicious: Vec<_> = all_results + .iter() + .filter(|r| matches!(r.status, PackageStatus::Suspicious { .. })) + .collect(); + + if cli.json { + print_json(&all_results, clean, warnings, suspicious.len()); + } else { + print_summary(clean, warnings, &suspicious); + } + + if !suspicious.is_empty() { + std::process::exit(1); + } +} + +fn print_summary(clean: usize, warnings: usize, suspicious: &[&PackageResult]) { + eprintln!("\n{}Summary{}", output::BOLD, output::RST); + eprintln!(" {}Clean:{} {clean}", output::GRN, output::RST); + eprintln!(" {}Warnings:{} {warnings}", output::YLW, output::RST); + eprintln!( + " {}Suspicious:{} {}", + output::RED, + output::RST, + suspicious.len() + ); + + if !suspicious.is_empty() { + eprintln!(); + eprintln!( + "{}{}Action required:{} these packages were NOT FOUND on their registry:", + output::RED, + output::BOLD, + output::RST + ); + for s in suspicious { + eprintln!( + " {}→{} {}:{}", + output::RED, + output::RST, + s.registry, + s.name + ); + } + eprintln!(); + eprintln!("This could mean: typosquatted name, removed package, or private package."); + eprintln!("Investigate before continuing to use them."); + } else if warnings > 0 { + eprintln!(); + eprintln!( + "{}Some packages have low popularity or are very new — worth a quick check.{}", + output::YLW, + output::RST + ); + } else { + eprintln!("\n{}All clear.{}", output::GRN, output::RST); + } +} + +fn print_results(results: &[PackageResult], verbose: bool) { + if results.is_empty() { + eprintln!(" {}no packages found{}", output::DIM, output::RST); + return; + } + for r in results { + match &r.status { + PackageStatus::Suspicious { reason } => output::log_sus(&r.name, reason), + PackageStatus::Warning { reason } => output::log_warn(&r.name, reason), + PackageStatus::Clean { detail } => output::log_ok(&r.name, detail, verbose), + } + } +} + +fn print_json(results: &[PackageResult], clean: usize, warnings: usize, suspicious: usize) { + #[derive(serde::Serialize)] + struct JsonOutput { + summary: JsonSummary, + packages: Vec, + } + #[derive(serde::Serialize)] + struct JsonSummary { + clean: usize, + warnings: usize, + suspicious: usize, + } + #[derive(serde::Serialize)] + struct JsonPackage { + registry: String, + name: String, + status: String, + detail: String, + } + + let packages: Vec = results + .iter() + .map(|r| { + let (status, detail) = match &r.status { + PackageStatus::Clean { detail } => ("clean", detail.clone()), + PackageStatus::Warning { reason } => ("warning", reason.clone()), + PackageStatus::Suspicious { reason } => ("suspicious", reason.clone()), + }; + JsonPackage { + registry: r.registry.to_string(), + name: r.name.clone(), + status: status.to_string(), + detail, + } + }) + .collect(); + + let out = JsonOutput { + summary: JsonSummary { + clean, + warnings, + suspicious, + }, + packages, + }; + + let _ = serde_json::to_writer_pretty(std::io::stdout().lock(), &out); + println!(); +} diff --git a/crates/slopsquatscan/src/output.rs b/crates/slopsquatscan/src/output.rs new file mode 100644 index 0000000..165c580 --- /dev/null +++ b/crates/slopsquatscan/src/output.rs @@ -0,0 +1,38 @@ +pub const RED: &str = "\x1b[0;31m"; +pub const YLW: &str = "\x1b[0;33m"; +pub const GRN: &str = "\x1b[0;32m"; +pub const DIM: &str = "\x1b[0;90m"; +pub const RST: &str = "\x1b[0m"; +pub const BOLD: &str = "\x1b[1m"; + +pub fn log_sus(name: &str, reason: &str) { + eprintln!(" {RED}\u{2717}{RST} {name:<30} {RED}{reason}{RST}"); +} + +pub fn log_warn(name: &str, reason: &str) { + eprintln!(" {YLW}!{RST} {name:<30} {YLW}{reason}{RST}"); +} + +pub fn log_ok(name: &str, detail: &str, verbose: bool) { + if verbose { + eprintln!(" {GRN}\u{2713}{RST} {name:<30} {DIM}{detail}{RST}"); + } +} + +pub fn banner() { + eprintln!( + "{RED}\ + _____ _ _____ _ _____ +/ ___| | / ___| | | / ___| +\\ `--.| | ___ _ __ \\ `--. __ _ _ _ __ _| |_\\ `--. ___ __ _ _ __ + `--. \\ |/ _ \\| '_ \\ `--. \\/ _` | | | |/ _` | __|`--. \\/ __/ _` | '_ \\ +/\\__/ / | (_) | |_) /\\__/ / (_| | |_| | (_| | |_/\\__/ / (_| (_| | | | | +\\____/|_|\\___/| .__/\\____/ \\__, |\\__,_|\\__,_|\\__\\____/ \\___\\__,_|_| |_| + | | | | + |_| |_|{RST}" + ); +} + +pub fn thresholds(npm_weekly: u64, pypi_weekly: u64, days_new: i64) { + eprintln!("{DIM}thresholds: <{npm_weekly} dl/week (npm), <{pypi_weekly} dl/week (pypi), <{days_new}d old = warning{RST}"); +} diff --git a/crates/slopsquatscan/src/registry.rs b/crates/slopsquatscan/src/registry.rs new file mode 100644 index 0000000..1a5a798 --- /dev/null +++ b/crates/slopsquatscan/src/registry.rs @@ -0,0 +1,393 @@ +use chrono::Utc; +use reqwest::blocking::Client; +use serde_json::Value; + +const NPM_WEEKLY_THRESHOLD: u64 = 100; +const PYPI_WEEKLY_THRESHOLD: u64 = 100; +const DAYS_NEW_THRESHOLD: i64 = 30; +const AUR_VOTES_THRESHOLD: u64 = 5; + +pub const fn npm_threshold() -> u64 { + NPM_WEEKLY_THRESHOLD +} +pub const fn pypi_threshold() -> u64 { + PYPI_WEEKLY_THRESHOLD +} +pub const fn days_threshold() -> i64 { + DAYS_NEW_THRESHOLD +} + +#[derive(Debug, Clone)] +pub enum PackageStatus { + Clean { detail: String }, + Warning { reason: String }, + Suspicious { reason: String }, +} + +#[derive(Debug, Clone)] +pub struct PackageResult { + pub registry: &'static str, + pub name: String, + pub status: PackageStatus, +} + +fn days_since_iso(date_str: &str) -> Option { + // Try RFC 3339 first (npm uses this), then naive datetime (PyPI's upload_time has no tz) + if let Ok(dt) = chrono::DateTime::parse_from_rfc3339(date_str) { + return Some((Utc::now() - dt.with_timezone(&Utc)).num_days()); + } + let ndt = chrono::NaiveDateTime::parse_from_str(date_str, "%Y-%m-%dT%H:%M:%S").ok()?; + Some((Utc::now() - ndt.and_utc()).num_days()) +} + +fn days_since_epoch(epoch: i64) -> i64 { + (Utc::now().timestamp() - epoch) / 86400 +} + +fn check_npm_package(client: &Client, pkg: &str) -> PackageResult { + let resp = client + .get(format!("https://registry.npmjs.org/{pkg}")) + .timeout(std::time::Duration::from_secs(10)) + .send(); + + let resp = match resp { + Ok(r) => r, + Err(_) => { + return PackageResult { + registry: "npm", + name: pkg.to_string(), + status: PackageStatus::Suspicious { + reason: "registry unreachable".into(), + }, + }; + } + }; + + let body: Value = match resp.json() { + Ok(v) => v, + Err(_) => { + return PackageResult { + registry: "npm", + name: pkg.to_string(), + status: PackageStatus::Warning { + reason: "failed to parse registry response".into(), + }, + }; + } + }; + + if body.get("error").is_some() { + return PackageResult { + registry: "npm", + name: pkg.to_string(), + status: PackageStatus::Suspicious { + reason: "NOT FOUND on npm registry".into(), + }, + }; + } + + // Check age + if let Some(created) = body.pointer("/time/created").and_then(|v| v.as_str()) { + if let Some(age) = days_since_iso(created) { + if age < DAYS_NEW_THRESHOLD { + return PackageResult { + registry: "npm", + name: pkg.to_string(), + status: PackageStatus::Warning { + reason: format!("created {age}d ago"), + }, + }; + } + } + } + + // Check weekly downloads + let weekly = client + .get(format!( + "https://api.npmjs.org/downloads/point/last-week/{pkg}" + )) + .timeout(std::time::Duration::from_secs(10)) + .send() + .ok() + .and_then(|r| r.json::().ok()) + .and_then(|v| v.get("downloads")?.as_u64()) + .unwrap_or(0); + + if weekly < NPM_WEEKLY_THRESHOLD { + return PackageResult { + registry: "npm", + name: pkg.to_string(), + status: PackageStatus::Warning { + reason: format!("only {weekly} downloads/week"), + }, + }; + } + + PackageResult { + registry: "npm", + name: pkg.to_string(), + status: PackageStatus::Clean { + detail: format!("{weekly} dl/week"), + }, + } +} + +fn check_pip_package(client: &Client, pkg: &str) -> PackageResult { + let resp = client + .get(format!("https://pypi.org/pypi/{pkg}/json")) + .timeout(std::time::Duration::from_secs(10)) + .send(); + + let resp = match resp { + Ok(r) => r, + Err(_) => { + return PackageResult { + registry: "pip", + name: pkg.to_string(), + status: PackageStatus::Suspicious { + reason: "registry unreachable".into(), + }, + }; + } + }; + + if resp.status() == 404 { + return PackageResult { + registry: "pip", + name: pkg.to_string(), + status: PackageStatus::Suspicious { + reason: "NOT FOUND on PyPI".into(), + }, + }; + } + + let body: Value = match resp.json() { + Ok(v) => v, + Err(_) => { + return PackageResult { + registry: "pip", + name: pkg.to_string(), + status: PackageStatus::Warning { + reason: "failed to parse registry response".into(), + }, + }; + } + }; + + // Check age — find earliest upload_time across all releases + if let Some(releases) = body.get("releases").and_then(|r| r.as_object()) { + let earliest = releases + .values() + .filter_map(|files| files.as_array()) + .flatten() + .filter_map(|f| f.get("upload_time").and_then(|t| t.as_str())) + .min(); + if let Some(upload) = earliest { + if let Some(age) = days_since_iso(upload) { + if age < DAYS_NEW_THRESHOLD { + return PackageResult { + registry: "pip", + name: pkg.to_string(), + status: PackageStatus::Warning { + reason: format!("first upload {age}d ago"), + }, + }; + } + } + } + } + + // Check weekly downloads via pypistats + let weekly = client + .get(format!("https://pypistats.org/api/packages/{pkg}/recent")) + .timeout(std::time::Duration::from_secs(10)) + .send() + .ok() + .and_then(|r| r.json::().ok()) + .and_then(|v| v.pointer("/data/last_week")?.as_u64()) + .unwrap_or(0); + + if weekly < PYPI_WEEKLY_THRESHOLD { + return PackageResult { + registry: "pip", + name: pkg.to_string(), + status: PackageStatus::Warning { + reason: format!("only {weekly} downloads/week"), + }, + }; + } + + PackageResult { + registry: "pip", + name: pkg.to_string(), + status: PackageStatus::Clean { + detail: format!("{weekly} dl/week"), + }, + } +} + +fn check_aur_package(client: &Client, pkg: &str) -> PackageResult { + let resp = client + .get(format!("https://aur.archlinux.org/rpc/v5/info?arg[]={pkg}")) + .timeout(std::time::Duration::from_secs(10)) + .send(); + + let resp = match resp { + Ok(r) => r, + Err(_) => { + return PackageResult { + registry: "aur", + name: pkg.to_string(), + status: PackageStatus::Suspicious { + reason: "AUR unreachable".into(), + }, + }; + } + }; + + let body: Value = match resp.json() { + Ok(v) => v, + Err(_) => { + return PackageResult { + registry: "aur", + name: pkg.to_string(), + status: PackageStatus::Warning { + reason: "failed to parse registry response".into(), + }, + }; + } + }; + + let count = body + .get("resultcount") + .and_then(|v| v.as_u64()) + .unwrap_or(0); + if count == 0 { + return PackageResult { + registry: "aur", + name: pkg.to_string(), + status: PackageStatus::Suspicious { + reason: "NOT FOUND on AUR".into(), + }, + }; + } + + let votes = body + .pointer("/results/0/NumVotes") + .and_then(|v| v.as_u64()) + .unwrap_or(0); + let first_submitted = body + .pointer("/results/0/FirstSubmitted") + .and_then(|v| v.as_i64()) + .unwrap_or(0); + + if first_submitted > 0 { + let age = days_since_epoch(first_submitted); + if age < DAYS_NEW_THRESHOLD { + return PackageResult { + registry: "aur", + name: pkg.to_string(), + status: PackageStatus::Warning { + reason: format!("submitted {age}d ago, {votes} votes"), + }, + }; + } + } + + if votes < AUR_VOTES_THRESHOLD { + return PackageResult { + registry: "aur", + name: pkg.to_string(), + status: PackageStatus::Warning { + reason: format!("only {votes} AUR votes"), + }, + }; + } + + PackageResult { + registry: "aur", + name: pkg.to_string(), + status: PackageStatus::Clean { + detail: format!("{votes} votes"), + }, + } +} + +pub fn scan_npm(client: &Client) -> Vec { + let output = match std::process::Command::new("npm") + .args(["list", "-g", "--depth=0", "--json"]) + .output() + { + Ok(o) => o, + Err(_) => return Vec::new(), + }; + + let json: Value = match serde_json::from_slice(&output.stdout) { + Ok(v) => v, + Err(_) => return Vec::new(), + }; + + let deps = match json.get("dependencies").and_then(|d| d.as_object()) { + Some(d) => d, + None => return Vec::new(), + }; + + deps.keys() + .filter(|pkg| pkg.as_str() != "npm") + .map(|pkg| check_npm_package(client, pkg)) + .collect() +} + +pub fn scan_pip(client: &Client) -> Vec { + let pip_cmd = if which("pip3") { + "pip3" + } else if which("pip") { + "pip" + } else { + return Vec::new(); + }; + + let output = match std::process::Command::new(pip_cmd) + .args(["list", "--format=json"]) + .output() + { + Ok(o) => o, + Err(_) => return Vec::new(), + }; + + let pkgs: Vec = match serde_json::from_slice(&output.stdout) { + Ok(v) => v, + Err(_) => return Vec::new(), + }; + + pkgs.iter() + .filter_map(|entry| entry.get("name").and_then(|n| n.as_str())) + .map(|pkg| check_pip_package(client, pkg)) + .collect() +} + +pub fn scan_aur(client: &Client) -> Vec { + let output = match std::process::Command::new("pacman").args(["-Qm"]).output() { + Ok(o) => o, + Err(_) => return Vec::new(), + }; + + let stdout = String::from_utf8_lossy(&output.stdout); + let pkgs: Vec = stdout + .lines() + .filter_map(|line| line.split_whitespace().next()) + .map(|s| s.to_string()) + .collect(); + + pkgs.iter() + .map(|pkg| check_aur_package(client, pkg)) + .collect() +} + +fn which(cmd: &str) -> bool { + std::process::Command::new("which") + .arg(cmd) + .output() + .map(|o| o.status.success()) + .unwrap_or(false) +}