Skip to content

Commit 7608eab

Browse files
authored
Merge pull request #7206 from LawnGnome/typomania
Wire up typosquatting checks when new packages are published
2 parents c46f914 + 1705535 commit 7608eab

File tree

17 files changed

+760
-11
lines changed

17 files changed

+760
-11
lines changed

Cargo.lock

Lines changed: 19 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ tower = "=0.4.13"
9797
tower-http = { version = "=0.4.4", features = ["add-extension", "fs", "catch-panic", "timeout", "compression-full"] }
9898
tracing = "=0.1.40"
9999
tracing-subscriber = { version = "=0.3.18", features = ["env-filter"] }
100+
typomania = { version = "0.1.2", default-features = false }
100101
url = "=2.4.1"
101102

102103
[dev-dependencies]

src/admin/enqueue_job.rs

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use crate::db;
2-
use crate::schema::background_jobs;
2+
use crate::schema::{background_jobs, crates};
33
use crate::worker::jobs;
44
use crate::worker::swirl::BackgroundJob;
55
use anyhow::Result;
@@ -26,6 +26,10 @@ pub enum Command {
2626
#[arg(long = "dry-run")]
2727
dry_run: bool,
2828
},
29+
CheckTyposquat {
30+
#[arg()]
31+
name: String,
32+
},
2933
}
3034

3135
pub fn run(command: Command) -> Result<()> {
@@ -60,6 +64,21 @@ pub fn run(command: Command) -> Result<()> {
6064
Command::NormalizeIndex { dry_run } => {
6165
jobs::NormalizeIndex::new(dry_run).enqueue(conn)?;
6266
}
67+
Command::CheckTyposquat { name } => {
68+
// The job will fail if the crate doesn't actually exist, so let's check that up front.
69+
if crates::table
70+
.filter(crates::name.eq(&name))
71+
.count()
72+
.get_result::<i64>(conn)?
73+
== 0
74+
{
75+
anyhow::bail!(
76+
"cannot enqueue a typosquat check for a crate that doesn't exist: {name}"
77+
);
78+
}
79+
80+
jobs::CheckTyposquat::new(&name).enqueue(conn)?;
81+
}
6382
};
6483

6584
Ok(())

src/bin/background-worker.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,12 @@
1616
extern crate tracing;
1717

1818
use crates_io::cloudfront::CloudFront;
19-
use crates_io::config;
2019
use crates_io::db::DieselPool;
2120
use crates_io::fastly::Fastly;
2221
use crates_io::storage::Storage;
2322
use crates_io::worker::swirl::Runner;
2423
use crates_io::worker::{Environment, RunnerExt};
24+
use crates_io::{config, Emails};
2525
use crates_io::{db, ssh};
2626
use crates_io_env_vars::{var, var_parsed};
2727
use crates_io_index::RepositoryConfig;
@@ -73,6 +73,7 @@ fn main() -> anyhow::Result<()> {
7373
.build()
7474
.expect("Couldn't build client");
7575

76+
let emails = Emails::from_environment(&config);
7677
let fastly = Fastly::from_environment(client);
7778

7879
let connection_pool = r2d2::Pool::builder()
@@ -88,6 +89,7 @@ fn main() -> anyhow::Result<()> {
8889
fastly,
8990
storage,
9091
connection_pool.clone(),
92+
emails,
9193
);
9294

9395
let environment = Arc::new(environment);

src/controllers/krate/publish.rs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
//! Functionality related to publishing a new crate or version of a crate.
22
33
use crate::auth::AuthCheck;
4-
use crate::worker::jobs;
4+
use crate::worker::jobs::{self, CheckTyposquat};
55
use crate::worker::swirl::BackgroundJob;
66
use axum::body::Bytes;
77
use cargo_manifest::{Dependency, DepsSet, TargetDepsSet};
@@ -85,7 +85,7 @@ pub async fn publish(app: AppState, req: BytesRequest) -> AppResult<Json<GoodCra
8585
// this query should only be used for the endpoint scope calculation
8686
// since a race condition there would only cause `publish-new` instead of
8787
// `publish-update` to be used.
88-
let existing_crate = Crate::by_name(&metadata.name)
88+
let existing_crate: Option<Crate> = Crate::by_name(&metadata.name)
8989
.first::<Crate>(conn)
9090
.optional()?;
9191

@@ -222,7 +222,7 @@ pub async fn publish(app: AppState, req: BytesRequest) -> AppResult<Json<GoodCra
222222
return Err(cargo_err("expected at most 5 categories per crate"));
223223
}
224224

225-
let max_features = existing_crate
225+
let max_features = existing_crate.as_ref()
226226
.and_then(|c| c.max_features.map(|mf| mf as usize))
227227
.unwrap_or(app.config.max_features);
228228

@@ -393,6 +393,11 @@ pub async fn publish(app: AppState, req: BytesRequest) -> AppResult<Json<GoodCra
393393

394394
jobs::enqueue_sync_to_index(&krate.name, conn)?;
395395

396+
// Experiment: check new crates for potential typosquatting.
397+
if existing_crate.is_none() {
398+
CheckTyposquat::new(&krate.name).enqueue(conn)?;
399+
}
400+
396401
// The `other` field on `PublishWarnings` was introduced to handle a temporary warning
397402
// that is no longer needed. As such, crates.io currently does not return any `other`
398403
// warnings at this time, but if we need to, the field is available.

src/email.rs

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use std::path::PathBuf;
2-
use std::sync::Mutex;
2+
use std::sync::{Arc, Mutex};
33

44
use crate::util::errors::{server_error, AppResult};
55

@@ -12,7 +12,7 @@ use lettre::transport::smtp::SmtpTransport;
1212
use lettre::{Message, Transport};
1313
use rand::distributions::{Alphanumeric, DistString};
1414

15-
#[derive(Debug)]
15+
#[derive(Debug, Clone)]
1616
pub struct Emails {
1717
backend: EmailBackend,
1818
}
@@ -48,7 +48,7 @@ impl Emails {
4848
pub fn new_in_memory() -> Self {
4949
Self {
5050
backend: EmailBackend::Memory {
51-
mails: Mutex::new(Vec::new()),
51+
mails: Arc::new(Mutex::new(Vec::new())),
5252
},
5353
}
5454
}
@@ -91,6 +91,35 @@ or go to https://{domain}/me/pending-invites to manage all of your crate ownersh
9191
self.send(email, subject, &body)
9292
}
9393

94+
/// Attempts to send a notification that a new crate may be typosquatting another crate.
95+
pub fn send_possible_typosquat_notification(
96+
&self,
97+
email: &str,
98+
crate_name: &str,
99+
squats: &[typomania::checks::Squat],
100+
) -> AppResult<()> {
101+
let domain = crate::config::domain_name();
102+
let subject = "Possible typosquatting in new crate";
103+
let body = format!(
104+
"New crate {crate_name} may be typosquatting one or more other crates.\n
105+
Visit https://{domain}/crates/{crate_name} to see the offending crate.\n
106+
\n
107+
Specific squat checks that triggered:\n
108+
\n
109+
{squats}",
110+
squats = squats
111+
.iter()
112+
.map(|squat| format!(
113+
"- {squat} (https://{domain}/crates/{crate_name})\n",
114+
crate_name = squat.package()
115+
))
116+
.collect::<Vec<_>>()
117+
.join(""),
118+
);
119+
120+
self.send(email, subject, &body)
121+
}
122+
94123
/// Attempts to send an API token exposure notification email
95124
pub fn send_token_exposed_notification(
96125
&self,
@@ -204,6 +233,7 @@ Source type: {source}\n",
204233
}
205234
}
206235

236+
#[derive(Clone)]
207237
enum EmailBackend {
208238
/// Backend used in production to send mails using SMTP.
209239
Smtp {
@@ -214,7 +244,7 @@ enum EmailBackend {
214244
/// Backend used locally during development, will store the emails in the provided directory.
215245
FileSystem { path: PathBuf },
216246
/// Backend used during tests, will keep messages in memory to allow tests to retrieve them.
217-
Memory { mails: Mutex<Vec<StoredEmail>> },
247+
Memory { mails: Arc<Mutex<Vec<StoredEmail>>> },
218248
}
219249

220250
// Custom Debug implementation to avoid showing the SMTP password.

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ pub mod sql;
6060
pub mod ssh;
6161
pub mod storage;
6262
mod test_util;
63+
pub mod typosquat;
6364
pub mod util;
6465
pub mod views;
6566
pub mod worker;

src/tests/util/test_app.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,7 @@ impl TestAppBuilder {
269269
None,
270270
app.storage.clone(),
271271
app.primary_database.clone(),
272+
app.emails.clone(),
272273
);
273274

274275
let runner = Runner::new(app.primary_database.clone(), Arc::new(environment))

src/typosquat/cache.rs

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
use std::sync::Arc;
2+
3+
use diesel::PgConnection;
4+
use thiserror::Error;
5+
use typomania::{
6+
checks::{Bitflips, Omitted, SwappedWords, Typos},
7+
Harness,
8+
};
9+
10+
use super::{config, database::TopCrates};
11+
12+
static NOTIFICATION_EMAILS_ENV: &str = "TYPOSQUAT_NOTIFICATION_EMAILS";
13+
14+
/// A cache containing everything we need to run typosquatting checks.
15+
///
16+
/// Specifically, this includes a corpus of popular crates attached to a typomania harness, and a
17+
/// list of e-mail addresses that we'll send notifications to if potential typosquatting is
18+
/// discovered.
19+
pub struct Cache {
20+
emails: Vec<String>,
21+
harness: Option<Harness<TopCrates>>,
22+
}
23+
24+
impl Cache {
25+
/// Instantiates a new [`Cache`] from the environment.
26+
///
27+
/// This reads the `NOTIFICATION_EMAILS_ENV` environment variable to get the list of e-mail
28+
/// addresses to send notifications to, then invokes [`Cache::new`] to read popular crates from
29+
/// the database.
30+
#[instrument(skip_all, err)]
31+
pub fn from_env(conn: &mut PgConnection) -> Result<Self, Error> {
32+
let emails: Vec<String> = crates_io_env_vars::var(NOTIFICATION_EMAILS_ENV)
33+
.map_err(|e| Error::Environment {
34+
name: NOTIFICATION_EMAILS_ENV.into(),
35+
source: Arc::new(e),
36+
})?
37+
.unwrap_or_default()
38+
.split(',')
39+
.map(|s| s.trim().to_owned())
40+
.filter(|s| !s.is_empty())
41+
.collect();
42+
43+
if emails.is_empty() {
44+
// If we're not notifying anyone, then there's really not much to do here.
45+
warn!("$TYPOSQUAT_NOTIFICATION_EMAILS is not set; no typosquatting notifications will be sent");
46+
Ok(Self {
47+
emails,
48+
harness: None,
49+
})
50+
} else {
51+
// Otherwise, let's go get the top crates and build a corpus.
52+
Self::new(emails, conn)
53+
}
54+
}
55+
56+
/// Instantiates a cache by querying popular crates and building them into a typomania harness.
57+
///
58+
/// This relies on configuration in the `super::config` module.
59+
pub fn new(emails: Vec<String>, conn: &mut PgConnection) -> Result<Self, Error> {
60+
let top = TopCrates::new(conn, config::TOP_CRATES)?;
61+
62+
Ok(Self {
63+
emails,
64+
harness: Some(
65+
Harness::builder()
66+
.with_check(Bitflips::new(
67+
config::CRATE_NAME_ALPHABET,
68+
top.crates.keys().map(String::as_str),
69+
))
70+
.with_check(Omitted::new(config::CRATE_NAME_ALPHABET))
71+
.with_check(SwappedWords::new("-_"))
72+
.with_check(Typos::new(config::TYPOS.iter().map(|(c, typos)| {
73+
(*c, typos.iter().map(|ss| ss.to_string()).collect())
74+
})))
75+
.build(top),
76+
),
77+
})
78+
}
79+
80+
pub fn get_harness(&self) -> Option<&Harness<TopCrates>> {
81+
self.harness.as_ref()
82+
}
83+
84+
pub fn iter_emails(&self) -> impl Iterator<Item = &str> {
85+
self.emails.iter().map(String::as_str)
86+
}
87+
}
88+
89+
// Because the error returned from Cache::new() gets memoised in the environment, we either need to
90+
// return it by reference from Environment::typosquat_cache() or we need to be able to clone it.
91+
// We'll do some Arc wrapping in the variants below to ensure that everything is clonable while not
92+
// destroying the source metadata.
93+
#[derive(Error, Debug, Clone)]
94+
pub enum Error {
95+
#[error("error reading environment variable {name}: {source:?}")]
96+
Environment {
97+
name: String,
98+
#[source]
99+
source: Arc<anyhow::Error>,
100+
},
101+
102+
#[error("error getting top crates: {0:?}")]
103+
TopCrates(#[source] Arc<diesel::result::Error>),
104+
}
105+
106+
impl From<diesel::result::Error> for Error {
107+
fn from(value: diesel::result::Error) -> Self {
108+
Self::TopCrates(Arc::new(value))
109+
}
110+
}

0 commit comments

Comments
 (0)