Skip to content

Commit 5590f33

Browse files
authored
Create new tool to manage updating the list in chromium src (#278)
The new scripts/updatelist tool is an intended replacement for scripts/roll_preload_list.py. This updatelist tool also incorporates the functionality of the `hstspreload scan-pending` command that has to be run as a prerequisite to roll_preload_list.py. It additionally performs revalidation checks on the domains from the /api/v2/pending-automated-removal list.
1 parent 2cc8d54 commit 5590f33

File tree

1 file changed

+320
-0
lines changed

1 file changed

+320
-0
lines changed

scripts/updatelist/main.go

Lines changed: 320 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,320 @@
1+
package main
2+
3+
import (
4+
"encoding/json"
5+
"flag"
6+
"fmt"
7+
"io"
8+
"log"
9+
"net/http"
10+
"os"
11+
"regexp"
12+
"slices"
13+
"strings"
14+
"sync"
15+
"time"
16+
17+
"github.com/chromium/hstspreload"
18+
"github.com/chromium/hstspreload/chromium/preloadlist"
19+
"golang.org/x/sync/errgroup"
20+
)
21+
22+
type PendingChanges struct {
23+
pendingAdditions []string
24+
pendingRemovals []string
25+
pendingAutomatedRemovals []string
26+
removals map[string]bool
27+
}
28+
29+
func fetchPendingChanges() (*PendingChanges, error) {
30+
changes := new(PendingChanges)
31+
g := new(errgroup.Group)
32+
g.Go(func() error {
33+
log.Println("Fetching pending additions...")
34+
resp, err := http.Get("https://hstspreload.org/api/v2/pending")
35+
if err != nil {
36+
return err
37+
}
38+
defer resp.Body.Close()
39+
pendingReader := json.NewDecoder(resp.Body)
40+
pendingEntries := []preloadlist.Entry{}
41+
if err := pendingReader.Decode(&pendingEntries); err != nil {
42+
return err
43+
}
44+
for _, entry := range pendingEntries {
45+
changes.pendingAdditions = append(changes.pendingAdditions, entry.Name)
46+
}
47+
slices.Sort(changes.pendingAdditions)
48+
return nil
49+
})
50+
g.Go(func() error {
51+
log.Println("Fetching pending removals...")
52+
resp, err := http.Get("https://hstspreload.org/api/v2/pending-removal")
53+
if err != nil {
54+
return err
55+
}
56+
defer resp.Body.Close()
57+
pendingReader := json.NewDecoder(resp.Body)
58+
if err := pendingReader.Decode(&changes.pendingRemovals); err != nil {
59+
return err
60+
}
61+
return nil
62+
})
63+
g.Go(func() error {
64+
log.Println("Fetching pending automated removals...")
65+
resp, err := http.Get("https://hstspreload.org/api/v2/pending-automated-removal")
66+
if err != nil {
67+
return err
68+
}
69+
defer resp.Body.Close()
70+
pendingReader := json.NewDecoder(resp.Body)
71+
if err := pendingReader.Decode(&changes.pendingAutomatedRemovals); err != nil {
72+
return err
73+
}
74+
return nil
75+
})
76+
77+
if err := g.Wait(); err != nil {
78+
return nil, err
79+
}
80+
log.Println("... all fetches complete")
81+
changes.updateRemovals()
82+
return changes, nil
83+
}
84+
85+
func (pc *PendingChanges) updateRemovals() {
86+
pc.removals = make(map[string]bool)
87+
for _, r := range pc.pendingRemovals {
88+
pc.removals[r] = true
89+
}
90+
for _, r := range pc.pendingAutomatedRemovals {
91+
pc.removals[r] = true
92+
}
93+
}
94+
95+
// Filter modifies the list of pending changes to include only domains that
96+
// still meet the criteria for that domain's proposed state.
97+
func (pc *PendingChanges) Filter() {
98+
log.Print("Verifying pending additions...")
99+
pc.pendingAdditions = filterParallel(pc.pendingAdditions, func(domain string) bool {
100+
// A pending addition to the list is still valid to add to the list if scanning the domain indicates no errors.
101+
_, issues := hstspreload.EligibleDomain(domain, preloadlist.Bulk1Year)
102+
return len(issues.Errors) == 0
103+
})
104+
log.Print("Verifying pending automated removals...")
105+
pc.pendingAutomatedRemovals = filterParallel(pc.pendingAutomatedRemovals, func(domain string) bool {
106+
// TODO: the call to EligibleDomain should be made with the policy that
107+
// the domain was originally preloaded with. The pending-automated-removal
108+
// endpoint does not expose that policy information. To prevent from
109+
// incorrectly removing old entries added with the 18-week policy, this
110+
// check always uses the 18-week policy.
111+
_, issues := hstspreload.EligibleDomain(domain, preloadlist.Bulk18Weeks)
112+
113+
// A pending automated removal is eligible for removal if it continues
114+
// to not meet the preload requirements, i.e. it has errors.
115+
return len(issues.Errors) > 0
116+
})
117+
log.Print("... done verifying domains")
118+
pc.updateRemovals()
119+
}
120+
121+
type tickLogger struct {
122+
ticker *time.Ticker
123+
logLine string
124+
done chan bool
125+
}
126+
127+
func (t *tickLogger) Logf(format string, v ...any) {
128+
t.logLine = fmt.Sprintf(format, v...)
129+
}
130+
131+
func (t *tickLogger) Stop() {
132+
t.ticker.Stop()
133+
t.done <- true
134+
}
135+
136+
func newTickLogger(d time.Duration) *tickLogger {
137+
t := new(tickLogger)
138+
t.ticker = time.NewTicker(d)
139+
t.done = make(chan bool)
140+
go func() {
141+
for {
142+
select {
143+
case <-t.done:
144+
return
145+
case <-t.ticker.C:
146+
if t.logLine == "" {
147+
return
148+
}
149+
log.Print(t.logLine)
150+
}
151+
}
152+
}()
153+
return t
154+
}
155+
156+
func filterParallel(domains []string, predicate func(domain string) bool) []string {
157+
mu := sync.Mutex{}
158+
filtered := make([]string, 0)
159+
160+
parallelism := 500
161+
sem := make(chan any, parallelism) // Use a buffered channel to limit the amount of parallelism
162+
wg := sync.WaitGroup{}
163+
l := newTickLogger(5 * time.Second)
164+
defer l.Stop()
165+
for i, domain := range domains {
166+
l.Logf("started processing %d domains", i)
167+
sem <- nil // Acquire a slot
168+
wg.Add(1)
169+
go func(domain string) {
170+
defer func() {
171+
wg.Done()
172+
<-sem // Release the slot
173+
}()
174+
if predicate(domain) {
175+
mu.Lock()
176+
filtered = append(filtered, domain)
177+
mu.Unlock()
178+
}
179+
}(domain)
180+
}
181+
wg.Wait()
182+
return filtered
183+
}
184+
185+
// PendingAdditions returns a sorted list of domain names that are pending
186+
// addition to the HSTS preload list.
187+
func (pc *PendingChanges) PendingAdditions() []string {
188+
return pc.pendingAdditions
189+
}
190+
191+
func (pc *PendingChanges) Removes(domain string) bool {
192+
return pc.removals[domain]
193+
}
194+
195+
type dupeTracker struct {
196+
seenDomains map[string]int
197+
}
198+
199+
func (d *dupeTracker) Observe(domain string) {
200+
if d.seenDomains == nil {
201+
d.seenDomains = make(map[string]int)
202+
}
203+
d.seenDomains[domain]++
204+
}
205+
206+
func (d *dupeTracker) Dupes() []string {
207+
domains := []string{}
208+
for domain, count := range d.seenDomains {
209+
if count < 2 {
210+
continue
211+
}
212+
domains = append(domains, domain)
213+
}
214+
slices.Sort(domains)
215+
return domains
216+
}
217+
218+
func updateList(listContents []byte, changes *PendingChanges) (string, []string, error) {
219+
listString := strings.TrimSuffix(string(listContents), "\n")
220+
log.Print("Removing and adding entries...")
221+
commentRe := regexp.MustCompile("^ *//.*")
222+
listEntryRe := regexp.MustCompile(`^ \{.*\},`)
223+
output := strings.Builder{}
224+
dupes := dupeTracker{}
225+
for _, line := range strings.Split(listString, "\n") {
226+
if commentRe.MatchString(line) {
227+
if line != " // END OF 1-YEAR BULK HSTS ENTRIES" {
228+
output.WriteString(line)
229+
output.WriteByte('\n')
230+
continue
231+
}
232+
for _, domain := range changes.PendingAdditions() {
233+
dupes.Observe(domain)
234+
fmt.Fprintf(&output, ` { "name": "%s", "policy": "bulk-1-year", "mode": "force-https", "include_subdomains": true },`, domain)
235+
fmt.Fprintln(&output)
236+
}
237+
output.WriteString(line)
238+
output.WriteByte('\n')
239+
continue
240+
}
241+
if !listEntryRe.MatchString(line) {
242+
output.WriteString(line)
243+
output.WriteByte('\n')
244+
continue
245+
}
246+
entry := preloadlist.Entry{}
247+
if err := json.Unmarshal([]byte(strings.TrimSuffix(line, ",")), &entry); err != nil {
248+
return "", nil, err
249+
}
250+
if !changes.Removes(entry.Name) {
251+
dupes.Observe(entry.Name)
252+
output.WriteString(line)
253+
output.WriteByte('\n')
254+
}
255+
}
256+
return output.String(), dupes.Dupes(), nil
257+
}
258+
259+
func overwriteFile(f *os.File, contents string) error {
260+
if _, err := f.Seek(0, io.SeekStart); err != nil {
261+
return err
262+
}
263+
if err := f.Truncate(0); err != nil {
264+
return err
265+
}
266+
if _, err := f.WriteString(contents); err != nil {
267+
return err
268+
}
269+
return nil
270+
}
271+
272+
func main() {
273+
listPath := flag.String("list_path", "", "Path to the file containing the HSTS preload list")
274+
flag.Parse()
275+
if *listPath == "" {
276+
log.Fatal("list_path not specified")
277+
}
278+
279+
// Open the JSON file containing the HSTS preload list.
280+
log.Print("Fetching preload list from Chromium source...")
281+
listFile, err := os.OpenFile(*listPath, os.O_RDWR, 0)
282+
if err != nil {
283+
log.Fatalf("Failed to open HSTS preload list in %q: %v", *listPath, err)
284+
}
285+
defer listFile.Close()
286+
287+
listContents, err := io.ReadAll(listFile)
288+
if err != nil {
289+
log.Fatalf("Failed to read HSTS preload list: %v", err)
290+
}
291+
292+
// fetch pending changes from hstspreload.org
293+
changes, err := fetchPendingChanges()
294+
if err != nil {
295+
log.Fatalf("Error fetching pending changes from hstspreload.org: %v", err)
296+
}
297+
298+
// filter changes to only ones that are still valid
299+
changes.Filter()
300+
301+
// apply the changes to the JSON file in the chromium source
302+
updatedList, dupes, err := updateList(listContents, changes)
303+
if err != nil {
304+
log.Fatalf("Failed to update list: %v", err)
305+
}
306+
if err := overwriteFile(listFile, updatedList); err != nil {
307+
log.Fatalf("Error writing HSTS preload list file: %v", err)
308+
}
309+
310+
if len(dupes) > 0 {
311+
fmt.Println("WARNING\nDuplicate entries:")
312+
for _, dupe := range dupes {
313+
fmt.Printf("- %s\n", dupe)
314+
}
315+
fmt.Println("You'll need to manually deduplicate entries before commiting them to Chromium")
316+
fmt.Println("Note: if there are a lot of duplicate entries, you may have accidentally run this program twice. Reset your checkout and try again.")
317+
} else {
318+
fmt.Println("SUCCESS")
319+
}
320+
}

0 commit comments

Comments
 (0)