diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 5253163..724560c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -210,8 +210,8 @@ Aggregate of [https_crawl](#https_crawl) that creates latest crawl sessions base |requests|Number of comparison requests actually made during the crawl session|integer|| |session_request_limit|The number of comparisons wanted for the session|integer|| |is_redirect|Whether the domain was actually crawled or is a redirect from another host in the table that was crawled|boolean|| -|max_https_crawl_id|https_crawl.id of last comparison made during crawl session|bigint|| |redirect_hosts|key/value pairs of hosts and the number of redirects to it|jsonb|| +|updated|When last updated|timestamp with time zone|| #### https_upgrade_metrics diff --git a/https_crawl.pl b/https_crawl.pl index f134662..4d43638 100755 --- a/https_crawl.pl +++ b/https_crawl.pl @@ -333,7 +333,6 @@ sub crawl_sites{ mixed_requests max_ss_diff redirects - max_id requests is_redirect redirect_hosts' @@ -412,18 +411,18 @@ sub prep_db { domain, https, http_and_https, - https_errs, http, + https_errs, + http, unknown, autoupgrade, mixed_requests, max_screenshot_diff, redirects, - max_https_crawl_id, requests, is_redirect, redirect_hosts, session_request_limit) - values (?,?,?,?,?,?,?,?,?,?,?,?,?,?,$CC{URLS_PER_SITE}) + values (?,?,?,?,?,?,?,?,?,?,?,?,?,$CC{URLS_PER_SITE}) on conflict (domain) do update set ( https, http_and_https, @@ -434,11 +433,11 @@ sub prep_db { mixed_requests, max_screenshot_diff, redirects, - max_https_crawl_id, requests, is_redirect, redirect_hosts, - session_request_limit + session_request_limit, + updated ) = ( EXCLUDED.https, EXCLUDED.http_and_https, @@ -449,11 +448,12 @@ sub prep_db { EXCLUDED.mixed_requests, EXCLUDED.max_screenshot_diff, EXCLUDED.redirects, - EXCLUDED.max_https_crawl_id, EXCLUDED.requests, EXCLUDED.is_redirect, EXCLUDED.redirect_hosts, - EXCLUDED.session_request_limit) + EXCLUDED.session_request_limit, + now() + ) where EXCLUDED.is_redirect = false or https_crawl_aggregate.is_redirect = true diff --git a/sql/https_crawl_aggregate.sql b/sql/https_crawl_aggregate.sql index 354edf3..5157a42 100644 --- a/sql/https_crawl_aggregate.sql +++ b/sql/https_crawl_aggregate.sql @@ -37,8 +37,8 @@ CREATE TABLE https_crawl_aggregate ( requests integer NOT NULL, session_request_limit integer NOT NULL, is_redirect boolean DEFAULT false NOT NULL, - max_https_crawl_id bigint NOT NULL, - redirect_hosts jsonb + redirect_hosts jsonb, + updated timestamp with time zone DEFAULT now() NOT NULL );