sensepost
diff --git a/‎ATTRIBUTION.txt‎
Lines changed: 2 additions & 0 deletions b/‎ATTRIBUTION.txt‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎Pipfile‎
Lines changed: 1 addition & 0 deletions b/‎Pipfile‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎Pipfile.lock‎
Lines changed: 620 additions & 605 deletions b/‎Pipfile.lock‎
Lines changed: 620 additions & 605 deletions
diff --git a/‎async_poll_headers.py‎
Lines changed: 184 additions & 35 deletions b/‎async_poll_headers.py‎
Lines changed: 184 additions & 35 deletions
diff --git a/‎async_poll_whois_data.py‎
Lines changed: 6 additions & 2 deletions b/‎async_poll_whois_data.py‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎config.yml‎
Lines changed: 26 additions & 28 deletions b/‎config.yml‎
Lines changed: 26 additions & 28 deletions
diff --git a/‎dashboard/app.py‎
Lines changed: 2 additions & 2 deletions b/‎dashboard/app.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎dashboard/data/countries.continents.csv‎
Lines changed: 1 addition & 1 deletion b/‎dashboard/data/countries.continents.csv‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎dashboard/pages/csp_directives.py‎
Lines changed: 15 additions & 4 deletions b/‎dashboard/pages/csp_directives.py‎
Lines changed: 15 additions & 4 deletions
diff --git a/‎dashboard/pages/header_names.py‎
Lines changed: 10 additions & 5 deletions b/‎dashboard/pages/header_names.py‎
Lines changed: 10 additions & 5 deletions
@@ -0,0 +1,2 @@
+The following third-party repositories have been used for this project:
+* Plotly: https://github.com/plotly - For the library and geographical datasets - MIT License - It can be read here: https://github.com/plotly/datasets/blob/master/LICENSE
@@ -18,6 +18,7 @@ dnspython = "*"
 pyyaml = "*"
 geoip2 = "*"
 brotli = "*"
+mongoengine = "*"
 
 [dev-packages]
 
 
@@ -7,7 +7,6 @@
 from datetime import datetime
 from optparse import OptionParser
 import pandas as pd
-from tldextract import tldextract
 from dashboard.utils.utils import get_config,get_headers_collection
 import asyncio
 from time import time
@@ -68,7 +67,12 @@ def update_whois(db_document,whois_result,options,collection,mapping_df):
     if ("country" in whois_result["whois"].keys() and whois_result["whois"]['country'] is not None):
         try:
             logging.debug("Assigning the country %s to this record." % whois_result["whois"]["country"])
-            collection.update_one({'_id': db_document["_id"]}, { '$set': {'whois': {'IPv4': options.ip, 'whois_data': whois_result['whois']} , 'country': whois_result["whois"]['country'], 'continent': mapping_df[mapping_df["TLD"]==whois_result['whois']['country'].lower()].Continent.values[0]} })
+            collection.update_one({'_id': db_document["_id"]}, 
+                                  { '$set': {'whois': {'IPv4': options.ip, 'whois_data': whois_result['whois']} , 
+                                             'country': whois_result["whois"]['country'], 
+                                             'continent': mapping_df[mapping_df["TLD"]==whois_result['whois']['country'].lower()].Continent.values[0]
+                                            } 
+                                  })
             updated_whois+=1
         except Exception as ex:
             logging.error("Error updating this record in the DB with country and continent information: %s" % ex)
 
@@ -1,22 +1,14 @@
 db_environments:
-  majestic:
+  majestic_snapshots:
     user: root
     password: Headers123!
     host: 127.0.0.1
     port: 27017
     database: headers
-    headers_coll: headers_get
+    headers_coll: header_scans
     orphans_coll: orphan
-  umbrella:
-    user: root
-    password: Headers123!
-    host: 127.0.0.1
-    port: 27017
-    database: headers
-    headers_coll: headers_umbrella
-    orphans_coll: orphans
 general:
-  scrapeops: <your-scrapeops-api-key>
+  scrapeops: 855523ae-8c95-4594-91a7-8b01de2a028f
   abusable_domains:
     - '*.amazonaws.com': 
       - exfil
@@ -28,18 +20,24 @@ general:
       - exec
     - 'cdn.jsdelivr.net':
       - exec
+    - 'www.facebook.com':
+      - exfil
     - '*.facebook.com':
       - exfil
     - '*.hotjar.com':
       - exfil
-    - 'ask.hotjar.com':
+    - 'ask.hotjar.io':
       - exfil
     - '*.herokuapp.com':
       - exfil
       - exec
     - '*.firebaseapp.com':
       - exfil
       - exec
+    # It is possible to exfiltrate to googletagmanager.com by using custom events as well
+    # https://www.analyticsmania.com/post/google-tag-manager-custom-event-trigger/
+    # Integrate the custom event submision to Hotjar following this:
+    # https://help.hotjar.com/hc/en-us/articles/4412561401111-How-to-Send-Events-with-Google-Tag-Manager
     - '*.google-analytics.com':
       - exfil
     - '*.azurestaticapps.net':
@@ -51,19 +49,19 @@ general:
   vulns_explanation:
     UNDEFINED: Undefined vulnerability
     NOCSP: No CSP policy was defined
-    UNSAFEINLINE: The value 'unsafe-inline' was found in the directive {}.
-    UNSAFEEVAL: The value 'unsafe-eval' was found in the directive {}.
-    LENIENTSCHEME: The policy contained a lenient scheme handler, such as https:// or http:// within the directive {}. This could allow attackers to include any external resource as a source of the affected directive, as long as it follows the protocol indicated in the handle.
-    CSPRO: Only the header 'Content-Security-Policy-Report-Only' was found. No policy is Content Security Policy is enforced in this case.
-    THIRDPARTYABUSE: 'Third-party domains that could be abused were found in the directive {}: {}'
-    DEFAULTSRC: The directive 'default-src' was not found. This is a critical fallback directive for cases where specific directives are not defined, such as script-src, object-src, or font-src
-    FRAMEANCESTORS: The directive 'frame-ancestors' was not found. This would allow an attacker to embed this page into another one with <frame> and similar elements for clickjacking attacks.
-    REPORTTO: The directive 'report-to' was not found. It is recommended to report all CSP error to centralised infrastructure for early detection of XSS attempts.
-    BASEURI: The directive 'base-uri' was not found. This would allow an attacker to inject a malicious <base> element to produce all relative paths to be pointed at the malicious base URI.
-    UPGRIR: The directive 'upgrade-insecure-request' was not found. This directive indicates the browser to upgrade all resources included in the site from http:// to https://.
-    NDSCRIPTSRC: The directives 'script-src' and 'default-src' were not found.
-    NDCONNECTSRC: The directives 'connect-src' and 'default-src' were not found.
-    NDFRAMESRC: The directives 'frame-src' and 'default-src' were not found.
-    NDCHILDSRC: The directives 'child-src' and 'default-src' were not found.
-    NDOBJECTSRC: The directives 'object-src' and 'default-src' were not found.
-    ORPHANDOMAIN: The domain '{}', present in the directive {}, is not registered.
+    UNSAFEINLINE: The value 'unsafe-inline' found in  '{}'.
+    UNSAFEEVAL: The value 'unsafe-eval' found in '{}'.
+    LENIENTSCHEME: The policy contained a lenient handler in '{}'.
+    CSPRO: Header 'Content-Security-Policy-Report-Only' was found, but 'Content-Security-Policy' was not.
+    THIRDPARTYABUSE: Detected in '{}' - {}
+    NODEFAULTSRC: The directive 'default-src' was not found.
+    NOFRAMEANCESTORS: The directive 'frame-ancestors' was not found. 
+    NOREPORTTO: Neither 'report-to' nor 'report-uri' were found. 
+    NOBASEURI: The directive 'base-uri' was not found. 
+    NOUPGRIR: The directive 'upgrade-insecure-request' was not found.
+    NOSCRIPTSRC: The directives 'script-src' and 'default-src' were not found.
+    NOCONNECTSRC: The directives 'connect-src' and 'default-src' were not found.
+    NOFRAMESRC: The directives 'frame-src' and 'default-src' were not found.
+    NOCHILDSRC: The directives 'child-src' and 'default-src' were not found.
+    NOOBJECTSRC: The directives 'object-src' and 'default-src' were not found.
+    ORPHANDOMAIN: Domain '{}', in '{}' of the '{}' header, is not found (NXDOMAIN and no WHOIS)
@@ -88,7 +88,7 @@ def update_target_collection(collection_name):
         children=[
         dcc.Dropdown(
             options=get_environments(),
-            value="majestic",
+            value="majestic_snapshots",
             id="collection-dropdown"
         ),
         html.H2("Limit of data to load from database"),
@@ -115,4 +115,4 @@ def update_target_collection(collection_name):
 ])
 
 if __name__ == '__main__':
-    app.run(debug=True,host="0.0.0.0")
+    app.run(debug=False,host="0.0.0.0")
@@ -194,7 +194,7 @@ No,Country or Area,ISO-3,M49 Code,Region 1,Region 2,Continent
 193,Samoa,WSM,882,Polynesia,,Oceania
 194,San Marino,SMR,674,Southern Europe,,Europe
 195,Sao Tome and Principe,STP,678,Middle Africa,Sub-Saharan Africa,Africa
-196,Sark,,680,Northern Europe,Channel Islands,Europe
+196,Sark,CRQ,680,Northern Europe,Channel Islands,Europe
 197,Saudi Arabia,SAU,682,Western Asia,,Asia
 198,Senegal,SEN,686,Western Africa,Sub-Saharan Africa,Africa
 199,Serbia,SRB,688,Southern Europe,,Europe
 
@@ -17,7 +17,7 @@
 find_limit=10000
 data=pd.DataFrame()
 
-config=get_config()
+config=get_config(environment="majestic_snapshots")
 collection = get_headers_collection(config)
 total_documents = collection.count_documents({})
 n_top_directive_values = 15
@@ -61,10 +61,21 @@ def reload_all_graphs(stored_data,selected_header,selected_directive,n_limit_dir
     find_limit=stored_data["find_limit"]
     print("Values - showing %s documents in graphs" % find_limit)
     # data = pd.DataFrame(list(collection.aggregate([{'$limit': find_limit},{'$match': {'headers.{}'.format(selected_header) : {'$exists': 1}}},{'$project': {"url":1, "headers":1 }}])))
-    data_df = pd.DataFrame(list(collection.find({},{"url":1,"headers":1,"csp":1}).limit(find_limit)))
+    data_df = pd.DataFrame(list(collection.aggregate([
+        { '$sort': { "scans.globalRank": 1 } },
+        { '$limit': find_limit }, 
+        { '$addFields': { 'last_scan': { '$first': { '$sortArray': { 'input': "$scans", 'sortBy': { 'date': -1 } } } } } }, 
+        { '$addFields': { 'headers_kv': { '$objectToArray': "$last_scan.headers" }, "csp_kv": {'$objectToArray': "$last_scan.csp"} } },
+        { '$match': { 
+            "headers_kv.k": { '$regex': '^{}$'.format(selected_header), '$options': "i" }, 
+            "csp_kv.k": {'$regex': "^{}$".format(selected_directive), '$options': "i" } 
+            } 
+        }, 
+        {'$project': {"url": 1, "headers": "$last_scan.headers", "csp": "$last_scan.csp" }}])))
+
     # Beautify the "headers" Series
     # data_df["headers"] = data_df["headers"].map(lambda x: array_to_dict(x,tolower=True))
-    data_df["headers_lower"]=data_df["headers"].map(lambda x: dict((k.lower(), v.lower()) for k,v in x.items()) if x is not None else None)
+    data_df["headers_lower"]=data_df["headers"].map(lambda x: dict((k.lower(), v.lower()) for k,v in x.items()) if x is not None else {})
     # Prepare CSP data to visualise sites and directives, etc
     # csp_cspro=data_df["headers_lower"].map(lambda x: parse_csp(x,lower=True))
     # csp_columns_df=pd.DataFrame(csp_cspro.tolist(),columns=["csp","cspro"])
@@ -75,7 +86,7 @@ def reload_all_graphs(stored_data,selected_header,selected_directive,n_limit_dir
     csp_data=data_df[data_df["csp"].notnull()]
     # csp_data=csp_data[csp_data["csp"].map(lambda x: "" not in x.keys())]
     # Freeing up some memory
-    csp_columns_df=None
+    # csp_columns_df=None
 
     # List unique header names
     header_names,counts_headers = np.unique(np.hstack(data_df["headers_lower"].map(lambda x: list(x.keys())).values),return_counts=True)
 
@@ -17,7 +17,7 @@
 find_limit=10000
 data_df=pd.DataFrame()
 
-config=get_config()
+config=get_config(environment="majestic_snapshots")
 collection = get_headers_collection(config)
 total_documents = collection.count_documents({})
 n_top_header_names = 20 
@@ -35,18 +35,23 @@
 )
 def reload_all_graphs(stored_data,n_limit_header_names,n_limit_directive_names,collection_data):
     print("collection_data: "+collection_data) 
-    config=get_config(collection_data)
+    config=get_config(environment=collection_data)
     collection = get_headers_collection(config)
     find_limit=stored_data["find_limit"]
 
     print("Names - showing %s documents in graphs" % find_limit)
-    data_df = pd.DataFrame(list(collection.find({},{"url":1,"headers":1,"csp":1}).limit(find_limit)))
+    data_df = pd.DataFrame(list(collection.aggregate([
+        { '$sort': { "scans.globalRank": 1 } },
+        {'$limit': find_limit},
+        {'$addFields': { 'last_scan': { '$first': { '$sortArray': { 'input': "$scans", 'sortBy': { 'date': -1 } } } } } }, 
+        {'$project': {"url":1,"headers":"$last_scan.headers","csp": "$last_scan.csp"}}
+        ])))
     print("Data pulled form DB. Shaping it with Pandas")
     # Beautify the "headers" Series
     # Make all the headers lowercase 
     print("Beautifying headers")
     # data_df["headers"] = data_df["headers"].map(lambda x: array_to_dict(x,tolower=True))
-    data_df["headers_lower"]=data_df["headers"].map(lambda x: dict((k.lower(), v.lower()) for k,v in x.items()) if x is not None else None)
+    data_df["headers_lower"]=data_df["headers"].map(lambda x: dict((k.lower(), v.lower()) for k,v in x.items()) if x is not None else {})
     # Prepare CSP data to visualise sites and directives, etc. Indicate that all headers have been changed to lowercase
     # print("Parsing CSP headers")
     # csp_cspro=data_df["headers_lower"].map(lambda x: parse_csp(x,lower=True))
@@ -58,7 +63,7 @@ def reload_all_graphs(stored_data,n_limit_header_names,n_limit_directive_names,c
     csp_data=data_df[data_df["csp"].notnull()]
     # csp_data=csp_data[csp_data["csp"].map(lambda x: "" not in x.keys())]
     # Freeing up some memory
-    csp_columns_df=None
+    # csp_columns_df=None
 
     # In case I needed to look at the csp ro stats, here's how I would get them
     # print("Filtering null CSP-RO headers and empty directives")
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+The following third-party repositories have been used for this project:`
	`2`	`+* Plotly: https://github.com/plotly - For the library and geographical datasets - MIT License - It can be read here: https://github.com/plotly/datasets/blob/master/LICENSE`