Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions statvar_imports/norway_census/Norway_metadata.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
parameter,value
#mapped_rows,3
mapped_columns,5
header_rows,1
#skip_rows,2
#input_rows,1000
place_type,Country
place_within,country/NOR
#word_delimiter,""""""
output_columns,"observationDate,observationAbout,value,variableMeasured"
90 changes: 90 additions & 0 deletions statvar_imports/norway_census/Norway_pvmap.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
key,p1,v1,p2,v2,p3,v3,p4,v4,p5,v5
Value,populationType,Person,measuredProperty,count,value,{Number}

Males,gender,Male
Females,gender,Female
0-9 years,age,[0 9 Years]
10-19 years,age,[10 19 Years]
20-29 years,age,[20 29 Years]
30-39 years,age,[30 39 Years]
40-49 years,age,[40 49 Years]
50-59 years,age,[50 59 Years]
60-69 years,age,[60 69 Years]
70-79 years,age,[70 79 Years]
80-89 years,age,[80 89 Years]
90-99 years,age,[90 99 Years]
100 years or older,age,[100 - Years]

The whole country,observationAbout,country/NOR
Oslo - Oslove,observationAbout,nuts/NO011
Rogaland,observationAbout,nuts/NO012
Møre og Romsdal,observationAbout,nuts/NO031
Nordland - Nordlánnda,observationAbout,nuts/NO032
Østfold,observationAbout,nuts/NO033
Akershus,observationAbout,nuts/NO034
Buskerud,observationAbout,nuts/NO020
Innlandet,observationAbout,nuts/NO042
Vestfold,observationAbout,nuts/NO043
Telemark,observationAbout,wikidataId/Q56407177
Agder,observationAbout,nuts/NO053
Vestland,observationAbout,nuts/NO060
Trøndelag - Trööndelage,observationAbout,nuts/NO071
Troms - Romsa - Tromssa,observationAbout,nuts/NO072
Finnmark - Finnmárku - Finmarkku,observationAbout,nuts/NO073


1986,observationDate,1986
1987,observationDate,1987
1988,observationDate,1988
1989,observationDate,1989
1990,observationDate,1990
1991,observationDate,1991
1992,observationDate,1992
1993,observationDate,1993
1994,observationDate,1994
1995,observationDate,1995
1996,observationDate,1996
1997,observationDate,1997
1998,observationDate,1998
1999,observationDate,1999
2000,observationDate,2000
2001,observationDate,2001
2002,observationDate,2002
2003,observationDate,2003
2004,observationDate,2004
2005,observationDate,2005
2006,observationDate,2006
2007,observationDate,2007
2008,observationDate,2008
2009,observationDate,2009
2010,observationDate,2010
2011,observationDate,2011
2012,observationDate,2012
2013,observationDate,2013
2014,observationDate,2014
2015,observationDate,2015
2016,observationDate,2016
2017,observationDate,2017
2018,observationDate,2018
2019,observationDate,2019
2020,observationDate,2020
2021,observationDate,2021
2022,observationDate,2022
2023,observationDate,2023
2024,observationDate,2024
2025,observationDate,2025
2026,observationDate,2026
2027,observationDate,2027
2028,observationDate,2028
2029,observationDate,2029
2030,observationDate,2030
2031,observationDate,2031
2032,observationDate,2032
2033,observationDate,2033
2034,observationDate,2034
2035,observationDate,2035
2036,observationDate,2036
2037,observationDate,2037
2038,observationDate,2038
2039,observationDate,2039
2040,observationDate,2040
45 changes: 45 additions & 0 deletions statvar_imports/norway_census/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Norway Demographics Dataset
## Overview
This dataset provides foundational demographic and socio-economic statistics for Norway, sourced directly from official national datasets.

## Data Source

**Source URL:**
https://www.ssb.no/en/statbank/table/07459/

The data comes from Statistics Norway (SSB) and includes comprehensive demographic variables such as population counts, age distributions, and other census-related metrics.

## How To Download Input Data
To download the data, you'll need to use the provided download script data_download.py. This script fetches live data from the SSB API to generate Norway_input.csv which is our input data.

type of place: AdministrativeArea1.

statvars: Demographics

years: 1986 to 2025.

## Processing Instructions
To process the Norway Census data and generate statistical variables, use the following command from the "data" directory:

**Download Input File**
```bash
python3 statvar_imports/norway_census/data_download.py
```
**For Test Data Run**
```bash
python3 tools/statvar_importer/stat_var_processor.py \
--input_data=statvar_imports/norway_census/test/Norway_input.csv \
--pv_map=statvar_imports/norway_census/Norway_pvmap.csv \
--output_path=statvar_imports/norway_census/test/Norway_output \
--config_file=statvar_imports/norway_census/Norway_metadata.csv \
--existing_statvar_mcf=gs://unresolved_mcf/scripts/statvar/stat_vars.mcf
```
**For Main data run**
```bash
python3 tools/statvar_importer/stat_var_processor.py \
--input_data=statvar_imports/norway_census/Norway_input.csv \
--pv_map=statvar_imports/norway_census/Norway_pvmap.csv \
--output_path=statvar_imports/norway_census/Norway_output \
--config_file=statvar_imports/norway_census/Norway_metadata.csv \
--existing_statvar_mcf=gs://unresolved_mcf/scripts/statvar/stat_vars.mcf
```
87 changes: 87 additions & 0 deletions statvar_imports/norway_census/data_download.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import requests
import pandas as pd
from pyjstat import pyjstat
import os
import logging
import sys

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')

# --- DYNAMIC CONFIGURATION ---
BASE_PATH = os.path.dirname(os.path.abspath(__file__))
POST_URL = "https://data.ssb.no/api/pxwebapi/v2/tables/07459/data?lang=en&outputFormat=json-stat2"
OUTPUT_FILE = os.path.join(BASE_PATH, "Norway_input.csv")

EXCLUDE_REGIONS = ["Svalbard", "Jan Mayen", "Continental shelf", "Unknown region", "Uoppgitt bostedskommune"]

def get_query(region_code, codelist=None):
"""Generates the body for the SSB API request."""
selection = [
{"variableCode": "ContentsCode", "valueCodes": ["*"]},
{"variableCode": "Tid", "valueCodes": ["*"]},
{"variableCode": "Alder", "valueCodes": ["*"], "codelist": "agg_TiAarigGruppering"},
{"variableCode": "Kjonn", "valueCodes": ["*"]},
{"variableCode": "Region", "valueCodes": [region_code]}
]
if codelist:
selection[-1]["codelist"] = codelist

return {"selection": selection, "response": {"format": "json-stat2"}}

def fetch_norway_data():
logging.info("Starting combined data fetch (National + Regional) for all years...")

try:
# 1. Fetch National Data (Code 0)
logging.info("Fetching National data...")
res_nat = requests.post(POST_URL, json=get_query("0"))
res_nat.raise_for_status()
df_nat = pyjstat.Dataset.read(res_nat.text).write('dataframe')

# 2. Fetch Regional Data (Codelist agg_KommFylker)
logging.info("Fetching Regional data...")
res_reg = requests.post(POST_URL, json=get_query("*", "agg_KommFylker"))
res_reg.raise_for_status()
df_reg = pyjstat.Dataset.read(res_reg.text).write('dataframe')

# 3. Combine DataFrames
df = pd.concat([df_nat, df_reg], ignore_index=True)

# 4. Robust Column Mapping (Case-Insensitive)
# This converts all column names to lowercase first to ensure the map finds them
df.columns = [c.lower() for c in df.columns]
rename_map = {
'region': 'Region',
'contents': 'Contents',
'tid': 'Year',
'alder': 'Age',
'kjonn': 'Sex',
'value': 'Value'
}
df.rename(columns=rename_map, inplace=True)

# 5. Filter Regions
if 'Region' in df.columns:
df = df[~df['Region'].isin(EXCLUDE_REGIONS)]
df = df[~df['Region'].str.contains('shelf|Unknown|Svalbard', case=False, na=False)]

# 6. Year Cleaning (No date filter applied)
if 'Year' in df.columns:
df['Year'] = df['Year'].astype(str).str.extract('(\d{4})')[0]

# 7. Save result
df.to_csv(OUTPUT_FILE, index=False, encoding='utf-8')

logging.info(f"SUCCESS: Combined data ({len(df)} rows) saved to {OUTPUT_FILE}")

# Final safety check before logging year range
if 'Year' in df.columns and not df['Year'].isnull().all():
logging.info(f"Year range in file: {df['Year'].min()} to {df['Year'].max()}")

except Exception as e:
logging.error(f"An error occurred: {e}")
sys.exit(1)

if __name__ == "__main__":
fetch_norway_data()
27 changes: 27 additions & 0 deletions statvar_imports/norway_census/manifest.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{
"import_specifications": [
{
"import_name": "norway_census",
"curator_emails": [
"[email protected]"
],
"provenance_url": "https://www.ssb.no/en/statbank/table/07459/",
"provenance_description": "Population data for demographic variables such as population counts, age distributions, and other census-related metrics in Norway",
"scripts": [
"data_download.py",
"../../tools/statvar_importer/stat_var_processor.py --input_data=Norway_input.csv --pv_map=Norway_pvmap.csv --config_file=Norway_metadata.csv --output_path=Norway_output --existing_statvar_mcf=gs://unresolved_mcf/scripts/statvar/stat_vars.mcf"
],
"source_files": [
"Norway_input.csv"
],
"import_inputs": [
{
"template_mcf": "Norway_output.tmcf",
"cleaned_csv": "Norway_output.csv",
"stat_var_mcf": "Norway_output_stat_vars.mcf"
}
],
"cron_schedule": "0 0 1 1,4,7,10 *"
}
]
}
Loading
Loading