-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathExploratory Data Analysis.sql
More file actions
101 lines (76 loc) · 2.53 KB
/
Exploratory Data Analysis.sql
File metadata and controls
101 lines (76 loc) · 2.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
-- Exploratory Data Analysis --
SELECT *
FROM layoffs_staging2;
-- Selecting MAX value of total_laid_off and percentage_laid_off columns --
SELECT MAX(total_laid_off), MAX(percentage_laid_off)
FROM layoffs_staging2;
-- Selecting details of companies having percentage_laid_off = 1 (i.e. 100%), ordering by highest to lowest funds_raised_millions --
SELECT *
FROM layoffs_staging2
WHERE percentage_laid_off = 1
ORDER BY funds_raised_millions DESC;
-- Grouping SUM(total_laid_off) by comapny and order by SUM(total_laid_off) desc --
SELECT company, SUM(total_laid_off)
FROM layoffs_staging2
GROUP BY company
ORDER BY 2 DESC;
-- Identifying the date range --
SELECT MIN(`date`), MAX(`date`)
FROM layoffs_staging2;
-- Grouping SUM(total_laid_off) by industry and order by SUM(total_laid_off) desc --
SELECT industry, SUM(total_laid_off)
FROM layoffs_staging2
GROUP BY industry
ORDER BY 2 DESC;
-- Grouping SUM(total_laid_off) by country and order by SUM(total_laid_off) desc --
SELECT country, SUM(total_laid_off)
FROM layoffs_staging2
GROUP BY country
ORDER BY 2 DESC;
-- Grouping SUM(total_laid_off) by Year --
SELECT YEAR(`date`), SUM(total_laid_off)
FROM layoffs_staging2
GROUP BY YEAR(`date`)
ORDER BY 1 DESC;
-- Grouping SUM(tota_laid_off) by company stage --
SELECT stage, SUM(total_laid_off)
FROM layoffs_staging2
GROUP BY stage
ORDER BY 2 DESC;
-- Grouping SUM(total_laid_off) by date (year and month) and order by date asc --
SELECT SUBSTRING(`date`, 1, 7) AS 'MONTH', SUM(total_laid_off)
FROM layoffs_staging2
WHERE SUBSTRING(`date`, 1, 7) IS NOT NULL
GROUP BY 1
ORDER BY 1 ASC;
-- Creating a CTE, to find the rolling_total of total_laid_off by date --
WITH Rolling_Total AS
(
SELECT SUBSTRING(`date`, 1, 7) AS 'MONTH', SUM(total_laid_off) AS total_off
FROM layoffs_staging2
WHERE SUBSTRING(`date`, 1, 7) IS NOT NULL
GROUP BY 1
ORDER BY 1 ASC
)
SELECT `MONTH`, total_off, SUM(total_off)
OVER(ORDER BY `MONTH`) AS rolling_total
FROM Rolling_Total;
-- Using 2 CTE's to evaluate the ranks of companies by year having most no. of total_laid_off data --
SELECT company,YEAR(`date`), SUM(total_laid_off)
FROM layoffs_staging2
GROUP BY company, YEAR(`date`)
ORDER BY SUM(total_laid_off) DESC;
WITH Company_Year (company, years, total_laid_off) AS
(
SELECT company,YEAR(`date`), SUM(total_laid_off)
FROM layoffs_staging2
GROUP BY company, YEAR(`date`)
), Company_Year_Rank AS
(
SELECT *, DENSE_RANK() OVER(PARTITION BY years ORDER BY total_laid_off DESC) AS Ranking
FROM Company_Year
WHERE years IS NOT NULL
)
SELECT *
FROM Company_Year_Rank
WHERE Ranking <=5;