Covid-19 Data Exploration with SQL
Dataset: https://ourworldindata.org
Software: SQL Server Management Studio (SSMS)
SELECT *
FROM CovidProject..CovidDeaths
WHERE continent IS NOT NULL
--Select necessary data
SELECT Location, date, total_cases, new_cases, total_deaths, population
FROM CovidProject..CovidDeaths
WHERE continent IS NOT NULL
ORDER BY 1,2
--Look at total cases vs total deaths
SELECT Location, date, total_cases, total_deaths, (CAST(total_deaths AS numeric)/CAST(total_cases AS numeric))*100 AS death_percentage
FROM CovidProject..CovidDeaths
WHERE continent IS NOT NULL
ORDER BY 1,2
--Show likelihood of death if you contract Covid in Romania
SELECT Location, date, total_cases, total_deaths, (CAST(total_deaths AS numeric)/CAST(total_cases AS numeric))*100 AS death_percentage
FROM CovidProject..CovidDeaths
WHERE Location = 'Romania'
ORDER BY 1,2
--Show what percentage of population contracted Covid in Romania
SELECT Location, date, population, total_cases, (CAST(total_cases AS numeric)/population)*100 AS infection_percentage
FROM CovidProject..CovidDeaths
WHERE Location = 'Romania'
ORDER BY 1,2
--Look at countries with highest infection rate compared to population
SELECT Location, population, MAX(CAST(total_cases AS numeric)) AS high_infection_count, MAX((CAST(total_cases AS numeric)/population))*100 AS infection_percentage
FROM CovidProject..CovidDeaths
WHERE continent IS NOT NULL
GROUP BY Location, population
ORDER BY 4 DESC
--Show countries with highest death count per population
SELECT Location, MAX(CAST(total_deaths AS numeric)) AS total_death_count
FROM CovidProject..CovidDeaths
WHERE continent IS NOT NULL
GROUP BY Location
ORDER BY 2 DESC
--Show continents with highest death count
SELECT Location, MAX(CAST(total_deaths AS numeric)) AS total_death_count
FROM CovidProject..CovidDeaths
WHERE continent IS NULL AND Location NOT LIKE '%income%'
GROUP BY Location
ORDER BY 2 DESC
--Show global death percentage
SELECT SUM(new_cases) AS global_cases, SUM(new_deaths) AS global_deaths, SUM(new_deaths)/SUM(new_cases)*100
AS global_death_percentage
FROM CovidProject..CovidDeaths
WHERE continent IS NOT NULL
--Join CovidDeaths with CovidVaccinations
SELECT * FROM CovidProject..CovidDeaths AS dea
JOIN CovidProject..CovidVaccinations AS vac
ON dea.location = vac.location
AND dea.date = vac.date
--Look at total population vs total vaccinations
SELECT dea.continent, dea.location, dea.date, dea.population, vac.new_vaccinations
FROM CovidProject..CovidDeaths AS dea
JOIN CovidProject..CovidVaccinations AS vac
ON dea.location = vac.location
AND dea.date = vac.date
WHERE dea.continent IS NOT NULL
ORDER BY 2,3
--Show increase in vaccinations
SELECT dea.continent, dea.location, dea.date, dea.population, vac.new_vaccinations,
SUM(CAST(vac.new_vaccinations AS numeric)) OVER(PARTITION BY dea.location ORDER BY dea.location, dea.date) AS people_vaccinated
FROM CovidProject..CovidDeaths AS dea
JOIN CovidProject..CovidVaccinations AS vac
ON dea.location = vac.location
AND dea.date = vac.date
WHERE dea.continent IS NOT NULL
ORDER BY 2,3
--Show percentage of people vaccinated using a CTE
WITH PopulationVaccinated (continent, location, date, population, new_vaccinations, people_vaccinated)
AS
(
SELECT dea.continent, dea.location, dea.date, dea.population, vac.new_vaccinations,
SUM(CONVERT(numeric, vac.new_vaccinations)) OVER(PARTITION BY dea.location ORDER BY dea.location, dea.date) AS people_vaccinated
FROM CovidProject..CovidDeaths AS dea
JOIN CovidProject..CovidVaccinations AS vac
ON dea.location = vac.location
AND dea.date = vac.date
WHERE dea.continent IS NOT NULL
)
SELECT *,(CONVERT(numeric, people_vaccinated)/population)*100 AS vaccination_percentage
FROM PopulationVaccinated
--Show maximum number of vaccinated people from each country using CTE
WITH MaximumVaccination (continent, location, date, population, new_vaccinations, people_vaccinated)
AS
(
SELECT dea.continent, dea.location, dea.date, dea.population, vac.new_vaccinations,
SUM(CONVERT(numeric, vac.new_vaccinations)) OVER (PARTITION BY dea.location ORDER BY dea.location, dea.date) AS people_vaccinated
FROM CovidProject..CovidDeaths AS dea
JOIN CovidProject..CovidVaccinations AS vac
ON dea.location = vac.location
AND dea.date = vac.date
WHERE dea.continent IS NOT NULL
)
SELECT Location, MAX(CONVERT(numeric, people_vaccinated)) AS max_people_vaccinated
FROM MaximumVaccination
GROUP BY Location
--Create a temp table
DROP TABLE IF EXISTS #VaccinatedPopulationPercentage
CREATE TABLE #VaccinatedPopulationPercentage
(
continent nvarchar(255),
location nvarchar(255),
date datetime,
population numeric,
new_vaccinations numeric,
people_vaccinated numeric
)
INSERT INTO #VaccinatedPopulationPercentage
SELECT dea.continent, dea.location, dea.date, dea.population, vac.new_vaccinations,
SUM(CONVERT(numeric, vac.new_vaccinations)) OVER(PARTITION BY dea.location ORDER BY dea.location, dea.date) AS people_vaccinated
FROM CovidProject..CovidDeaths AS dea
JOIN CovidProject..CovidVaccinations AS vac
ON dea.location = vac.location
AND dea.date = vac.date
WHERE dea.continent IS NOT NULL
SELECT *,(CONVERT(numeric, people_vaccinated)/population)*100 AS vaccination_percentage
FROM #VaccinatedPopulationPercentage
--Create a view to store data for future visualizations
CREATE VIEW VaccinatedPopulationPercentage AS
SELECT dea.continent, dea.location, dea.date, dea.population, vac.new_vaccinations,
SUM(CONVERT(numeric, vac.new_vaccinations)) OVER(PARTITION BY dea.location ORDER BY dea.location, dea.date) AS people_vaccinated
FROM CovidProject..CovidDeaths AS dea
JOIN CovidProject..CovidVaccinations AS vac
ON dea.location = vac.location
AND dea.date = vac.date
WHERE dea.continent IS NOT NULL
SELECT * FROM VaccinatedPopulationPercentage