top of page

Covid-19 Data Exploration with SQL

Dataset: https://ourworldindata.org

Software: SQL Server Management Studio (SSMS)

SELECT
FROM CovidProject..CovidDeaths
WHERE continent IS NOT NULL

--Select necessary data

SELECT Location, date, total_cases, new_cases, total_deaths, population 
FROM CovidProject..CovidDeaths 
WHERE continent IS NOT NULL
ORDER BY 1,2

--Look at total cases vs total deaths

SELECT Location, date, total_cases, total_deaths, (CAST(total_deaths AS numeric)/CAST(total_cases AS numeric))*100 AS death_percentage
FROM CovidProject..CovidDeaths 
WHERE continent IS NOT NULL
ORDER BY 1,2

--Show likelihood of death if you contract Covid in Romania

SELECT Location, date, total_cases, total_deaths, (CAST(total_deaths AS numeric)/CAST(total_cases AS numeric))*100 AS death_percentage
FROM CovidProject..CovidDeaths 
WHERE Location = 'Romania'
ORDER BY 1,2

--Show what percentage of population contracted Covid in Romania

SELECT Location, date, population, total_cases, (CAST(total_cases AS numeric)/population)*100 AS infection_percentage
FROM CovidProject..CovidDeaths 
WHERE Location = 'Romania'
ORDER BY 1,2

--Look at countries with highest infection rate compared to population

SELECT Location, population, MAX(CAST(total_cases AS numeric)) AS high_infection_count, MAX((CAST(total_cases AS numeric)/population))*100 AS infection_percentage
FROM CovidProject..CovidDeaths 
WHERE continent IS NOT NULL
GROUP BY Location, population
ORDER BY 4 DESC

--Show countries with highest death count per population

SELECT Location, MAX(CAST(total_deaths AS numeric)) AS total_death_count
FROM CovidProject..CovidDeaths 
WHERE continent IS NOT NULL
GROUP BY Location
ORDER BY 2 DESC

--Show continents with highest death count

SELECT Location, MAX(CAST(total_deaths AS numeric)) AS total_death_count
FROM CovidProject..CovidDeaths 
WHERE continent IS NULL AND Location NOT LIKE '%income%'
GROUP BY Location
ORDER BY 2 DESC

--Show global death percentage 

SELECT SUM(new_cases) AS global_cases, SUM(new_deaths) AS global_deaths, SUM(new_deaths)/SUM(new_cases)*100 

AS global_death_percentage
FROM CovidProject..CovidDeaths
WHERE continent IS NOT NULL

--Join CovidDeaths with CovidVaccinations

SELECT * FROM CovidProject..CovidDeaths AS dea
JOIN CovidProject..CovidVaccinations AS vac 

          ON dea.location = vac.location
         
AND dea.date = vac.date

--Look at total population vs total vaccinations

SELECT dea.continent, dea.location, dea.date, dea.population, vac.new_vaccinations
FROM CovidProject..CovidDeaths AS dea
JOIN CovidProject..CovidVaccinations AS vac
         
ON dea.location = vac.location
         
AND dea.date = vac.date
WHERE dea.continent IS NOT NULL
ORDER BY 2,3

--Show increase in vaccinations 

SELECT dea.continent, dea.location, dea.date, dea.population, vac.new_vaccinations,
SUM(CAST(vac.new_vaccinations AS numeric)) OVER(PARTITION BY dea.location ORDER BY dea.location, dea.date) AS people_vaccinated
FROM CovidProject..CovidDeaths AS dea
JOIN CovidProject..CovidVaccinations AS vac
         
ON dea.location = vac.location
         
AND dea.date = vac.date
WHERE dea.continent IS NOT NULL
ORDER BY 2,3

--Show percentage of people vaccinated using a CTE

WITH PopulationVaccinated (continent, location, date, population, new_vaccinations, people_vaccinated)
AS
(
SELECT dea.continent, dea.location, dea.date, dea.population, vac.new_vaccinations,
SUM(CONVERT(numeric, vac.new_vaccinations)) OVER(PARTITION BY dea.location ORDER BY dea.location, dea.date) AS people_vaccinated
FROM CovidProject..CovidDeaths AS dea

JOIN CovidProject..CovidVaccinations AS vac
         
ON dea.location = vac.location
         
AND dea.date = vac.date
WHERE dea.continent IS NOT NULL
)

SELECT *,(CONVERT(numeric, people_vaccinated)/population)*100 AS vaccination_percentage 
FROM PopulationVaccinated

--Show maximum number of vaccinated people from each country using CTE

WITH MaximumVaccination (continent, location, date, population, new_vaccinations, people_vaccinated)
AS
(
SELECT dea.continent, dea.location, dea.date, dea.population, vac.new_vaccinations,
SUM(CONVERT(numeric, vac.new_vaccinations)) OVER (PARTITION BY dea.location ORDER BY dea.location, dea.date) AS people_vaccinated
FROM CovidProject..CovidDeaths AS dea
JOIN CovidProject..CovidVaccinations AS vac
         
ON dea.location = vac.location
         
AND dea.date = vac.date
WHERE dea.continent IS NOT NULL
)

SELECT Location, MAX(CONVERT(numeric, people_vaccinated)) AS max_people_vaccinated
FROM MaximumVaccination
GROUP BY Location

--Create a temp table

DROP TABLE IF EXISTS #VaccinatedPopulationPercentage

CREATE TABLE #VaccinatedPopulationPercentage
(
continent
nvarchar(255), 
location
nvarchar(255), 
date
datetime
population
numeric,
new_vaccinations
numeric,
people_vaccinated
numeric
)


INSERT INTO #VaccinatedPopulationPercentage
SELECT dea.continent, dea.location, dea.date, dea.population, vac.new_vaccinations, 
SUM(CONVERT(numeric, vac.new_vaccinations)) OVER(PARTITION BY dea.location ORDER BY dea.location, dea.date) AS people_vaccinated
FROM CovidProject..CovidDeaths AS dea
JOIN CovidProject..CovidVaccinations AS vac
         
ON dea.location = vac.location
         
AND dea.date = vac.date
WHERE dea.continent IS NOT NULL

SELECT *,(CONVERT(numeric, people_vaccinated)/population)*100 AS vaccination_percentage 
FROM #VaccinatedPopulationPercentage

--Create a view to store data for future visualizations

CREATE VIEW VaccinatedPopulationPercentage AS
SELECT dea.continent, dea.location, dea.date, dea.population, vac.new_vaccinations, 
SUM(CONVERT(numeric, vac.new_vaccinations)) OVER(PARTITION BY dea.location ORDER BY dea.location, dea.date) AS people_vaccinated
FROM CovidProject..CovidDeaths AS dea
JOIN CovidProject..CovidVaccinations AS vac
         
ON dea.location = vac.location
         
AND dea.date = vac.date
WHERE dea.continent IS NOT NULL

SELECT * FROM VaccinatedPopulationPercentage

© 2023 by Cristina Miron.

bottom of page