Health Metrics Dataset
This dataset was generated using synthetic data created with the Python faker library. It simulates health metrics for 1,000 individuals, including information on blood pressure, cholesterol levels, BMI, smoking status, and diabetes status. The data was generated randomly, with certain constraints to mimic real-world distributions. (source file location www.kaggle.com)
An age band column was also created (not in the original dataset) and classified patients from Young Adult to Old.
The purpose of this analysis is to do a cohort analysis of the patients and come up with insights. The analysis will look at patient’s height, BMI index and whether being a smoker affects one’s health. Also, patients' diabetics status will be analyzed.
Using SQL, I was able to come up with the following findings.
Select *
FROM enhanced_health_data limit 10;
/** to take a look at the Table **/
Select COUNT (*)
from enhanced_health_data;
Total patient count 1000
Select COUNT (*)
from enhanced_health_data
WHERE gender = 'Female'
ORDER BY gender;
494
Select COUNT (*)
from enhanced_health_data
WHERE gender = 'Male'
ORDER BY gender;
506
Select
count (case when gender='Male' then 1 end) as male_cnt,
count (case when gender='Female' then 1 end) as female_cnt,
count (*) as total_cnt
from enhanced_health_data;
Male 506
Female 494
Total Count 1000
Select Avg (age), min (age),max(age)
from enhanced_health_data;
avg age 48.4 yrs
min age 18 yrs
max age 80 yrs
Select Round (avg(age))
FROM enhanced_health_data as Avg_male_age
where gender ='Male';
48
Select Round (avg(age))
FROM enhanced_health_data as Avg_female_age
where gender ='Female';
50
Select name, age, systolic_bp
from enhanced_health_data
where age>
(select Round (avg(age)) FROM enhanced_health_data);
/** Gives a list of patient names, ages and Systolic_bp for those
patients above Average age **/
Select COUNT (systolic_bp)
from enhanced_health_data
where age>
(select Round (avg(age)) FROM enhanced_health_data);
481
Select COUNT (systolic_bp)
from enhanced_health_data
where age<
(select Round (avg(age))
FROM enhanced_health_data);
497
Select COUNT (systolic_bp)
from enhanced_health_data
where age<=
(select Round (avg(age))
FROM enhanced_health_data);
519
Select COUNT (cholesterol)
from enhanced_health_data
where cholesterol>
(select Round (avg(cholesterol))
FROM enhanced_health_data);
470
Select COUNT (cholesterol)
from enhanced_health_data
where cholesterol<
(select Round (avg(cholesterol))
FROM enhanced_health_data);
513
Select COUNT (smoker)
from enhanced_health_data
where cholesterol>
(select Round (avg(cholesterol))
FROM enhanced_health_data);
470
Select
count (case when gender='Male' then 1 end) as male_cnt,
count (case when gender='Female' then 1 end) as female_cnt,
count (*) as total_cnt
from enhanced_health_data
ORDER by smoker;
Female 494
Male 506
Total Count 1000
Select COUNT (gender)
from enhanced_health_data
where smoker is 'FALSE' and gender is 'Female';
273
Select COUNT (gender)
from enhanced_health_data
where smoker is 'False' and gender is 'Male';
265
Select COUNT (gender)
from enhanced_health_data
where smoker is 'FALSE' and diabetes is 'TRUE';
251
Select COUNT (gender)
from enhanced_health_data
where smoker is 'FALSE' and diabetes is 'FALSE';
287
Select COUNT (gender)
from enhanced_health_data
where smoker is 'TRUE' and diabetes is 'TRUE';
229
Select COUNT (*) AS High_Bp
FROM enhanced_health_data
WHERE systolic_bp >
(SELECT AVG (systolic_bp)
FROM enhanced_health_data);
514
Select COUNT (*) AS Low_Bp
FROM enhanced_health_data
WHERE systolic_bp <
(SELECT AVG (systolic_bp)
FROM enhanced_health_data);
486
Select COUNT(*)AS Non_smoker
FROM enhanced_health_data
WHERE smoker is 'FALSE'
and systolic_bp <
(SELECT AVG(systolic_bp)
FROM enhanced_health_data);
264
Select COUNT (*) AS Smoke
FROM enhanced_health_data
WHERE smoker is 'TRUE'
and systolic_bp >
(SELECT AVG(systolic_bp)
FROM enhanced_health_data);
240
Select COUNT(*) AS NonSmoker
FROM enhanced_health_data
WHERE smoker = 'False'
AND systolic_bp >
(SELECT AVG(systolic_bp)
FROM enhanced_health_data);
274
Select gender,COUNT (*) as Health_Status
from enhanced_health_data
where health is 'Good' or smoker is 'True' and diabetes is 'True'
GROUP by gender;
Female 65
Male 45
Select gender,COUNT (*) as Health_Status
from enhanced_health_data
where health is 'Fair' or smoker is 'True' and diabetes is 'True'
GROUP by gender;
Female 327
Male 335
Select gender, COUNT(*) AS Health_Status
FROM enhanced_health_data
WHERE health = 'Poor'
OR (smoker = 'True' AND diabetes = 'True')
GROUP BY gender;
Select gender, COUNT(*) AS Health_Status
FROM enhanced_health_data
WHERE (health = 'Good' OR (smoker = 'True' AND diabetes = 'True'))
GROUP BY gender;
Female 65
Male 45
Select gender,round (avg (bmi)) from
enhanced_health_data
GROUP by gender
Female 25
Male 25
Select gender,round(avg (height))
as Average_height
from enhanced_health_data
GROUP by gender;
Female 174
Male 176
Select gender, count (gender)
from enhanced_health_data
WHERE age > 40 and smoker is 'TRUE'and diabetes is 'TRUE'
GROUP by gender
Female 74
Male 76
Select gender, count (gender)
from enhanced_health_data
WHERE age > 40 and smoker is 'FALSE'and diabetes is 'TRUE'
GROUP by gender
Female 77
Male 72
Select gender, COUNT (gender) as Overweight
FROM enhanced_health_data
where bmi > 30
Group by gender
Female 112
Male 89
Select gender, COUNT (gender) as Healthly
FROM enhanced_health_data
where bmi BETWEEN 18 and 25
Group by gender
Female 199
Male 218
Select COUNT (age_band_) ,age_band_ AS "Age Band"
FROM enhanced_health_data
GROUP BY age_band_;
Adult 255
Child 13
Old 87
Senior 380
Young Adult 265
WITH cte AS
(SELECT age, gender,age_band_ ,diabetes
FROM enhanced_health_data)
SELECT avg (age)
FROM cte
where gender is 'Female';
49.8 years
WITH cte AS
(SELECT age, gender,age_band_ ,diabetes
FROM enhanced_health_data)
SELECT avg (age)
FROM cte
where gender is 'Male';
47.8 years
WITH cte AS
(SELECT age ,gender,diabetes,bmi,health
FROM enhanced_health_data )
SELECT gender,COUNT (*)
FROM
cte
GROUP BY gender;
Female 494
Male 506