Healthcare Insurance
This dataset contains information on the relationship between personal attributes (age, gender, BMI, family size, smoking habits), geographic factors, and their impact on medical insurance charges. It can be used to study how these features influence insurance costs and develop predictive models for estimating healthcare expenses. The dataset has the following columns and was coined insurance_data.
Age: The insured person’s age.
Sex: Gender (male or female) of the insured.
BMI (Body Mass Index): A measure of body fat based on height and weight.
Children: The number of dependents covered.
Smoker: Whether the insured is a smoker (yes or no).
Region: The geographic area of coverage.
Charges: The medical insurance costs incurred by the insured person.
Patient ID: Added the patient _column to the dataset
In addition, a second data filtered from a Hypertension Risk Dataset called Smokers Health by Md Raihan Kahn was added to the report to look at health effects of smokers versus non-smokers. The Dataset had eight columns and a new curated column member_id was added to the dataset and table is named smoking_health_data_final.
SELECT * FROM insurance_data
LIMIT 20
/*to take a look at the table Columns*/
SELECT age,sex,
CASE
WHEN bmi > 30 then 'Overweight'
WHEN bmi between 25 and 30 then 'Normal'
ELSE 'Underweight'
END AS Patient_Weight
FROM insurance_data
GROUP BY bmi,age;
/*to divide patients into different weight categories*/
SELECT
CASE
WHEN bmi > 30 then 'Overweight'
WHEN bmi between 25 and 30 then 'Normal'
ELSE 'Underweight'
END as Patient_Weight,
COUNT(*) as Patient_Weight_Category_Count
FROM insurance_data
GROUP by
CASE
WHEN bmi > 30 then 'Overweight'
WHEN bmi between 25 and 30 then 'Normal'
ELSE 'Underweight'
END;
Overweight 705 Normal Weight 388 Underweight 245
/*Count number of patients in the differenet weight categories*/
SELECT
sex,
ROUND(AVG(bmi), 2) AS avg_bmi,
Round (AVG(age),2) AS avg_age
FROM
insurance_data
GROUP BY
sex;
Male avg (age) 30.94; avg (bmi)39.50
Female avg (age) 30.38; avg (bmi)38.92
SELECT
age,
sex,
bmi,
smoker,
CASE
WHEN smoker = 'yes' AND bmi > 30
THEN 'Serious Safety Concern'
WHEN smoker = 'yes' AND bmi BETWEEN 20 AND 25
THEN 'Potential Safety Concern'
ELSE 'No Safety Concern'
END AS 'Safety Concern'
FROM insurance_data;
/*categorises patients into different risk levels
based on bmi and smoking*/
SELECT
smoker,
CASE
WHEN smoker = 'yes' AND bmi > 30
THEN 'Serious Safety Concern'
WHEN smoker = 'yes' AND bmi BETWEEN 20 AND 25
THEN 'Potential Safety Concern'
ELSE 'No Safety Concern'
END AS Safety_Concern,
COUNT(*) as Patient_Safety_Count
FROM insurance_data
GROUP by
CASE
WHEN smoker = 'yes' AND bmi > 30
THEN 'Serious Safety Concern'
WHEN smoker = 'yes' AND bmi BETWEEN 20 AND 25
THEN 'Potential Safety Concern'
ELSE 'No Safety Concern'
END;
No Safety Concern ;1148
Potential Safefy Concern ;46
Seriuos Safety Concern ;144
SELECT COUNT (*) as Number_of_patients,
Round(avg(age),2),
region
FROM insurance_data
GROUP by region
Northeast ; 39.27 324
Northwest ; 39.2 325
Southeast ; 38.94 364
Southwest ; 39.46 325
SELECT COUNT (*)
FROM insurance_data
WHERE age>
(SELECT Round(avg(age),2)
FROM insurance_data);
664
/*count of patients above avg(age)*/
SELECT smoker,
sex,
COUNT(*)
as patient_count
FROM
insurance_data
GROUP BY
smoker, sex;
No Smoker Female 547 Male 517
Yes Smoker Female 115 Male 159
SELECT
sex,
CASE
WHEN age BETWEEN 0 and 18 then 'Pediatic'
WHEN age BETWEEN 18 and 60 then 'Adults'
ELSE 'Seniors'
END AS Age_Band,
COUNT(*) as patient_count
FROM insurance_data
GROUP by sex,
Case
WHEN age BETWEEN 0 and 18 then 'Pediatic'
WHEN age BETWEEN 18 and 60 then 'Adults'
ELSE 'Seniors'
END
Female Adults 582 ; Pediatic 33 ; Seniors 47
Male Adults 596 ;Pediatic 36 ; Seniors 44
SELECT ROUND (avg(charges), 2)
AS average_charge,
ROUND( max (charges) ,2)
AS Maximum_charge,
ROUND( min(charges),2)
as minimum_charge
FROM insurance_data
Average Charge $ 13,270.42
Maximum Charge $ 63,770.43
Minimum Charge $ 1,121.87
SELECT sex, bmi,region,
SUM(charges)
AS Total_Charges
FROM insurance_data
GROUP BY sex, bmi
HAVING SUM(charges) >
(SELECT AVG(charges)
FROM insurance_data)
ORDER BY Total_Charges and region
DESC
LIMIT 10;
/* list of top 10 patients whose total charges exceed the avg (Charges)*/
Select
CASE
WHEN smoker = 'yes' and age > 50 then 'High Risk Patient'
WHEN smoker ='yes' and age BETWEEN 30 and 50 then 'Medium Risk Patient'
ELSE 'Low Risk'
END AS risk_category,
Count (sex) as number_of_patients
FROM insurance_data
GROUP BY
Case
WHEN smoker = 'yes' and age > 50 then 'High Risk Patient'
WHEN smoker ='yes' and age BETWEEN 30 and 50 then 'Medium Risk Patient'
ELSE 'Low Risk'
END
High Risk Patients ; 64
Medium Risk Patients ;1150
Low Risk Patients;125
SELECT
current_smoker,
CASE
WHEN current_smoker = 'yes' AND chol > 190
THEN 'High Risk for Heart Disease'
WHEN current_smoker = 'yes' AND chol BETWEEN 130 AND 159
THEN 'Medium Risk for Heart Disease'
ELSE 'Low Risk for Heart Disease'
END AS Safety_Concern,
COUNT(*) AS Risk_for_Cardiovascular_disease
FROM
smoking_health_data_final
GROUP BY
current_smoker,
CASE
WHEN current_smoker = 'yes' AND chol > 190
THEN 'High Risk for Heart Disease'
WHEN current_smoker = 'yes' AND chol BETWEEN 130 AND 159
THEN 'Medium Risk for Heart Disease'
ELSE 'Low Risk for Heart Disease'
END;
Smoker Safety Concern RisK for Risk for Cardiovascular Disease
No Low Risk for Heart Disease 1968
Yes High Risk for Heart Disease 1631
Yes Low Risk for Heart Disease 254
Yes Medium Risk for Heart Disease 47
SELECT
sex,
Round(avg(chol),2)
AS Average_chol
FROM
smoking_health_data_final
GROUP by sex
Male average Chol ; 233.11
Female average Chol ; 239.63
SELECT
sex, Round (avg(cigs_per_day),2)
FROM
smoking_health_data_final
GROUP BY sex
Male averarge cigs per day 13.37
Female average cigs per day 5.53
SELECT
sex,
COUNT(member_id)
AS Smokers_Chol_Above_AVG
FROM smoking_health_data_final
WHERE chol >
(SELECT AVG(chol)
FROM smoking_health_data_final)
AND cigs_per_day >
(SELECT AVG(cigs_per_day)
FROM smoking_health_data_final)
GROUP by sex
Female 239
MAle 439
/*patients who smoke more than the average cigs per day and also have
chol levels above average*/
SELECT
CASE
WHEN age BETWEEN 57 AND 75 THEN 'Baby Boomer'
WHEN age BETWEEN 41 AND 56 THEN 'Generation X'
ELSE 'Millennials'
END AS age_range,
COUNT (*)
FROM smoking_health_data_final
GROUP by
CASE
WHEN age BETWEEN 57 AND 75 THEN 'Baby Boomer'
WHEN age BETWEEN 41 AND 56 THEN 'Generation X'
ELSE 'Millennials'
END
Millenials 690
Generation X 2255
Baby Boomers 955
SELECT
CASE
WHEN age BETWEEN 57 AND 75 THEN 'Baby Boomer'
WHEN age BETWEEN 41 AND 56 THEN 'Generation X'
ELSE 'Millennial'
END AS age_group,
COUNT(*) AS smokers
FROM
smoking_health_data_final
WHERE
current_smoker = 'yes'
GROUP BY
age_group;
Millenial 325
Generation X 1190
BabyBoomer 417