Barplot and Scatterplot Comparing Sex Differences in Height
Author
Anthony Steven Dick
Load libraries
Define means and standard errors
Code
set.seed(123)mean_male <-175.3# for all groups 20+mean_female <-161.3# for all groups 20+se_male <-0.19# n = 5,092se_female <-0.19# n = 5,510sd_male <-13.6sd_female <-14.1n_male <-5092# sample sizen_female <-5510# sample sizesamp_n <-30
Generate sample height data for males in cm
Code
# Generate sample height data for males in cmheights_male <-rnorm(n = samp_n, mean = mean_male, sd = sd_male) # use estimates from larger sample, pull n = samp_n# Generate sample height data for females in cmheights_female <-rnorm(n = samp_n, mean = mean_female, sd = sd_female) # use estimates from larger sample, pull n = samp_n# Combine data into a data frameheight_data <-data.frame(Sex =factor(c(rep(0, samp_n), rep(1, samp_n))), # Convert to numeric: 1 for Male, 0 for FemaleHeight =c(heights_female, heights_male))# Generate sample height data for males for larger sample in cmheights_male_larger <-rnorm(n = n_male, mean = mean_male, sd = sd_male) #use estimates from larger sample# Generate sample height data for females in cmheights_female_larger <-rnorm(n = n_female, mean = mean_female, sd = sd_female) #use estimates from larger sample# Combine data into a data frameheight_data_larger <-data.frame(Sex =factor(c(rep(0, n_female), rep(1, n_male))), # Convert to numeric: 1 for Male, 0 for FemaleHeight =c(heights_female_larger, heights_male_larger))
Perform the t-test and compute Cohen’s d
Code
# Perform the t-testt_test_result <-t.test(Height ~ Sex, data = height_data, var.equal =TRUE)# Compute Cohen's dcohen_d <-cohens_d(Height ~ Sex, data = height_data, var.equal =TRUE)# Perform the t-testt_test_result_larger <-t.test(Height ~ Sex, data = height_data_larger, var.equal =TRUE)# Compute Cohen's dcohen_d_larger<-cohens_d(Height ~ Sex, data = height_data_larger, var.equal =TRUE)
Prepare data for plotting
Code
# Extract the means and standard deviations from the t-test resultmeans_male <- t_test_result$estimate[1]means_female <- t_test_result$estimate[2]# Subset data for males and femalesmale_heights <- height_data$Height[height_data$Sex ==1]female_heights <- height_data$Height[height_data$Sex ==0]# Reverse the order of levels in the Species variableheight_data$Sex <-factor(height_data$Sex, levels =levels(height_data$Sex))# Define Ravenclaw inspired colors#ravenclaw_colors <- c("#B0B7BC", "#222F5B") # Silver and Blue (if you want to go with the movie colors)ravenclaw_colors <-c("#B08D57", "#222F5B") # Bronze and Blue (if you want to go with the book colors)
Plot the data for the barplot.
Code
# Plot the barplot with error barsp_bar <-ggplot(height_data, aes(x = Sex, y = Height, fill = Sex)) +geom_bar(stat ="summary", fun ="mean", color ="black", width =0.4) +# Adjust width heregeom_errorbar(stat ="summary", fun.data ="mean_cl_boot", width =0.1, color ="black") +geom_text(x =0.5, y =max(height_data$Height) +3, label =paste("t(", t_test_result$parameter, ") = ", abs(round(t_test_result$statistic, 2)), ", p = ", round(t_test_result$p.value, 3), sep =""), size =6, color ="black", vjust =0, hjust =-.55) +labs(x ="Sex", y ="Mean Height (cm)", title ="Mean Height by Sex") +scale_fill_manual(values = ravenclaw_colors, labels =c("Female", "Male")) +# Use Ravenclaw colorsscale_x_discrete(labels =c("Female", "Male")) +scale_y_continuous(expand =c(0,0),limits =c(0,max(height_data$Height) +15)) +theme_bw() +# Set background theme to black and whitetheme(text =element_text(size =18), # Increase text sizepanel.border =element_blank(), # Remove panel borderpanel.grid.minor =element_blank(), # Remove major grid linesaxis.line.x =element_line(), axis.line.y =element_line())print(p_bar)
Code
# Save the barplot as a TIFF file with dimensions 9 x 6 inchesggsave(filename ="barplot_height_by_sex.tiff", plot = p_bar, width =9, height =6, dpi =600)
Conduct a regression and comput correlation
Code
# Perform linear regressionlm_model <-lm(Height ~ Sex, data = height_data)print("\nResults of linear regression:")
[1] "\nResults of linear regression:"
Code
summary(lm_model)
Call:
lm(formula = Height ~ Sex, data = height_data)
Residuals:
Min 1Q Median 3Q Max
-26.105 -7.865 -1.546 7.679 28.068
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 163.815 2.297 71.306 < 2e-16 ***
Sex1 10.845 3.249 3.338 0.00148 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 12.58 on 58 degrees of freedom
Multiple R-squared: 0.1611, Adjusted R-squared: 0.1467
F-statistic: 11.14 on 1 and 58 DF, p-value: 0.001478
Code
# Compute correlation coefficients and p-valuescorr_result <-cor.test(height_data$Height, as.numeric(height_data$Sex))print("Results of correlation test:")
[1] "Results of correlation test:"
Code
print(corr_result)
Pearson's product-moment correlation
data: x and y
t = 3.338, df = 58, p-value = 0.001478
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.1642482 0.5947323
sample estimates:
cor
0.4014306
Plot the scatterplot with linear regression line
Code
p_scatter <-ggplot(height_data, aes(x =as.numeric(Sex), y = Height, color = Sex)) +geom_point(alpha =0.6, position =position_jitter(width =0.02, height =0), size =3) +# Larger dotsgeom_smooth(method ="lm", se =TRUE, color ="black") +geom_text(x =0.5, y =max(height_data$Height) +3,label =paste("β = ", round(lm.beta(lm_model)$standardized.coefficients[2], 2), ", t(", lm_model$df.residual, ") = ", round(summary(lm_model)$coefficients[2, 3], 2),", p = ", round(summary(lm_model)$coefficients[2, 4], 3), sep =""),vjust =-0.5, hjust =-0.25, size =6, color ="black") +labs(x ="Sex", y ="Height (cm)", title ="Height by Sex") +scale_color_manual(values = ravenclaw_colors, labels =c("Female", "Male")) +# Use Ravenclaw colorsscale_x_discrete(labels =c("Female", "Male")) +coord_cartesian(ylim =c(150, max(height_data$Height) +10)) +# Adjust y-axis limitstheme_bw() +# Set background theme to black and whitetheme(text =element_text(size =18), # Increase text sizepanel.border =element_blank(), # Remove panel borderpanel.grid.minor =element_blank(), # Remove major grid linesaxis.line.x =element_line(), axis.line.y =element_line())print(p_scatter)
`geom_smooth()` using formula = 'y ~ x'
Code
# Save the scatterplot as a TIFF fileggsave(filename ="scatterplot_height_by_sex.tiff", plot = p_scatter, width =9, height =6, dpi =600)