```{r} #step 1. call the required package library(ggplot2) #step 2. define a function named penetration_cutpoint, which is used for finding the cutoff value for penetration rate ( how the function works:generate 100 cutoff value, divide the dataset into two parts according to different cutoff values.Run a linear regression model for the second part of data (where penetration_rate>cutoff value), save the β coefficient into the vector "slope",find the optimal cutoff value, which is the one minimize the β coefficient of the linear regression model) penetration_cutpoint <- function(df){ N = 100 slope = rep(-99, N-1) # use the loop to get the value for the β coefficient, save it into the vector "slope" for (i in 1:N-1) { df_check = df[df$penetration_rate>=i, ] fit.lm.check = lm(prevalence~penetration_rate, data = df_check) slope[i] = fit.lm.check$coefficients[2] } # get the absolute value for the β coefficient slope_abs = abs(slope) row = 1:length(slope_abs) slope_matrix = cbind(row, slope_abs) slope_matrix = data.frame(slope_matrix) # choose the minimum β coefficient and get corresponding cutoff value of penetration rate cut = slope_matrix[which.min(slope_matrix$slope),1] beta = slope_matrix[which.min(slope_matrix$slope),2] my_list <- data.frame(cut, beta) return(my_list) } #step 3: use the above-mentioned function "penetration_cutpoint" to deal with a real-world dataset #(1)load the dataset df<-read.csv("2019_T1D.csv") #you can add your own dataset here #(2) use the defined function to get the optimal cutoff value and corresponding β coefficient result = penetration_cutpoint(df) result # show the optimal cutoff value and β coefficient #step 4: define a function named "plot_penetration", which is used for drawing the regression line for the dataset (divide the dataset into two parts:(1)penetration_rate=cut_point, draw two regression lines for the two datasets, add the cutoff line, save the picture as PNG) cut_point = result[1,1] plot_penetration <-function(df,cut_point){ df_before_cut = df[df$penetration_rate=cut_point, ] p <- ggplot(df, aes( y = prevalence,x = penetration_rate)) + geom_point()+labs(x = "Penetration rate (%)", y = "Prevalence of type 1 diabetes (%)") pic<-p + stat_smooth(data = df_before_cut, mapping = aes(x = penetration_rate,y = prevalence),method = "gam", formula = y ~ x ) + stat_smooth(data = df_after_cut, mapping = aes(x = penetration_rate,y = prevalence),method = "gam", formula = y ~ x ) + geom_vline(xintercept = cut_point, linetype = 2, color = "red") ggsave(file="penetration_cutoff.png", plot=pic, width=10, height=8) return(pic) } # step 5: use the function "plot_penetration" to draw the graph plot_penetration(df,cut_point) ```