library(car) df=read.csv(file="C:\\data01.csv", header=TRUE, sep=",") summary(df) a_df <-aggregate(df, by=list(df$Year,df$Sex), FUN=mean, na.rm=TRUE)#Take the average times of medalists a_df$y2=a_df$Year^2#add a squared term since the change in race times is probably non-linear a_df$gender=recode(a_df$Group.2, "c('M')=1; else=0")#recode Sex into an integer (so I can test the interaction between year and Sex) fm=lm(Result ~ Year + y2 + gender + gender*y2, data=a_df)#run the linear model a_df$pred=predict(fm) plot(y=a_df$pred, x=a_df$Year, main="100m Olympic final race times", col=c("blue","red")[a_df$Group.2], xlab="Year", ylab="Result(s)")#add a plot legend(x="topright", legend = levels(a_df$Group.2), col=c("blue","red"), pch=1)#add a legend to plot