dropboxDir = "~/Documents/Dropbox/" datadir = paste(dropboxDir,"Research/trevor/stats",sep="") chartdir = paste(dropboxDir,"Research/trevor/paper",sep="") simdir = paste(dropboxDir,"Research/trevor/simulation",sep="") # load consonant harmony file setwd(datadir) lex = read.csv("trevor_harmony.csv") lex$token_no = as.factor(lex$token_no) lex$left = as.factor(substr(lex$places,1,1)) lex$right = as.factor(substr(lex$places,3,3)) lex = merge(lex, read.csv("types.csv")) #summary(lex) # load complex onset file newcc = read.csv("trevor_ccons.txt",sep="\t") newcc$cluster.type = gsub("[rlmnwj]","R", newcc$cluster); newcc$cluster.type = gsub("[pfbv]","P", newcc$cluster.type); newcc$cluster.type = gsub("[tdT]","T", newcc$cluster.type); newcc$cluster.type = gsub("[kg]","K", newcc$cluster.type); newcc$cluster.type = gsub("s.+","sC", newcc$cluster.type); #summary(newcc) #nrow(newcc) complete = read.csv("trevor_complete_corpus.txt",sep="\t") complete = complete[,-which(colnames(complete) %in% c("Age_year","Age_month","Age_day"))] #summary(complete) ################# # prepare for making charts setwd(chartdir) chartmode = "pdf" chartmode = "screen" myfont = "Doulos SIL"; pointsize=10 myfont = "Linux Libertine"; pointsize=12 quartz.fnc = function(cwidth,cheight,cname) { if(chartmode=="screen") { quartz(width=cwidth,height=cheight) } if(chartmode=="pdf") { quartz(width=cwidth,height=cheight,type="pdf",file=cname) } } trevor.axes = function() { s = seq(365.25,365.25*3,60.875); axis(1,at=s,paste(floor(s/30.4375/12),(s/30.4375)%%12,sep=";"),cex.axis=1.5); axis(2,at=seq(0,1,.25),paste(seq(0,1,.25)*100,"%",sep=""),cex.axis=1.5); } # age -> age in days #with(lex,aggregate(days_old,list(age),mean));x[order(x[,2]),] ################### ### DONE LOADING DATA ################### #################################### ######### ############## ######### C H A R T S ############## ######### ############## #################################### ################## # ONSETS ################## quartz.fnc(cwidth=8,cheight=8,cname="n.onsets.pdf") par(family=myfont,ps=pointsize,mar=c(4,5,5,1),mfrow=c(3,2)) for(clust in c("PR","TR","KR","sC")) { x=xtabs(~ faithful + Age_in_days, data=subset(newcc, cluster.type == clust)); plot(x[2,]/colSums(x) ~ as.numeric(colnames(x)),cex=log(colSums(x)),xlim=c(360,1130),type="p", xlab="", ylab="faithful tokens", main= clust, col="gray50", lwd=.3, xaxt="n", yaxt="n", cex.axis=1.6, cex.lab=1.6, cex.main=2); trevor.axes() lex2=data.frame(age=as.numeric(colnames(x)),faith=as.numeric(x[2,]),unfaith=as.numeric(x[1,]), row.names=NULL); lex2.glm = glm(cbind(faith, unfaith) ~ poly(age,2) , family = "binomial", data = lex2);summary(lex2.glm); lex2$fittedbase = fitted(lex2.glm) lines(lex2$fittedbase ~ age, data=lex2, lwd=3) } if (chartmode=="pdf") {dev.off()} ########### # Harmony: all places ########### library(Design) quartz.fnc(cwidth=8,cheight=8,cname="n.harmony.by.places.pdf") par(family=myfont,ps=pointsize,mar=c(4,5,5,1),mfrow=c(3,2)) for(place in c("TVK","KVT","PVK","KVP","PVT","TVP")) { x = xtabs(~ production + I(days_old), data=subset(lex, places==place )) lex2=data.frame(age=as.numeric(colnames(x)),faith=as.numeric(x[1,]),LTR=as.numeric(x[2,]), RTL=as.numeric(x[3,]), row.names=NULL) lex2$total = lex2$faith + lex2$LTR ;lex2$unfaith = lex2$LTR lex2$total = lex2$faith + lex2$RTL;lex2$unfaith = lex2$RTL lex2$total = lex2$faith + lex2$LTR + lex2$RTL;lex2$unfaith = lex2$LTR + lex2$RTL lex2.glm = glm(cbind(faith, unfaith) ~ age , family = "binomial", data = lex2);summary(lex2.glm) lex2.glm8 = glm(cbind(faith, unfaith) ~ poly(age,2), family = "binomial", data = lex2);summary(lex2.glm8) lex2$p = lex2$faith/lex2$total lex2$fittedbase = fitted(lex2.glm) lex2$fitted8 = fitted(lex2.glm8) anv = anova(lex2.glm,lex2.glm8,test="Chisq") # round(anv[2,5],4) lex2 = subset(lex2,age<1130) plot(p ~ age, data=lex2, type="n", xlab="", ylab="faithful tokens", cex.lab=1.6, ylim=c(0,1), xlim=c(360,1130), main=paste(place), yaxt="n", xaxt="n", cex.main=2); trevor.axes(); lines(faith/(faith+unfaith) ~ age, data=subset(lex2,age<1730), type="p", cex=log(lex2$faith+lex2$unfaith+.2), col="gray50", lwd=.3); lines(fitted8 ~ age, data=lex2, lwd=3, lty="solid"); # lines(fittedbase ~ age, data=lex2, lwd=2, lty="dotted"); } if (chartmode=="pdf") {dev.off()} ####################### ## KVT/TVK by direction ###################### quartz.fnc(cwidth=9,cheight=9,cname="n.KVTTVK.pdf") par(family=myfont,ps=pointsize,mar=c(5,5,3,1),mfrow=c(2,1)) x = xtabs(~ production + I(days_old), data=subset(lex,places=="KVT")) lex2=data.frame(age=as.numeric(colnames(x)), faith=as.numeric(x[1,]), LTR=as.numeric(x[2,]), RTL=as.numeric(x[3,]), row.names=NULL) lex2$total = lex2$faith + lex2$LTR + lex2$RTL lex2$unfaith = lex2$LTR + lex2$RTL plot(unfaith/faith ~ age, data=lex2, type="n", xlab="", ylab="tokens", cex.lab=1.6, ylim=c(0,1), xlim=c(360,1130), yaxt="n", xaxt="n", cex.main=2, main="KVT", cex.main=2);trevor.axes(); lines(lex2$faith/lex2$total ~ lex2$age,type="p", cex=log(lex2$faith), col="gray50", lwd=.3); lex2.glm8 = glm(cbind(faith, unfaith) ~ poly(age,2), family = "binomial", data = lex2);summary(lex2.glm8);lex2$fittedL = fitted(lex2.glm8); lines(fittedL ~ age, data=lex2, lwd=3, lty="solid") lines(lex2$RTL/lex2$total ~ lex2$age,type="p", pch="T", cex=log(lex2$RTL)+.6, col="#CC0000") lines(lex2$LTR/lex2$total ~ lex2$age,type="p", pch="K", cex=log(lex2$LTR)+.6, col="blue") # x = xtabs(~ production + I(days_old), data=subset(lex,places=="TVK")) lex2=data.frame(age=as.numeric(colnames(x)), faith=as.numeric(x[1,]), LTR=as.numeric(x[2,]), RTL=as.numeric(x[3,]), row.names=NULL) lex2$total = lex2$faith + lex2$LTR + lex2$RTL lex2$unfaith = lex2$LTR + lex2$RTL plot(unfaith/total ~ age, data=lex2, type="n", xlab="", ylab="tokens", cex.lab=1.6, ylim=c(0,1), xlim=c(360,1130), yaxt="n", xaxt="n", cex.main=2, main="TVK", cex.main=2);trevor.axes(); lines(lex2$faith/lex2$total ~ lex2$age,type="p", cex=log(lex2$total), col="gray50", lwd=.3) lex2.glm8 = glm(cbind(faith, unfaith) ~ poly(age,2), family = "binomial", data = lex2);summary(lex2.glm8);lex2$fittedL = fitted(lex2.glm8); lines(fittedL ~ age, data=lex2, lwd=3, lty="solid") lines(lex2$RTL/lex2$total ~ lex2$age,type="p", pch="K", cex=log(lex2$RTL)+.6, col="blue") lines(lex2$LTR/lex2$total ~ lex2$age,type="p", pch="T", cex=log(lex2$LTR)+.8, col="#CC0000") if (chartmode=="pdf") {dev.off()} # KVT tokens vs. TVK tokens: basic vs. quadratic quartz.fnc(cwidth=8,cheight=8,cname="n.TVK.vs.KVT.pdf") par(family=myfont,ps=pointsize,mar=c(4,5,5,1),mfrow=c(3,2)) for(place in c("TVK","KVT")) { x = xtabs(~ production + I(days_old), data=subset(lex, places==place)) lex2=data.frame(age=as.numeric(colnames(x)), faith=as.numeric(x[1,]), LTR=as.numeric(x[2,]), RTL=as.numeric(x[3,]), row.names=NULL) lex2$total = lex2$faith + lex2$LTR + lex2$RTL lex2$unfaith = lex2$LTR + lex2$RTL plot(unfaith/faith ~ age, data=lex2, type="n", xlab="", ylab="faithful tokens", cex.lab=1.6, ylim=c(0,1), xlim=c(360,1130), main= place, yaxt="n", xaxt="n", cex.main=2);trevor.axes(); lines(faith/total ~ age, data=subset(lex2,age<1730), type="p", lwd=.3, cex=log(lex2$faith+lex2$unfaith+.2), col="gray50"); lex2.glm8 = glm(cbind(faith, unfaith) ~ age + I(age^2), family = "binomial", data = lex2);summary(lex2.glm8);lex2$fittedL = fitted(lex2.glm8); lines(fittedL ~ age, data=lex2, lwd=3, lty="solid") lex2.glm = glm(cbind(faith, unfaith) ~ age , family = "binomial", data = lex2);summary(lex2.glm);lex2$fittedR = fitted(lex2.glm); lines(fittedR ~ age, data=lex2, lwd=2, lty="dashed") print(anova(lex2.glm8, lex2.glm, test="Chisq")) } if (chartmode=="pdf") {dev.off()} # KVT tokens: linear vs. quadratic library(Design) quartz.fnc(cwidth=9,cheight=4,cname="n.KVT.Kside.pdf") par(family=myfont,ps=pointsize,mar=c(4,5,1,1),mfrow=c(1,1)) x = xtabs(~ production + I(days_old), data=subset(lex,places=="KVT")) lex2=data.frame(age=as.numeric(colnames(x)), faith=as.numeric(x[1,]), LTR=as.numeric(x[2,]), RTL=as.numeric(x[3,]), row.names=NULL) lex2$total = lex2$faith + lex2$LTR + lex2$RTL lex2$unfaith = lex2$LTR + lex2$RTL plot(unfaith/faith ~ age, data=lex2, type="n", xlab="", ylab="faithful tokens", cex.lab=1.6, ylim=c(0,1), xlim=c(360,1130), main="", yaxt="n", xaxt="n", cex.main=2);trevor.axes(); lines(faith/(faith+LTR) ~ age, data=subset(lex2,age<1730), type="p", lwd=.3, cex=log(lex2$faith+lex2$unfaith+.2), col="gray50"); lex2.glm8 = glm(cbind(faith, LTR) ~ poly(age,2), family = "binomial", data = lex2);summary(lex2.glm8);lex2$fittedL = fitted(lex2.glm8); lines(fittedL ~ age, data=lex2, lwd=4, lty="solid") lex2.glm = glm(cbind(faith, LTR) ~ age, family = "binomial", data = lex2);summary(lex2.glm);lex2$fittedL = fitted(lex2.glm); lines(fittedL ~ age, data=lex2, lwd=3, lty="dashed") if (chartmode=="pdf") {dev.off()} # KVT lmer library(languageR) quartz.fnc(cwidth=9,cheight=4,cname="n.KVT.lmer.all.pdf") par(family=myfont,ps=pointsize,mar=c(4,5,1,1),mfrow=c(1,1)) kvt = subset(lex,places=="KVT") kvt = kvt[order(kvt$days_old),] pl = poly(kvt$days_old,2) lmer = lmer(production=="faithful" ~ poly(days_old,2) + (1|type), data=kvt, family="binomial") lmer.b = lmer(production=="faithful" ~ days_old + (1|type), data=kvt, family="binomial");anova(lmer,lmer.b) a1 = fixef(lmer)[1] + fixef(lmer)[2]*pl[,1] + fixef(lmer)[3]*pl[,2] + mean(ranef(lmer)$type[,1]) plot(fitted(lmer) ~ kvt$days_old, type="p", pch=19, cex=.4, col="black", ylim=c(0,1), yaxt="n", xaxt="n", xlab="", ylab="faithful tokens", cex.lab=1.6);trevor.axes(); lines(plogis(a1) ~ kvt$days_old, type="l", lwd=4, ylim=c(0,1)) if (chartmode=="pdf") {dev.off()} # LMER by direction # first LTR library(languageR) quartz.fnc(cwidth=9,cheight=4,cname="n.KVT.lmer.both.pdf") par(family=myfont,ps=pointsize,mar=c(4,5,1,1),mfrow=c(1,1)) plot(0~0, type="n", ylim=c(0,1), xlim=c(360,1130), yaxt="n", xaxt="n", xlab="", ylab="faithful tokens", cex.lab=1.6);trevor.axes(); kvt = subset(lex,places=="KVT" & production!="RTL") kvt = kvt[order(kvt$days_old),] pl = poly(kvt$days_old,2) lmer = lmer(production=="faithful" ~ poly(days_old,2) + (1|type), data=kvt, family="binomial") lines(fitted(lmer) ~ kvt$days_old, type="p", pch=4, cex=.8-fitted(lmer)/3, col="blue"); # pch=4 is an x (~K) # K = 75 ltr = fixef(lmer)[1] + fixef(lmer)[2]*pl[,1] + fixef(lmer)[3]*pl[,2] + mean(ranef(lmer)$type[,1]); ltr.days = kvt$days_old # now RTL kvt = subset(lex,places=="KVT" & production!="LTR") kvt = kvt[order(kvt$days_old),] pl = poly(kvt$days_old,2) lmer = lmer(production=="faithful" ~ poly(days_old,2) + (1|type), data=kvt, family="binomial") rtl = fixef(lmer)[1] + fixef(lmer)[2]*pl[,1] + fixef(lmer)[3]*pl[,2] + mean(ranef(lmer)$type[,1]);rtl.days = kvt$days_old lines(fitted(lmer) ~ kvt$days_old, type="p", pch=3, col="#CC0000", cex=.8-fitted(lmer)/3); # pch=3 is a cross (~T) # T=84 lines(plogis(rtl) ~ rtl.days, type="l", lwd=3, ylim=c(0,1), col="#CC0000");rtl.days[which(min(plogis(rtl))==plogis(rtl))] lines(plogis(ltr) ~ ltr.days, type="l", lwd=3, ylim=c(0,1), col="blue");ltr.days[which(min(plogis(ltr))==plogis(ltr))] text(480,.58,"K", cex=2, col="blue") # "progressive" text(738,.82,"T", cex=2, col="#CC0000") # "regressive" if (chartmode=="pdf") {dev.off()} ############################################## ######### ############## ######### S I M U L A T I O N S ############## ######### ############## ############################################## ## complex onsets quartz.fnc(cwidth=9,cheight=4,cname="r.sim.onsets.pdf") library(Design);library(languageR) setwd(simdir) #system("perl trevor_complexonset.pl") setwd(chartdir) sim3 = read.csv("sim.onsets.csv",header=F) colnames(sim3) = c("sim","age","grammar","lexicon","type","production") sim3$tage = sort(subset(newcc, cluster.type != "sC")$Age_in_days) summary(sim3) par(family=myfont,ps=pointsize,mar=c(4,5,1,1),mfrow=c(1,1)) plot(0~0, type="n", xlim=c(350,1150),ylim=c(0,1), ylab="tokens", yaxt="n", xlab="simulated time", cex.lab=1.6, cex.axis=1.5, xaxt="n");trevor.axes(); for(s in 0:max(sim3$sim)) { l = lrm(production=="faithful" ~ poly(tage,2), data=subset(sim3,sim==s)); lines(plogis(predict(l))~tage, type="l", lwd=1, data=subset(sim3,sim==s), col="gray80"); } l = lrm(production=="faithful" ~ poly(tage,2), data=subset(sim3,sim==s)); lines(plogis(predict(l))~tage, type="l", lwd=2, data=subset(sim3,sim==s)) if (chartmode=="pdf") {dev.off()} # KVT + TVK library(Design);library(languageR) setwd(simdir) #system("perl trevor_consonantharmonyTVKKVT.pl") setwd(chartdir) quartz.fnc(cwidth=9,cheight=9,cname="n.sim.harmony.pdf") sim2 = read.csv("sim.harmony.TVK.csv",header=F) colnames(sim2) = c("sim","age","grammar","lexicon","type","place","production") #summary(sim2) sim2K = subset(sim2,place==1);sim2K$tage = sort(subset(lex, places=="KVT")$days_old);summary(sim2K) sim2T = subset(sim2,place==0);sim2T$tage = sort(subset(lex, places=="TVK")$days_old);summary(sim2T) par(family=myfont,ps=pointsize,mar=c(5,5,3,1),mfrow=c(2,1)) plot(0~0, type="n", xlim=c(350,1150),ylim=c(0,1), ylab="tokens", yaxt="n", xlab="simulated time", cex.lab=1.6, cex.axis=1.5, xaxt="n",main="KVT", cex.main=2);trevor.axes(); K.sd = rep(NA,20); for(s in 0:19) { l = lrm(production=="faithful" ~ poly(tage,2), data=subset(sim2K,sim==s)); lines(plogis(predict(l))~tage, type="l", lwd=1, col="gray80", data=subset(sim2K,sim==s)) } s = sample(0:19, size=1) l = lrm(production=="faithful" ~ poly(tage,2), data=subset(sim2K,sim==s)); lines(plogis(predict(l))~tage, type="l", lwd=2, data=subset(sim2K,sim==s)) x = xtabs(~ production + I(tage), data=subset(sim2K,sim==s)) lex2=data.frame(age=as.numeric(colnames(x)), faith=as.numeric(x[1,]), LTR=as.numeric(x[2,]), RTL=as.numeric(x[3,]), row.names=NULL) lex2$total = lex2$faith + lex2$LTR + lex2$RTL lines(lex2$LTR/lex2$total ~ lex2$age,type="p", pch="T", cex=log(lex2$LTR)*.8, col="#CC0000") lines(lex2$RTL/lex2$total ~ lex2$age,type="p", pch="K", cex=log(lex2$RTL)*.8, col="blue") # plot(0~0, type="n", xlim=c(350,1150),ylim=c(0,1), ylab="tokens", yaxt="n", xlab="simulated time", cex.lab=1.6, cex.axis=1.5, xaxt="n",main="TVK", cex.main=2);trevor.axes(); T.sd = rep(NA,20); for(s in 0:19) { l = lrm(production=="faithful" ~ poly(tage,2), data=subset(sim2T,sim==s)); lines(plogis(predict(l))~tage, type="l", lwd=1, col="gray80", data=subset(sim2T,sim==s)) } s = sample(0:19, size=1) l = lrm(production=="faithful" ~ poly(tage,2), data=subset(sim2T,sim==s)); lines(plogis(predict(l))~tage, type="l", lwd=2, data=subset(sim2T,sim==s)) x = xtabs(~ production + I(tage), data=subset(sim2T,sim==s)) lex2=data.frame(age=as.numeric(colnames(x)), faith=as.numeric(x[1,]), LTR=as.numeric(x[2,]), RTL=as.numeric(x[3,]), row.names=NULL) lex2$total = lex2$faith + lex2$LTR + lex2$RTL lines(lex2$LTR/lex2$total~lex2$age,type="p", pch="T", cex=log(lex2$LTR)*1.1, col="#CC0000") lines(lex2$RTL/lex2$total~lex2$age,type="p", pch="K", cex=log(lex2$RTL)*1.1, col="blue") if (chartmode=="pdf") {dev.off()}