library(mvtnorm) # Grammar functions # Inputs: tableau, tab, and a vector of constraint weights, w # return vector of candidate probabilities # sample tableau # candidates are rows, constraints are columns c1 = c(-1,0,0) c2 = c(0,-2,0) c3 = c(0,-1,-1) tab = rbind(c1, c2, c3) # vector of constraint weights in the same order as columns of tab w = c(15,8,8) # Maxent probabilities # calculate probabilities of candidates given a MaxEnt grammar ME <-function(tab,w){ # calculate harmony vector h = w %*% t(tab) # calculate probabilities of candidates p = exp(h)/sum(exp(h)) return(p)} # calculate probabilities of candidates given a Normal MaxEnt grammar NME<-function(tab,w){ # calculate harmony vector h = w %*% t(tab) # initialize vector of candidate probabilities p = rep(NA,nrow(tab)) # ncands is the number of candidates ncands = nrow(tab) # set standard deviation of noise terms sd = 1 # sigma is the covariance matrix of the joint distribution of noise differences # i.e. an (ncands-1)*(ncands-1) matrix with s*sd^2 on the leading diagonal and sd^2 elsewhere sigma = sd^2*(diag(ncands-1)+matrix(1,nrow=ncands-1, ncol=ncands-1)) # calculate probability of each candidate for (c in 1:ncands){ # calculate harmony differences between each candidate and candidate c hdiff = h[c]-h[-c] # calculate candidate probability of candidate c from multivariate normal distribution function p[c] = pmvnorm(upper=hdiff, sigma=sigma) } return(p) } # NHG probabilities # calculate the probabilities of candidates given an NHG NHG<-function(tab,w){ # calculate harmony vector h = w %*% t(tab) # initialize vector of candidate probabilities p = rep(NA,nrow(tab)) # set standard deviation of noise terms sd = 1 # calculate probability of each candidate for (c in 1:nrow(tab)){ # calculate vector of harmony differences between each other candidate and candidate c hdiff = h[c]-h[-c] # calculate matrix of differences in violations between candidate c and each other candidate difftab = tab[-c,]-matrix(rep(tab[c,],nrow(tab)-1),nrow=nrow(tab)-1,byrow=TRUE) # calculate candidate probability of candidate c from multivariate normal distribution function # use violation difference matrix to calculate covariance among noise terms, sigma p[c] = pmvnorm(upper=hdiff, sigma=(sd^2)*(difftab %*% t(difftab))) } return(p) } # estimate probabilities of candidates given a censored NHG cNHG <- function(tab,w){ # initial vector of candidate probabilities p_hat = rep(NA,nrow(tab)) # number of constraints nconstraints = ncol(tab) # number of samples to use in simulation N=10^6 # generate N weight vectors, each perturbed by censored noise, combine them into a matrix wnoise = matrix(pmax(rnorm(nconstraints*N, mean=w, sd=1),0),ncol=nconstraints, byrow=TRUE) # calculate matrix of harmonies of candidates for each perturbed weight vector hs = wnoise%*%t(tab) # calculate observed probabilities of each candidate for(c in 1:nrow(tab)){ # count number of times candidate c wins and divide by N p_hat[c]=sum(apply(hs,1,which.max)==c)/N } return(p_hat) } # apply functions to sample tableau # MaxEnt ME(tab,w) # normal Maxent NME(tab,w) # NHG NHG(tab,w) # censored NHG cNHG(tab,w) ######################################## # Fit grammars to Smith & Pater schwa data # Observed rates of schwa realization in Smith & Pater (2020) # order of contexts: clitic: CC_s, CC_ss, C_s, C_ss, word: CC_s, CC_ss, C_s, C_ss p = c(0.938, 0.914,0.648,0.562,0.833,0.683,0.122,0.09) # convert probabilities to counts # 27 subjects * 6 items per context = 162 schwa_count = round(162*p) # Fit MaxEnt grammars to schwa data # Constraints - differences in violations in each context, ordered as above, # schwa violations - non-schwa violations # NoSchwa c1 = rep(-1, 8) # *CCC c2 = c(1,1,0,0,1,1,0,0) # *Clash c3 = c(1,0,1,0,1,0,1,0) # Max c4 = c(rep(1,4),rep(0,4)) # Dep c5 = c(rep(0,4), rep(-1,4)) # *Cluster c6 = c(0,0,1,1,0,0,1,1) # *CCC/iP c7 = c(rep(0,4),1,1,0,0) ####################### # Fit MaxEnt grammar with constraints 1-4 using logistic regression # (weight of constraint 1 = negation of the intercept) maxent =glm(cbind(schwa_count, 162-schwa_count)~c2+c3+c4, family=binomial(link="logit")) summary(maxent) # extract fitted schwa probabilities ppmaxent = fitted(maxent) # Test for significant differences in weights of constraints between subsets of contexts # by adding two-way interactions to the logistic regression MaxEnt model twoway =glm(cbind(schwa_count, 162-schwa_count)~(c2+c3+c4)^2, family=binomial(link="logit")) summary(twoway) # fit normal MaxEnt grammar with constraints 1-4 using probit regression # in probit regression noise follows a normal distribution with sd =1, whereas in normal MaxEnt # sd of noise difference is sqrt(2), if the noise added to candidate harmonies has sd = 1. # So to convert coefficients of the probit regression to normal MaxEnt weights, the must be multiplied by sqrt(2) # (weight of constraint 1 = -intercept*sqrt(2)) norm_maxent = glm(cbind(schwa_count, 162-schwa_count)~c2+c3+c4, family=binomial(link="probit")) summary(norm_maxent) # convert coefficients to constraint weights w = coef(norm_maxent)*sqrt(2) w[1] = -w[1] # extract fitted schwa probabilities ppnormmaxent = fitted(norm_maxent) # Fit models with revised constraint set, adding *CCC/iP and collapsing Max/Dep into a single constraint # create Max/Dep constraint c45 = c4+c5 # Fit MaxEnt grammar with revised constraint set using logistic regression # (weight of constraint 1 = negation of the intercept) maxent2 =glm(cbind(schwa_count, 162-schwa_count)~c2+c3+c45+c7, family=binomial(link="logit")) summary(maxent2) # extract fitted schwa probabilities ppmaxent2 = fitted(maxent2) # Fit normal MaxEnt grammar with revised constraint set using logistic regression norm_maxent2 =glm(cbind(schwa_count, 162-schwa_count)~c2+c3+c45+c7, family=binomial(link="probit")) summary(norm_maxent2) # convert coefficients to constraint weights w = coef(norm_maxent2)*sqrt(2) w[1] = -w[1] ############################ # Fit NHG grammars to Smith & Pater schwa data # Tableaux # violation vectors for schwa and non-schwa candidates in each context # order of constraints: NoSchwa, *CCC. Clash, Max, Dep, *CC, *CCC/iP #CC_, _s, /schwa/ c1 = c(0,-1,-1,-1,0,-1,0) c2 = c(-1,0,0,0,0,-1,0) #CC_, _ss, /schwa/ c3 = c(0,-1,0,-1,0,-1,0) c4 = c(-1,0,0,0,0,-1,0) #C_, _s, /schwa/ c5 = c(0,0,-1,-1,0,-1,0) c6 = c(-1,0,0,0,0,0,0) #C_, _ss, /schwa/ c7 = c(0,0,0,-1,0,-1,0) c8 = c(-1,0,0,0,0,0,0) #CC_, _s, /0/ c9 = c(0,-1,-1,0,0,-1,-1) c10 = c(-1,0,0,0,-1,-1,0) #CC_, _ss, /0/ c11 = c(0,-1,0,0,0,-1,-1) c12 = c(-1,0,0,0,-1,-1,0) #C_, _s, /0/ c13 = c(0,0,-1,0,0,-1,0) c14 = c(-1,0,0,0,-1,0,0) #C_, _ss, /0/ c15 = c(0,0,0,0,0,-1,0) c16 = c(-1,0,0,0,-1,0,0) # combine vectors into matrix of tableaux tabs = rbind(c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16) # exclude *CCC/iP tabs = tabs[,1:6] # function to calculate residual deviance of a NHG with vector of constraint weights w, # given a set of tableaux, tabs, and observed counts of schwa (y1) and non-schwa (y0) candidates. devhg<-function(w,tabs,y1,y0){ p_hat = rep(NA,nrow(tabs)/2) for(input in 1:8){ tab = tabs[c((2*input)-1,2*input),] p_hat[input]=NHG(tab,w)[2] } resdev=2*sum((y1*log((y1/(y1+y0)/p_hat))+y0*log(y0/(y1+y0)/(1-p_hat)))) return(resdev) } # version of devhg that fixes at 0 the weights of Dep and *CC to eliminate redundancy # which is present if all constraint weights are allowed to vary devhg4<-function(w,tabs,y1,y0){ p_hat = rep(NA,nrow(tabs)/2) for(input in 1:8){ tab = tabs[c((2*input)-1,2*input),] p_hat[input]=NHG(tab,c(w, 0,0))[2] } resdev=2*sum((y1*log((y1/(y1+y0)/p_hat))+y0*log(y0/(y1+y0)/(1-p_hat)))) return(resdev) } # Find best fitting constraint weights for NHG # set initial weights to 1 # 4 constraint weights initial_w = rep(1,4) # 6 constraints initial_w = rep(1,6) # 7 constraints initial_w = rep(1,7) # Use L-BFGS-B algorithm to search for constraint weights that minimize deviance, # constraining weights to be non-negative. # All weights free to vary soln = optim(initial_w,devhg,tabs=tabs,y1=schwa_count,y0=162-schwa_count, lower=0, method = "L-BFGS-B") # Use Nelder-Mead algorithm to search for constraint weights that minimize deviance, # no restrictions on constraint weights soln = optim(initial_w,devhg,tabs=tabs,y1=schwa_count,y0=162-schwa_count) # set w to optimal constraint weights w = soln$par # First four constraint weights free to vary, 5 and 6 set to 0 soln = optim(initial_w,devhg4, tabs=tabs,y1=schwa_count,y0=162-schwa_count) # set w to optimal constraint weights # weights 5 and 6 set to 0 w = c(soln$par, 0, 0) # calculate fitted probabilities, p_hat, given weight vector, w, and tableaux, tabs p_hat = rep(NA,nrow(tabs)/2) for(input in 1:8){ tab = tabs[c((2*input)-1,2*input),] p_hat[input]=NHG(tab,w)[2] } # fit constraint set with CCC/iP, but collapsing Max and Dep into a single constraint # collapse Max and Dep into a single constraint tabs[,4]=tabs[,4]+tabs[,5] tabs = tabs[,-5] # version of devhg that fixes weight of *CC at 0, avoiding redundancy in constraint weights devhg5<-function(w,tabs,y1,y0){ p_hat = rep(NA,nrow(tabs)/2) for(input in 1:8){ tab = tabs[c((2*input)-1,2*input),] p_hat[input]=NHG(tab,c(w[1:4], 0,w[5]))[2] } resdev=2*sum((y1*log((y1/(y1+y0)/p_hat))+y0*log(y0/(y1+y0)/(1-p_hat)))) return(resdev) } # set initial weights to 1 initial_w = rep(1,5) # Use Nelder-Mead algorithm to search for constraint weights that minimize deviance, # with weight of *CC fixed at 0. soln = optim(initial_w,devhg5, tabs=tabs,y1=schwa_count,y0=162-schwa_count, control=list(maxit=1000)) # set w to optimal constraint weights w = c(soln$par[1:4], 0, soln$par[5]) # calculate fitted probabilities, p_hat, given weight vector, w, and tableaux, tabs p_hat = rep(NA,nrow(tabs)/2) for(input in 1:8){ tab = tabs[c((2*input)-1,2*input),] p_hat[input]=NHG(tab,w)[2] } ######################### # Fit censored NHG # Function to estimate residual deviance of a censored NHG with vector of constraint weights, w, # given a set of tableaux, tabs, and observed counts of schwa (y1) and non-schwa (y0) candidates. devcNHG=function(w,tabs,y1,y0){ p_hat = rep(NA,nrow(tabs)/2) for(input in 1:8){ tab = tabs[c((2*input)-1,2*input),] p_hat[input]=cNHG(tab,w)[2] } resdev=2*sum((y1*log((y1/(y1+y0)/p_hat))+y0*log(y0/(y1+y0)/(1-p_hat)))) return(resdev) } # Find best-fitting constraint weights for censored NHG # initial constraint weights for 7 constraints (based on optimum for 6 constraints) initial_w= c(11, 12, 0, 2, 0, 9, 1) # initial constraint weights for 6 constraints (based on previous runs) initial_w= c(10, 10, 0.3, 1.5, 0.1, 9) # use Nelder-Mead algorithm to search for constraint weights that minimize deviance of censored NHG grammar # Warning: This can take a very long time to run because each iteration of devcNHG takes a long time to run. # It may be necessary to increase maxit, or feed interim results back into the optimization routine to find optimal values. soln = optim(initial_w,devcNHG,tabs=tabs,y1=schwa_count,y0=162-schwa_count, control=list(maxit=200)) # extract optimal weights w = soln$par # estimate fitted probabilities p_hat = rep(NA,nrow(tabs)/2) for(input in 1:8){ tab = tabs[c((2*input)-1,2*input),] p_hat[input]=cNHG(tab,w)[2] } # estimate residual deviance of model from p_hat # note this will generally be higher than the deviance reported by optim() because # that will typically represent a 'lucky draw' in the simulation process y1 = schwa_count y0 = 162-schwa_count resdev=2*sum((*log((y1/(y1+y0)/p_hat))+y0*log(y0/(y1+y0)/(1-p_hat))))