#!/usr/bin/perl -w use strict; # use this for every damn script you ever write use warnings; # use this for every damn script you ever write use Fcntl qw(:flock); # lets you lock files to prevent others from using them while they're open # name of file that contains a word list (dictionary) # the file should contain nothing but words, each in a separate line # # each sound must be represented by one letter # this is crucially used in building the alphabet and in # generating the neighbors # my $dictionaryFile = "citation.txt" ; # name of file that contains a list of words to calculate the density for. # each word in a separate line my $inputFile = "stimuli.txt" ; # name of output file my $outputFile = "stimuli_density.txt"; # STEP 1: # create lookup table (load the dictionary to the working memory) # and also collect the letters of the alphabet open (INPUT, "<" . $dictionaryFile) or die "Can't read dictionary file $!"; flock(INPUT, LOCK_SH) or die "$!"; # lock file for shared use my %dict; # holds the attested words of the language my %alphabet; # table of phonemes while () { chomp; $dict{$_} = 1; # record the existence of a word # collect any new phonemes in current word for (my $i=0; $i < length($_); $i++) { unless(exists ($alphabet{substr($_,$i,1)})) { $alphabet{substr($_,$i,1)} = 1; } } } close(INPUT) or die "$!"; # STEP 2: # get stimuli and calculate neighborhood density open (INPUT, "<" . $inputFile) or die "Can't read input file $!"; open (OUTPUT, ">" . $outputFile) or die "Can't write to output file $!"; flock(INPUT, LOCK_EX) or die "$!"; # lock file for exclusive use flock(OUTPUT,LOCK_EX) or die "$!"; # lock file for exclusive use while () { # at this point, each word of the input file at a time # will be in the special variable $_ chomp; # remove the "end of line" character from the current word # print an empty line in the output for any empty line in the output if (/^$/ or /^#/) { print OUTPUT "$_\n"; next; } my $formCounter = 0; # a count of potential neighbors my @foundCounter; # list of attested neighbors # replace each segment with all others for (my $pos = 0 ; $pos < length($_) ; $pos++){ my $currSeg = substr($_,$pos,1); # saves the current segment in postion "$pos" for later # go over all the alphabet for my $phon (sort keys %alphabet) { if ($phon ne $currSeg) { substr($_,$pos,1,$phon); # insert current phoneme in place of original phoneme $formCounter++; push(@foundCounter,$_) if exists($dict{$_}); } } substr($_,$pos,1,$currSeg); # puts the original segment in "$pos" back where it belongs } # delete each segment for (my $pos = 0 ; $pos < length($_) ; $pos++){ my $currSeg = substr($_,$pos,1); # saves the original segment in position "$pos" for later substr($_,$pos,1,""); # delete seg $formCounter++; push(@foundCounter,$_) if exists($dict{$_}); substr($_,$pos,0,$currSeg); # puts the original segment in "$pos" back where it belongs } # add every possible segment in every position for (my $pos = 0 ; $pos <= length($_) ; $pos++){ for my $phon (sort keys %alphabet) { substr($_,$pos,0,$phon); # insert a phoneme at position "$pos" $formCounter++; push(@foundCounter,$_) if exists($dict{$_}); substr($_,$pos,1,""); # remove the inserted phoneme } } # print result (whether or not the input word was found in the dictionary) unless (scalar(@foundCounter)==0 or exists($dict{$_}) or exists($dict{$_."mak"}) or exists($dict{$_."mek"})) { print OUTPUT "$_\t"; # print the frequency (four decimal places) and the actual neighbors # \t = tab (useful for opening the file in excel as a "tab delimited file") print OUTPUT " " if (scalar(@foundCounter)<10); # print OUTPUT scalar(@foundCounter) ," / $formCounter = \t", int((scalar(@foundCounter) / $formCounter)*10000)/10000 , "\t@foundCounter\n"; print OUTPUT scalar(@foundCounter) ," / $formCounter \t@foundCounter\n"; } } close(INPUT) or die "$!"; close(OUTPUT) or die "$!"; print "Output file created.";