#!/usr/bin/perl # # A perl script that "babbles" to create new words # It requires an input file of legal onsets, nuclei, and codas (see Sample.inv for an example) # Written by Adam Albright (albright@ucsc.edu); last modified 2/16/03 use Math::Round; print "Enter file of onsets, nuclei, and codas: "; $inventory_file = ; chomp($inventory_file); open (INV_FILE, "<$inventory_file") or die "Warning: can't open inventory file: $!\n"; print "Enter number of words desired: "; $target_words = ; chomp($target_words); #robustness: should check to make sure it is an integer print "Enter maximum number of syllables: "; $max_syllables = ; chomp($target_words); #robustness: should check to make sure it is an integer print "Save as file: "; $outputfile = ; chomp($outputfile); open (OUTPUTFILE, ">$outputfile") or die "Warning: Can't open output file: $!\n"; # first read in the inventory file # the structure of an inventory file is onsets, nuclei, and then codas. # they should be separated by labels ("Onsets", "Nuclei", "Codas") on individual lines, # and each possible phoneme should be on its own line. $line = ; chomp($line); if ($line eq "Onsets") { # ok, can skip this line and move on to the next $line = ; chomp($line); my $i = 0; print "Onsets:"; while ($line ne "Nuclei") { $onsets[$i] = $line; $i++; print " $line"; $line = ; chomp($line); } # now we've hit nuclei; can skip this line and move on to the next $line = ; chomp($line); $i = 0; print "\nNuclei: "; while ($line ne "Codas" and $line ne "") { $nuclei[$i] = $line; $i++; print " $line"; $line = ; chomp($line); } # now we've hit codas; can skip this line and move on to the next $line = ; chomp($line); $i = 0; print "\nCodas: "; while ($line ne "") { $codas[$i] = $line; $i++; # print " $line"; $line = ; chomp($line); } print "\nDone reading in inventory\n"; } else { print "Warning: inventory file does not appear to be valid (Must begin with onsets)\n"; } # now generate random syllables $current_words = 0; while ($current_words < $target_words) { # start with the first onset + nucleus $candidate_word = ""; $candidate_word .= @onsets[round(rand @onsets - 1)]; $candidate_word .= @nuclei[round(rand @nuclei - 1)]; $candidate_word .= @codas[round(rand @nuclei - 1)]; # make monosyllables most likely; go on to another syllable with only 1/3 probability if (rand > .66 and $max_syllables >= 2){ $candidate_word .= @onsets[round(rand @onsets - 1)]; $candidate_word .= @nuclei[round(rand @nuclei - 1)]; $candidate_word .= @codas[round(rand @nuclei - 1)]; if (rand > .66 and $max_syllables >= 3){ $candidate_word .= @onsets[round(rand @onsets - 1)]; $candidate_word .= @nuclei[round(rand @nuclei - 1)]; $candidate_word .= @codas[round(rand @nuclei - 1)]; if (rand > .66 and $max_syllables >= 4){ $candidate_word .= @onsets[round(rand @onsets - 1)]; $candidate_word .= @nuclei[round(rand @nuclei - 1)]; $candidate_word .= @codas[round(rand @nuclei - 1)]; } } } # print "$candidate_word\n"; if ($known_words{$candidate_word} eq "") { # a new word $current_words++; $known_words{$candidate_word} = 1; } else { $known_words{$candidate_word}++; # print "Duplicate word: $candidate_word (time $known_words{$candidate_word})\n"; } } # now have generated the target number of words foreach $word (keys %known_words) { printf OUTPUTFILE "$word\t$known_words{$word}\n"; } print "Done generating novel words\n"; close OUTPUTFILE;