#! /usr/bin/env perl # # cluster-pop.pl # # Populate the cluster table from Siva's clusters. # Siva's clusters seems to contain just 13506 word forms ... # # Usage: cat clusters_weka-trimmed.txt | ./cluster-pop.pl # use DBI; $| = 1; my $dbh = DBI->connect('DBI:SQLite:dbname=clusters.db', 'linas', 'asdf') or die "Couldn't connect to database: " . DBI->errstr; my $insert = $dbh->prepare( 'INSERT INTO ClusterMembers (cluster_name, inflected_word) VALUES (?,?)'); my $cluster_name = ""; my $inflected_word = ""; my $clust_cnt = 0; my $word_cnt = 0; while(<>) { chop; if (/^cluster/) { $cluster_name = $_; print "$cluster_name\n"; $clust_cnt ++; next; } if (/^ (.*)$/) { $inflected_word = $1; print "\t$inflected_word\n"; $insert->execute($cluster_name, $inflected_word) or die "Couldn't execute statement: " . $insert->errstr . "\nword was " . $inflected_word; $word_cnt ++; } } print "Added $word_cnt words to $clust_cnt clusters\n";