#!/usr/bin/perl # Copyright (c) 1998, 1999 Bruce Martin # Usage: # 1. Set $old_path and $new_path appropriately. # 2. Run sense-changes < map.out > sense-changes.out require 5.000; # For hard references. use Search::Dict; # For binary search. # Define the paths to the wordnet files. $pos = "verb"; $old_path = "/home/bruce/proj/wn/wordnet/dict"; $old_index = "$old_path/index.$pos"; $new_path = "/home/bruce/proj/wn/wordnet-1.6/dict"; $new_index = "$new_path/index.$pos"; # Open the index files. open( OLD_INDEX, $old_index ) or die "Can't open $old_index"; open( NEW_INDEX, $new_index ) or die "Can't open $new_index"; # Read the mapping into a hash. while( <> ) { next unless ( $old_synset_id, $new_synset_id ) = split /\s+/; $map{$old_synset_id} = $new_synset_id; } # Loop through the old words sequentially. while( ) { my( $word, $old_sense, $new_sense ); next unless ( $word ) = /^(\S+)/; foreach $old_synset_id ( / (\d{8})/g ) { $new_sense = &new_word_sense( $word, $map{ $old_synset_id } ); next if ++$old_sense == $new_sense; print $word, $word=~/\d$/ && "~", $old_sense; print "\t", $word, $word=~/\d$/ && "~", $new_sense if $new_sense; print "\n"; } } ############################################################################### ##### Find the new word sense for the given synset id. sub new_word_sense { my( $word, $new_synset_id ) = @_; my( $s, $sense ); foreach $id ( &synset_ids( "NEW_INDEX", $word ) ) { $s++; $sense = $s, last if $id == $new_synset_id; } $sense; } ##### List all the synset ids for a word. sub synset_ids { my( $index, $word ) = @_; my( @ids ); look *$index, $word, 0, 1; $_ = <$index>; / (\d{8})/g if /^$word /i; }