#!/usr/bin/perl # (c) 2001 Vlado Keselj # # Print leaves of the trees given a forest. # (Standard tree formats, e.g. Penn TreeBank.) # # Example: on input: # (S (N I) (V code) (. .)) # (S (N You) # (V code) # (. .)) # # produces: # I code . # You code . $CurrentToken='START'; scan(); while ( $CurrentToken ne '') { doTree(); print "\n"; } sub scan { scanBegin: return '' if $CurrentToken eq ''; if (! $CurrentLine) { $CurrentLine = <>; } if (! $CurrentLine) { $CurrentToken=''; return ''; } $CurrentLine =~ s/^\s+//; if (! $CurrentLine ) { goto scanBegin; } if ($CurrentLine =~ /^(\(|\))\s*/) { $CurrentToken = $1; $CurrentLine = $'; } else { $CurrentLine =~ /^([^\s()]+)\s*/ or die; $CurrentToken = $1; $CurrentLine = $'; } return $CurrentToken; } sub doTree { if ($CurrentToken eq '(') { scan(); die if $CurrentToken eq ''; if ($CurrentToken ne '(') { scan(); } do { doTree(); die if $CurrentToken eq ''; } while ($CurrentToken ne ')'); scan(); } else { print "$CurrentToken "; scan(); } }