#!/usr/bin/perl -W

##############################################################################
# edict - Your personal command line dictionary
#
# Copyright (c) 2002-2003 Vishal Verma <vermavee@users.sf.net>. 
# All rights reserved.
# 
#    This program is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 2 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program; if not, write to the Free Software
#    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
# ChangeLog is maintained as a separate file.
#
##############################################################################

use Socket;
use Getopt::Std;
use strict;

# this is just a temporary fix for perl 5.8.0. 5.8.1 won't need this
use bytes;

# error codes returned by gethostbyname: BUG 769932
my @netdb_errors = (	'Success',
						'Host not found!',
						'Non-authoritatie - host not found',
						'Non-recoverable errors',
						'Valid name, no data record of requested type',
						'Internal error'	# actually -1. Yes, it's a hack! :)
					);
						


# sub prototypes
sub parse_cmdline();
sub http_post($$$);
sub lookup($$);
sub indexof($$);

# m-w prototypes
sub mw_parse($$$$$);
sub mw_content($$$$);

# global vars
my $version = '1.2';
our ($opt_p);

# Default dictionary to use. Command line/Config options may override this OR 
# change it here. The assigned values should be one of the keys of hash %dicts
# which is defined further down
my $cur_dict = 'mwdict';
if ($0 =~ /ethes$/) {
	$cur_dict = 'mwthes';
}

# default server, to send HTTP request to. Command line switches may override.
my $proxy_server;
# port, if specified by the user
my $http_port;

# Whether the dictionary should offer suggestions (if available), or not
my $suggest = 1;
my $color = 1;


# POST request related vars
my @request = ('POST',
	#'Proxy-Connection: Keep-Alive',
    'Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*',
    'REFERER',
    'Accept-Language: en-us',
    'Content-Type: application/x-www-form-urlencoded',
    'Accept-Encoding: gzip, deflate',
    'User-Agent: unknown',
    'HOST',
    'CONTENTLEN',
    'Connection: Close',
    'Cache-Control: no-cache',
    '',
	'CONTENT'
);

# constants; these are closely related to the request above
my $post_cmd = indexof(\@request, 'POST');
my $post_referer = indexof(\@request, 'REFERER');
my $post_host = indexof(\@request, 'HOST');
my $post_content_len = indexof(\@request, 'CONTENTLEN');
my $post_content = indexof(\@request, 'CONTENT');


# Define each dictionary specific data here
my %mwdict = (	
		'TITLE', 'Merriam Webster Online Dictionary',
		'HOST', 'www.m-w.com',
		'POST', 'POST http://www.m-w.com/cgi-bin/dictionary HTTP/1.1',
		'REFERER', 'http://www.m-w.com/cgi-bin/dictionary',
		'CONTENT', \&mw_content,
		'PARSE', \&mw_parse,
	);

my %mwthes = (
		'TITLE', 'Merriam Webster Online Thesaurus',
		'HOST', 'www.m-w.com',
		'POST', 'POST http://www.m-w.com/cgi-bin/thesaurus HTTP/1.1',
		'REFERER','http://www.m-w.com/cgi-bin/thesaurus',
		'CONTENT', \&mw_content,
		'PARSE', \&mw_parse,
	);

my %dictionarydotcom = (
		'TITLE', 'www.dictionary.com dictionary',
		'HOST', 'dictionary.reference.com',
		'POST', 'POST http://dictionary.reference.com/search HTTP/1.1',
		'REFERER','http://dictionary.reference.com',
		'CONTENT', \&dictionarydotcom_content,
		'PARSE', \&dictionarydotcom_parse,
	);

# List of DICTIONARIES; Add your own below
my %dicts = (
	'mwdict', \%mwdict,
	'mwthes', \%mwthes,
	'dictionary.com', \%dictionarydotcom,
	);








##############################################################################
#                           MAIN program                                     #
##############################################################################

$0 =~ s/^.*\///o;
$0 =~ s/^.*\\//o;	# Win32: Bug [ 756309 ] Don't show full path in usage

print "edict - Your personal command line dictionary. Version $version.\n";
# parse the command line options
parse_cmdline;

$#ARGV >= 0 || die "usage:\n    $0 [-p proxy:port] <word1> [ <word2> ] ...\n";

lookup(shift(@ARGV), $suggest);
foreach (@ARGV) {
	print "----------------------------------------------------------\n";
	lookup($_, $suggest);
}




##############################################################################
# indexof
#
# finds the index of given element in a given array. The elements are assumed
# to be strings.
##############################################################################

sub indexof($$)
{
	my $list = shift;
	my $word = shift;

	my $not_found = 1;
	return grep {$not_found &= $word ne $_} @{$list};
}




##############################################################################
# parse_cmdline
#
# parses command line options and sets up various variables accordingly
##############################################################################

sub parse_cmdline()
{
	getopt('p:');
	if (defined $opt_p) {
		# if user specified just a port
		if ($opt_p =~ /^\d+$/) {
			$http_port = $opt_p;
		} elsif ($opt_p =~ /\w+:\d+/) { # user specified host & port
			# split host:port into host & port
			($proxy_server, $http_port) = split /:/, $opt_p;
		} else {
			$proxy_server = $opt_p;
		}
	}
}


##############################################################################
# lookup
#
# looks up the given word for meaning
##############################################################################

sub lookup($$)
{
	my $word = shift;
	my $suggest = shift;
	my (@suggestions, $meta, $choice);

	while (1) {

		$word = $suggestions[$choice] if (defined $choice);
		print "Looking up \"$word\" in $dicts{$cur_dict}->{TITLE}...\n\n";

		# get the content
		my $content = &{$dicts{$cur_dict}{'CONTENT'}}
								($cur_dict, $word, \@suggestions, $meta);

		# HTTP POST operation for current dictionary host
		my $response = http_post($cur_dict, $http_port, $content);

		# parse the response
		if (&{$dicts{$cur_dict}{'PARSE'}}($cur_dict, $response, $word, \@suggestions, \$meta)) {
			print ${$response};
		} else {
			# [ 697431 ] edict shows wrong error message for valid words 
			# in thesaurus
			if ($cur_dict eq 'mwthes') {
				print "No thesaurus entries found for $word.\n";
			} else {
				print "Don't understand Swahili words like $word ;)!!\n";
			}
		}

		last unless ($suggest && $#suggestions >= 0);

		# show suggestions
		print " ------------------------------------------------------------------------------\n";
		print "                    Suggestions/Alternative meanings\n";
		print " ------------------------------------------------------------------------------\n";
		# print items two at a time
		my $i;
		for ($i = 1; $i <= $#suggestions; $i += 2) {
			printf "%2d) %-35s %2d) %-35s\n", $i, $suggestions[$i-1], 
										$i+1, $suggestions[$i];
		}
		# print the last item if there were odd no. of entries
		printf "%2d) %-35s\n", $i, $suggestions[$i-1] if (($#suggestions + 1) % 2);
		print " ------------------------------------------------------------------------------\n";
		print "Your choice: ";

		# accept user input
		$choice = <STDIN>;
		chomp $choice;
		(defined $choice && $choice =~ /^\d+$/o) || last;
		--$choice;
		($choice >= 0 && $choice <= $#suggestions) || last; 

		# lookup user's choice in next iteration
	}
}




##############################################################################
# http_post
#
# performs HTTP POST operation with given content
##############################################################################

sub http_post($$$)
{
	my ($dict, $port, $content) = @_;
	my $dicthost = $dicts{$dict}{'HOST'};
	my $http_server = defined($proxy_server) ? $proxy_server : $dicthost;

	defined $http_server || die "No valid server to send request to!\n";
	defined $port || ($port = getservbyname('http', 'tcp'));
	#print "Port is $port, dict is $dict, dicthost is ", $dicthost, "\n";

	socket(DICT, PF_INET, SOCK_STREAM, getprotobyname('tcp')) || die "socket: $!\n";

	# convert hostname to IP : 769932
	my $sin;
	if ($http_server =~ /\d+\.\d+\.\d+\.\d+/) {
		$sin = sockaddr_in($port, inet_aton($http_server));
	} else {
		my $addr0 = (gethostbyname($http_server))[4];
		die "ERROR $http_server: ".$netdb_errors[$?] if $?;
		$sin = sockaddr_in($port, $addr0);
	}
		
	#print "connecting...\n";
	connect(DICT, $sin) || die "connect: $!\n";


	# setup the post request
	$request[$post_cmd] = $dicts{$dict}{'POST'};
	$request[$post_referer] = 'Referer: ' . $dicts{$dict}{'REFERER'};
	$request[$post_host] = "Host: $dicthost";
	$request[$post_content] = $content;
	$request[$post_content_len] = 'Content-length: '.length($request[$post_content]);


	#print "\n", @request, "\n";

	# send it
	foreach (@request) {
		send DICT, $_."\r\n", 0;
	}

	#print "sent\n";

	# store reply
	my $reply = '';
	while (<DICT>) {
		$reply .= $_;
		last if /<\/html>/;
	}

	#print "recd reply\n $reply";

	# close connection
	close(DICT);


	# return reply
	return \$reply;
}

	




# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# m-w functions
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #


##############################################################################
# mw_content
#
# generates the content for POST request
##############################################################################
sub BEGIN {
my $headword;			# make $headword a 'C'-like static variable
sub mw_content($$$$)
{
	my ($dict, $word, $suggestions, $meta) = @_;
	my $content;

	if (!defined $meta) {		# simple case
		$headword = $word;
		if ($dict eq 'mwdict') {
			$content = "book=Dictionary&va=$headword";
		} elsif ($dict eq 'mwthes') {
			$content = "book=Thesaurus&va=$headword";
		}
	} else { 	# complex case - consider meta information, which is list of values
		# convert non alpha chars in meta to their ascii codes in hex
		$meta =~ s/\[/%5B/og;
		$meta =~ s/\]/%5D/og;
		$meta =~ s/,/%2C/og;
		$meta =~ s/=/%3D/og;
		$meta =~ s/ /%20/og;
		$word =~ s/\[/%5B/og;
		$word =~ s/\]/%5D/og;
		$word =~ s/ /%20/og;

		if ($dict eq 'mwdict') {
			$content = "hdwd=$headword&listword=$headword&book=Dictionary&jump=$word&list=$meta";
		} elsif ($dict eq 'mwthes') {
			$content = "hdwd=$headword&listword=$headword&book=Thesaurus&jump=$word&list=$meta";
		}
	}

	#print $content;
	return $content;
}
}






##############################################################################
# mw_parse
#
# parses the response returned by m-w.
# Parse functions should return 1 on success and 0 on failure.
##############################################################################

sub mw_parse($$$$$)
{
	my $dict = shift;			# The first arg is the dict, so that you can
								# have a single parse function for multiple
								# dictionaries
	my $response = shift;
	my $word = shift;			# only non-reference argument
	my $suggestions = shift;
	my $meta = shift;

	# start with no suggestions
	@{$suggestions} = ();

	#print "response is : ", ${$response}, "\n";

	# -- 8< -- 8< -- edit/prune/splice operations below -- 8< -- 8< --
	# get rid of all newlines
	(${$response} =~ s/\r*\n//og)  || return 0;


	# if the word was misspelled, offer suggestions and indicate failure
	if (${$response} =~ s#.*Suggestions for <STRONG>$word</STRONG>##o) {
		if (${$response} =~ m#<PRE>(.*?)</PRE>#o) {
			my $suggestions_text = $1;
			#print "Suggestion text: $suggestions_text\n";
			while ($suggestions_text =~ s#<a .*?>(.*?)</a>##o) {
				push @{$suggestions}, $1;
			}
		}
		return 0;		# indicate failure
	}

	# word was OK, find the number of entries
	(${$response} =~ s/.*\W(\w+) entr(y|ies) found for.*?\.//o) || return 0;
	#print "no of entries: $1\n";
	my $nentries = $1;
	unless ($nentries eq 'One') {
		# m-w doesn't show more than 10 entries... so adjust the count if more than 10
		$nentries = $nentries > 10 ? 10 : $nentries;

		#print "no. of entries: $nentries\n";

		# parse the suggestions and store them in the suggestions list
		${$response} =~ s#(<select\s+style.*?/select>)##o;
		my $suggestions_text = $1;
		#print "Suggestion text: $suggestions_text\n";
		while ($suggestions_text =~ s/<option>([^<]+)//o) {
			#print "Option: $1, $suggestions_text\n";
			push @{$suggestions}, $1;
		}

		#print "Suggestions: ", join(', ', @{$suggestions}), "\n";

		${$response} =~ s/<input\s+type=hidden\s+name=list\s+value="(.*?)">//o;
		my $listval = $1;

		#print $listval;
		${$meta} = $listval;
	}
	
	#print ${$response}, "\n";
	# cut everything out except the "meat"
	(${$response} =~ s/.*<form name="entry".*?<\/form>//o) || return 0;
	(${$response} =~ s/<\/form>.*$//o) || return 0;

	# remove tabs
	${$response} =~ s/\t+/ /og;

	# remove font, href, img etc. tags
	${$response} =~ s/<font.*?>//og;
	${$response} =~ s/<\/font>//og;
	${$response} =~ s/<a .*?>(.*?)<\/a>/$1/og;
	${$response} =~ s/<img.*?>//og;
	${$response} =~ s/<\/?tt>//og;

	# separate multipart defns into multiple lines
	${$response} =~ s/<b>\s*([a-z])\s*<\/b>\s*<b>\s*:\s*<\/b>/$1:/og;
	${$response} =~ s/<b>\s*([a-z])\s*<\/b>/$1:/og;
	${$response} =~ s/<br>/\n/og;
	${$response} =~ s/<b>\s*:\s*<\/b>/\n\t:/og;

	# special case for thesaurus: Print Synonym and other headers with colon
	if ($dict eq 'mwthes') {
		${$response} =~ s/Synonyms /Synonyms: /og;
		${$response} =~ s/Related Word /Related Words: /og;
		${$response} =~ s/Contrasted Words /Contrasted Words: /og;
	}

	# change bold, italic tags
	if ($color && $^O =~ /linux/i) {
		${$response} =~ s/<b>/\e[1;34m/og;
		${$response} =~ s/<i>/\e[34m/og;
		${$response} =~ s/<\/[bi]>/\e[0m/og;
	} else {
    		${$response} =~ s/<([bi])>(.*?)<\/\1>/$2/og;
	}

	# change special html esc sequences to normal text
	${$response} =~ s/&amp;/&/og;
	${$response} =~ s/&lt;/</og;
	${$response} =~ s/&gt;/>/og;

	# umlauts / digraphs / diacritics
	${$response} =~ s/&uuml;//og;
	${$response} =~ s/&auml;//og;
	${$response} =~ s/&eacute;//og;
	${$response} =~ s/&iacute;//og;
	# This is the last change I'm making by hand! I'm starting another
	# project that converts these HTML umlaut codes to Unicode or something.
	# Just kiddin'. But, edict will have better (& generic) ability to
	# handle all umlauts in some future version.
	${$response} =~ s/&ccedil;//og;

	# fix Main Entry to show superscripted Entry # properly
	${$response} =~ s/<sup>(\d+)<\/sup>(\S+)/$2($1)/og;
	# fix pronunciation for superscript directives
	${$response} =~ s/<sup>(.)<\/sup>/($1)/og;
	# fix subscript directives: Bug 771934
	${$response} =~ s/<sub>(\d+)<\/sub>/($1)/og;

	# remove unwanted spaces
	${$response} =~ s/(\s){2,}/$1/og;
	${$response} =~ s/\s:/:/og;

	# separate meaning from date : BUG # 672762
	${$response} =~ s/Date:([^\n]*?):/Date:$1\n:/o;
	# synonyms etc. headings have already been changed above to have a colon
	# The line below utilizes that fact
	${$response} =~ s/Text:\s*(\w+):/Text:\n$1/o;

	# Bo, Wed Feb  4 02:20:04 CST 2004
	# fix the change of m-w site which now using table
	# remove all html tag now
	${$response} =~ s/<\/*[^>]*>//og;
	${$response} =~ s/&nbsp;/ /og;
	${$response} =~ s/Get the .*Top 10.*Symbols//o;

	# fix weird char in Windows : BUG 659236
	if ($^O =~ /mswin/i) {
		${$response} =~ s/\xB7/./og;
	}

	# wrap the words
	use Text::Wrap;
	$Text::Wrap::columns = 78;
	${$response} = wrap('', '', ${$response});
	return 1;
}






# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# www.dictionary.com functions
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

##############################################################################
# dictionarydotcom_content
#
# generates the content for POST request
##############################################################################
sub BEGIN {
my $headword;			# make $headword a 'C'-like static variable
sub dictionarydotcom_content($$$$)
{
	my ($dict, $word, $suggestions, $meta) = @_;
	my $content;

	if (!defined $meta) {		# simple case
		$headword = $word;
		if ($dict eq 'mwdict') {
			$content = "book=Dictionary&va=$headword";
		} elsif ($dict eq 'mwthes') {
			$content = "book=Thesaurus&va=$headword";
		}
	} else { 	# complex case - consider meta information, which is list of values
		# convert non alpha chars in meta to their ascii codes in hex
		$meta =~ s/\[/%5B/og;
		$meta =~ s/\]/%5D/og;
		$meta =~ s/,/%2C/og;
		$meta =~ s/=/%3D/og;
		if ($dict eq 'mwdict') {
			$content = "hdwd=$headword&listword=$headword&book=Dictionary&jump=$word&list=$meta";
		} elsif ($dict eq 'mwthes') {
			$content = "hdwd=$headword&listword=$headword&book=Thesaurus&jump=$word&list=$meta";
		}
	}

	#print $content;
	return $content;
}
}




##############################################################################
# dictionarydotcom_parse
#
# parses the response returned by www.dictionary.com
# Parse functions should return 1 on success and 0 on failure.
##############################################################################

sub dictionarydotcom_parse($$$$$)
{
	my $dict = shift;			# The first arg is the dict, so that you can
								# have a single parse function for multiple
								# dictionaries
	my $response = shift;
	my $word = shift;			# only non-reference argument
	my $suggestions = shift;
	my $meta = shift;

	# start with no suggestions
	@{$suggestions} = ();

	#print "response is : ", ${$response}, "\n";
}
