#!/bin/sh
# ^^^^^^^ Edit if your /bin/sh has no idea about POSIX. Known to work:
#         - GNU bash 2.05b and 3.00.x
#         - ash included in BusyBox 1.00
#         - /bin/sh of OpenBSD 3.6

#############################################################################
#
# lzmash - gzip/bzip2 like command line interface for LZMA encoder/decoder
#
# This script gives a gzip and bzip2 compatible command line interface
# for Igor Pavlov's LZMA encoder/decoder program included in the LZMA SDK.
# LZMA SDK is available from http://7-zip.org/sdk.html under the GNU LGPL.
#
# This script is part of LZMA utils package.
# http://tukaani.org/lzma/
#
# This script is used by LZMA-patched GNU tar 1.15.1 to support LZMA
# compressed tar archives.
#
#############################################################################
#
# Copyright (C) 2005 Lasse Collin <lasse.collin@tukaani.org>
#
# This script is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This script is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
#############################################################################

VERSION="4.27.1"

# Default option -1 to -9 if no compression ratio is explicitly specified on
# the command line. -7 is equivalent to default settings of LZMA 4.17.
# Tip: the default is used when lzmash is called from tar(1).
DEFAULT_COMPRESSION=7 # Only number 1 .. 9, no leading dash

#############################################################################

show_help() {
	if [ $HAVE_GNU_GETOPT = 1 ]; then
		cat << EOF

Usage: lzmash [flags and input files in any order]

  -c --stdout       output to standard output
  -d --decompress   force decompression
  -z --compress     force compression
  -k --keep         keep (don't delete) input files
  -f --force        force overwrite of output file and compress links
  -t --test         test compressed file integrity
  -r --recursive    operate recursively on directories
  -S .suf  --suffix .suf   use suffix .suf on compressed files
  -q --quiet        suppress error messages
  -v --verbose      be verbose (show output of lzma binary)
  -P /path/lzma  --lzma-path /path/lzma   path to the lzma executable
  -T --check-path   only check that lzma binary is in the PATH and quit
  -h --help         print this message
  -L --license      display the license information of LZMA and lzmash
  -V --version      display LZMA and lzmash version numbers
  -1 .. -2          fast compression
  -3 .. -9          good to excellent compression. -$DEFAULT_COMPRESSION is the default.
  -e --extreme      maximize the compression ratio of -3 .. -9; very slow
     --fast         alias for -1
     --best         alias for -9 (usually *not* what you want)

  Memory usage depends a lot on the chosen compression mode -1 .. -9.
  See the man page lzmash(1) for details. --extreme doesn't affect the
  memory usage, only compression time.

EOF
	else
		cat << EOF

Usage: lzmash [options] [input files ...]

  -c          output to standard output
  -d          force decompression
  -z          force compression
  -k          keep (don't delete) input files
  -f          force overwrite of output file and compress links
  -t          test compressed file integrity
  -r          operate recursively on directories
  -S .suf     use suffix .suf on compressed files
  -q          suppress error messages
  -v          be verbose (show output of lzma binary)
  -P /path/lzma   path to the lzma executable
  -T          only check that lzma binary is in the PATH and quit
  -h          print this message
  -L          display the license information of LZMA and lzmash
  -V          display LZMA and lzmash version numbers
  -1 .. -2    fast compression
  -3 .. -9    good to excellent compression. -$DEFAULT_COMPRESSION is the default.
  -e          maximize the compression ratio of -3 .. -9; very slow

  Memory usage depends a lot on the chosen compression mode -1 .. -9.
  See the man page lzmash(1) for details. -e doesn't affect the memory
  usage, only compression time.

EOF
	fi
	exit 0
}

show_license() {
	cat << EOF
______________________________________________________________________

  LZMA SDK - Copyright by Igor Pavlov

  See http://7-zip.org/sdk.html or the documentation of LZMA SDK for
  its license. For the reference, the version 4.23 is free software
  licensed under the GNU LGPL.
______________________________________________________________________

  LZMA.sh (lzmash) - Copyright by Lasse Collin
  This script is part of the LZMA utils package.
  http://tukaani.org/lzma/

  This script is free software; you can redistribute it and/or
  modify it under the terms of the GNU Lesser General Public
  License as published by the Free Software Foundation; either
  version 2.1 of the License, or (at your option) any later version.

  This script is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  Lesser General Public License for more details.
______________________________________________________________________

EOF
	exit 0
}

show_version() {
	"$LZMA_EXECUTABLE" 2>&1 | grep -v '^$' \
			| sed -n 's,^LZMA *\([^ ]*\) .*$,LZMA \1,p' 1>&2
	echo "lzmash $VERSION" 1>&2
	exit 0
}

# Show message to stderr. $1 = required minimum verbosity level to
# show the message; $2 = message
show_msg() {
	[ $1 -gt $VERBOSE ] && return 0
	shift 1
	echo "$MYNAME:" "$@" 1>&2
}

compress() {
# $1 = source file name or descriptor ; output is always /dev/fd/6
	if [ -f "$1" ]; then
		# Non-streamed:
		if [ $VERBOSE -ge 3 ]; then
			( cd "$(dirname "$1")" && "$LZMA_EXECUTABLE" e \
					$LZMA_ARGS -- "$(basename "$1")" \
					/dev/fd/6 )
		else
			( cd "$(dirname "$1")" && "$LZMA_EXECUTABLE" e \
					$LZMA_ARGS -- "$(basename "$1")" \
					/dev/fd/6 2> /dev/null )
		fi
	else
		# Streamed:
		if [ $VERBOSE -ge 3 ]; then
			"$LZMA_EXECUTABLE" e -si -so $LZMA_ARGS \
					< "$1" >>/dev/fd/6
		else
			"$LZMA_EXECUTABLE" e -si -so $LZMA_ARGS \
					< "$1" >>/dev/fd/6 2>/dev/null
		fi
	fi
}

decompress() {
# $1 = source file name or descriptor ; output is always /dev/fd/6
	if [ $VERBOSE -ge 3 ]; then
		"$LZMA_EXECUTABLE" d -si -so < "$1" >> /dev/fd/6
	else
		"$LZMA_EXECUTABLE" d -si -so < "$1" >> /dev/fd/6 2> /dev/null
	fi
}

filesize() {
	# 'head' shouldn't be needed but is just in case something goes wrong.
	ls -dlL "$1" 2> /dev/null | tr -s ' ' | cut -f 5 -d ' ' | head -n 1
}

give_suffix() {
	for J in "${1%$FILENAME_SUFFIX}" "${1%.lzma}" "${1%.tlz}"; do
		if [ "$J" != "$1" ]; then
			echo "${1#"$J"}"
			return 0
		fi
	done
	return 1
}

# Initial default settings:
EXITSTATUS=0
USE_STDIN=0
USE_STDOUT=0
MAXIMUM_COMPRESSION=0
DECOMPRESS=0
FORCE=0
KEEP_FILE=0
TEST_ONLY=0
FILENAME_SUFFIX=".lzma"
RECURSIVE=0
LZMA_EXECUTABLE=lzma
CHECK_LZMA_PATH_ONLY=0
VERBOSE=1

# Command name to show in messages:
MYNAME=$(basename "$0")

# Do not overwrite files with '>' redirection. This was originally here
# to avoid symlink attacks, but unfortunately it doesn't prevent them
# if the symlink is pointing to a device node. :-( Now we have yet one
# more reason to rewrite lzmash in C. ;-)
set -C

# Create the files first with a secure umask:
umask 0077

# Check how we are called and default to standard compress mode:
case "$MYNAME" in
	*un*zma*)   DECOMPRESS=1 ;;
	*cat)       DECOMPRESS=1 ; USE_STDOUT=1 ;;
esac

# Parse command line arguments. Only GNU getopt supports long options:
getopt -T > /dev/null 2> /dev/null
if [ $? = 4 ]; then # We have GNU getopt
	HAVE_GNU_GETOPT=1
	ARGS=$(getopt -n "$MYNAME" -o 123456789cdefhkLP:qrS:tTvV -l fast,best \
			-l stdout,--to-stdout,decompress,uncompress,extreme \
			-l force,help,keep,license,lzma-path:,quiet,recursive \
			-l suffix:,test,check-path,verbose,version -- $LZMASH "$@")
else # Fallback to a more portable behavior without support for long options:
	HAVE_GNU_GETOPT=0
	ARGS=$(getopt 123456789cdefhkLP:qrS:tTvV $LZMASH "$@")
fi
[ $? != 0 ] && exit 1 # Error: invalid command line parameters
eval set -- "-$DEFAULT_COMPRESSION $ARGS"
while : ; do
case "$1" in
	# The fast options:
	-1|--fast)   COMP_MODE=0; DICT_SIZE=16; FAST_BYTES=64;  MF_ID=hc3 ;;
	-2)          COMP_MODE=0; DICT_SIZE=20; FAST_BYTES=64;  MF_ID=hc4 ;;
	# Some a bit slower settings:
	-3)          COMP_MODE=1; DICT_SIZE=19; FAST_BYTES=64;  MF_ID=bt4 ;;
	-4)          COMP_MODE=1; DICT_SIZE=20; FAST_BYTES=64;  MF_ID=bt4 ;;
	-5)          COMP_MODE=1; DICT_SIZE=21; FAST_BYTES=128; MF_ID=bt4 ;;
	-6)          COMP_MODE=1; DICT_SIZE=22; FAST_BYTES=128; MF_ID=bt4 ;;
	-7)          COMP_MODE=1; DICT_SIZE=23; FAST_BYTES=128; MF_ID=bt4 ;;
	# Got lots of RAM and CPU time? ;-)
	-8)          COMP_MODE=1; DICT_SIZE=24; FAST_BYTES=255; MF_ID=bt4 ;;
	-9|--best)   COMP_MODE=1; DICT_SIZE=25; FAST_BYTES=255; MF_ID=bt4 ;;
	# The rest of the command line parameters:
	-c|--stdout|--to-stdout)      USE_STDOUT=1 ;;
	-d|--decompress|--uncompress) DECOMPRESS=1 ;;
	-e|--extreme)                 MAXIMUM_COMPRESSION=1 ;;
	-f|--force)                   FORCE=1 ;;
	-h|--help)                    show_help ;;
	-k|--keep)                    KEEP_FILE=1 ;;
	-L|--license)                 show_license ;;
	-P|--lzma-path)               LZMA_EXECUTABLE=$2; shift 1 ;;
	-q|--quiet)                   VERBOSE=$((VERBOSE - 1)) ;;
	-r|--recursive)               RECURSIVE=1 ;;
	-S|--suffix)                  FILENAME_SUFFIX=$2; shift 1 ;;
	-t|--test)                    TEST_ONLY=1 ;;
	-T|--check-path)              CHECK_LZMA_PATH_ONLY=1 ;;
	-v|--verbose)                 VERBOSE=$((VERBOSE + 1)) ;;
	-V|--version)                 show_version ;;
	-z|--compress)                DECOMPRESS=0 ;;
	--)                           shift 1; break ;; # End of options.
	*) show_msg 0 "Fatal error parsing command line options. You probably"\
					"found a bug in the wrapper script."
					exit 5 ;;
esac
shift 1
done

# Command line options are now parsed. Before real action we need to make a
# few checks etc. The order of the commands before the main loop is important.

# Check that lzma binary is available.
if ! type "$LZMA_EXECUTABLE" > /dev/null 2> /dev/null; then
	show_msg 0 "Cannot find 'lzma' binary. Use -P to specify the path."
	exit 4
elif [ $CHECK_LZMA_PATH_ONLY = 1 ]; then
	show_msg 1 "'lzma' found in the PATH."
	exit 0
fi

# If no filenames are given we use standard input/output:
[ $# = 0 ] && USE_STDIN=1 && USE_STDOUT=1

# Check if we can we use file descriptors to set file owner, perms and time.
# This probably works only on GNU/*/Linux, sorry. lzmash should be rewritten
# in C/C++ to avoid these kind of tricks.
if [ $USE_STDOUT = 0 -a -L /dev/fd/0 ] \
		&& chown --help > /dev/null 2> /dev/null \
		&& chmod --help > /dev/null 2> /dev/null; then
	IS_GNU_LINUX=1  # A bit misleading variable name
else
	IS_GNU_LINUX=0
fi

# Test mode:
if [ $TEST_ONLY = 1 ]; then
	USE_STDOUT=0
	DECOMPRESS=1
	KEEP_FILE=1
	exec 6> /dev/null
fi

if [ $DECOMPRESS = 0 -a $USE_STDOUT = 1 ]; then
	# Do not write compressed data to terminal unless --force is
	# specified. The parentheses around 'tty' are for portability.
	if [ $FORCE = 0 ] && ( tty -s < /dev/stdout ) 2> /dev/null; then
		show_msg 0 "Compressed data not written to a terminal." \
				"Use -f to force compression or -h for help."
		exit 1
	fi
	# Compress only one file if output is stdout:
	if [ $# -gt 1 ]; then
		show_msg 0 "Cannot compress more than one file" \
				"to the standard output."
		exit 1
	fi
elif [ $DECOMPRESS = 1 -a $USE_STDIN = 1 -a $FORCE = 0 ] \
		&& ( tty -s ) 2> /dev/null; then
	show_msg 0 "Compressed data not read from a terminal." \
			"Use -f to force decompression or -h for help."
	exit 1
fi

# If we want maximum compression ratio:
[ $MAXIMUM_COMPRESSION = 1 -a $COMP_MODE = 1 ] && COMP_MODE=2

# Set options to be passed to lzma:
LZMA_ARGS="-a$COMP_MODE -d$DICT_SIZE -fb$FAST_BYTES -mf$MF_ID"

# If reading standard input, run loop exactly once using /dev/stdin as input:
[ $USE_STDIN = 1 ] && set -- /dev/stdin

# In this script /dev/fd/5 refers to input and /dev/fd/6 to output.
[ $USE_STDOUT = 1 ] && exec 6>&1

# The main loop:
while [ $# != 0 ]; do

SOURCE=$1
SOURCE_FD=$1
shift 1

if [ $USE_STDIN = 0 ]; then

	# Check if file is readable. ash and at least bash versions up
	# to and including 3.00.15 assume that root can read everything
	# which is not always correct. I'm still not going to add any
	# workaround here, maybe they fix the bug some day (yes, I have
	# reported it).
	if [ ! -r "$SOURCE" ]; then
		show_msg 0 "cannot read input file $SOURCE"
		EXITSTATUS=1 # Error
		continue
	fi

	# Symlinks and other non-files are accepted only with --force. In
	# addition to files, also directories are accepted when --recursive.
	if [ $FORCE = 0 -a \( -L "$SOURCE" -o \( ! -f "$SOURCE" -a \
			! \( $RECURSIVE = 1 -a -d "$SOURCE" \) \) \) ]; then
		if [ $RECURSIVE = 1 ]; then
			show_msg 1 "$SOURCE is not a regular file or" \
					"directory -- ignored"
		else
			show_msg 1 "$SOURCE is not a regular file -- ignored"
		fi
		[ $EXITSTATUS = 0 ] && EXITSTATUS=2 # Warning
		continue
	fi

	# Recursive operation:
	if [ $RECURSIVE = 1 -a -d "$SOURCE" ]; then
		# Do not operate recursively on root directory.
		# See "info coreutils treating".
		if [ "$(cd "$SOURCE" && pwd)" = "/" ]; then
			show_msg 0 "cowardly refusing to operate on /"
			exit 1
		fi
		# This breaks if filenames contain newlines. Note that
		# IFS=$'\n' doesn't work with ash shell but this works:
		IFS='
'
		# Security issue: There's a race condition between 'find'
		# and actual compression process with 'lzma'. Do not use
		# 'lzmash -r' on group or world writable directories to
		# avoid this issue.
		if [ $FORCE = 1 ]; then
			# Security note: following symlinks can cause an
			# infinite loop depending on the implementation
			# of 'find'.
			set -- $(find "$SOURCE" \! -type d -follow \
					2> /dev/null) "$@"
		else
			set -- $(find "$SOURCE" \! -type d \
					2> /dev/null) "$@"
		fi
		unset IFS
		continue
	fi

	if [ $DECOMPRESS = 0 ]; then
		# Compressing: if the source file already has $FILENAME_SUFFIX
		# as an extension we skip it:
		if give_suffix "$SOURCE" > /dev/null; then
			show_msg 1 "$SOURCE already has" \
					"'$(give_suffix "$SOURCE")'" \
					"suffix -- unchanged"
			[ $EXITSTATUS = 0 ] && EXITSTATUS=2 # Warning
			continue
		fi
	else
		# Decompressing: if source file doesn't have a correct
		# extension refuse to decompress unless output is stdout:
		if [ $USE_STDOUT = 0 ] && ! give_suffix "$SOURCE" \
				> /dev/null; then
			show_msg 1 "$SOURCE has unknown suffix -- skipped"
			[ $EXITSTATUS = 0 ] && EXITSTATUS=2 # Warning
			continue
		fi
	fi

	# On GNU/*/Linux we can use file descriptor for the source file:
	if [ $IS_GNU_LINUX = 1 ]; then
		exec 5< "$SOURCE"
		if [ $? != 0 ]; then
			show_msg 0 "error opening the source file"
			EXITSTATUS=1 # Error
			continue
		fi
		SOURCE_FD=/dev/fd/5
	fi

	if [ $USE_STDOUT = 0 -a $TEST_ONLY = 0 ]; then
		# Determine the target filename:
		if [ $DECOMPRESS = 0 ]; then
			TARGET="${SOURCE}${FILENAME_SUFFIX}"
		elif [ "${SOURCE%"$FILENAME_SUFFIX"}" != "$SOURCE" ]; then
			TARGET=${SOURCE%"$FILENAME_SUFFIX"}
		elif [ "${SOURCE%.lzma}" != "$SOURCE" ]; then
			TARGET=${SOURCE%.lzma}
		elif [ "${SOURCE%.tlz}" != "$SOURCE" ]; then
			TARGET=${SOURCE%.tlz}.tar
		fi
		# Verify that the target filename != source filename:
		if [ "$SOURCE" = "$TARGET" ]; then
			show_msg 1 "$SOURCE: unknown suffix -- ignored"
			[ $EXITSTATUS = 0 ] && EXITSTATUS=2 # Warning
			continue
		fi
		# Check that target file does not exist. If someone tries to
		# exploit the race condition between '-e' and creating the
		# file using '>' we have already defined 'set -C' so it should
		# not be a security problem.
		if [ -e "$TARGET" ]; then
			if [ $FORCE = 1 ]; then
				# Should we use "rm -rf" instead of "rm -r"
				# to remove the conflicting directory?
				# bzip2 removes dirs, gzip does not.
				# The >/dev/nulls are to hide possible errors:
				rm -f -- "$TARGET" > /dev/null 2> /dev/null
				if [ -e "$TARGET" ]; then
					show_msg 0 "cannot replace $FILE"
					EXITSTATUS=1 # Error
					continue
				fi
			else
				show_msg 0 "output file $TARGET already exists"
				EXITSTATUS=1 # Error
				continue
			fi
		fi
		# Open a file descriptor for the target file. When using
		# the standard output or in the test mode, the output
		# descriptor has been opened before the main loop.
		exec 6> "$TARGET"
		if [ $? != 0 ]; then
			show_msg 0 "error creating the target file"
			EXITSTATUS=1 # Error
			continue
		fi
	fi
fi


if [ $VERBOSE -ge 2 ]; then

	echo -en "$SOURCE:\t" 1>&2
fi

if [ $DECOMPRESS = 0 ]; then
	compress "$SOURCE_FD"
else
	decompress "$SOURCE_FD"
fi

# The exit status of the lzma binary passes here:
ERRCODE=$?
if [ $ERRCODE != 0 ]; then
	if [ $TEST_ONLY = 1 ]; then
		if [ $VERBOSE -ge 2 ]; then
			echo "FAILED" 1>&2
		else
			show_msg 0 "$SOURCE: corrupted file"
		fi
	else
		show_msg 0 "lzma returned exit status $ERRCODE." \
				"If problem repeats try -vv."
		# Remove the broken target file:
		[ $USE_STDOUT = 0 ] && rm -f -- "$TARGET" 2> /dev/null
	fi
	EXITSTATUS=1 # Error
	continue
fi

# If we got here the compression/decompression should have been successful.
# Show some info if we are in verbose mode:
if [ $VERBOSE -ge 2 ]; then
	if [ $TEST_ONLY = 1 -a $VERBOSE -ge 2 ]; then
		echo "OK" 1>&2
	else
		OLD_SIZE=$(filesize "$SOURCE_FD")
		NEW_SIZE=$(filesize "$TARGET")
		if [ "$OLD_SIZE" = "" -o "$NEW_SIZE" = "" ]; then
		:
		elif [ $OLD_SIZE -le 0 -o $NEW_SIZE -le 0 ]; then
			'0% -- '
		elif [ $DECOMPRESS = 0 ]; then
			printf "%3d%% -- " "$((100 - 100 * NEW_SIZE / OLD_SIZE))" 1>&2
		else
			printf "%3d%% -- " "$((100 - 100 * OLD_SIZE / NEW_SIZE))" 1>&2
		fi
		if [ $USE_STDOUT = 0 ]; then
			echo "replaced with $TARGET" 1>&2
		else
			echo "Done." 1>&2
		fi
	fi
fi

# Copy the time stamp of the original file. Also remove the original file
# unless --keep is used:
if [ $USE_STDOUT = 0 -a $TEST_ONLY = 0 ]; then
	if [ $IS_GNU_LINUX = 1 ]; then
		chown --reference /dev/fd/5 /dev/fd/6 >/dev/null 2>/dev/null
		chmod --reference /dev/fd/5 /dev/fd/6 >/dev/null 2>/dev/null
		touch -cr /dev/fd/5 /dev/fd/6 >/dev/null 2>/dev/null
	else
		# No GNU :-(  Portable, dirty and maybe less reliable trick.
		FILE_INFO=$(ls -dLln -- "$SOURCE" \
				| tr -s ' ' | cut -f 1,3,4 -d ' ')
		chown "$(echo "$FILE_INFO" | cut -f 2,3 -d ' ' \
				| tr ' ' ':')" "$TARGET"
		chmod "$(echo "$FILE_INFO" | sed \
			's/^.\(...\)\(...\)\(...\).*$/u=\1,g=\2,o=\3/;s/-//g')"\
				"$TARGET"
		touch -cr "$SOURCE" -- "$TARGET"
	fi
	[ $KEEP_FILE = 0 ] && rm -f -- "$SOURCE"
fi

done

# Close the file descriptors:
exec 5<&- 6>&-

# Exit status:
# 0 = All OK
# 1 = Error occurred
# 2 = Warning occurred
# 4 = lzma not found in the path
# 5 = Internal error (bug) in lzmash script

exit $EXITSTATUS
