<?php
/*
 * Copyright 2005 - 2009  Zarafa B.V.
 * 
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License, version 3, 
 * as published by the Free Software Foundation with the following additional 
 * term according to sec. 7:
 *  
 * According to sec. 7 of the GNU Affero General Public License, version
 * 3, the terms of the AGPL are supplemented with the following terms:
 * 
 * "Zarafa" is a registered trademark of Zarafa B.V. The licensing of
 * the Program under the AGPL does not imply a trademark license.
 * Therefore any rights, title and interest in our trademarks remain
 * entirely with us.
 * 
 * However, if you propagate an unmodified version of the Program you are
 * allowed to use the term "Zarafa" to indicate that you distribute the
 * Program. Furthermore you may use our trademarks where it is necessary
 * to indicate the intended purpose of a product or service provided you
 * use it in accordance with honest practices in industrial or commercial
 * matters.  If you want to propagate modified versions of the Program
 * under the name "Zarafa" or "Zarafa Server", you may only do so if you
 * have a written permission by Zarafa B.V. (to acquire a permission
 * please contact Zarafa at trademark@zarafa.com).
 * 
 * The interactive user interface of the software displays an attribution
 * notice containing the term "Zarafa" and/or the logo of Zarafa.
 * Interactive user interfaces of unmodified and modified versions must
 * display Appropriate Legal Notices according to sec. 5 of the GNU
 * Affero General Public License, version 3, when you propagate
 * unmodified or modified versions of the Program. In accordance with
 * sec. 7 b) of the GNU Affero General Public License, version 3, these
 * Appropriate Legal Notices must retain the logo of Zarafa or display
 * the words "Initial Development by Zarafa" if the display of the logo
 * is not reasonably feasible for technical reasons. The use of the logo
 * of Zarafa in Legal Notices is allowed for unmodified and modified
 * versions of the software.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU Affero General Public License for more details.
 *  
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 * 
 */

?>
<?php
	/**
	 * Filters text messages for various uses
	 * 		 	
	 * @package core
	 */
	
	class filter
	{
		/**
		 * Create script-safe HTML from raw HTML
		 *
		 * Note that the function inline_attachments() in util.php is used to convert inline attachments to
		 * the correct HTML to show the message in the browser window.
		 *
		 * @see inline_attachments()
		 *
		 * @param string $html The HTML text
		 * @param string $storeid MAPI binary entryid, is used for the inline attachments and new mail
		 * @param string $entryid MAPI message entryid, is used for the inline attachments and new mail
		 * @return string safe html		
		 */		 		
		function safeHTML($html, $storeid = false, $entryid = false)
		{
			// Save all "<" symbols
			$html = preg_replace("/<(?=[^a-zA-Z\/\!\?\%])/", "&lt;", $html); 
			// Opera6 bug workaround
			$html = str_replace("\xC0\xBC", "&lt;", $html);
			
			// Filter '<script>'
			$html = $this->filterScripts($html);
			
			if($storeid && $entryid){
				// Set GLOBALS for preg_replace_callback functions.
				$GLOBALS["preg_replace"] = array();
				$GLOBALS["preg_replace"]["storeid"] = $storeid;
				$GLOBALS["preg_replace"]["entryid"] = $entryid;
			}

			// Set GLOBALS for preg_replace_callback functions.
			$GLOBALS["preg_replace"] = array();
			$GLOBALS["preg_replace"]["storeid"] = $storeid;
			$GLOBALS["preg_replace"]["entryid"] = $entryid;
			
			// Inline image attachments can also be specified without 'cid:' so fetch all img tag and replace with link to attachment
			$html = preg_replace_callback('/(<\s*img\b[^>].*?src\s*=\s*[\"\'])(\w+\.\w+)([\"\']\s*[\w+\W*?]*?\s*\/*\s*>)/msi', "inline_img_attachments", $html);

			// Replace all 'cid:...' with correct link to attachment.
			$html = preg_replace_callback("/=[\"']?(cid:)([^\"'>]+)[\"']?/msi", "inline_attachments", $html);
			
			// Replace all 'mailto:..' with link to compose new mail
			$html = preg_replace_callback('/<(a[^>]*)(href)=(["\'])?mailto:([^"\'>\?]+)\??([^"\'>]*)(["\']?)([^>]*)>/msi','mailto_newmail',$html);
			
			// remove 'base target' if exists
			$html = preg_replace("/<base[^>]*target=[^>]*\/?>/msi",'',$html);
			
			// Add 'base target' after the head-tag
			$base = '<base target="_blank">';
			$html = preg_replace("/<(head[^>]*)>/msi",('<$1>'.$base),$html);

			// if no head-tag was found (no base target is then added), add the 'base target' above the file
			if(strpos($html, $base)===false){
				$html = $base . $html;
			}

			// add default font
			$font = '<style type="text/css">body { font-family: monospace; }</style>';
			$html = preg_replace("/<(head[^>]*)>/msi",('<$1>'.$font),$html);

			// if no head-tag was found (no default font is then added), add the 'font default' above the file
			if(strpos($html, $font)===false){
				$html = $font . $html;
			}

			return $html;
		} 
		
		/**
		 * Filter scripts from HTML
		 *
		 * @access private
		 * @param string $str string which should be filtered
		 * @return string string without any script tags		
		 */		 		
		function filterScripts($str)
		{
			// remove script tags
			$str = preg_replace("'<SCRIPT[^>]*?>.*?</SCRIPT[^>]*?>'si", "", $str);

			// filter attributes
			$str = preg_replace_callback("|(<\/?\w+[^>]*>)|", array($this, "_filter_tag"), $str);

			return $str;
		} 
	
		/**
		 * Convert HTML to text
		 *
		 * @param string $str the html which should be converted to text
		 * @return string plain text version of the given $str				
		 */		 		
		function html2text($str)
		{
			return $this->unhtmlentities(preg_replace(
					Array("'<(HEAD|SCRIPT|STYLE|TITLE)[^>]*?>.*?</(HEAD|SCRIPT|STYLE|TITLE)[^>]*?>'si",
						"'(\r|\n)'",
						"'<BR[^>]*?>'i",
						"'<P[^>]*?>'i",
						"'<\/?\w+[^>]*>'e",
						"'<![^>]*>'s"
						),
					Array("",
						"",
						"\r\n",
						"\r\n\r\n",
						"",
						""),
					$str));
		} 
		
		/**
		 * Remove HTML entities and convert them to single characters where possible
		 *
		 * @access private
		 * @param string $str string which should be converted
		 * @return string converted string						
		 */
		function unhtmlentities ($string)
		{
			$trans_tbl = get_html_translation_table(HTML_ENTITIES);
			$trans_tbl = array_flip($trans_tbl);
			return strtr($string, $trans_tbl);
		} 
	
		/**
		 * Remove script tags from HTML source
		 *
		 * @access private
		 * @param string $str the html which the events should be filtered
		 * @return string html with no 'on' events				
		 */
		function _filter_tag($str)
		{
			// fix $str when called by preg_replace_callback
			if (is_array($str)) $str = $str[0];
			
			// fix unicode
			$str = preg_replace_callback("|(%[0-9A-Z]{2})|i", create_function('$str', 'return chr(hexdec($str[0]));'), $str);

			$matches = Array(
				// (\bON\w+(?!.)) - matches string beginning with 'on' only if the string is not followed by '.'	for example 'online.nl' will not be matched, whereas 'onmouse' will be.
				"'(\bON\w+(?!.))'i", // events
				"'(HREF)( *= *[\"\']?\w+SCRIPT *:[^\"\' >]+)'i", // links
				"'\n'",
				"'\r'"
				);
			$replaces = Array(
				"\\1_filtered",
				"\\1_filtered\\2",
				" ",
				" ",
				);
			return preg_replace($matches, $replaces, $str);
		} 
	} 
?>
