<?php
/**
 * The MIME_Viewer_html class renders out HTML text with an effort to
 * remove potentially malicious code.
 *
 * $Horde: framework/MIME/MIME/Viewer/html.php,v 1.14.4.20 2006/06/07 13:34:16 jan Exp $
 *
 * Copyright 1999-2006 Anil Madhavapeddy <anil@recoil.org>
 * Copyright 1999-2006 Jon Parise <jon@horde.org>
 * Copyright 2002-2006 Michael Slusarz <slusarz@horde.org>
 *
 * See the enclosed file COPYING for license information (GPL). If you
 * did not receive this file, see http://www.fsf.org/copyleft/gpl.html.
 *
 * @author  Anil Madhavapeddy <anil@recoil.org>
 * @author  Jon Parise <jon@horde.org>
 * @author  Michael Slusarz <slusarz@horde.org>
 * @since   Horde 3.0
 * @package Horde_MIME_Viewer
 */
class MIME_Viewer_html extends MIME_Viewer {

    /**
     * Render out the currently set contents.
     *
     * @param array $params  Any parameters the viewer may need.
     *
     * @return string  The rendered text.
     */
    function render($params = null)
    {
        return $this->_cleanHTML($this->mime_part->getContents());
    }

    /**
     * Filters active content, dereferences external links, detects phishing,
     * etc.
     *
     * @access private
     *
     * @param string $data  The HTML data.
     *
     * @return string  The cleaned HTML data.
     */
    function _cleanHTML($data)
    {
        global $browser, $prefs;

        $phish_warn = false;

        require_once 'Horde/MIME/Contents.php';
        $attachment = MIME_Contents::viewAsAttachment();

        /* Deal with <base> tags in the HTML, since they will screw up our own
         * relative paths. */
        if (preg_match('/<base href="?([^"> ]*)"? ?\/?>/i', $data, $matches)) {
            $base = $matches[1];
            if (substr($base, -1, 1) != '/') {
                $base .= '/';
            }

            /* Recursively call _cleanHTML() to prevent clever fiends from
             * sneaking nasty things into the page via $base. */
            $base = $this->_cleanHTML($base);
        }

        /* Attempt to fix paths that were relying on a <base> tag. */
        if (!empty($base)) {
            $pattern = array('|src=(["\'])([^:"\']+)\1|i',
                             '|src=([^: >"\']+)|i',
                             '|href= *(["\'])([^:"\']+)\1|i',
                             '|href=([^: >"\']+)|i');
            $replace = array('src=\1' . $base . '\2\1',
                             'src=' . $base . '\1',
                             'href=\1' . $base . '\2\1',
                             'href=' . $base . '\1');
            $data = preg_replace($pattern, $replace, $data);
        }

        require_once 'Horde/Text/Filter.php';
        $strip_styles = !$attachment ||
            ($browser->isBrowser('mozilla') &&
             $browser->getMajor() == 4) ||
            $browser->isBrowser('msie');
        $data = Text_Filter::filter($data, 'xss',
                                    array('body_only' => !$attachment,
                                          'strip_styles' => $strip_styles));

        /* Check for phishing exploits. */
        if (preg_match('/href\s*=\s*["\']?\s*(http|https|ftp):\/\/\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/i', $data)) {
            /* Check 1: Check for IP address links. */
            $phish_warn = true;
        } elseif (preg_match_all('/href\s*=\s*["\']?\s*(?:http|https|ftp):\/\/([^\s"\'>]+)["\']?[^>]*>\s*(?:(?:http|https|ftp):\/\/)?(.*?)<\/a/is', $data, $m)) {
            /* $m[1] = Link; $m[2] = Target
             * Check 2: Check for links that point to a different host than
             * the target url; if target looks like a domain name, check it
             * against the link. */
            $links = count($m[0]);
            for ($i = 0; $i < $links; $i++) {
                $m[1][$i] = strtolower(urldecode($m[1][$i]));
                $m[2][$i] = strtolower(preg_replace('/^(http|https|ftp):\/\//', '', strip_tags($m[2][$i])));
                if (preg_match('/^[-._\da-z]+\.[a-z]{2,}/i', $m[2][$i]) &&
                    strpos($m[1][$i], $m[2][$i]) !== 0 &&
                    strpos($m[2][$i], $m[1][$i]) !== 0) {
                    /* Don't consider the link a phishing link if the domain
                     * is the same on both links (e.g. adtracking.example.com
                     * & www.example.com). */
                    preg_match('/\.?([^\.\/]+\.[^\.\/]+)\//', $m[1][$i], $host1);
                    preg_match('/\.?([^\.\/]+\.[^\.\/]+)(\/.*)?$/', $m[2][$i], $host2);
                    if (!(count($host1) && count($host2)) ||
                        strcasecmp($host1[1], $host2[1]) !== 0) {
                        $phish_warn = true;
                    }
                }
            }
        }

        /* Try to derefer all external references. */
        $data = preg_replace_callback('/href\s*=\s*(["\'])?((?(1)[^\1]*?|[^\s>]+))(?(1)\1|)/i',
                                      create_function('$m', 'return \'href="\' . (strlen($m[2]) && $m[2]{0} == \'#\' ? $m[2] : Horde::externalUrl($m[2])) . \'"\';'),
                                      $data);

        /* Prepend phishing warning. */
        if ($phish_warn) {
            require_once 'Horde/MIME/Contents.php';
            $contents = &new MIME_Contents(new MIME_Part());
            $phish_warning = sprintf(_("%s: This message may not be from whom it claims to be. Beware of following any links in it or of providing the sender with any personal information."), _("Warning"));
            if ($contents->viewAsAttachment()) {
                $phish_warning = '<span style="background-color:#ffd0af;color:black">' . String::convertCharset($phish_warning, NLS::getCharset(), $this->mime_part->getCharset()) . '</span><br />';
            }
            $phish_warning = $contents->formatStatusMsg($phish_warning, null, true, 'mimeStatusWarning');
            if (stristr($data, '<body') === false) {
                $data = $phish_warning . $data;
            } else {
                $data = preg_replace('/(.*<body.*?>)(.*)/i', '$1' . $phish_warning . '$2', $data);
            }
        }

        return $data;
    }

    /**
     * Return the content-type of the rendered text.
     *
     * @return string  The MIME Content-Type.
     */
    function getType()
    {
        require_once 'Horde/MIME/Contents.php';
        return MIME_Contents::viewAsAttachment() ? $this->mime_part->getType(true) : 'text/html; charset=' . NLS::getCharset();
    }

}
