[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Phpgroupware-cvs] phpgwapi/inc functions.inc.php, 1.121.2.13.2.23 class
From: |
skwashd |
Subject: |
[Phpgroupware-cvs] phpgwapi/inc functions.inc.php, 1.121.2.13.2.23 class.data_cleaner.inc.php, 1.1.2.1 |
Date: |
Wed, 24 Aug 2005 15:46:00 +0200 |
Update of phpgwapi/inc
Modified Files:
Branch: Version-0_9_16-branch
functions.inc.php lines: +27 -1
Added Files:
Branch: Version-0_9_16-branch
class.data_cleaner.inc.php lines: +254 -0
Log Message:
XSS fix
====================================================
Index: phpgwapi/inc/functions.inc.php
diff -u phpgwapi/inc/functions.inc.php:1.121.2.13.2.22
phpgwapi/inc/functions.inc.php:1.121.2.13.2.23
--- phpgwapi/inc/functions.inc.php:1.121.2.13.2.22 Sat Nov 6 15:34:26 2004
+++ phpgwapi/inc/functions.inc.php Wed Aug 24 13:46:44 2005
@@ -116,6 +116,32 @@
// Can't use this yet - errorlog hasn't been created.
// print_debug('domain',@$GLOBALS['phpgw_info']['user']['domain'],'api');
+ // Remove this and you will loose important parts of your anatomy -
skwashd
+ $GLOBALS['RAW_REQUEST'] = $_REQUEST; // if you really need the raw value
+ $to_cleans = array('_GET', '_POST', '_COOKIE', '_REQUEST');
+ $data_cleaner = createObject('phpgwapi.data_cleaner', '');
+ foreach ( $to_cleans as $to_clean )
+ {
+ if ( isset($GLOBALS[$to_clean]) &&
is_array($GLOBALS[$to_clean]) && count($GLOBALS[$to_clean]) )
+ {
+ foreach ( $GLOBALS[$to_clean] as $key => $val )
+ {
+ if ( !is_array($val) )
+ {
+ $GLOBALS[$to_clean][$key] =
$data_cleaner->clean($val);
+ }
+ else
+ {
+ foreach ( $val as $skey => $sval )
+ {
+
$GLOBALS[$to_clean][$key][$skey] = $data_cleaner->clean($val);
+ }
+ }
+ }
+ $GLOBALS["HTTP{$to_change}_VARS"] =
$GLOBALS[$to_change];
+ }
+ }
+
/****************************************************************************\
* These lines load up the API, fill up the $phpgw_info array, etc
*
\****************************************************************************/
====================================================
Index: class.data_cleaner.inc.php
<?php
/**
* HTML Sanitizer, attemtpts to make variables safe for users.
* $Id: class.data_cleaner.inc.php,v 1.1.2.1 2005/08/24 13:46:44 skwashd Exp $
*
* Taken from the horde project by Dave Hall for use in phpGroupWare
*
* Copyright 1999-2005 Anil Madhavapeddy <address@hidden>
* Copyright 1999-2005 Jon Parise <address@hidden>
* Copyright 2002-2005 Michael Slusarz <address@hidden>
* Portions Copyright 2005 Free Software Foundation Inc http://fsf.org
*
* See the enclosed file COPYING for license information (GPL). If you
* did not receive this file, see http://www.fsf.org/copyleft/gpl.html.
*
* @author Anil Madhavapeddy <address@hidden>
* @author Jon Parise <address@hidden>
* @author Michael Slusarz <address@hidden>
* @author Dave Hall skwashd at phpgroupware.org
* @since phpGroupWare 0.9.16.007
* @package API
*/
class data_cleaner
{
/**
* @var string $data the data
*/
var $data;
/**
* @constructor
*
* @param string $data the data to be cleaned
*/
function data_cleaner($data = '')
{
$this->html = $data;
}
/**
* Render out the currently set contents.
*
* @param String $data the raw data.
*
* @return string The cleaned data.
*/
function clean($data = null)
{
if ( !is_null($data) )
{
return $this->_clean_data($data);
}
return $this->_clean_data($this->data);
}
/**
* These regular expressions attempt to make HTML safe for
* viewing. THEY ARE NOT PERFECT.
*
* @access private
*
* @param string $data The HTML data.
*
* @return string The cleaned HTML data.
*/
function _clean_data(&$data)
{
/* Deal with <base> tags in the HTML, since they will screw up
* our own relative paths. */
if (($i = stristr($data, '<base ')) && ($i = stristr($i,
'http')) &&
($j = strchr($i, '>')))
{
$base = substr($i, 0, strlen($i) - strlen($j));
$base = preg_replace('|(http.*://[^/]*/?).*|i', '\1',
$base);
if ($base[strlen($base) - 1] != '/')
{
$base .= '/';
}
/* Recursively call this->_clean_data() to prevent
clever fiends
* from sneaking nasty things into the page via $base.
*/
$base = $this->_clean_data($base);
}
/* Removes HTML comments (including some scripts & styles). */
$data = preg_replace('/<!--.*?-->/s', '', $data);
/* Change space entities to space characters. */
$data = preg_replace('/&#(x0*20|0*32);?/i', ' ', $data);
/* Nuke non-printable characters (a play in three acts). */
/* Rule 1). If we have a semicolon, it is deterministically
* detectable and fixable, without introducing collateral
* damage. */
$data = preg_replace('/&#x?0*([9A-D]|1[0-3]);/i', ' ',
$data);
/* Rule 2). Hex numbers (usually having an x prefix) are also
* deterministic, even if we don't have the semi. Note that
* some browsers will treat &#a or �a as a hex number even
* without the x prefix; hence /x?/ which will cover those
* cases in this rule. */
$data = preg_replace('/&#x?0*[9A-D]([^0-9A-F]|$)/i',
' \\1', $data);
/* Rule 3). Decimal numbers without trailing semicolons. The
* problem is that some browsers will interpret 
a as
* "\na", some as "Ċ" so we have to clean the 
 to be
* safe for the "\na" case at the expense of mangling a valid
* entity in other cases. (Solution for valid HTML authors:
* always use the semicolon.) */
$data = preg_replace('/�*(9|1[0-3])([^0-9]|$)/i', ' \\2',
$data);
/* Remove overly long numeric entities. */
$data = preg_replace('/&#x?0*[0-9A-F]{6,};?/i', ' ',
$data);
/* Remove everything outside of and including the <body> tag
* if displaying inline. */
if (!$attachment) {
$data = preg_replace('/.*<body[^>]*>/si', '', $data);
$data = preg_replace('/<\/body>.*/si', '', $data);
}
/* Get all attribute="javascript:foo()" tags. This is
* essentially the regex /(=|url\()("?)[^>]*script:/ but
* expanded to catch camouflage with spaces and entities. */
$preg = '/((�*61;?|�*3D;?|=)|' .
'((u|�*85;?|�*55;?|�*117;?|�*75;?)\s*' .
'(r|�*82;?|�*52;?|�*114;?|�*72;?)\s*' .
'(l|�*76;?|�*4c;?|�*108;?|�*6c;?)\s*' .
'(\()))\s*' .
'(�*34;?|�*22;?|"|�*39;?|�*27;?|\')?' .
'[^>]*\s*' .
'(s|�*83;?|�*53;?|�*115;?|�*73;?)\s*' .
'(c|�*67;?|�*43;?|�*99;?|�*63;?)\s*' .
'(r|�*82;?|�*52;?|�*114;?|�*72;?)\s*' .
'(i|�*73;?|�*49;?|�*105;?|�*69;?)\s*' .
'(p|�*80;?|�*50;?|�*112;?|�*70;?)\s*' .
'(t|�*84;?|�*54;?|�*116;?|�*74;?)\s*' .
'(:|�*58;?|�*3a;?)/i';
$data = preg_replace($preg, '\1\8VarCleaned', $data);
/* Get all on<foo>="bar()". NEVER allow these. */
$data = preg_replace('/([\s"\']+' .
'(o|�*79;?|�*4f;?|�*111;?|�*6f;?)' .
'(n|�*78;?|�*4e;?|�*110;?|�*6e;?)' .
'\w+)\s*=/i', '\1VarCleaned=', $data);
/* Remove all scripts since they might introduce garbage if
* they are not quoted properly. */
$data = preg_replace('|<script[^>]*>.*?</script>|is',
'<VarCleaned_script />', $data);
/* Get all tags that might cause trouble - <object>, <embed>,
* <base>, etc. Meta refreshes and iframes, too. */
$malicious = array(
'/<([^>a-z]*)' .
'(s|�*83;?|�*53;?|�*115;?|�*73;?)\s*'
.
'(c|�*67;?|�*43;?|�*99;?|�*63;?)\s*' .
'(r|�*82;?|�*52;?|�*114;?|�*72;?)\s*'
.
'(i|�*73;?|�*49;?|�*105;?|�*69;?)\s*'
.
'(p|�*80;?|�*50;?|�*112;?|�*70;?)\s*'
.
'(t|�*84;?|�*54;?|�*116;?|�*74;?)\s*/i',
'/<([^>a-z]*)' .
'(e|�*69;?|�*45;?|�*101;?|�*65;?)\s*' .
'(m|�*77;?|�*4d;?|�*109;?|�*6d;?)\s*' .
'(b|�*66;?|�*42;?|�*98;?|�*62;?)\s*' .
'(e|�*69;?|�*45;?|�*101;?|�*65;?)\s*' .
'(d|�*68;?|�*44;?|�*100;?|�*64;?)\s*/i',
'/<([^>a-z]*)' .
'(x|�*88;?|�*58;?|�*120;?|�*78;?)\s*' .
'(m|�*77;?|�*4d;?|�*109;?|�*6d;?)\s*' .
'(l|�*76;?|�*4c;?|�*108;?|�*6c;?)\s*/i',
'/<([^>a-z]*)' .
'(b|�*66;?|�*42;?|�*98;?|�*62;?)\s*' .
'(a|�*65;?|�*41;?|�*97;?|�*61;?)\s*' .
'(s|�*83;?|�*53;?|�*115;?|�*73;?)\s*' .
'(e|�*69;?|�*45;?|�*101;?|�*65;?)\s*' .
'[^line]/i',
'/<([^>a-z]*)' .
'(m|�*77;?|�*4d;?|�*109;?|�*6d;?)\s*' .
'(e|�*69;?|�*45;?|�*101;?|�*65;?)\s*' .
'(t|�*84;?|�*54;?|�*116;?|�*74;?)\s*' .
'(a|�*65;?|�*41;?|�*97;?|�*61;?)\s*/i',
'/<([^>a-z]*)' .
'(j|�*74;?|�*4a;?|�*106;?|�*6a;?)\s*' .
'(a|�*65;?|�*41;?|�*97;?|�*61;?)\s*' .
'(v|�*86;?|�*56;?|�*118;?|�*76;?)\s*' .
'(a|�*65;?|�*41;?|�*97;?|�*61;?)\s*/i',
'/<([^>a-z]*)' .
'(o|�*79;?|�*4f;?|�*111;?|�*6f;?)\s*' .
'(b|�*66;?|�*42;?|�*98;?|�*62;?)\s*' .
'(j|�*74;?|�*4a;?|�*106;?|�*6a;?)\s*' .
'(e|�*69;?|�*45;?|�*101;?|�*65;?)\s*' .
'(c|�*67;?|�*43;?|�*99;?|�*63;?)\s*' .
'(t|�*84;?|�*54;?|�*116;?|�*74;?)\s*/i',
'/<([^>a-z]*)' .
'(i|�*73;?|�*49;?|�*105;?|�*69;?)\s*' .
'(f|�*70;?|�*46;?|�*102;?|�*66;?)\s*' .
'(r|�*82;?|�*52;?|�*114;?|�*72;?)\s*' .
'(a|�*65;?|�*41;?|�*97;?|�*61;?)\s*' .
'(m|�*77;?|�*4d;?|�*109;?|�*6d;?)\s*' .
'(e|�*69;?|�*45;?|�*101;?|�*65;?)\s*/i');
$data = preg_replace($malicious, '<VarCleaned_tag', $data);
/* Comment out style/link tags. */
$pattern = array('/\s+style\s*=/i',
'|<style[^>]*>(?:\s*<\!--)*|i',
'|(?:-->\s*)*</style>|i',
'|(<link[^>]*>)|i');
$replace = array(' VarCleaned=',
'<!--',
'-->',
'<!-- $1 -->');
$data = preg_replace($pattern, $replace, $data);
/* A few other matches. */
$pattern = array('|<([^>]*)&{.*}([^>]*)>|',
'|<([^>]*)mocha:([^>]*)>|i',
'|<([^>]*)binding:([^>]*)>|i');
$replace = array('<&{;}\3>',
'<\1VarCleaned:\2>',
'<\1VarCleaned:\2>');
$data = preg_replace($pattern, $replace, $data);
/* Attempt to fix paths that were relying on a <base> tag. */
if (!empty($base)) {
$pattern = array('|src=(["\'])/|i',
'|src=[^\'"]/|i',
'|href= *(["\'])/|i',
'|href= *[^\'"]/|i');
$replace = array('src=\1' . $base,
'src=' . $base,
'href=\1' . $base,
'href=' . $base);
$data = preg_replace($pattern, $replace, $data);
}
/* Try to derefer all external references. */
$data =
preg_replace_callback('/href\s*=\s*(["\'])?((?(1)[^\1]*?|[^\s]+))(?(1)\1|)/i',
create_function('$m', 'return \'href="\' .
(strlen($m[2]) && $m[2]{0} == \'#\' ? $m[2] :
$GLOBALS[\'phpgw\']->safe_redirect($m[2])) . \'"\';'),
$data);
return $data;
}
}
?>
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Phpgroupware-cvs] phpgwapi/inc functions.inc.php, 1.121.2.13.2.23 class.data_cleaner.inc.php, 1.1.2.1,
skwashd <=