Template engine for Codeigniter 3 with built-in HTML sanitizer

Updated Dani 2 Tallied Votes 264 Views Share

A little while ago, I wrote a tutorial about how important it is to sanitize PHP user input strings. Not only is it important to sanitize user input being fed into a database query, but it's also important to sanitize user input being displayed to the end-user to generate valid HTML: For example, converting & to &. Otherwise, you can end up with javascript injection attacks and that sort of thing. This happens when a malicious user fills out a user field with javascript, for example, it isn't properly sanitized, and it's displayed on a webpage where a different victim user's web browser sees it and it executes the malicious javascript.

To combat this, I rolled my own template library for DaniWeb, which is built on top of the Codeigniter 3.1.x PHP framework. By default, Codeigniter, being an MVC framework, requires you to pass an array of variables into a view template that you want the template to have access to. My template library handles this by creating a class where you "assign" variables to a template. For each of those variables, it sanitizes them to make sure they aren't vulnerable to any injection bugs, and also runs them through a bad words censor. This makes it really easy to make sure that there are absolutely no injection bugs anywhere on DaniWeb, as templates literally only have access to variables that have been passed through the filter.

Here's an example of how I would access my template library from a controller:

$this->dw_template->assign('name', $name);        
$this->load->view('hello', $this->dw_template->get());

Then, from within the hello.php template, I would be able to do something like:

<p>Hello, <?= $name ?>!</p>

This ensures that there's never any logic within the templates, and every variable they have access to is guaranteed to be sanitized.

Alternatively, if there's a specific variable I want to pass into the template but I don't want to sanitize it for any particular reason, I can pass in the 'true' parameter, as so:

$this->dw_template->assign('unsanitized_variable', $unsanitized, true);

I tend to use this in my own code when the variable is something like a number, where I know beyond a reasonable doubt that there is nothing to sanitize, so I want to save it from being passed into the sanitization functions for efficiency sake.

The code for the deep_clone() function that I used is here.

<?php if ( ! defined('BASEPATH')) exit('No direct script access allowed');

// This class is used to automagically sanitize variables as they pass from the controller to the view
//	Therefore, the developer does not need to concern themselves with escaping every variable manually
class Dw_template
{
	
	private $CI;
	private $output;
	
	// ************************
	// PRIVATE HELPER FUNCTIONS
	// ************************
	
	// Encode HTML entities into variables so they will be sanitized from injection bugs
	public function html_escape($string)
	{
		if (is_array($string))
		{
			return array_map('self::html_escape', deep_clone($string));
		}
		else if (is_object($string))
		{
			foreach ($string AS $key => &$value)
			{
				if (is_string($value))
				{
					$string->{$key} = $this->html_escape($value);
				}
				else if (is_array($value) OR is_object($value))
				{
					// Recursion
					$string->{$key} = $this->html_escape(deep_clone($value));
				}
				else
				{
					// Don't do anything to it if it's a number, boolean, etc
					// Commented out because it's redundant
					// $string->{$key} = $value;
				}
			}
			return $string;
		}
		else if (is_bool($string))
		{
		    return $string;
		}
		else if (is_numeric($string))
		{
		    return $string;
		}
		else
		{		
		    // Convert ISO-8559-1 characters (and MS Word characters) to UTF8
		    /*
		    if (mb_detect_encoding($string) == 'ISO-8859-1')
		    {
                $string = @iconv('ISO-8859-1', 'UTF-8//IGNORE', $string);                     
		    }
		    */
		    
		    return htmlspecialchars($string);
		    		    
		    // return filter_var($string, FILTER_SANITIZE_SPECIAL_CHARS, array('flags' => FILTER_FLAG_ENCODE_HIGH));
		}
	}
	
	// Recursive wrapper for $this->badword_censor()
	private function censor($word)
	{
		$this->CI->load->helper('text_helper');
	
		if (is_array($word))
		{
			foreach ($word AS $key => &$value)
			{
				if (is_string($value))
				{
					$word["$key"] = $this->badword_censor($value);
				}
				else if (is_array($value))
				{
					// Recursion
					$word["$key"] = $this->censor($value);
				}
				else
				{
					// Don't do anything to it if it's a number, boolean, etc
					$word["$key"] = $value;
				}
			}
			return $word;
		}
		else if (is_object($word))
		{
			foreach ($word AS $key => &$value)
			{
				if (is_string($value))
				{
					$word->{$key} = $this->badword_censor($value);
				}
				else if (is_array($value))
				{
					// Recursion
					$word->{$key} = $this->censor($value);
				}
				else
				{
					// Don't do anything to it if it's a number, boolean, etc
					$word->{$key} = $value;
				}
			}
			return $word;
		}		
		else if (is_string($word))
		{
			return $this->badword_censor($word);
		}
	}
	
	public function badword_censor($string)
	{
		// Match always, even as part of a word
		$badstems = array('#######', '########', '#######', '########', '####', '########', '####', '######');

		// Words that we only want to match on whole-word only
		//	(i.e. we want to match ### but not assume or assist, and we want to match #### but not parse)
		$badwords = array('####', '###', '####');
	
		foreach ($badwords AS $badword)
		{
			$string = preg_replace("/\b$badword\b/i", str_repeat('#', strlen($badword)), $string);
		}
	
		foreach ($badstems AS $badstem)
		{
			$string = str_ireplace($badstem, str_repeat('#', strlen($badstem)), $string);
		}
	
		return $string;
	}
	
	// **************
	// PUBLIC METHODS
	// **************
		
	// Used to assign a variable we want to use in a template
	public function assign($name, $dummy, $sanitized = false)
	{
	    // Objects are pointers?!
	    // Dummy variable is being manipulated when it's an object
	    if (!$sanitized AND is_object($dummy))
	    {
	        $variable = clone $dummy;
	    }
	    else if (!$sanitized AND is_array($dummy))
	    {
	        $variable = deep_clone($dummy);
	    }
	    else
	    {
	        $variable = $dummy;
	    }
	    
		if (!$sanitized)
		{		    
		    if (!is_numeric($variable) AND !is_bool($variable))
		    {
        		    // Pass string through badwords censor
        			$variable = $this->censor($variable);
        			
        			// HTML escape string
        			$variable = $this->html_escape($variable);
		    }
		}
		
		// Make the assignment
		$this->output[$name] = $variable;
	}
	
	public function deallocate($name)
	{
		unset($this->output[$name]);
	}
	
	// Used to retrieve all template variables to pass them to the view
	public function get()
	{		
		return $this->output;
	}
	
	// Constructor function
	public function __construct()
	{
		
		$this->CI =& get_instance();
		$this->output = array();	
	}
}