mirror of
				https://github.com/avecms/AVE.cms.git
				synced 2025-10-30 21:26:40 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			1077 lines
		
	
	
		
			37 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
			
		
		
	
	
			1077 lines
		
	
	
		
			37 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
| <?php
 | |
| /**
 | |
|  * SQL Formatter is a collection of utilities for debugging SQL queries.
 | |
|  * It includes methods for formatting, syntax highlighting, removing comments, etc.
 | |
|  */
 | |
| class SqlFormatter
 | |
| {
 | |
| 	// Constants for token types
 | |
| 	const TOKEN_TYPE_WHITESPACE = 0;
 | |
| 	const TOKEN_TYPE_WORD = 1;
 | |
| 	const TOKEN_TYPE_QUOTE = 2;
 | |
| 	const TOKEN_TYPE_BACKTICK_QUOTE = 3;
 | |
| 	const TOKEN_TYPE_RESERVED = 4;
 | |
| 	const TOKEN_TYPE_RESERVED_TOPLEVEL = 5;
 | |
| 	const TOKEN_TYPE_RESERVED_NEWLINE = 6;
 | |
| 	const TOKEN_TYPE_BOUNDARY = 7;
 | |
| 	const TOKEN_TYPE_COMMENT = 8;
 | |
| 	const TOKEN_TYPE_BLOCK_COMMENT = 9;
 | |
| 	const TOKEN_TYPE_NUMBER = 10;
 | |
| 	const TOKEN_TYPE_ERROR = 11;
 | |
| 	const TOKEN_TYPE_VARIABLE = 12;
 | |
| 
 | |
| 	// Constants for different components of a token
 | |
| 	const TOKEN_TYPE = 0;
 | |
| 	const TOKEN_VALUE = 1;
 | |
| 
 | |
| 	// Reserved words (for syntax highlighting)
 | |
| 	protected static $reserved = array(
 | |
| 		'ACCESSIBLE', 'ACTION', 'AGAINST', 'AGGREGATE', 'ALGORITHM', 'ALL', 'ALTER', 'ANALYSE', 'ANALYZE', 'AS', 'ASC',
 | |
| 		'AUTOCOMMIT', 'AUTO_INCREMENT', 'BACKUP', 'BEGIN', 'BETWEEN', 'BINLOG', 'BOTH', 'CASCADE', 'CASE', 'CHANGE', 'CHANGED', 'CHARACTER SET',
 | |
| 		'CHARSET', 'CHECK', 'CHECKSUM', 'COLLATE', 'COLLATION', 'COLUMN', 'COLUMNS', 'COMMENT', 'COMMIT', 'COMMITTED', 'COMPRESSED', 'CONCURRENT',
 | |
| 		'CONSTRAINT', 'CONTAINS', 'CONVERT', 'CREATE', 'CROSS', 'CURRENT_TIMESTAMP', 'DATABASE', 'DATABASES', 'DAY', 'DAY_HOUR', 'DAY_MINUTE',
 | |
| 		'DAY_SECOND', 'DEFAULT', 'DEFINER', 'DELAYED', 'DELETE', 'DESC', 'DESCRIBE', 'DETERMINISTIC', 'DISTINCT', 'DISTINCTROW', 'DIV',
 | |
| 		'DO', 'DUMPFILE', 'DUPLICATE', 'DYNAMIC', 'ELSE', 'ENCLOSED', 'END', 'ENGINE', 'ENGINE_TYPE', 'ENGINES', 'ESCAPE', 'ESCAPED', 'EVENTS', 'EXEC',
 | |
| 		'EXECUTE', 'EXISTS', 'EXPLAIN', 'EXTENDED', 'FAST', 'FIELDS', 'FILE', 'FIRST', 'FIXED', 'FLUSH', 'FOR', 'FORCE', 'FOREIGN', 'FULL', 'FULLTEXT',
 | |
| 		'FUNCTION', 'GLOBAL', 'GRANT', 'GRANTS', 'GROUP_CONCAT', 'HEAP', 'HIGH_PRIORITY', 'HOSTS', 'HOUR', 'HOUR_MINUTE',
 | |
| 		'HOUR_SECOND', 'IDENTIFIED', 'IF', 'IFNULL', 'IGNORE', 'IN', 'INDEX', 'INDEXES', 'INFILE', 'INSERT', 'INSERT_ID', 'INSERT_METHOD', 'INTERVAL',
 | |
| 		'INTO', 'INVOKER', 'IS', 'ISOLATION', 'KEY', 'KEYS', 'KILL', 'LAST_INSERT_ID', 'LEADING', 'LEVEL', 'LIKE', 'LINEAR',
 | |
| 		'LINES', 'LOAD', 'LOCAL', 'LOCK', 'LOCKS', 'LOGS', 'LOW_PRIORITY', 'MARIA', 'MASTER', 'MASTER_CONNECT_RETRY', 'MASTER_HOST', 'MASTER_LOG_FILE',
 | |
| 		'MATCH','MAX_CONNECTIONS_PER_HOUR', 'MAX_QUERIES_PER_HOUR', 'MAX_ROWS', 'MAX_UPDATES_PER_HOUR', 'MAX_USER_CONNECTIONS',
 | |
| 		'MEDIUM', 'MERGE', 'MINUTE', 'MINUTE_SECOND', 'MIN_ROWS', 'MODE', 'MODIFY',
 | |
| 		'MONTH', 'MRG_MYISAM', 'MYISAM', 'NAMES', 'NATURAL', 'NOT', 'NOW()','NULL', 'OFFSET', 'ON', 'OPEN', 'OPTIMIZE', 'OPTION', 'OPTIONALLY',
 | |
| 		'ON UPDATE', 'ON DELETE', 'OUTFILE', 'PACK_KEYS', 'PAGE', 'PARTIAL', 'PARTITION', 'PARTITIONS', 'PASSWORD', 'PRIMARY', 'PRIVILEGES', 'PROCEDURE',
 | |
| 		'PROCESS', 'PROCESSLIST', 'PURGE', 'QUICK', 'RANGE', 'RAID0', 'RAID_CHUNKS', 'RAID_CHUNKSIZE','RAID_TYPE', 'READ', 'READ_ONLY',
 | |
| 		'READ_WRITE', 'REFERENCES', 'REGEXP', 'RELOAD', 'RENAME', 'REPAIR', 'REPEATABLE', 'REPLACE', 'REPLICATION', 'RESET', 'RESTORE', 'RESTRICT',
 | |
| 		'RETURN', 'RETURNS', 'REVOKE', 'RLIKE', 'ROLLBACK', 'ROW', 'ROWS', 'ROW_FORMAT', 'SECOND', 'SECURITY', 'SEPARATOR',
 | |
| 		'SERIALIZABLE', 'SESSION', 'SHARE', 'SHOW', 'SHUTDOWN', 'SLAVE', 'SONAME', 'SOUNDS', 'SQL',	 'SQL_AUTO_IS_NULL', 'SQL_BIG_RESULT',
 | |
| 		'SQL_BIG_SELECTS', 'SQL_BIG_TABLES', 'SQL_BUFFER_RESULT', 'SQL_CALC_FOUND_ROWS', 'SQL_LOG_BIN', 'SQL_LOG_OFF', 'SQL_LOG_UPDATE',
 | |
| 		'SQL_LOW_PRIORITY_UPDATES', 'SQL_MAX_JOIN_SIZE', 'SQL_QUOTE_SHOW_CREATE', 'SQL_SAFE_UPDATES', 'SQL_SELECT_LIMIT', 'SQL_SLAVE_SKIP_COUNTER',
 | |
| 		'SQL_SMALL_RESULT', 'SQL_WARNINGS', 'SQL_CACHE', 'SQL_NO_CACHE', 'START', 'STARTING', 'STATUS', 'STOP', 'STORAGE',
 | |
| 		'STRAIGHT_JOIN', 'STRING', 'STRIPED', 'SUPER', 'TABLE', 'TABLES', 'TEMPORARY', 'TERMINATED', 'THEN', 'TO', 'TRAILING', 'TRANSACTIONAL', 'TRUE',
 | |
| 		'TRUNCATE', 'TYPE', 'TYPES', 'UNCOMMITTED', 'UNIQUE', 'UNLOCK', 'UNSIGNED', 'USAGE', 'USE', 'USING', 'VARIABLES',
 | |
| 		'VIEW', 'WHEN', 'WITH', 'WORK', 'WRITE', 'YEAR_MONTH'
 | |
| 	);
 | |
| 
 | |
| 	// For SQL formatting
 | |
| 	// These keywords will all be on their own line
 | |
| 	protected static $reserved_toplevel = array(
 | |
| 		'SELECT', 'FROM', 'WHERE', 'SET', 'ORDER BY', 'GROUP BY', 'LIMIT', 'DROP',
 | |
| 		'VALUES', 'UPDATE', 'HAVING', 'ADD', 'AFTER', 'ALTER TABLE', 'DELETE FROM', 'UNION ALL', 'UNION', 'EXCEPT', 'INTERSECT'
 | |
| 	);
 | |
| 
 | |
| 	protected static $reserved_newline = array(
 | |
| 		'LEFT OUTER JOIN', 'RIGHT OUTER JOIN', 'LEFT JOIN', 'RIGHT JOIN', 'OUTER JOIN', 'INNER JOIN', 'JOIN', 'XOR', 'OR', 'AND'
 | |
| 	);
 | |
| 
 | |
| 	protected static $functions = array (
 | |
| 		'ABS', 'ACOS', 'ADDDATE', 'ADDTIME', 'AES_DECRYPT', 'AES_ENCRYPT', 'AREA', 'ASBINARY', 'ASCII', 'ASIN', 'ASTEXT', 'ATAN', 'ATAN2',
 | |
| 		'AVG', 'BDMPOLYFROMTEXT',  'BDMPOLYFROMWKB', 'BDPOLYFROMTEXT', 'BDPOLYFROMWKB', 'BENCHMARK', 'BIN', 'BIT_AND', 'BIT_COUNT', 'BIT_LENGTH',
 | |
| 		'BIT_OR', 'BIT_XOR', 'BOUNDARY',  'BUFFER',	 'CAST', 'CEIL', 'CEILING', 'CENTROID',	 'CHAR', 'CHARACTER_LENGTH', 'CHARSET', 'CHAR_LENGTH',
 | |
| 		'COALESCE', 'COERCIBILITY', 'COLLATION',  'COMPRESS', 'CONCAT', 'CONCAT_WS', 'CONNECTION_ID', 'CONTAINS', 'CONV', 'CONVERT', 'CONVERT_TZ',
 | |
| 		'CONVEXHULL',  'COS', 'COT', 'COUNT', 'CRC32', 'CROSSES', 'CURDATE', 'CURRENT_DATE', 'CURRENT_TIME', 'CURRENT_TIMESTAMP', 'CURRENT_USER',
 | |
| 		'CURTIME', 'DATABASE', 'DATE', 'DATEDIFF', 'DATE_ADD', 'DATE_DIFF', 'DATE_FORMAT', 'DATE_SUB', 'DAY', 'DAYNAME', 'DAYOFMONTH', 'DAYOFWEEK',
 | |
| 		'DAYOFYEAR', 'DECODE', 'DEFAULT', 'DEGREES', 'DES_DECRYPT', 'DES_ENCRYPT', 'DIFFERENCE', 'DIMENSION', 'DISJOINT', 'DISTANCE', 'ELT', 'ENCODE',
 | |
| 		'ENCRYPT', 'ENDPOINT', 'ENVELOPE', 'EQUALS', 'EXP', 'EXPORT_SET', 'EXTERIORRING', 'EXTRACT', 'EXTRACTVALUE', 'FIELD', 'FIND_IN_SET', 'FLOOR',
 | |
| 		'FORMAT', 'FOUND_ROWS', 'FROM_DAYS', 'FROM_UNIXTIME', 'GEOMCOLLFROMTEXT', 'GEOMCOLLFROMWKB', 'GEOMETRYCOLLECTION', 'GEOMETRYCOLLECTIONFROMTEXT',
 | |
| 		'GEOMETRYCOLLECTIONFROMWKB', 'GEOMETRYFROMTEXT', 'GEOMETRYFROMWKB', 'GEOMETRYN', 'GEOMETRYTYPE', 'GEOMFROMTEXT', 'GEOMFROMWKB', 'GET_FORMAT',
 | |
| 		'GET_LOCK', 'GLENGTH', 'GREATEST', 'GROUP_CONCAT', 'GROUP_UNIQUE_USERS', 'HEX', 'HOUR', 'IF', 'IFNULL', 'INET_ATON', 'INET_NTOA', 'INSERT', 'INSTR',
 | |
| 		'INTERIORRINGN', 'INTERSECTION', 'INTERSECTS',	'INTERVAL', 'ISCLOSED', 'ISEMPTY', 'ISNULL', 'ISRING', 'ISSIMPLE', 'IS_FREE_LOCK', 'IS_USED_LOCK',
 | |
| 		'LAST_DAY', 'LAST_INSERT_ID', 'LCASE', 'LEAST', 'LEFT', 'LENGTH', 'LINEFROMTEXT', 'LINEFROMWKB', 'LINESTRING', 'LINESTRINGFROMTEXT', 'LINESTRINGFROMWKB',
 | |
| 		'LN', 'LOAD_FILE', 'LOCALTIME', 'LOCALTIMESTAMP', 'LOCATE', 'LOG', 'LOG10', 'LOG2', 'LOWER', 'LPAD', 'LTRIM', 'MAKEDATE', 'MAKETIME', 'MAKE_SET',
 | |
| 		'MASTER_POS_WAIT', 'MAX', 'MBRCONTAINS', 'MBRDISJOINT', 'MBREQUAL', 'MBRINTERSECTS', 'MBROVERLAPS', 'MBRTOUCHES', 'MBRWITHIN', 'MD5', 'MICROSECOND',
 | |
| 		'MID', 'MIN', 'MINUTE', 'MLINEFROMTEXT', 'MLINEFROMWKB', 'MOD', 'MONTH', 'MONTHNAME', 'MPOINTFROMTEXT', 'MPOINTFROMWKB', 'MPOLYFROMTEXT', 'MPOLYFROMWKB',
 | |
| 		'MULTILINESTRING', 'MULTILINESTRINGFROMTEXT', 'MULTILINESTRINGFROMWKB', 'MULTIPOINT',  'MULTIPOINTFROMTEXT', 'MULTIPOINTFROMWKB', 'MULTIPOLYGON',
 | |
| 		'MULTIPOLYGONFROMTEXT', 'MULTIPOLYGONFROMWKB', 'NAME_CONST', 'NULLIF', 'NUMGEOMETRIES', 'NUMINTERIORRINGS',	 'NUMPOINTS', 'OCT', 'OCTET_LENGTH',
 | |
| 		'OLD_PASSWORD', 'ORD', 'OVERLAPS', 'PASSWORD', 'PERIOD_ADD', 'PERIOD_DIFF', 'PI', 'POINT', 'POINTFROMTEXT', 'POINTFROMWKB', 'POINTN', 'POINTONSURFACE',
 | |
| 		'POLYFROMTEXT', 'POLYFROMWKB', 'POLYGON', 'POLYGONFROMTEXT', 'POLYGONFROMWKB', 'POSITION', 'POW', 'POWER', 'QUARTER', 'QUOTE', 'RADIANS', 'RAND',
 | |
| 		'RELATED', 'RELEASE_LOCK', 'REPEAT', 'REPLACE', 'REVERSE', 'RIGHT', 'ROUND', 'ROW_COUNT', 'RPAD', 'RTRIM', 'SCHEMA', 'SECOND', 'SEC_TO_TIME',
 | |
| 		'SESSION_USER', 'SHA', 'SHA1', 'SIGN', 'SIN', 'SLEEP', 'SOUNDEX', 'SPACE', 'SQRT', 'SRID', 'STARTPOINT', 'STD', 'STDDEV', 'STDDEV_POP', 'STDDEV_SAMP',
 | |
| 		'STRCMP', 'STR_TO_DATE', 'SUBDATE', 'SUBSTR', 'SUBSTRING', 'SUBSTRING_INDEX', 'SUBTIME', 'SUM', 'SYMDIFFERENCE', 'SYSDATE', 'SYSTEM_USER', 'TAN',
 | |
| 		'TIME', 'TIMEDIFF', 'TIMESTAMP', 'TIMESTAMPADD', 'TIMESTAMPDIFF', 'TIME_FORMAT', 'TIME_TO_SEC', 'TOUCHES', 'TO_DAYS', 'TRIM', 'TRUNCATE', 'UCASE',
 | |
| 		'UNCOMPRESS', 'UNCOMPRESSED_LENGTH', 'UNHEX', 'UNIQUE_USERS', 'UNIX_TIMESTAMP', 'UPDATEXML', 'UPPER', 'USER', 'UTC_DATE', 'UTC_TIME', 'UTC_TIMESTAMP',
 | |
| 		'UUID', 'VARIANCE', 'VAR_POP', 'VAR_SAMP', 'VERSION', 'WEEK', 'WEEKDAY', 'WEEKOFYEAR', 'WITHIN', 'X', 'Y', 'YEAR', 'YEARWEEK'
 | |
| 	);
 | |
| 
 | |
| 	// Punctuation that can be used as a boundary between other tokens
 | |
| 	protected static $boundaries = array(',', ';',':', ')', '(', '.', '=', '<', '>', '+', '-', '*', '/', '!', '^', '%', '|', '&', '#');
 | |
| 
 | |
| 	// For HTML syntax highlighting
 | |
| 	// Styles applied to different token types
 | |
| 	public static $quote_attributes = 'style="color: blue;"';
 | |
| 	public static $backtick_quote_attributes = 'style="color: purple;"';
 | |
| 	public static $reserved_attributes = 'style="font-weight:bold;"';
 | |
| 	public static $boundary_attributes = '';
 | |
| 	public static $number_attributes = 'style="color: green;"';
 | |
| 	public static $word_attributes = 'style="color: #333;"';
 | |
| 	public static $error_attributes = 'style="background-color: red;"';
 | |
| 	public static $comment_attributes = 'style="color: #aaa;"';
 | |
| 	public static $variable_attributes = 'style="color: orange;"';
 | |
| 	public static $pre_attributes = 'style="color: black; background-color: white;"';
 | |
| 
 | |
| 	// Boolean - whether or not the current environment is the CLI
 | |
| 	// This affects the type of syntax highlighting
 | |
| 	// If not defined, it will be determined automatically
 | |
| 	public static $cli;
 | |
| 
 | |
| 	// For CLI syntax highlighting
 | |
| 	public static $cli_quote = "\x1b[34;1m";
 | |
| 	public static $cli_backtick_quote = "\x1b[35;1m";
 | |
| 	public static $cli_reserved = "\x1b[37m";
 | |
| 	public static $cli_boundary = "";
 | |
| 	public static $cli_number = "\x1b[32;1m";
 | |
| 	public static $cli_word = "";
 | |
| 	public static $cli_error = "\x1b[31;1;7m";
 | |
| 	public static $cli_comment = "\x1b[30;1m";
 | |
| 	public static $cli_functions = "\x1b[37m";
 | |
| 	public static $cli_variable = "\x1b[36;1m";
 | |
| 
 | |
| 	// The tab character to use when formatting SQL
 | |
| 	public static $tab = '	';
 | |
| 
 | |
| 	// This flag tells us if queries need to be enclosed in <pre> tags
 | |
| 	public static $use_pre = true;
 | |
| 
 | |
| 	// This flag tells us if SqlFormatted has been initialized
 | |
| 	protected static $init;
 | |
| 
 | |
| 	// Regular expressions for tokenizing
 | |
| 	protected static $regex_boundaries;
 | |
| 	protected static $regex_reserved;
 | |
| 	protected static $regex_reserved_newline;
 | |
| 	protected static $regex_reserved_toplevel;
 | |
| 	protected static $regex_function;
 | |
| 
 | |
| 	// Cache variables
 | |
| 	// Only tokens shorter than this size will be cached.  Somewhere between 10 and 20 seems to work well for most cases.
 | |
| 	public static $max_cachekey_size = 15;
 | |
| 	protected static $token_cache = array();
 | |
| 	protected static $cache_hits = 0;
 | |
| 	protected static $cache_misses = 0;
 | |
| 
 | |
| 	/**
 | |
| 	 * Get stats about the token cache
 | |
| 	 * @return Array An array containing the keys 'hits', 'misses', 'entries', and 'size' in bytes
 | |
| 	 */
 | |
| 	public static function getCacheStats()
 | |
| 	{
 | |
| 		return array(
 | |
| 			'hits'=>self::$cache_hits,
 | |
| 			'misses'=>self::$cache_misses,
 | |
| 			'entries'=>count(self::$token_cache),
 | |
| 			'size'=>strlen(serialize(self::$token_cache))
 | |
| 		);
 | |
| 	}
 | |
| 
 | |
| 	/**
 | |
| 	 * Stuff that only needs to be done once.  Builds regular expressions and sorts the reserved words.
 | |
| 	 */
 | |
| 	protected static function init()
 | |
| 	{
 | |
| 		if (self::$init) return;
 | |
| 
 | |
| 		// Sort reserved word list from longest word to shortest, 3x faster than usort
 | |
| 		$reservedMap = array_combine(self::$reserved, array_map('strlen', self::$reserved));
 | |
| 		arsort($reservedMap);
 | |
| 		self::$reserved = array_keys($reservedMap);
 | |
| 
 | |
| 		// Set up regular expressions
 | |
| 		self::$regex_boundaries = '('.implode('|',array_map(array(__CLASS__, 'quote_regex'),self::$boundaries)).')';
 | |
| 		self::$regex_reserved = '('.implode('|',array_map(array(__CLASS__, 'quote_regex'),self::$reserved)).')';
 | |
| 		self::$regex_reserved_toplevel = str_replace(' ','\\s+','('.implode('|',array_map(array(__CLASS__, 'quote_regex'),self::$reserved_toplevel)).')');
 | |
| 		self::$regex_reserved_newline = str_replace(' ','\\s+','('.implode('|',array_map(array(__CLASS__, 'quote_regex'),self::$reserved_newline)).')');
 | |
| 
 | |
| 		self::$regex_function = '('.implode('|',array_map(array(__CLASS__, 'quote_regex'),self::$functions)).')';
 | |
| 
 | |
| 		self::$init = true;
 | |
| 	}
 | |
| 
 | |
| 	/**
 | |
| 	 * Return the next token and token type in a SQL string.
 | |
| 	 * Quoted strings, comments, reserved words, whitespace, and punctuation are all their own tokens.
 | |
| 	 *
 | |
| 	 * @param String $string   The SQL string
 | |
| 	 * @param array	 $previous The result of the previous getNextToken() call
 | |
| 	 *
 | |
| 	 * @return Array An associative array containing the type and value of the token.
 | |
| 	 */
 | |
| 	protected static function getNextToken($string, $previous = null)
 | |
| 	{
 | |
| 		// Whitespace
 | |
| 		if (preg_match('/^\s+/',$string,$matches)) {
 | |
| 			return array(
 | |
| 				self::TOKEN_VALUE => $matches[0],
 | |
| 				self::TOKEN_TYPE=>self::TOKEN_TYPE_WHITESPACE
 | |
| 			);
 | |
| 		}
 | |
| 
 | |
| 		// Comment
 | |
| 		if ($string[0] === '#' || (isset($string[1])&&($string[0]==='-'&&$string[1]==='-') || ($string[0]==='/'&&$string[1]==='*'))) {
 | |
| 			// Comment until end of line
 | |
| 			if ($string[0] === '-' || $string[0] === '#') {
 | |
| 				$last = strpos($string, "\n");
 | |
| 				$type = self::TOKEN_TYPE_COMMENT;
 | |
| 			} else { // Comment until closing comment tag
 | |
| 				$last = strpos($string, "*/", 2) + 2;
 | |
| 				$type = self::TOKEN_TYPE_BLOCK_COMMENT;
 | |
| 			}
 | |
| 
 | |
| 			if ($last === false) {
 | |
| 				$last = strlen($string);
 | |
| 			}
 | |
| 
 | |
| 			return array(
 | |
| 				self::TOKEN_VALUE => substr($string, 0, $last),
 | |
| 				self::TOKEN_TYPE  => $type
 | |
| 			);
 | |
| 		}
 | |
| 
 | |
| 		// Quoted String
 | |
| 		if ($string[0]==='"' || $string[0]==='\'' || $string[0]==='`' || $string[0]==='[') {
 | |
| 			$return = array(
 | |
| 				self::TOKEN_TYPE => (($string[0]==='`' || $string[0]==='[')? self::TOKEN_TYPE_BACKTICK_QUOTE : self::TOKEN_TYPE_QUOTE),
 | |
| 				self::TOKEN_VALUE => self::getQuotedString($string)
 | |
| 			);
 | |
| 
 | |
| 			return $return;
 | |
| 		}
 | |
| 
 | |
| 		// User-defined Variable
 | |
| 		if (($string[0] === '@' || $string[0] === ':') && isset($string[1])) {
 | |
| 			$ret = array(
 | |
| 				self::TOKEN_VALUE => null,
 | |
| 				self::TOKEN_TYPE => self::TOKEN_TYPE_VARIABLE
 | |
| 			);
 | |
| 
 | |
| 			// If the variable name is quoted
 | |
| 			if ($string[1]==='"' || $string[1]==='\'' || $string[1]==='`') {
 | |
| 				$ret[self::TOKEN_VALUE] = $string[0].self::getQuotedString(substr($string,1));
 | |
| 			}
 | |
| 			// Non-quoted variable name
 | |
| 			else {
 | |
| 				preg_match('/^('.$string[0].'[a-zA-Z0-9\._\$]+)/',$string,$matches);
 | |
| 				if ($matches) {
 | |
| 					$ret[self::TOKEN_VALUE] = $matches[1];
 | |
| 				}
 | |
| 			}
 | |
| 
 | |
| 			if($ret[self::TOKEN_VALUE] !== null) return $ret;
 | |
| 		}
 | |
| 
 | |
| 		// Number (decimal, binary, or hex)
 | |
| 		if (preg_match('/^([0-9]+(\.[0-9]+)?|0x[0-9a-fA-F]+|0b[01]+)($|\s|"\'`|'.self::$regex_boundaries.')/',$string,$matches)) {
 | |
| 			return array(
 | |
| 				self::TOKEN_VALUE => $matches[1],
 | |
| 				self::TOKEN_TYPE=>self::TOKEN_TYPE_NUMBER
 | |
| 			);
 | |
| 		}
 | |
| 
 | |
| 		// Boundary Character (punctuation and symbols)
 | |
| 		if (preg_match('/^('.self::$regex_boundaries.')/',$string,$matches)) {
 | |
| 			return array(
 | |
| 				self::TOKEN_VALUE => $matches[1],
 | |
| 				self::TOKEN_TYPE  => self::TOKEN_TYPE_BOUNDARY
 | |
| 			);
 | |
| 		}
 | |
| 
 | |
| 		// A reserved word cannot be preceded by a '.'
 | |
| 		// this makes it so in "mytable.from", "from" is not considered a reserved word
 | |
| 		if (!$previous || !isset($previous[self::TOKEN_VALUE]) || $previous[self::TOKEN_VALUE] !== '.') {
 | |
| 			$upper = strtoupper($string);
 | |
| 			// Top Level Reserved Word
 | |
| 			if (preg_match('/^('.self::$regex_reserved_toplevel.')($|\s|'.self::$regex_boundaries.')/', $upper,$matches)) {
 | |
| 				return array(
 | |
| 					self::TOKEN_TYPE=>self::TOKEN_TYPE_RESERVED_TOPLEVEL,
 | |
| 					self::TOKEN_VALUE=>substr($string,0,strlen($matches[1]))
 | |
| 				);
 | |
| 			}
 | |
| 			// Newline Reserved Word
 | |
| 			if (preg_match('/^('.self::$regex_reserved_newline.')($|\s|'.self::$regex_boundaries.')/', $upper,$matches)) {
 | |
| 				return array(
 | |
| 					self::TOKEN_TYPE=>self::TOKEN_TYPE_RESERVED_NEWLINE,
 | |
| 					self::TOKEN_VALUE=>substr($string,0,strlen($matches[1]))
 | |
| 				);
 | |
| 			}
 | |
| 			// Other Reserved Word
 | |
| 			if (preg_match('/^('.self::$regex_reserved.')($|\s|'.self::$regex_boundaries.')/', $upper,$matches)) {
 | |
| 				return array(
 | |
| 					self::TOKEN_TYPE=>self::TOKEN_TYPE_RESERVED,
 | |
| 					self::TOKEN_VALUE=>substr($string,0,strlen($matches[1]))
 | |
| 				);
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		// A function must be suceeded by '('
 | |
| 		// this makes it so "count(" is considered a function, but "count" alone is not
 | |
| 		$upper = strtoupper($string);
 | |
| 		// function
 | |
| 		if (preg_match('/^('.self::$regex_function.'[(]|\s|[)])/', $upper,$matches)) {
 | |
| 			return array(
 | |
| 				self::TOKEN_TYPE=>self::TOKEN_TYPE_RESERVED,
 | |
| 				self::TOKEN_VALUE=>substr($string,0,strlen($matches[1])-1)
 | |
| 			);
 | |
| 		}
 | |
| 
 | |
| 		// Non reserved word
 | |
| 		preg_match('/^(.*?)($|\s|["\'`]|'.self::$regex_boundaries.')/',$string,$matches);
 | |
| 
 | |
| 		return array(
 | |
| 			self::TOKEN_VALUE => $matches[1],
 | |
| 			self::TOKEN_TYPE  => self::TOKEN_TYPE_WORD
 | |
| 		);
 | |
| 	}
 | |
| 
 | |
| 	protected static function getQuotedString($string)
 | |
| 	{
 | |
| 		$ret = null;
 | |
| 
 | |
| 		// This checks for the following patterns:
 | |
| 		// 1. backtick quoted string using `` to escape
 | |
| 		// 2. square bracket quoted string (SQL Server) using ]] to escape
 | |
| 		// 3. double quoted string using "" or \" to escape
 | |
| 		// 4. single quoted string using '' or \' to escape
 | |
| 		if ( preg_match('/^(((`[^`]*($|`))+)|((\[[^\]]*($|\]))(\][^\]]*($|\]))*)|(("[^"\\\\]*(?:\\\\.[^"\\\\]*)*("|$))+)|((\'[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*(\'|$))+))/s', $string, $matches)) {
 | |
| 			$ret = $matches[1];
 | |
| 		}
 | |
| 
 | |
| 		return $ret;
 | |
| 	}
 | |
| 
 | |
| 	/**
 | |
| 	 * Takes a SQL string and breaks it into tokens.
 | |
| 	 * Each token is an associative array with type and value.
 | |
| 	 *
 | |
| 	 * @param String $string The SQL string
 | |
| 	 *
 | |
| 	 * @return Array An array of tokens.
 | |
| 	 */
 | |
| 	protected static function tokenize($string)
 | |
| 	{
 | |
| 		self::init();
 | |
| 
 | |
| 		$tokens = array();
 | |
| 
 | |
| 		// Used for debugging if there is an error while tokenizing the string
 | |
| 		$original_length = strlen($string);
 | |
| 
 | |
| 		// Used to make sure the string keeps shrinking on each iteration
 | |
| 		$old_string_len = strlen($string) + 1;
 | |
| 
 | |
| 		$token = null;
 | |
| 
 | |
| 		$current_length = strlen($string);
 | |
| 
 | |
| 		// Keep processing the string until it is empty
 | |
| 		while ($current_length) {
 | |
| 			// If the string stopped shrinking, there was a problem
 | |
| 			if ($old_string_len <= $current_length) {
 | |
| 				$tokens[] = array(
 | |
| 					self::TOKEN_VALUE=>$string,
 | |
| 					self::TOKEN_TYPE=>self::TOKEN_TYPE_ERROR
 | |
| 				);
 | |
| 
 | |
| 				return $tokens;
 | |
| 			}
 | |
| 			$old_string_len =  $current_length;
 | |
| 
 | |
| 			// Determine if we can use caching
 | |
| 			if ($current_length >= self::$max_cachekey_size) {
 | |
| 				$cacheKey = substr($string,0,self::$max_cachekey_size);
 | |
| 			} else {
 | |
| 				$cacheKey = false;
 | |
| 			}
 | |
| 
 | |
| 			// See if the token is already cached
 | |
| 			if ($cacheKey && isset(self::$token_cache[$cacheKey])) {
 | |
| 				// Retrieve from cache
 | |
| 				$token = self::$token_cache[$cacheKey];
 | |
| 				$token_length = strlen($token[self::TOKEN_VALUE]);
 | |
| 				self::$cache_hits++;
 | |
| 			} else {
 | |
| 				// Get the next token and the token type
 | |
| 				$token = self::getNextToken($string, $token);
 | |
| 				$token_length = strlen($token[self::TOKEN_VALUE]);
 | |
| 				self::$cache_misses++;
 | |
| 
 | |
| 				// If the token is shorter than the max length, store it in cache
 | |
| 				if ($cacheKey && $token_length < self::$max_cachekey_size) {
 | |
| 					self::$token_cache[$cacheKey] = $token;
 | |
| 				}
 | |
| 			}
 | |
| 
 | |
| 			$tokens[] = $token;
 | |
| 
 | |
| 			// Advance the string
 | |
| 			$string = substr($string, $token_length);
 | |
| 
 | |
| 			$current_length -= $token_length;
 | |
| 		}
 | |
| 
 | |
| 		return $tokens;
 | |
| 	}
 | |
| 
 | |
| 	/**
 | |
| 	 * Format the whitespace in a SQL string to make it easier to read.
 | |
| 	 *
 | |
| 	 * @param String  $string	 The SQL string
 | |
| 	 * @param boolean $highlight If true, syntax highlighting will also be performed
 | |
| 	 *
 | |
| 	 * @return String The SQL string with HTML styles and formatting wrapped in a <pre> tag
 | |
| 	 */
 | |
| 	public static function format($string, $highlight=true)
 | |
| 	{
 | |
| 		// This variable will be populated with formatted html
 | |
| 		$return = '';
 | |
| 
 | |
| 		// Use an actual tab while formatting and then switch out with self::$tab at the end
 | |
| 		$tab = "\t";
 | |
| 
 | |
| 		$indent_level = 0;
 | |
| 		$newline = false;
 | |
| 		$inline_parentheses = false;
 | |
| 		$increase_special_indent = false;
 | |
| 		$increase_block_indent = false;
 | |
| 		$indent_types = array();
 | |
| 		$added_newline = false;
 | |
| 		$inline_count = 0;
 | |
| 		$inline_indented = false;
 | |
| 		$clause_limit = false;
 | |
| 
 | |
| 		// Tokenize String
 | |
| 		$original_tokens = self::tokenize($string);
 | |
| 
 | |
| 		// Remove existing whitespace
 | |
| 		$tokens = array();
 | |
| 		foreach ($original_tokens as $i=>$token) {
 | |
| 			if ($token[self::TOKEN_TYPE] !== self::TOKEN_TYPE_WHITESPACE) {
 | |
| 				$token['i'] = $i;
 | |
| 				$tokens[] = $token;
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		// Format token by token
 | |
| 		foreach ($tokens as $i=>$token) {
 | |
| 			// Get highlighted token if doing syntax highlighting
 | |
| 			if ($highlight) {
 | |
| 				$highlighted = self::highlightToken($token);
 | |
| 			} else { // If returning raw text
 | |
| 				$highlighted = $token[self::TOKEN_VALUE];
 | |
| 			}
 | |
| 
 | |
| 			// If we are increasing the special indent level now
 | |
| 			if ($increase_special_indent) {
 | |
| 				$indent_level++;
 | |
| 				$increase_special_indent = false;
 | |
| 				array_unshift($indent_types,'special');
 | |
| 			}
 | |
| 			// If we are increasing the block indent level now
 | |
| 			if ($increase_block_indent) {
 | |
| 				$indent_level++;
 | |
| 				$increase_block_indent = false;
 | |
| 				array_unshift($indent_types,'block');
 | |
| 			}
 | |
| 
 | |
| 			// If we need a new line before the token
 | |
| 			if ($newline) {
 | |
| 				$return .= "\n" . str_repeat($tab, $indent_level);
 | |
| 				$newline = false;
 | |
| 				$added_newline = true;
 | |
| 			} else {
 | |
| 				$added_newline = false;
 | |
| 			}
 | |
| 
 | |
| 			// Display comments directly where they appear in the source
 | |
| 			if ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_COMMENT || $token[self::TOKEN_TYPE] === self::TOKEN_TYPE_BLOCK_COMMENT) {
 | |
| 				if ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_BLOCK_COMMENT) {
 | |
| 					$indent = str_repeat($tab,$indent_level);
 | |
| 					$return .= "\n" . $indent;
 | |
| 					$highlighted = str_replace("\n","\n".$indent,$highlighted);
 | |
| 				}
 | |
| 
 | |
| 				$return .= $highlighted;
 | |
| 				$newline = true;
 | |
| 				continue;
 | |
| 			}
 | |
| 
 | |
| 			if ($inline_parentheses) {
 | |
| 				// End of inline parentheses
 | |
| 				if ($token[self::TOKEN_VALUE] === ')') {
 | |
| 					$return = rtrim($return,' ');
 | |
| 
 | |
| 					if ($inline_indented) {
 | |
| 						array_shift($indent_types);
 | |
| 						$indent_level --;
 | |
| 						$return .= "\n" . str_repeat($tab, $indent_level);
 | |
| 					}
 | |
| 
 | |
| 					$inline_parentheses = false;
 | |
| 
 | |
| 					$return .= $highlighted . ' ';
 | |
| 					continue;
 | |
| 				}
 | |
| 
 | |
| 				if ($token[self::TOKEN_VALUE] === ',') {
 | |
| 					if ($inline_count >= 30) {
 | |
| 						$inline_count = 0;
 | |
| 						$newline = true;
 | |
| 					}
 | |
| 				}
 | |
| 
 | |
| 				$inline_count += strlen($token[self::TOKEN_VALUE]);
 | |
| 			}
 | |
| 
 | |
| 			// Opening parentheses increase the block indent level and start a new line
 | |
| 			if ($token[self::TOKEN_VALUE] === '(') {
 | |
| 				// First check if this should be an inline parentheses block
 | |
| 				// Examples are "NOW()", "COUNT(*)", "int(10)", key(`somecolumn`), DECIMAL(7,2)
 | |
| 				// Allow up to 3 non-whitespace tokens inside inline parentheses
 | |
| 				$length = 0;
 | |
| 				for ($j=1;$j<=250;$j++) {
 | |
| 					// Reached end of string
 | |
| 					if (!isset($tokens[$i+$j])) break;
 | |
| 
 | |
| 					$next = $tokens[$i+$j];
 | |
| 
 | |
| 					// Reached closing parentheses, able to inline it
 | |
| 					if ($next[self::TOKEN_VALUE] === ')') {
 | |
| 						$inline_parentheses = true;
 | |
| 						$inline_count = 0;
 | |
| 						$inline_indented = false;
 | |
| 						break;
 | |
| 					}
 | |
| 
 | |
| 					// Reached an invalid token for inline parentheses
 | |
| 					if ($next[self::TOKEN_VALUE]===';' || $next[self::TOKEN_VALUE]==='(') {
 | |
| 						break;
 | |
| 					}
 | |
| 
 | |
| 					// Reached an invalid token type for inline parentheses
 | |
| 					if ($next[self::TOKEN_TYPE]===self::TOKEN_TYPE_RESERVED_TOPLEVEL || $next[self::TOKEN_TYPE]===self::TOKEN_TYPE_RESERVED_NEWLINE || $next[self::TOKEN_TYPE]===self::TOKEN_TYPE_COMMENT || $next[self::TOKEN_TYPE]===self::TOKEN_TYPE_BLOCK_COMMENT) {
 | |
| 						break;
 | |
| 					}
 | |
| 
 | |
| 					$length += strlen($next[self::TOKEN_VALUE]);
 | |
| 				}
 | |
| 
 | |
| 				if ($inline_parentheses && $length > 30) {
 | |
| 					$increase_block_indent = true;
 | |
| 					$inline_indented = true;
 | |
| 					$newline = true;
 | |
| 				}
 | |
| 
 | |
| 				// Take out the preceding space unless there was whitespace there in the original query
 | |
| 				if (isset($original_tokens[$token['i']-1]) && $original_tokens[$token['i']-1][self::TOKEN_TYPE] !== self::TOKEN_TYPE_WHITESPACE) {
 | |
| 					$return = rtrim($return,' ');
 | |
| 				}
 | |
| 
 | |
| 				if (!$inline_parentheses) {
 | |
| 					$increase_block_indent = true;
 | |
| 					// Add a newline after the parentheses
 | |
| 					$newline = true;
 | |
| 				}
 | |
| 
 | |
| 			}
 | |
| 
 | |
| 			// Closing parentheses decrease the block indent level
 | |
| 			elseif ($token[self::TOKEN_VALUE] === ')') {
 | |
| 				// Remove whitespace before the closing parentheses
 | |
| 				$return = rtrim($return,' ');
 | |
| 
 | |
| 				$indent_level--;
 | |
| 
 | |
| 				// Reset indent level
 | |
| 				while ($j=array_shift($indent_types)) {
 | |
| 					if ($j==='special') {
 | |
| 						$indent_level--;
 | |
| 					} else {
 | |
| 						break;
 | |
| 					}
 | |
| 				}
 | |
| 
 | |
| 				if ($indent_level < 0) {
 | |
| 					// This is an error
 | |
| 					$indent_level = 0;
 | |
| 
 | |
| 					if ($highlight) {
 | |
| 						$return .= "\n".self::highlightError($token[self::TOKEN_VALUE]);
 | |
| 						continue;
 | |
| 					}
 | |
| 				}
 | |
| 
 | |
| 				// Add a newline before the closing parentheses (if not already added)
 | |
| 				if (!$added_newline) {
 | |
| 					$return .= "\n" . str_repeat($tab, $indent_level);
 | |
| 				}
 | |
| 			}
 | |
| 
 | |
| 			// Top level reserved words start a new line and increase the special indent level
 | |
| 			elseif ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_RESERVED_TOPLEVEL) {
 | |
| 				$increase_special_indent = true;
 | |
| 
 | |
| 				// If the last indent type was 'special', decrease the special indent for this round
 | |
| 				reset($indent_types);
 | |
| 				if (current($indent_types)==='special') {
 | |
| 					$indent_level--;
 | |
| 					array_shift($indent_types);
 | |
| 				}
 | |
| 
 | |
| 				// Add a newline after the top level reserved word
 | |
| 				$newline = true;
 | |
| 				// Add a newline before the top level reserved word (if not already added)
 | |
| 				if (!$added_newline) {
 | |
| 					$return .= "\n" . str_repeat($tab, $indent_level);
 | |
| 				}
 | |
| 				// If we already added a newline, redo the indentation since it may be different now
 | |
| 				else {
 | |
| 					$return = rtrim($return,$tab).str_repeat($tab, $indent_level);
 | |
| 				}
 | |
| 
 | |
| 				// If the token may have extra whitespace
 | |
| 				if (strpos($token[self::TOKEN_VALUE],' ')!==false || strpos($token[self::TOKEN_VALUE],"\n")!==false || strpos($token[self::TOKEN_VALUE],"\t")!==false) {
 | |
| 					$highlighted = preg_replace('/\s+/',' ',$highlighted);
 | |
| 				}
 | |
| 				//if SQL 'LIMIT' clause, start variable to reset newline
 | |
| 				if ($token[self::TOKEN_VALUE] === 'LIMIT' && !$inline_parentheses) {
 | |
| 					$clause_limit = true;
 | |
| 				}
 | |
| 			}
 | |
| 
 | |
| 			// Checks if we are out of the limit clause
 | |
| 			elseif ($clause_limit && $token[self::TOKEN_VALUE] !== "," && $token[self::TOKEN_TYPE] !== self::TOKEN_TYPE_NUMBER && $token[self::TOKEN_TYPE] !== self::TOKEN_TYPE_WHITESPACE) {
 | |
| 				$clause_limit = false;
 | |
| 			}
 | |
| 
 | |
| 			// Commas start a new line (unless within inline parentheses or SQL 'LIMIT' clause)
 | |
| 			elseif ($token[self::TOKEN_VALUE] === ',' && !$inline_parentheses) {
 | |
| 				//If the previous TOKEN_VALUE is 'LIMIT', resets new line
 | |
| 				if ($clause_limit === true) {
 | |
| 					$newline = false;
 | |
| 					$clause_limit = false;
 | |
| 				}
 | |
| 				// All other cases of commas
 | |
| 				else {
 | |
| 					$newline = true;
 | |
| 				}
 | |
| 			}
 | |
| 
 | |
| 			// Newline reserved words start a new line
 | |
| 			elseif ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_RESERVED_NEWLINE) {
 | |
| 				// Add a newline before the reserved word (if not already added)
 | |
| 				if (!$added_newline) {
 | |
| 					$return .= "\n" . str_repeat($tab, $indent_level);
 | |
| 				}
 | |
| 
 | |
| 				// If the token may have extra whitespace
 | |
| 				if (strpos($token[self::TOKEN_VALUE],' ')!==false || strpos($token[self::TOKEN_VALUE],"\n")!==false || strpos($token[self::TOKEN_VALUE],"\t")!==false) {
 | |
| 					$highlighted = preg_replace('/\s+/',' ',$highlighted);
 | |
| 				}
 | |
| 			}
 | |
| 
 | |
| 			// Multiple boundary characters in a row should not have spaces between them (not including parentheses)
 | |
| 			elseif ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_BOUNDARY) {
 | |
| 				if (isset($tokens[$i-1]) && $tokens[$i-1][self::TOKEN_TYPE] === self::TOKEN_TYPE_BOUNDARY) {
 | |
| 					if (isset($original_tokens[$token['i']-1]) && $original_tokens[$token['i']-1][self::TOKEN_TYPE] !== self::TOKEN_TYPE_WHITESPACE) {
 | |
| 						$return = rtrim($return,' ');
 | |
| 					}
 | |
| 				}
 | |
| 			}
 | |
| 
 | |
| 			// If the token shouldn't have a space before it
 | |
| 			if ($token[self::TOKEN_VALUE] === '.' || $token[self::TOKEN_VALUE] === ',' || $token[self::TOKEN_VALUE] === ';') {
 | |
| 				$return = rtrim($return, ' ');
 | |
| 			}
 | |
| 
 | |
| 			$return .= $highlighted.' ';
 | |
| 
 | |
| 			// If the token shouldn't have a space after it
 | |
| 			if ($token[self::TOKEN_VALUE] === '(' || $token[self::TOKEN_VALUE] === '.') {
 | |
| 				$return = rtrim($return,' ');
 | |
| 			}
 | |
| 
 | |
| 			// If this is the "-" of a negative number, it shouldn't have a space after it
 | |
| 			if($token[self::TOKEN_VALUE] === '-' && isset($tokens[$i+1]) && $tokens[$i+1][self::TOKEN_TYPE] === self::TOKEN_TYPE_NUMBER && isset($tokens[$i-1])) {
 | |
| 				$prev = $tokens[$i-1][self::TOKEN_TYPE];
 | |
| 				if($prev !== self::TOKEN_TYPE_QUOTE && $prev !== self::TOKEN_TYPE_BACKTICK_QUOTE && $prev !== self::TOKEN_TYPE_WORD && $prev !== self::TOKEN_TYPE_NUMBER) {
 | |
| 					$return = rtrim($return,' ');
 | |
| 				}
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		// If there are unmatched parentheses
 | |
| 		if ($highlight && array_search('block',$indent_types) !== false) {
 | |
| 			$return .= "\n".self::highlightError("WARNING: unclosed parentheses or section");
 | |
| 		}
 | |
| 
 | |
| 		// Replace tab characters with the configuration tab character
 | |
| 		$return = trim(str_replace("\t",self::$tab,$return));
 | |
| 
 | |
| 		if ($highlight) {
 | |
| 			$return = self::output($return);
 | |
| 		}
 | |
| 
 | |
| 		return $return;
 | |
| 	}
 | |
| 
 | |
| 	/**
 | |
| 	 * Add syntax highlighting to a SQL string
 | |
| 	 *
 | |
| 	 * @param String $string The SQL string
 | |
| 	 *
 | |
| 	 * @return String The SQL string with HTML styles applied
 | |
| 	 */
 | |
| 	public static function highlight($string)
 | |
| 	{
 | |
| 		$tokens = self::tokenize($string);
 | |
| 
 | |
| 		$return = '';
 | |
| 
 | |
| 		foreach ($tokens as $token) {
 | |
| 			$return .= self::highlightToken($token);
 | |
| 		}
 | |
| 
 | |
| 		return self::output($return);
 | |
| 	}
 | |
| 
 | |
| 	/**
 | |
| 	 * Split a SQL string into multiple queries.
 | |
| 	 * Uses ";" as a query delimiter.
 | |
| 	 *
 | |
| 	 * @param String $string The SQL string
 | |
| 	 *
 | |
| 	 * @return Array An array of individual query strings without trailing semicolons
 | |
| 	 */
 | |
| 	public static function splitQuery($string)
 | |
| 	{
 | |
| 		$queries = array();
 | |
| 		$current_query = '';
 | |
| 		$empty = true;
 | |
| 
 | |
| 		$tokens = self::tokenize($string);
 | |
| 
 | |
| 		foreach ($tokens as $token) {
 | |
| 			// If this is a query separator
 | |
| 			if ($token[self::TOKEN_VALUE] === ';') {
 | |
| 				if (!$empty) {
 | |
| 					$queries[] = $current_query.';';
 | |
| 				}
 | |
| 				$current_query = '';
 | |
| 				$empty = true;
 | |
| 				continue;
 | |
| 			}
 | |
| 
 | |
| 			// If this is a non-empty character
 | |
| 			if ($token[self::TOKEN_TYPE] !== self::TOKEN_TYPE_WHITESPACE && $token[self::TOKEN_TYPE] !== self::TOKEN_TYPE_COMMENT && $token[self::TOKEN_TYPE] !== self::TOKEN_TYPE_BLOCK_COMMENT) {
 | |
| 				$empty = false;
 | |
| 			}
 | |
| 
 | |
| 			$current_query .= $token[self::TOKEN_VALUE];
 | |
| 		}
 | |
| 
 | |
| 		if (!$empty) {
 | |
| 			$queries[] = trim($current_query);
 | |
| 		}
 | |
| 
 | |
| 		return $queries;
 | |
| 	}
 | |
| 
 | |
| 	/**
 | |
| 	 * Remove all comments from a SQL string
 | |
| 	 *
 | |
| 	 * @param String $string The SQL string
 | |
| 	 *
 | |
| 	 * @return String The SQL string without comments
 | |
| 	 */
 | |
| 	public static function removeComments($string)
 | |
| 	{
 | |
| 		$result = '';
 | |
| 
 | |
| 		$tokens = self::tokenize($string);
 | |
| 
 | |
| 		foreach ($tokens as $token) {
 | |
| 			// Skip comment tokens
 | |
| 			if ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_COMMENT || $token[self::TOKEN_TYPE] === self::TOKEN_TYPE_BLOCK_COMMENT) {
 | |
| 				continue;
 | |
| 			}
 | |
| 
 | |
| 			$result .= $token[self::TOKEN_VALUE];
 | |
| 		}
 | |
| 		$result = self::format( $result,false);
 | |
| 
 | |
| 		return $result;
 | |
| 	}
 | |
| 
 | |
| 	/**
 | |
| 	 * Compress a query by collapsing white space and removing comments
 | |
| 	 *
 | |
| 	 * @param String $string The SQL string
 | |
| 	 *
 | |
| 	 * @return String The SQL string without comments
 | |
| 	 */
 | |
| 	public static function compress($string)
 | |
| 	{
 | |
| 		$result = '';
 | |
| 
 | |
| 		$tokens = self::tokenize($string);
 | |
| 
 | |
| 		$whitespace = true;
 | |
| 		foreach ($tokens as $token) {
 | |
| 			// Skip comment tokens
 | |
| 			if ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_COMMENT || $token[self::TOKEN_TYPE] === self::TOKEN_TYPE_BLOCK_COMMENT) {
 | |
| 				continue;
 | |
| 			}
 | |
| 			// Remove extra whitespace in reserved words (e.g "OUTER	 JOIN" becomes "OUTER JOIN")
 | |
| 			elseif ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_RESERVED || $token[self::TOKEN_TYPE] === self::TOKEN_TYPE_RESERVED_NEWLINE || $token[self::TOKEN_TYPE] === self::TOKEN_TYPE_RESERVED_TOPLEVEL) {
 | |
| 				$token[self::TOKEN_VALUE] = preg_replace('/\s+/',' ',$token[self::TOKEN_VALUE]);
 | |
| 			}
 | |
| 
 | |
| 			if ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_WHITESPACE) {
 | |
| 				// If the last token was whitespace, don't add another one
 | |
| 				if ($whitespace) {
 | |
| 					continue;
 | |
| 				} else {
 | |
| 					$whitespace = true;
 | |
| 					// Convert all whitespace to a single space
 | |
| 					$token[self::TOKEN_VALUE] = ' ';
 | |
| 				}
 | |
| 			} else {
 | |
| 				$whitespace = false;
 | |
| 			}
 | |
| 
 | |
| 			$result .= $token[self::TOKEN_VALUE];
 | |
| 		}
 | |
| 
 | |
| 		return rtrim($result);
 | |
| 	}
 | |
| 
 | |
| 	/**
 | |
| 	 * Highlights a token depending on its type.
 | |
| 	 *
 | |
| 	 * @param Array $token An associative array containing type and value.
 | |
| 	 *
 | |
| 	 * @return String HTML code of the highlighted token.
 | |
| 	 */
 | |
| 	protected static function highlightToken($token)
 | |
| 	{
 | |
| 		$type = $token[self::TOKEN_TYPE];
 | |
| 
 | |
| 		if (self::is_cli()) {
 | |
| 			$token = $token[self::TOKEN_VALUE];
 | |
| 		} else {
 | |
| 			if (defined('ENT_IGNORE')) {
 | |
| 			  $token = htmlentities($token[self::TOKEN_VALUE],ENT_COMPAT | ENT_IGNORE ,'UTF-8');
 | |
| 			} else {
 | |
| 			  $token = htmlentities($token[self::TOKEN_VALUE],ENT_COMPAT,'UTF-8');
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		if ($type===self::TOKEN_TYPE_BOUNDARY) {
 | |
| 			return self::highlightBoundary($token);
 | |
| 		} elseif ($type===self::TOKEN_TYPE_WORD) {
 | |
| 			return self::highlightWord($token);
 | |
| 		} elseif ($type===self::TOKEN_TYPE_BACKTICK_QUOTE) {
 | |
| 			return self::highlightBacktickQuote($token);
 | |
| 		} elseif ($type===self::TOKEN_TYPE_QUOTE) {
 | |
| 			return self::highlightQuote($token);
 | |
| 		} elseif ($type===self::TOKEN_TYPE_RESERVED) {
 | |
| 			return self::highlightReservedWord($token);
 | |
| 		} elseif ($type===self::TOKEN_TYPE_RESERVED_TOPLEVEL) {
 | |
| 			return self::highlightReservedWord($token);
 | |
| 		} elseif ($type===self::TOKEN_TYPE_RESERVED_NEWLINE) {
 | |
| 			return self::highlightReservedWord($token);
 | |
| 		} elseif ($type===self::TOKEN_TYPE_NUMBER) {
 | |
| 			return self::highlightNumber($token);
 | |
| 		} elseif ($type===self::TOKEN_TYPE_VARIABLE) {
 | |
| 			return self::highlightVariable($token);
 | |
| 		} elseif ($type===self::TOKEN_TYPE_COMMENT || $type===self::TOKEN_TYPE_BLOCK_COMMENT) {
 | |
| 			return self::highlightComment($token);
 | |
| 		}
 | |
| 
 | |
| 		return $token;
 | |
| 	}
 | |
| 
 | |
| 	/**
 | |
| 	 * Highlights a quoted string
 | |
| 	 *
 | |
| 	 * @param String $value The token's value
 | |
| 	 *
 | |
| 	 * @return String HTML code of the highlighted token.
 | |
| 	 */
 | |
| 	protected static function highlightQuote($value)
 | |
| 	{
 | |
| 		if (self::is_cli()) {
 | |
| 			return self::$cli_quote . $value . "\x1b[0m";
 | |
| 		} else {
 | |
| 			return '<span ' . self::$quote_attributes . '>' . $value . '</span>';
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	/**
 | |
| 	 * Highlights a backtick quoted string
 | |
| 	 *
 | |
| 	 * @param String $value The token's value
 | |
| 	 *
 | |
| 	 * @return String HTML code of the highlighted token.
 | |
| 	 */
 | |
| 	protected static function highlightBacktickQuote($value)
 | |
| 	{
 | |
| 		if (self::is_cli()) {
 | |
| 			return self::$cli_backtick_quote . $value . "\x1b[0m";
 | |
| 		} else {
 | |
| 			return '<span ' . self::$backtick_quote_attributes . '>' . $value . '</span>';
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	/**
 | |
| 	 * Highlights a reserved word
 | |
| 	 *
 | |
| 	 * @param String $value The token's value
 | |
| 	 *
 | |
| 	 * @return String HTML code of the highlighted token.
 | |
| 	 */
 | |
| 	protected static function highlightReservedWord($value)
 | |
| 	{
 | |
| 		if (self::is_cli()) {
 | |
| 			return self::$cli_reserved . $value . "\x1b[0m";
 | |
| 		} else {
 | |
| 			return '<span ' . self::$reserved_attributes . '>' . $value . '</span>';
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	/**
 | |
| 	 * Highlights a boundary token
 | |
| 	 *
 | |
| 	 * @param String $value The token's value
 | |
| 	 *
 | |
| 	 * @return String HTML code of the highlighted token.
 | |
| 	 */
 | |
| 	protected static function highlightBoundary($value)
 | |
| 	{
 | |
| 		if ($value==='(' || $value===')') return $value;
 | |
| 
 | |
| 		if (self::is_cli()) {
 | |
| 			return self::$cli_boundary . $value . "\x1b[0m";
 | |
| 		} else {
 | |
| 			return '<span ' . self::$boundary_attributes . '>' . $value . '</span>';
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	/**
 | |
| 	 * Highlights a number
 | |
| 	 *
 | |
| 	 * @param String $value The token's value
 | |
| 	 *
 | |
| 	 * @return String HTML code of the highlighted token.
 | |
| 	 */
 | |
| 	protected static function highlightNumber($value)
 | |
| 	{
 | |
| 		if (self::is_cli()) {
 | |
| 			return self::$cli_number . $value . "\x1b[0m";
 | |
| 		} else {
 | |
| 			return '<span ' . self::$number_attributes . '>' . $value . '</span>';
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	/**
 | |
| 	 * Highlights an error
 | |
| 	 *
 | |
| 	 * @param String $value The token's value
 | |
| 	 *
 | |
| 	 * @return String HTML code of the highlighted token.
 | |
| 	 */
 | |
| 	protected static function highlightError($value)
 | |
| 	{
 | |
| 		if (self::is_cli()) {
 | |
| 			return self::$cli_error . $value . "\x1b[0m";
 | |
| 		} else {
 | |
| 			return '<span ' . self::$error_attributes . '>' . $value . '</span>';
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	/**
 | |
| 	 * Highlights a comment
 | |
| 	 *
 | |
| 	 * @param String $value The token's value
 | |
| 	 *
 | |
| 	 * @return String HTML code of the highlighted token.
 | |
| 	 */
 | |
| 	protected static function highlightComment($value)
 | |
| 	{
 | |
| 		if (self::is_cli()) {
 | |
| 			return self::$cli_comment . $value . "\x1b[0m";
 | |
| 		} else {
 | |
| 			return '<span ' . self::$comment_attributes . '>' . $value . '</span>';
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	/**
 | |
| 	 * Highlights a word token
 | |
| 	 *
 | |
| 	 * @param String $value The token's value
 | |
| 	 *
 | |
| 	 * @return String HTML code of the highlighted token.
 | |
| 	 */
 | |
| 	protected static function highlightWord($value)
 | |
| 	{
 | |
| 		if (self::is_cli()) {
 | |
| 			return self::$cli_word . $value . "\x1b[0m";
 | |
| 		} else {
 | |
| 			return '<span ' . self::$word_attributes . '>' . $value . '</span>';
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	/**
 | |
| 	 * Highlights a variable token
 | |
| 	 *
 | |
| 	 * @param String $value The token's value
 | |
| 	 *
 | |
| 	 * @return String HTML code of the highlighted token.
 | |
| 	 */
 | |
| 	protected static function highlightVariable($value)
 | |
| 	{
 | |
| 		if (self::is_cli()) {
 | |
| 			return self::$cli_variable . $value . "\x1b[0m";
 | |
| 		} else {
 | |
| 			return '<span ' . self::$variable_attributes . '>' . $value . '</span>';
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	/**
 | |
| 	 * Helper function for building regular expressions for reserved words and boundary characters
 | |
| 	 *
 | |
| 	 * @param String $a The string to be quoted
 | |
| 	 *
 | |
| 	 * @return String The quoted string
 | |
| 	 */
 | |
| 	private static function quote_regex($a)
 | |
| 	{
 | |
| 		return preg_quote($a,'/');
 | |
| 	}
 | |
| 
 | |
| 	/**
 | |
| 	 * Helper function for building string output
 | |
| 	 *
 | |
| 	 * @param String $string The string to be quoted
 | |
| 	 *
 | |
| 	 * @return String The quoted string
 | |
| 	 */
 | |
| 	private static function output($string)
 | |
| 	{
 | |
| 		if (self::is_cli()) {
 | |
| 			return $string."\n";
 | |
| 		} else {
 | |
| 			$string=trim($string);
 | |
| 			if (!self::$use_pre) {
 | |
| 				return $string;
 | |
| 			}
 | |
| 
 | |
| 			return '<pre '.self::$pre_attributes.'>' . $string . '</pre>';
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	private static function is_cli()
 | |
| 	{
 | |
| 		if (isset(self::$cli)) return self::$cli;
 | |
| 		else return php_sapi_name() === 'cli';
 | |
| 	}
 | |
| 
 | |
| } |