sqlformatter.class.php 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939
  1. <?php
  2. /**
  3. * SQL Formatter is a collection of utilities for debugging SQL queries.
  4. * It includes methods for formatting, syntax highlighting, removing comments, etc.
  5. *
  6. * @package SqlFormatter
  7. * @author Jeremy Dorn <jeremy@jeremydorn.com>
  8. * @author Florin Patan <florinpatan@gmail.com>
  9. * @copyright 2013 Jeremy Dorn
  10. * @license http://opensource.org/licenses/MIT
  11. * @link http://github.com/jdorn/sql-formatter
  12. * @version 1.2.9
  13. */
  14. class SqlFormatter
  15. {
  16. // Constants for token types
  17. const TOKEN_TYPE_WHITESPACE = 0;
  18. const TOKEN_TYPE_WORD = 1;
  19. const TOKEN_TYPE_QUOTE = 2;
  20. const TOKEN_TYPE_BACKTICK_QUOTE = 3;
  21. const TOKEN_TYPE_RESERVED = 4;
  22. const TOKEN_TYPE_RESERVED_TOPLEVEL = 5;
  23. const TOKEN_TYPE_RESERVED_NEWLINE = 6;
  24. const TOKEN_TYPE_BOUNDARY = 7;
  25. const TOKEN_TYPE_COMMENT = 8;
  26. const TOKEN_TYPE_BLOCK_COMMENT = 9;
  27. const TOKEN_TYPE_NUMBER = 10;
  28. const TOKEN_TYPE_ERROR = 11;
  29. // Constants for different components of a token
  30. const TOKEN_TYPE = 0;
  31. const TOKEN_VALUE = 1;
  32. // Reserved words (for syntax highlighting)
  33. protected static $reserved = array(
  34. 'ACCESSIBLE', 'ACTION', 'AGAINST', 'AGGREGATE', 'ALGORITHM', 'ALL', 'ALTER', 'ANALYSE', 'ANALYZE', 'AS', 'ASC',
  35. 'AUTOCOMMIT', 'AUTO_INCREMENT', 'BACKUP', 'BEGIN', 'BETWEEN', 'BINLOG', 'BOTH', 'CASCADE', 'CASE', 'CHANGE', 'CHANGED',
  36. 'CHARSET', 'CHECK', 'CHECKSUM', 'COLLATE', 'COLLATION', 'COLUMN', 'COLUMNS', 'COMMENT', 'COMMIT', 'COMMITTED', 'COMPRESSED', 'CONCURRENT',
  37. 'CONSTRAINT', 'CONTAINS', 'CONVERT', 'COUNT', 'CREATE', 'CROSS', 'CURRENT_TIMESTAMP', 'DATABASE', 'DATABASES', 'DAY', 'DAY_HOUR', 'DAY_MINUTE',
  38. 'DAY_SECOND', 'DEFAULT', 'DEFINER', 'DELAYED', 'DELETE', 'DESC', 'DESCRIBE', 'DETERMINISTIC', 'DISTINCT', 'DISTINCTROW', 'DIV',
  39. 'DO', 'DUMPFILE', 'DUPLICATE', 'DYNAMIC', 'ELSE', 'ENCLOSED', 'END', 'ENGINE', 'ENGINE_TYPE', 'ENGINES', 'ESCAPE', 'ESCAPED', 'EVENTS', 'EXECUTE',
  40. 'EXISTS', 'EXPLAIN', 'EXTENDED', 'FAST', 'FIELDS', 'FILE', 'FIRST', 'FIXED', 'FLUSH', 'FOR', 'FORCE', 'FOREIGN', 'FULL', 'FULLTEXT',
  41. 'FUNCTION', 'GLOBAL', 'GRANT', 'GRANTS', 'GROUP_CONCAT', 'HEAP', 'HIGH_PRIORITY', 'HOSTS', 'HOUR', 'HOUR_MINUTE',
  42. 'HOUR_SECOND', 'IDENTIFIED', 'IF', 'IFNULL', 'IGNORE', 'IN', 'INDEX', 'INDEXES', 'INFILE', 'INSERT', 'INSERT_ID', 'INSERT_METHOD', 'INTERVAL',
  43. 'INTO', 'INVOKER', 'IS', 'ISOLATION', 'KEY', 'KEYS', 'KILL', 'LAST_INSERT_ID', 'LEADING', 'LEVEL', 'LIKE', 'LINEAR',
  44. 'LINES', 'LOAD', 'LOCAL', 'LOCK', 'LOCKS', 'LOGS', 'LOW_PRIORITY', 'MARIA', 'MASTER', 'MASTER_CONNECT_RETRY', 'MASTER_HOST', 'MASTER_LOG_FILE',
  45. 'MATCH', 'MEDIUM', 'MERGE', 'MINUTE', 'MINUTE_SECOND', 'MIN_ROWS', 'MODE', 'MODIFY',
  46. 'MONTH', 'MRG_MYISAM', 'MYISAM', 'NAMES', 'NATURAL', 'NOT', 'NOW()', 'NULL', 'OFFSET', 'ON', 'OPEN', 'OPTIMIZE', 'OPTION', 'OPTIONALLY',
  47. 'ON UPDATE', 'ON DELETE', 'OUTFILE', 'PACK_KEYS', 'PAGE', 'PARTIAL', 'PARTITION', 'PARTITIONS', 'PASSWORD', 'PRIMARY', 'PRIVILEGES', 'PROCEDURE',
  48. 'PROCESS', 'PROCESSLIST', 'PURGE', 'QUICK', 'RANGE', 'READ', 'READ_ONLY',
  49. 'READ_WRITE', 'REFERENCES', 'REGEXP', 'RELOAD', 'RENAME', 'REPAIR', 'REPEATABLE', 'REPLACE', 'REPLICATION', 'RESET', 'RESTORE', 'RESTRICT',
  50. 'RETURN', 'RETURNS', 'REVOKE', 'RLIKE', 'ROLLBACK', 'ROW', 'ROWS', 'ROW_FORMAT', 'SECOND', 'SECURITY', 'SEPARATOR',
  51. 'SERIALIZABLE', 'SESSION', 'SHARE', 'SHOW', 'SHUTDOWN', 'SLAVE', 'SONAME', 'SOUNDS', 'SQL',
  52. 'SQL_CACHE', 'SQL_NO_CACHE', 'START', 'STARTING', 'STATUS', 'STOP', 'STORAGE',
  53. 'STRAIGHT_JOIN', 'STRING', 'SUPER', 'TABLE', 'TABLES', 'TEMPORARY', 'TERMINATED', 'THEN', 'TO', 'TRAILING', 'TRANSACTIONAL',
  54. 'TRUNCATE', 'TYPE', 'TYPES', 'UNCOMMITTED', 'UNIQUE', 'UNLOCK', 'UNSIGNED', 'USAGE', 'USE', 'USING', 'VARIABLES',
  55. 'VIEW', 'WHEN', 'WITH', 'WORK', 'WRITE', 'YEAR_MONTH'
  56. );
  57. // For SQL formatting
  58. // These keywords will all be on their own line
  59. protected static $reserved_toplevel = array(
  60. 'SELECT', 'FROM', 'WHERE', 'SET', 'ORDER BY', 'GROUP BY', 'LIMIT', 'DROP',
  61. 'VALUES', 'UPDATE', 'HAVING', 'ADD', 'AFTER', 'ALTER TABLE', 'DELETE FROM', 'UNION ALL', 'UNION', 'EXCEPT', 'INTERSECT'
  62. );
  63. protected static $reserved_newline = array(
  64. 'LEFT JOIN', 'RIGHT JOIN', 'OUTER JOIN', 'INNER JOIN', 'JOIN', 'XOR', 'OR', 'AND'
  65. );
  66. // Punctuation that can be used as a boundary between other tokens
  67. protected static $boundaries = array(',', ';', ')', '(', '.', '=', '<', '>', '+', '-', '*', '/', '!', '^', '%', '|', '&', '#');
  68. // For HTML syntax highlighting
  69. // Styles applied to different token types
  70. public static $quote_attributes = 'style="color: blue;"';
  71. public static $backtick_quote_attributes = 'style="color: purple;"';
  72. public static $reserved_attributes = 'style="font-weight:bold;"';
  73. public static $boundary_attributes = '';
  74. public static $number_attributes = 'style="color: green;"';
  75. public static $word_attributes = 'style="color: #333;"';
  76. public static $error_attributes = 'style="background-color: red;"';
  77. public static $comment_attributes = 'style="color: #aaa;"';
  78. public static $pre_attributes = 'style="color: black; background-color: white;"';
  79. // Boolean - whether or not the current environment is the CLI
  80. // This affects the type of syntax highlighting
  81. // If not defined, it will be determined automatically
  82. public static $cli;
  83. // For CLI syntax highlighting
  84. public static $cli_quote = "\x1b[34;1m";
  85. public static $cli_backtick_quote = "\x1b[35;1m";
  86. public static $cli_reserved = "\x1b[37m";
  87. public static $cli_boundary = "";
  88. public static $cli_number = "\x1b[32;1m";
  89. public static $cli_word = "";
  90. public static $cli_error = "\x1b[31;1;7m";
  91. public static $cli_comment = "\x1b[30;1m";
  92. // The tab character to use when formatting SQL
  93. public static $tab = ' ';
  94. // This flag tells us if queries need to be enclosed in <pre> tags
  95. public static $use_pre = true;
  96. // This flag tells us if SqlFormatted has been initialized
  97. protected static $init;
  98. // Regular expressions for tokenizing
  99. protected static $regex_boundaries;
  100. protected static $regex_reserved;
  101. protected static $regex_reserved_newline;
  102. protected static $regex_reserved_toplevel;
  103. // Cache variables
  104. // Only tokens shorter than this size will be cached. Somewhere between 10 and 20 seems to work well for most cases.
  105. public static $max_cachekey_size = 15;
  106. protected static $token_cache = array();
  107. protected static $cache_hits = 0;
  108. protected static $cache_misses = 0;
  109. /**
  110. * Get stats about the token cache
  111. * @return Array An array containing the keys 'hits', 'misses', 'entries', and 'size' in bytes
  112. */
  113. public static function getCacheStats() {
  114. return array(
  115. 'hits'=>self::$cache_hits,
  116. 'misses'=>self::$cache_misses,
  117. 'entries'=>count(self::$token_cache),
  118. 'size'=>strlen(serialize(self::$token_cache))
  119. );
  120. }
  121. /**
  122. * Stuff that only needs to be done once. Builds regular expressions and sorts the reserved words.
  123. */
  124. protected static function init() {
  125. if(self::$init) return;
  126. // Sort reserved word list from longest word to shortest
  127. usort(self::$reserved, array('SqlFormatter', 'sortLength'));
  128. // Set up regular expressions
  129. self::$regex_boundaries = '('.implode('|',array_map(array('SqlFormatter', 'quote_regex'),self::$boundaries)).')';
  130. self::$regex_reserved = '('.implode('|',array_map(array('SqlFormatter', 'quote_regex'),self::$reserved)).')';
  131. self::$regex_reserved_toplevel = str_replace(' ','\\s+','('.implode('|',array_map(array('SqlFormatter', 'quote_regex'),self::$reserved_toplevel)).')');
  132. self::$regex_reserved_newline = str_replace(' ','\\s+','('.implode('|',array_map(array('SqlFormatter', 'quote_regex'),self::$reserved_newline)).')');
  133. self::$init = true;
  134. }
  135. /**
  136. * Return the next token and token type in a SQL string.
  137. * Quoted strings, comments, reserved words, whitespace, and punctuation are all their own tokens.
  138. *
  139. * @param String $string The SQL string
  140. * @param array $previous The result of the previous getNextToken() call
  141. *
  142. * @return Array An associative array containing the type and value of the token.
  143. */
  144. protected static function getNextToken($string, $previous = null)
  145. {
  146. // Whitespace
  147. if (preg_match('/^\s+/',$string,$matches)) {
  148. return array(
  149. self::TOKEN_VALUE => $matches[0],
  150. self::TOKEN_TYPE=>self::TOKEN_TYPE_WHITESPACE
  151. );
  152. }
  153. // Comment
  154. if ($string[0] === '#' || (isset($string[1])&&($string[0]==='-'&&$string[1]==='-') || ($string[0]==='/'&&$string[1]==='*'))) {
  155. // Comment until end of line
  156. if ($string[0] === '-' || $string[0] === '#') {
  157. $last = strpos($string, "\n");
  158. $type = self::TOKEN_TYPE_COMMENT;
  159. } else { // Comment until closing comment tag
  160. $last = strpos($string, "*/", 2) + 2;
  161. $type = self::TOKEN_TYPE_BLOCK_COMMENT;
  162. }
  163. if ($last === false) {
  164. $last = strlen($string);
  165. }
  166. return array(
  167. self::TOKEN_VALUE => substr($string, 0, $last),
  168. self::TOKEN_TYPE => $type
  169. );
  170. }
  171. // Quoted String
  172. if($string[0]==='"' || $string[0]==='\'' || $string[0]==='`') {
  173. $return = array(
  174. self::TOKEN_TYPE => ($string[0]==='`'? self::TOKEN_TYPE_BACKTICK_QUOTE : self::TOKEN_TYPE_QUOTE),
  175. self::TOKEN_VALUE => $string
  176. );
  177. // This checks for the following patterns:
  178. // 1. backtick quoted string using `` to escape
  179. // 2. double quoted string using "" or \" to escape
  180. // 3. single quoted string using '' or \' to escape
  181. if( preg_match('/^(((`[^`]*($|`))+)|(("[^"\\\\]*(?:\\\\.[^"\\\\]*)*("|$))+)|((\'[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*(\'|$))+))/s', $string, $matches)) {
  182. $return[self::TOKEN_VALUE] = $matches[1];
  183. }
  184. return $return;
  185. }
  186. // Number
  187. if(preg_match('/^([0-9]+(\.[0-9]+)?)($|\s|"\'`|'.self::$regex_boundaries.')/',$string,$matches)) {
  188. return array(
  189. self::TOKEN_VALUE => $matches[1],
  190. self::TOKEN_TYPE=>self::TOKEN_TYPE_NUMBER
  191. );
  192. }
  193. // Boundary Character (punctuation and symbols)
  194. if(preg_match('/^('.self::$regex_boundaries.')/',$string,$matches)) {
  195. return array(
  196. self::TOKEN_VALUE => $matches[1],
  197. self::TOKEN_TYPE => self::TOKEN_TYPE_BOUNDARY
  198. );
  199. }
  200. // A reserved word cannot be preceded by a '.'
  201. // this makes it so in "mytable.from", "from" is not considered a reserved word
  202. if (!$previous || !isset($previous[self::TOKEN_VALUE]) || $previous[self::TOKEN_VALUE] !== '.') {
  203. $upper = strtoupper($string);
  204. // Top Level Reserved Word
  205. if(preg_match('/^('.self::$regex_reserved_toplevel.')($|\s|'.self::$regex_boundaries.')/', $upper,$matches)) {
  206. return array(
  207. self::TOKEN_TYPE=>self::TOKEN_TYPE_RESERVED_TOPLEVEL,
  208. self::TOKEN_VALUE=>substr($string,0,strlen($matches[1]))
  209. );
  210. }
  211. // Newline Reserved Word
  212. if(preg_match('/^('.self::$regex_reserved_newline.')($|\s|'.self::$regex_boundaries.')/', $upper,$matches)) {
  213. return array(
  214. self::TOKEN_TYPE=>self::TOKEN_TYPE_RESERVED_NEWLINE,
  215. self::TOKEN_VALUE=>substr($string,0,strlen($matches[1]))
  216. );
  217. }
  218. // Other Reserved Word
  219. if(preg_match('/^('.self::$regex_reserved.')($|\s|'.self::$regex_boundaries.')/', $upper,$matches)) {
  220. return array(
  221. self::TOKEN_TYPE=>self::TOKEN_TYPE_RESERVED,
  222. self::TOKEN_VALUE=>substr($string,0,strlen($matches[1]))
  223. );
  224. }
  225. }
  226. // Non reserved word
  227. preg_match('/^(.*?)($|\s|["\'`]|'.self::$regex_boundaries.')/',$string,$matches);
  228. return array(
  229. self::TOKEN_VALUE => $matches[1],
  230. self::TOKEN_TYPE => self::TOKEN_TYPE_WORD
  231. );
  232. }
  233. /**
  234. * Takes a SQL string and breaks it into tokens.
  235. * Each token is an associative array with type and value.
  236. *
  237. * @param String $string The SQL string
  238. *
  239. * @return Array An array of tokens.
  240. */
  241. protected static function tokenize($string)
  242. {
  243. self::init();
  244. $tokens = array();
  245. // Used for debugging if there is an error while tokenizing the string
  246. $original_length = strlen($string);
  247. // Used to make sure the string keeps shrinking on each iteration
  248. $old_string_len = strlen($string) + 1;
  249. $token = null;
  250. $current_length = strlen($string);
  251. // Keep processing the string until it is empty
  252. while ($current_length) {
  253. // If the string stopped shrinking, there was a problem
  254. if ($old_string_len <= $current_length) {
  255. $tokens[] = array(
  256. self::TOKEN_VALUE=>$string,
  257. self::TOKEN_TYPE=>self::TOKEN_TYPE_ERROR
  258. );
  259. return $tokens;
  260. }
  261. $old_string_len = $current_length;
  262. // Determine if we can use caching
  263. if($current_length >= self::$max_cachekey_size) {
  264. $cacheKey = substr($string,0,self::$max_cachekey_size);
  265. }
  266. else {
  267. $cacheKey = false;
  268. }
  269. // See if the token is already cached
  270. if($cacheKey && isset(self::$token_cache[$cacheKey])) {
  271. // Retrieve from cache
  272. $token = self::$token_cache[$cacheKey];
  273. $token_length = strlen($token[self::TOKEN_VALUE]);
  274. self::$cache_hits++;
  275. }
  276. else {
  277. // Get the next token and the token type
  278. $token = self::getNextToken($string, $token);
  279. $token_length = strlen($token[self::TOKEN_VALUE]);
  280. self::$cache_misses++;
  281. // If the token is shorter than the max length, store it in cache
  282. if($cacheKey && $token_length < self::$max_cachekey_size) {
  283. self::$token_cache[$cacheKey] = $token;
  284. }
  285. }
  286. $tokens[] = $token;
  287. // Advance the string
  288. $string = substr($string, $token_length);
  289. $current_length -= $token_length;
  290. }
  291. return $tokens;
  292. }
  293. /**
  294. * Format the whitespace in a SQL string to make it easier to read.
  295. *
  296. * @param String $string The SQL string
  297. * @param boolean $highlight If true, syntax highlighting will also be performed
  298. *
  299. * @return String The SQL string with HTML styles and formatting wrapped in a <pre> tag
  300. */
  301. public static function format($string, $highlight=true) {
  302. // This variable will be populated with formatted html
  303. $return = '';
  304. // Use an actual tab while formatting and then switch out with self::$tab at the end
  305. $tab = "\t";
  306. $indent_level = 0;
  307. $newline = false;
  308. $inline_parentheses = false;
  309. $increase_special_indent = false;
  310. $increase_block_indent = false;
  311. $indent_types = array();
  312. $added_newline = false;
  313. $inline_count = 0;
  314. $inline_indented = false;
  315. // Tokenize String
  316. $original_tokens = self::tokenize($string);
  317. // Remove existing whitespace
  318. $tokens = array();
  319. foreach ($original_tokens as $i=>$token) {
  320. if ($token[self::TOKEN_TYPE] !== self::TOKEN_TYPE_WHITESPACE) {
  321. $token['i'] = $i;
  322. $tokens[] = $token;
  323. }
  324. }
  325. // Format token by token
  326. foreach ($tokens as $i=>$token) {
  327. // Get highlighted token if doing syntax highlighting
  328. if ($highlight) {
  329. $highlighted = self::highlightToken($token);
  330. } else { // If returning raw text
  331. $highlighted = $token[self::TOKEN_VALUE];
  332. }
  333. // If we are increasing the special indent level now
  334. if($increase_special_indent) {
  335. $indent_level++;
  336. $increase_special_indent = false;
  337. array_unshift($indent_types,'special');
  338. }
  339. // If we are increasing the block indent level now
  340. if($increase_block_indent) {
  341. $indent_level++;
  342. $increase_block_indent = false;
  343. array_unshift($indent_types,'block');
  344. }
  345. // If we need a new line before the token
  346. if ($newline) {
  347. $return .= "\n" . str_repeat($tab, $indent_level);
  348. $newline = false;
  349. $added_newline = true;
  350. }
  351. else {
  352. $added_newline = false;
  353. }
  354. // Display comments directly where they appear in the source
  355. if ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_COMMENT || $token[self::TOKEN_TYPE] === self::TOKEN_TYPE_BLOCK_COMMENT) {
  356. if ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_BLOCK_COMMENT) {
  357. $indent = str_repeat($tab,$indent_level);
  358. $return .= "\n" . $indent;
  359. $highlighted = str_replace("\n","\n".$indent,$highlighted);
  360. }
  361. $return .= $highlighted;
  362. $newline = true;
  363. continue;
  364. }
  365. if($inline_parentheses) {
  366. // End of inline parentheses
  367. if($token[self::TOKEN_VALUE] === ')') {
  368. $return = rtrim($return,' ');
  369. if($inline_indented) {
  370. array_shift($indent_types);
  371. $indent_level --;
  372. $return .= "\n" . str_repeat($tab, $indent_level);
  373. }
  374. $inline_parentheses = false;
  375. $return .= $highlighted . ' ';
  376. continue;
  377. }
  378. if($token[self::TOKEN_VALUE] === ',') {
  379. if($inline_count >= 30) {
  380. $inline_count = 0;
  381. $newline = true;
  382. }
  383. }
  384. $inline_count += strlen($token[self::TOKEN_VALUE]);
  385. }
  386. // Opening parentheses increase the block indent level and start a new line
  387. if ($token[self::TOKEN_VALUE] === '(') {
  388. // First check if this should be an inline parentheses block
  389. // Examples are "NOW()", "COUNT(*)", "int(10)", key(`somecolumn`), DECIMAL(7,2)
  390. // Allow up to 3 non-whitespace tokens inside inline parentheses
  391. $length = 0;
  392. for($j=1;$j<=250;$j++) {
  393. // Reached end of string
  394. if(!isset($tokens[$i+$j])) break;
  395. $next = $tokens[$i+$j];
  396. // Reached closing parentheses, able to inline it
  397. if($next[self::TOKEN_VALUE] === ')') {
  398. $inline_parentheses = true;
  399. $inline_count = 0;
  400. $inline_indented = false;
  401. break;
  402. }
  403. // Reached an invalid token for inline parentheses
  404. if ($next[self::TOKEN_VALUE]===';' || $next[self::TOKEN_VALUE]==='(') {
  405. break;
  406. }
  407. // Reached an invalid token type for inline parentheses
  408. if ($next[self::TOKEN_TYPE]===self::TOKEN_TYPE_RESERVED_TOPLEVEL || $next[self::TOKEN_TYPE]===self::TOKEN_TYPE_RESERVED_NEWLINE || $next[self::TOKEN_TYPE]===self::TOKEN_TYPE_COMMENT || $next[self::TOKEN_TYPE]===self::TOKEN_TYPE_BLOCK_COMMENT) {
  409. break;
  410. }
  411. $length += strlen($next[self::TOKEN_VALUE]);
  412. }
  413. if($inline_parentheses && $length > 30) {
  414. $increase_block_indent = true;
  415. $inline_indented = true;
  416. $newline = true;
  417. }
  418. // Take out the preceding space unless there was whitespace there in the original query
  419. if (isset($original_tokens[$token['i']-1]) && $original_tokens[$token['i']-1][self::TOKEN_TYPE] !== self::TOKEN_TYPE_WHITESPACE) {
  420. $return = rtrim($return,' ');
  421. }
  422. if(!$inline_parentheses) {
  423. $increase_block_indent = true;
  424. // Add a newline after the parentheses
  425. $newline = true;
  426. }
  427. }
  428. // Closing parentheses decrease the block indent level
  429. elseif ($token[self::TOKEN_VALUE] === ')') {
  430. // Remove whitespace before the closing parentheses
  431. $return = rtrim($return,' ');
  432. $indent_level--;
  433. // Reset indent level
  434. while($j=array_shift($indent_types)) {
  435. if($j==='special') {
  436. $indent_level--;
  437. }
  438. else {
  439. break;
  440. }
  441. }
  442. if($indent_level < 0) {
  443. // This is an error
  444. $indent_level = 0;
  445. if ($highlight) {
  446. $return .= "\n".self::highlightError($token[self::TOKEN_VALUE]);
  447. continue;
  448. }
  449. }
  450. // Add a newline before the closing parentheses (if not already added)
  451. if(!$added_newline) {
  452. $return .= "\n" . str_repeat($tab, $indent_level);
  453. }
  454. }
  455. // Commas start a new line (unless within inline parentheses)
  456. elseif ($token[self::TOKEN_VALUE] === ',' && !$inline_parentheses) {
  457. $newline = true;
  458. }
  459. // Top level reserved words start a new line and increase the special indent level
  460. elseif ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_RESERVED_TOPLEVEL) {
  461. $increase_special_indent = true;
  462. // If the last indent type was 'special', decrease the special indent for this round
  463. reset($indent_types);
  464. if(current($indent_types)==='special') {
  465. $indent_level--;
  466. array_shift($indent_types);
  467. }
  468. // Add a newline after the top level reserved word
  469. $newline = true;
  470. // Add a newline before the top level reserved word (if not already added)
  471. if(!$added_newline) {
  472. $return .= "\n" . str_repeat($tab, $indent_level);
  473. }
  474. // If we already added a newline, redo the indentation since it may be different now
  475. else {
  476. $return = rtrim($return,$tab).str_repeat($tab, $indent_level);
  477. }
  478. // If the token may have extra whitespace
  479. if (strpos($token[self::TOKEN_VALUE],' ')!==false || strpos($token[self::TOKEN_VALUE],"\n")!==false || strpos($token[self::TOKEN_VALUE],"\t")!==false) {
  480. $highlighted = preg_replace('/\s+/',' ',$highlighted);
  481. }
  482. }
  483. // Newline reserved words start a new line
  484. elseif ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_RESERVED_NEWLINE) {
  485. // Add a newline before the reserved word (if not already added)
  486. if(!$added_newline) {
  487. $return .= "\n" . str_repeat($tab, $indent_level);
  488. }
  489. // If the token may have extra whitespace
  490. if (strpos($token[self::TOKEN_VALUE],' ')!==false || strpos($token[self::TOKEN_VALUE],"\n")!==false || strpos($token[self::TOKEN_VALUE],"\t")!==false) {
  491. $highlighted = preg_replace('/\s+/',' ',$highlighted);
  492. }
  493. }
  494. // Multiple boundary characters in a row should not have spaces between them (not including parentheses)
  495. elseif($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_BOUNDARY) {
  496. if($tokens[$i-1][self::TOKEN_TYPE] === self::TOKEN_TYPE_BOUNDARY) {
  497. if (isset($original_tokens[$token['i']-1]) && $original_tokens[$token['i']-1][self::TOKEN_TYPE] !== self::TOKEN_TYPE_WHITESPACE) {
  498. $return = rtrim($return,' ');
  499. }
  500. }
  501. }
  502. // If the token shouldn't have a space before it
  503. if ($token[self::TOKEN_VALUE] === '.' || $token[self::TOKEN_VALUE] === ',' || $token[self::TOKEN_VALUE] === ';') {
  504. $return = rtrim($return, ' ');
  505. }
  506. $return .= $highlighted.' ';
  507. // If the token shouldn't have a space after it
  508. if ($token[self::TOKEN_VALUE] === '(' || $token[self::TOKEN_VALUE] === '.') {
  509. $return = rtrim($return,' ');
  510. }
  511. }
  512. // If there are unmatched parentheses
  513. if ($highlight && array_search('block',$indent_types) !== false) {
  514. $return .= "\n".self::highlightError("WARNING: unclosed parentheses or section");
  515. }
  516. // Replace tab characters with the configuration tab character
  517. $return = trim(str_replace("\t",self::$tab,$return));
  518. if ($highlight) {
  519. $return = self::output($return);
  520. }
  521. return $return;
  522. }
  523. /**
  524. * Add syntax highlighting to a SQL string
  525. *
  526. * @param String $string The SQL string
  527. *
  528. * @return String The SQL string with HTML styles applied
  529. */
  530. public static function highlight($string)
  531. {
  532. $tokens = self::tokenize($string);
  533. $return = '';
  534. foreach ($tokens as $token) {
  535. $return .= self::highlightToken($token);
  536. }
  537. return self::output($return);
  538. }
  539. /**
  540. * Split a SQL string into multiple queries.
  541. * Uses ";" as a query delimiter.
  542. *
  543. * @param String $string The SQL string
  544. *
  545. * @return Array An array of individual query strings without trailing semicolons
  546. */
  547. public static function splitQuery($string)
  548. {
  549. $queries = array();
  550. $current_query = '';
  551. $empty = true;
  552. $tokens = self::tokenize($string);
  553. foreach ($tokens as $token) {
  554. // If this is a query separator
  555. if ($token[self::TOKEN_VALUE] === ';') {
  556. if (!$empty) {
  557. $queries[] = $current_query.';';
  558. }
  559. $current_query = '';
  560. $empty = true;
  561. continue;
  562. }
  563. // If this is a non-empty character
  564. if($token[self::TOKEN_TYPE] !== self::TOKEN_TYPE_WHITESPACE && $token[self::TOKEN_TYPE] !== self::TOKEN_TYPE_COMMENT && $token[self::TOKEN_TYPE] !== self::TOKEN_TYPE_BLOCK_COMMENT) {
  565. $empty = false;
  566. }
  567. $current_query .= $token[self::TOKEN_VALUE];
  568. }
  569. if (!$empty) {
  570. $queries[] = trim($current_query);
  571. }
  572. return $queries;
  573. }
  574. /**
  575. * Remove all comments from a SQL string
  576. *
  577. * @param String $string The SQL string
  578. *
  579. * @return String The SQL string without comments
  580. */
  581. public static function removeComments($string)
  582. {
  583. $result = '';
  584. $tokens = self::tokenize($string);
  585. foreach ($tokens as $token) {
  586. // Skip comment tokens
  587. if ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_COMMENT || $token[self::TOKEN_TYPE] === self::TOKEN_TYPE_BLOCK_COMMENT) {
  588. continue;
  589. }
  590. $result .= $token[self::TOKEN_VALUE];
  591. }
  592. return $result;
  593. }
  594. /**
  595. * Highlights a token depending on its type.
  596. *
  597. * @param Array $token An associative array containing type and value.
  598. *
  599. * @return String HTML code of the highlighted token.
  600. */
  601. protected static function highlightToken($token)
  602. {
  603. $type = $token[self::TOKEN_TYPE];
  604. if(self::is_cli()) {
  605. $token = $token[self::TOKEN_VALUE];
  606. }
  607. else {
  608. $token = htmlentities($token[self::TOKEN_VALUE],ENT_COMPAT,'UTF-8');
  609. }
  610. if($type===self::TOKEN_TYPE_BOUNDARY) {
  611. return self::highlightBoundary($token);
  612. }
  613. elseif($type===self::TOKEN_TYPE_WORD) {
  614. return self::highlightWord($token);
  615. }
  616. elseif($type===self::TOKEN_TYPE_BACKTICK_QUOTE) {
  617. return self::highlightBacktickQuote($token);
  618. }
  619. elseif($type===self::TOKEN_TYPE_QUOTE) {
  620. return self::highlightQuote($token);
  621. }
  622. elseif($type===self::TOKEN_TYPE_RESERVED) {
  623. return self::highlightReservedWord($token);
  624. }
  625. elseif($type===self::TOKEN_TYPE_RESERVED_TOPLEVEL) {
  626. return self::highlightReservedWord($token);
  627. }
  628. elseif($type===self::TOKEN_TYPE_RESERVED_NEWLINE) {
  629. return self::highlightReservedWord($token);
  630. }
  631. elseif($type===self::TOKEN_TYPE_NUMBER) {
  632. return self::highlightNumber($token);
  633. }
  634. elseif($type===self::TOKEN_TYPE_COMMENT || $type===self::TOKEN_TYPE_BLOCK_COMMENT) {
  635. return self::highlightComment($token);
  636. }
  637. return $token;
  638. }
  639. /**
  640. * Highlights a quoted string
  641. *
  642. * @param String $value The token's value
  643. *
  644. * @return String HTML code of the highlighted token.
  645. */
  646. protected static function highlightQuote($value)
  647. {
  648. if(self::is_cli()) {
  649. return self::$cli_quote . $value . "\x1b[0m";
  650. }
  651. else {
  652. return '<span ' . self::$quote_attributes . '>' . $value . '</span>';
  653. }
  654. }
  655. /**
  656. * Highlights a backtick quoted string
  657. *
  658. * @param String $value The token's value
  659. *
  660. * @return String HTML code of the highlighted token.
  661. */
  662. protected static function highlightBacktickQuote($value) {
  663. if(self::is_cli()) {
  664. return self::$cli_backtick_quote . $value . "\x1b[0m";
  665. }
  666. else {
  667. return '<span ' . self::$backtick_quote_attributes . '>' . $value . '</span>';
  668. }
  669. }
  670. /**
  671. * Highlights a reserved word
  672. *
  673. * @param String $value The token's value
  674. *
  675. * @return String HTML code of the highlighted token.
  676. */
  677. protected static function highlightReservedWord($value)
  678. {
  679. if(self::is_cli()) {
  680. return self::$cli_reserved . $value . "\x1b[0m";
  681. }
  682. else {
  683. return '<span ' . self::$reserved_attributes . '>' . $value . '</span>';
  684. }
  685. }
  686. /**
  687. * Highlights a boundary token
  688. *
  689. * @param String $value The token's value
  690. *
  691. * @return String HTML code of the highlighted token.
  692. */
  693. protected static function highlightBoundary($value)
  694. {
  695. if($value==='(' || $value===')') return $value;
  696. if(self::is_cli()) {
  697. return self::$cli_boundary . $value . "\x1b[0m";
  698. }
  699. else {
  700. return '<span ' . self::$boundary_attributes . '>' . $value . '</span>';
  701. }
  702. }
  703. /**
  704. * Highlights a number
  705. *
  706. * @param String $value The token's value
  707. *
  708. * @return String HTML code of the highlighted token.
  709. */
  710. protected static function highlightNumber($value)
  711. {
  712. if(self::is_cli()) {
  713. return self::$cli_number . $value . "\x1b[0m";
  714. }
  715. else {
  716. return '<span ' . self::$number_attributes . '>' . $value . '</span>';
  717. }
  718. }
  719. /**
  720. * Highlights an error
  721. *
  722. * @param String $value The token's value
  723. *
  724. * @return String HTML code of the highlighted token.
  725. */
  726. protected static function highlightError($value)
  727. {
  728. if(self::is_cli()) {
  729. return self::$cli_error . $value . "\x1b[0m";
  730. }
  731. else {
  732. return '<span ' . self::$error_attributes . '>' . $value . '</span>';
  733. }
  734. }
  735. /**
  736. * Highlights a comment
  737. *
  738. * @param String $value The token's value
  739. *
  740. * @return String HTML code of the highlighted token.
  741. */
  742. protected static function highlightComment($value)
  743. {
  744. if(self::is_cli()) {
  745. return self::$cli_comment . $value . "\x1b[0m";
  746. }
  747. else {
  748. return '<span ' . self::$comment_attributes . '>' . $value . '</span>';
  749. }
  750. }
  751. /**
  752. * Highlights a word token
  753. *
  754. * @param String $value The token's value
  755. *
  756. * @return String HTML code of the highlighted token.
  757. */
  758. protected static function highlightWord($value)
  759. {
  760. if(self::is_cli()) {
  761. return self::$cli_word . $value . "\x1b[0m";
  762. }
  763. else {
  764. return '<span ' . self::$word_attributes . '>' . $value . '</span>';
  765. }
  766. }
  767. /**
  768. * Helper function for sorting the list of reserved words by length
  769. *
  770. * @param String $a The first string
  771. * @param String $b The second string
  772. *
  773. * @return int The comparison of the string lengths
  774. */
  775. private static function sortLength($a, $b)
  776. {
  777. return strlen($b) - strlen($a);
  778. }
  779. /**
  780. * Helper function for building regular expressions for reserved words and boundary characters
  781. *
  782. * @param String $a The string to be quoted
  783. *
  784. * @return String The quoted string
  785. */
  786. private static function quote_regex($a)
  787. {
  788. return preg_quote($a,'/');
  789. }
  790. /**
  791. * Helper function for building string output
  792. *
  793. * @param String $string The string to be quoted
  794. *
  795. * @return String The quoted string
  796. */
  797. private static function output($string)
  798. {
  799. if(self::is_cli()) {
  800. return $string."\n";
  801. }
  802. else {
  803. $string=trim($string);
  804. if(!self::$use_pre) {
  805. return $string;
  806. }
  807. return '<pre '.self::$pre_attributes.'>' . $string . '</pre>';
  808. }
  809. }
  810. private static function is_cli() {
  811. if(isset(self::$cli)) return self::$cli;
  812. else return php_sapi_name() === 'cli';
  813. }
  814. }