hadoopbootstrap.class.php 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127
  1. <?php
  2. /*
  3. * Copyright 2010-2011 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License").
  6. * You may not use this file except in compliance with the License.
  7. * A copy of the License is located at
  8. *
  9. * http://aws.amazon.com/apache2.0
  10. *
  11. * or in the "license" file accompanying this file. This file is distributed
  12. * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
  13. * express or implied. See the License for the specific language governing
  14. * permissions and limitations under the License.
  15. */
  16. /*%******************************************************************************************%*/
  17. // CLASS
  18. /**
  19. * Contains a set of pre-built Amazon EMR Hadoop Bootstrap Actions.
  20. *
  21. * @version 2011.05.03
  22. * @license See the included NOTICE.md file for more information.
  23. * @copyright See the included NOTICE.md file for more information.
  24. * @link http://aws.amazon.com/php/ PHP Developer Center
  25. * @link http://hadoop.apache.org Apache Hadoop
  26. */
  27. class CFHadoopBootstrap extends CFHadoopBase
  28. {
  29. // Config file types
  30. const CONFIG_SITE = 'S';
  31. const CONFIG_DEFAULT = 'D';
  32. const CONFIG_CORE = 'C';
  33. const CONFIG_HDFS = 'H';
  34. const CONFIG_MAPREDUCE = 'M';
  35. // Daemon types
  36. const DAEMON_NAME_NODE = 'namenode';
  37. const DAEMON_DATA_NODE = 'datanode';
  38. const DAEMON_JOB_TRACKER = 'jobtracker';
  39. const DAEMON_TASK_TRACKER = 'tasktracker';
  40. const DAEMON_CLIENT = 'client';
  41. /**
  42. * Create a new run-if bootstrap action which lets you conditionally run bootstrap actions.
  43. *
  44. * @param string $condition (Required) The condition to evaluate. If <code>true</code>, the bootstrap action executes.
  45. * @param array $args (Optional) An indexed array of arguments to pass to the script.
  46. * @return array A configuration set to be provided when running a job flow.
  47. */
  48. public static function run_if($condition, $args = null)
  49. {
  50. if (!$args) $args = array();
  51. $args = is_array($args) ? $args : array($args);
  52. return self::script_runner('s3://us-east-1.elasticmapreduce/bootstrap-actions/run-if', $args);
  53. }
  54. /**
  55. * Specify options to merge with Hadoop's default configuration.
  56. *
  57. * @param string $file (Required) The Hadoop configuration file to merge with. [Allowed values: <code>CFHadoopBootstrap::CONFIG_SITE</code>, <code>CFHadoopBootstrap::CONFIG_DEFAULT</code>, <code>CFHadoopBootstrap::CONFIG_CORE</code>, <code>CFHadoopBootstrap::CONFIG_HDFS</code>, <code>CFHadoopBootstrap::CONFIG_MAPREDUCE</code>]
  58. * @param string|array $config (Required) This can either be an XML file in S3 (as <code>s3://bucket/path</code>), or an associative array of key-value pairs.
  59. * @return array A configuration set to be provided when running a job flow.
  60. */
  61. public static function configure($file, $config)
  62. {
  63. $args = array();
  64. $file_arg = '-' . $file;
  65. if (is_string($config))
  66. {
  67. $args[] = $file_arg;
  68. $args[] = $config;
  69. }
  70. elseif (is_array($config))
  71. {
  72. foreach ($config as $key => $value)
  73. {
  74. $args[] = $file_arg;
  75. $args[] = $key . '=' . $value;
  76. }
  77. }
  78. return self::script_runner('s3://us-east-1.elasticmapreduce/bootstrap-actions/configure-hadoop', $args);
  79. }
  80. /**
  81. * Create a new bootstrap action which lets you configure Hadoop's daemons. The options are written to
  82. * the <code>hadoop-user-env.sh</code> file.
  83. *
  84. * @param string $daemon_type (Required) The Hadoop daemon to configure.
  85. * @param array $opt (Optional) An associative array of parameters that can have the following keys: <ul>
  86. * <li><code>HeapSize</code> - <code>integer</code> - Optional - The requested heap size of the daemon, in megabytes.</li>
  87. * <li><code>CLIOptions</code> - <code>string</code> - Optional - Additional Java command line arguments to pass to the daemon.</li>
  88. * <li><code>Replace</code> - <code>boolean</code> - Optional - Whether or not the file should be replaced. A value of <code>true</code> will replace the existing configuration file. A value of <code>false</code> will append the options to the configuration file.</li></ul>
  89. * @return array A configuration set to be provided when running a job flow.
  90. */
  91. public static function daemon($daemon_type, $opt = null)
  92. {
  93. if (!$opt) $opt = array();
  94. $args = array();
  95. foreach ($opt as $key => $value)
  96. {
  97. switch ($key)
  98. {
  99. case 'HeapSize':
  100. $args[] = '--' . $daemon_type . '-heap-size=' . $value;
  101. break;
  102. case 'CLIOptions':
  103. $args[] = '--' . $daemon_type . '-opts="' . $value . '"';
  104. break;
  105. case 'Replace':
  106. if ((is_string($value) && $value === 'true') || (is_bool($value) && $value === true))
  107. {
  108. $args[] = '--replace';
  109. }
  110. break;
  111. }
  112. }
  113. return self::script_runner('s3://us-east-1.elasticmapreduce/bootstrap-actions/configure-daemons', $args);
  114. }
  115. }