emr.class.php 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489
  1. <?php
  2. /*
  3. * Copyright 2010-2011 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License").
  6. * You may not use this file except in compliance with the License.
  7. * A copy of the License is located at
  8. *
  9. * http://aws.amazon.com/apache2.0
  10. *
  11. * or in the "license" file accompanying this file. This file is distributed
  12. * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
  13. * express or implied. See the License for the specific language governing
  14. * permissions and limitations under the License.
  15. */
  16. /**
  17. *
  18. * This is the <i>Amazon Elastic MapReduce API Reference</i>. This guide provides descriptions and samples of the Amazon Elastic MapReduce
  19. * APIs.
  20. *
  21. * Amazon Elastic MapReduce is a web service that makes it easy to process large amounts of data efficiently. Elastic MapReduce uses Hadoop
  22. * processing combined with several AWS products to do tasks such as web indexing, data mining, log file analysis, machine learning, scientific
  23. * simulation, and data warehousing.
  24. *
  25. * @version Tue Aug 23 12:49:06 PDT 2011
  26. * @license See the included NOTICE.md file for complete information.
  27. * @copyright See the included NOTICE.md file for complete information.
  28. * @link http://aws.amazon.com/elasticmapreduce/Amazon Elastic MapReduce
  29. * @link http://aws.amazon.com/documentation/elasticmapreduce/Amazon Elastic MapReduce documentation
  30. */
  31. class AmazonEMR extends CFRuntime
  32. {
  33. /*%******************************************************************************************%*/
  34. // CLASS CONSTANTS
  35. /**
  36. * Specify the default queue URL.
  37. */
  38. const DEFAULT_URL = 'us-east-1.elasticmapreduce.amazonaws.com';
  39. /**
  40. * Specify the queue URL for the US-East (Northern Virginia) Region.
  41. */
  42. const REGION_US_E1 = self::DEFAULT_URL;
  43. /**
  44. * Specify the queue URL for the US-West (Northern California) Region.
  45. */
  46. const REGION_US_W1 = 'us-west-1.elasticmapreduce.amazonaws.com';
  47. /**
  48. * Specify the queue URL for the EU (Ireland) Region.
  49. */
  50. const REGION_EU_W1 = 'eu-west-1.elasticmapreduce.amazonaws.com';
  51. /**
  52. * Specify the queue URL for the Asia Pacific (Singapore) Region.
  53. */
  54. const REGION_APAC_SE1 = 'ap-southeast-1.elasticmapreduce.amazonaws.com';
  55. /**
  56. * Specify the queue URL for the Asia Pacific (Japan) Region.
  57. */
  58. const REGION_APAC_NE1 = 'ap-northeast-1.elasticmapreduce.amazonaws.com';
  59. /*%******************************************************************************************%*/
  60. // SETTERS
  61. /**
  62. * This allows you to explicitly sets the region for the service to use.
  63. *
  64. * @param string $region (Required) The region to explicitly set. Available options are <REGION_US_E1>, <REGION_US_W1>, <REGION_EU_W1>, or <REGION_APAC_SE1>.
  65. * @return $this A reference to the current instance.
  66. */
  67. public function set_region($region)
  68. {
  69. $this->set_hostname($region);
  70. return $this;
  71. }
  72. /*%******************************************************************************************%*/
  73. // CONSTRUCTOR
  74. /**
  75. * Constructs a new instance of <AmazonEMR>.
  76. *
  77. * @param string $key (Optional) Your Amazon API Key. If blank, it will look for the <code>AWS_KEY</code> constant.
  78. * @param string $secret_key (Optional) Your Amazon API Secret Key. If blank, it will look for the <code>AWS_SECRET_KEY</code> constant.
  79. * @return boolean false if no valid values are set, otherwise true.
  80. */
  81. public function __construct($key = null, $secret_key = null)
  82. {
  83. $this->api_version = '2009-03-31';
  84. $this->hostname = self::DEFAULT_URL;
  85. if (!$key && !defined('AWS_KEY'))
  86. {
  87. // @codeCoverageIgnoreStart
  88. throw new EMR_Exception('No account key was passed into the constructor, nor was it set in the AWS_KEY constant.');
  89. // @codeCoverageIgnoreEnd
  90. }
  91. if (!$secret_key && !defined('AWS_SECRET_KEY'))
  92. {
  93. // @codeCoverageIgnoreStart
  94. throw new EMR_Exception('No account secret was passed into the constructor, nor was it set in the AWS_SECRET_KEY constant.');
  95. // @codeCoverageIgnoreEnd
  96. }
  97. return parent::__construct($key, $secret_key);
  98. }
  99. /*%******************************************************************************************%*/
  100. // SERVICE METHODS
  101. /**
  102. *
  103. * AddInstanceGroups adds an instance group to a running cluster.
  104. *
  105. * @param array $instance_groups (Required) Instance Groups to add. <ul>
  106. * <li><code>x</code> - <code>array</code> - This represents a simple array index. <ul>
  107. * <li><code>Name</code> - <code>string</code> - Optional - Friendly name given to the instance group. </li>
  108. * <li><code>Market</code> - <code>string</code> - Optional - Market type of the Amazon EC2 instances used to create a cluster node. [Allowed values: <code>ON_DEMAND</code>, <code>SPOT</code>]</li>
  109. * <li><code>InstanceRole</code> - <code>string</code> - Required - The role of the instance group in the cluster. [Allowed values: <code>MASTER</code>, <code>CORE</code>, <code>TASK</code>]</li>
  110. * <li><code>BidPrice</code> - <code>string</code> - Optional - Bid price for each Amazon EC2 instance in the instance group when launching nodes as Spot Instances, expressed in USD. </li>
  111. * <li><code>InstanceType</code> - <code>string</code> - Required - The Amazon EC2 instance type for all instances in the instance group. </li>
  112. * <li><code>InstanceCount</code> - <code>integer</code> - Required - Target number of instances for the instance group. </li>
  113. * </ul></li>
  114. * </ul>
  115. * @param string $job_flow_id (Required) Job flow in which to add the instance groups.
  116. * @param array $opt (Optional) An associative array of parameters that can have the following keys: <ul>
  117. * <li><code>curlopts</code> - <code>array</code> - Optional - A set of values to pass directly into <code>curl_setopt()</code>, where the key is a pre-defined <code>CURLOPT_*</code> constant.</li>
  118. * <li><code>returnCurlHandle</code> - <code>boolean</code> - Optional - A private toggle specifying that the cURL handle be returned rather than actually completing the request. This toggle is useful for manually managed batch requests.</li></ul>
  119. * @return CFResponse A <CFResponse> object containing a parsed HTTP response.
  120. */
  121. public function add_instance_groups($instance_groups, $job_flow_id, $opt = null)
  122. {
  123. if (!$opt) $opt = array();
  124. // Required parameter
  125. $opt = array_merge($opt, CFComplexType::map(array(
  126. 'InstanceGroups' => (is_array($instance_groups) ? $instance_groups : array($instance_groups))
  127. ), 'member'));
  128. $opt['JobFlowId'] = $job_flow_id;
  129. return $this->authenticate('AddInstanceGroups', $opt, $this->hostname);
  130. }
  131. /**
  132. *
  133. * AddJobFlowSteps adds new steps to a running job flow. A maximum of 256 steps are allowed in each job flow.
  134. *
  135. * If your job flow is long-running (such as a Hive data warehouse) or complex, you may require more than 256 steps to process your data. You
  136. * can bypass the 256-step limitation in various ways, including using the SSH shell to connect to the master node and submitting queries
  137. * directly to the software running on the master node, such as Hive and Hadoop. For more information on how to do this, go to <a
  138. * href="http://docs.amazonwebservices.com/ElasticMapReduce/latest/DeveloperGuide/AddMoreThan256Steps.html">Add More than 256 Steps to a Job
  139. * Flow</a> in the <i>Amazon Elastic MapReduce Developer's Guide</i>.
  140. *
  141. * A step specifies the location of a JAR file stored either on the master node of the job flow or in Amazon S3. Each step is performed by the
  142. * main function of the main class of the JAR file. The main class can be specified either in the manifest of the JAR or by using the
  143. * MainFunction parameter of the step.
  144. *
  145. * Elastic MapReduce executes each step in the order listed. For a step to be considered complete, the main function must exit with a zero
  146. * exit code and all Hadoop jobs started while the step was running must have completed and run successfully.
  147. *
  148. * You can only add steps to a job flow that is in one of the following states: STARTING, BOOTSTRAPPING, RUNNING, or WAITING.
  149. *
  150. * @param string $job_flow_id (Required) A string that uniquely identifies the job flow. This identifier is returned by RunJobFlow and can also be obtained from DescribeJobFlows.
  151. * @param array $steps (Required) A list of StepConfig to be executed by the job flow. <ul>
  152. * <li><code>x</code> - <code>array</code> - This represents a simple array index. <ul>
  153. * <li><code>Name</code> - <code>string</code> - Required - The name of the job flow step. </li>
  154. * <li><code>ActionOnFailure</code> - <code>string</code> - Optional - Specifies the action to take if the job flow step fails. [Allowed values: <code>TERMINATE_JOB_FLOW</code>, <code>CANCEL_AND_WAIT</code>, <code>CONTINUE</code>]</li>
  155. * <li><code>HadoopJarStep</code> - <code>array</code> - Required - Specifies the JAR file used for the job flow step. Takes an associative array of parameters that can have the following keys: <ul>
  156. * <li><code>Properties</code> - <code>array</code> - Optional - A list of Java properties that are set when the step runs. You can use these properties to pass key value pairs to your main function. <ul>
  157. * <li><code>x</code> - <code>array</code> - This represents a simple array index. <ul>
  158. * <li><code>Key</code> - <code>string</code> - Optional - The unique identifier of a key value pair. </li>
  159. * <li><code>Value</code> - <code>string</code> - Optional - The value part of the identified key. </li>
  160. * </ul></li>
  161. * </ul></li>
  162. * <li><code>Jar</code> - <code>string</code> - Required - A path to a JAR file run during the step. </li>
  163. * <li><code>MainClass</code> - <code>string</code> - Optional - The name of the main class in the specified Java file. If not specified, the JAR file should specify a Main-Class in its manifest file. </li>
  164. * <li><code>Args</code> - <code>string|array</code> - Optional - A list of command line arguments passed to the JAR file's main function when executed. Pass a string for a single value, or an indexed array for multiple values. </li>
  165. * </ul></li>
  166. * </ul></li>
  167. * </ul>
  168. * @param array $opt (Optional) An associative array of parameters that can have the following keys: <ul>
  169. * <li><code>curlopts</code> - <code>array</code> - Optional - A set of values to pass directly into <code>curl_setopt()</code>, where the key is a pre-defined <code>CURLOPT_*</code> constant.</li>
  170. * <li><code>returnCurlHandle</code> - <code>boolean</code> - Optional - A private toggle specifying that the cURL handle be returned rather than actually completing the request. This toggle is useful for manually managed batch requests.</li></ul>
  171. * @return CFResponse A <CFResponse> object containing a parsed HTTP response.
  172. */
  173. public function add_job_flow_steps($job_flow_id, $steps, $opt = null)
  174. {
  175. if (!$opt) $opt = array();
  176. $opt['JobFlowId'] = $job_flow_id;
  177. // Required parameter
  178. $opt = array_merge($opt, CFComplexType::map(array(
  179. 'Steps' => (is_array($steps) ? $steps : array($steps))
  180. ), 'member'));
  181. return $this->authenticate('AddJobFlowSteps', $opt, $this->hostname);
  182. }
  183. /**
  184. *
  185. * TerminateJobFlows shuts a list of job flows down. When a job flow is shut down, any step not yet completed is canceled and the EC2
  186. * instances on which the job flow is running are stopped. Any log files not already saved are uploaded to Amazon S3 if a LogUri was specified
  187. * when the job flow was created.
  188. *
  189. * @param string|array $job_flow_ids (Required) A list of job flows to be shutdown. Pass a string for a single value, or an indexed array for multiple values.
  190. * @param array $opt (Optional) An associative array of parameters that can have the following keys: <ul>
  191. * <li><code>curlopts</code> - <code>array</code> - Optional - A set of values to pass directly into <code>curl_setopt()</code>, where the key is a pre-defined <code>CURLOPT_*</code> constant.</li>
  192. * <li><code>returnCurlHandle</code> - <code>boolean</code> - Optional - A private toggle specifying that the cURL handle be returned rather than actually completing the request. This toggle is useful for manually managed batch requests.</li></ul>
  193. * @return CFResponse A <CFResponse> object containing a parsed HTTP response.
  194. */
  195. public function terminate_job_flows($job_flow_ids, $opt = null)
  196. {
  197. if (!$opt) $opt = array();
  198. // Required parameter
  199. $opt = array_merge($opt, CFComplexType::map(array(
  200. 'JobFlowIds' => (is_array($job_flow_ids) ? $job_flow_ids : array($job_flow_ids))
  201. ), 'member'));
  202. return $this->authenticate('TerminateJobFlows', $opt, $this->hostname);
  203. }
  204. /**
  205. *
  206. * DescribeJobFlows returns a list of job flows that match all of the supplied parameters. The parameters can include a list of job flow IDs,
  207. * job flow states, and restrictions on job flow creation date and time.
  208. *
  209. * Regardless of supplied parameters, only job flows created within the last two months are returned.
  210. *
  211. * If no parameters are supplied, then job flows matching either of the following criteria are returned:
  212. *
  213. * <ul> <li>Job flows created and completed in the last two weeks</li>
  214. *
  215. * <li> Job flows created within the last two months that are in one of the following states: <code>RUNNING</code>, <code>WAITING</code>,
  216. * <code>SHUTTING_DOWN</code>, <code>STARTING</code> </li>
  217. *
  218. * </ul>
  219. *
  220. * Amazon Elastic MapReduce can return a maximum of 512 job flow descriptions.
  221. *
  222. * @param array $opt (Optional) An associative array of parameters that can have the following keys: <ul>
  223. * <li><code>CreatedAfter</code> - <code>string</code> - Optional - Return only job flows created after this date and time. May be passed as a number of seconds since UNIX Epoch, or any string compatible with <php:strtotime()>.</li>
  224. * <li><code>CreatedBefore</code> - <code>string</code> - Optional - Return only job flows created before this date and time. May be passed as a number of seconds since UNIX Epoch, or any string compatible with <php:strtotime()>.</li>
  225. * <li><code>JobFlowIds</code> - <code>string|array</code> - Optional - Return only job flows whose job flow ID is contained in this list. Pass a string for a single value, or an indexed array for multiple values. </li>
  226. * <li><code>JobFlowStates</code> - <code>string|array</code> - Optional - Return only job flows whose state is contained in this list. Pass a string for a single value, or an indexed array for multiple values. </li>
  227. * <li><code>curlopts</code> - <code>array</code> - Optional - A set of values to pass directly into <code>curl_setopt()</code>, where the key is a pre-defined <code>CURLOPT_*</code> constant.</li>
  228. * <li><code>returnCurlHandle</code> - <code>boolean</code> - Optional - A private toggle specifying that the cURL handle be returned rather than actually completing the request. This toggle is useful for manually managed batch requests.</li></ul>
  229. * @return CFResponse A <CFResponse> object containing a parsed HTTP response.
  230. */
  231. public function describe_job_flows($opt = null)
  232. {
  233. if (!$opt) $opt = array();
  234. // Optional parameter
  235. if (isset($opt['CreatedAfter']))
  236. {
  237. $opt['CreatedAfter'] = $this->util->convert_date_to_iso8601($opt['CreatedAfter']);
  238. }
  239. // Optional parameter
  240. if (isset($opt['CreatedBefore']))
  241. {
  242. $opt['CreatedBefore'] = $this->util->convert_date_to_iso8601($opt['CreatedBefore']);
  243. }
  244. // Optional parameter
  245. if (isset($opt['JobFlowIds']))
  246. {
  247. $opt = array_merge($opt, CFComplexType::map(array(
  248. 'JobFlowIds' => (is_array($opt['JobFlowIds']) ? $opt['JobFlowIds'] : array($opt['JobFlowIds']))
  249. ), 'member'));
  250. unset($opt['JobFlowIds']);
  251. }
  252. // Optional parameter
  253. if (isset($opt['JobFlowStates']))
  254. {
  255. $opt = array_merge($opt, CFComplexType::map(array(
  256. 'JobFlowStates' => (is_array($opt['JobFlowStates']) ? $opt['JobFlowStates'] : array($opt['JobFlowStates']))
  257. ), 'member'));
  258. unset($opt['JobFlowStates']);
  259. }
  260. return $this->authenticate('DescribeJobFlows', $opt, $this->hostname);
  261. }
  262. /**
  263. *
  264. * SetTerminationProtection locks a job flow so the Amazon EC2 instances in the cluster cannot be terminated by user intervention, an API
  265. * call, or in the event of a job-flow error. The cluster still terminates upon successful completion of the job flow. Calling
  266. * SetTerminationProtection on a job flow is analogous to calling the Amazon EC2 DisableAPITermination API on all of the EC2 instances in a
  267. * cluster.
  268. *
  269. * SetTerminationProtection is used to prevent accidental termination of a job flow and to ensure that in the event of an error, the instances
  270. * will persist so you can recover any data stored in their ephemeral instance storage.
  271. *
  272. * To terminate a job flow that has been locked by setting SetTerminationProtection to <code>true</code>, you must first unlock the job flow
  273. * by a subsequent call to SetTerminationProtection in which you set the value to <code>false</code>.
  274. *
  275. * For more information, go to <a
  276. * href="http://docs.amazonwebservices.com/ElasticMapReduce/latest/DeveloperGuide/UsingEMR_TerminationProtection.html">Protecting a Job Flow
  277. * from Termination</a> in the <i>Amazon Elastic MapReduce Developer's Guide.</i>
  278. *
  279. * @param string|array $job_flow_ids (Required) A list of strings that uniquely identify the job flows to protect. This identifier is returned by RunJobFlow and can also be obtained from DescribeJobFlows . Pass a string for a single value, or an indexed array for multiple values.
  280. * @param boolean $termination_protected (Required) A Boolean that indicates whether to protect the job flow and prevent the Amazon EC2 instances in the cluster from shutting down due to API calls, user intervention, or job-flow error.
  281. * @param array $opt (Optional) An associative array of parameters that can have the following keys: <ul>
  282. * <li><code>curlopts</code> - <code>array</code> - Optional - A set of values to pass directly into <code>curl_setopt()</code>, where the key is a pre-defined <code>CURLOPT_*</code> constant.</li>
  283. * <li><code>returnCurlHandle</code> - <code>boolean</code> - Optional - A private toggle specifying that the cURL handle be returned rather than actually completing the request. This toggle is useful for manually managed batch requests.</li></ul>
  284. * @return CFResponse A <CFResponse> object containing a parsed HTTP response.
  285. */
  286. public function set_termination_protection($job_flow_ids, $termination_protected, $opt = null)
  287. {
  288. if (!$opt) $opt = array();
  289. // Required parameter
  290. $opt = array_merge($opt, CFComplexType::map(array(
  291. 'JobFlowIds' => (is_array($job_flow_ids) ? $job_flow_ids : array($job_flow_ids))
  292. ), 'member'));
  293. $opt['TerminationProtected'] = $termination_protected;
  294. return $this->authenticate('SetTerminationProtection', $opt, $this->hostname);
  295. }
  296. /**
  297. *
  298. * RunJobFlow creates and starts running a new job flow. The job flow will run the steps specified. Once the job flow completes, the cluster
  299. * is stopped and the HDFS partition is lost. To prevent loss of data, configure the last step of the job flow to store results in Amazon S3.
  300. * If the JobFlowInstancesDetail <code>KeepJobFlowAliveWhenNoSteps</code> parameter is set to <code>TRUE</code>, the job flow will transition
  301. * to the WAITING state rather than shutting down once the steps have completed.
  302. *
  303. * For additional protection, you can set the JobFlowInstancesDetail <code>TerminationProtected</code> parameter to <code>TRUE</code> to lock
  304. * the job flow and prevent it from being terminated by API call, user intervention, or in the event of a job flow error.
  305. *
  306. * A maximum of 256 steps are allowed in each job flow.
  307. *
  308. * If your job flow is long-running (such as a Hive data warehouse) or complex, you may require more than 256 steps to process your data. You
  309. * can bypass the 256-step limitation in various ways, including using the SSH shell to connect to the master node and submitting queries
  310. * directly to the software running on the master node, such as Hive and Hadoop. For more information on how to do this, go to <a
  311. * href="http://docs.amazonwebservices.com/ElasticMapReduce/latest/DeveloperGuide/AddMoreThan256Steps.html">Add More than 256 Steps to a Job
  312. * Flow</a> in the <i>Amazon Elastic MapReduce Developer's Guide</i>.
  313. *
  314. * For long running job flows, we recommend that you periodically store your results.
  315. *
  316. * @param string $name (Required) The name of the job flow.
  317. * @param array $instances (Required) A specification of the number and type of Amazon EC2 instances on which to run the job flow. <ul>
  318. * <li><code>MasterInstanceType</code> - <code>string</code> - Optional - The EC2 instance type of the master node. </li>
  319. * <li><code>SlaveInstanceType</code> - <code>string</code> - Optional - The EC2 instance type of the slave nodes. </li>
  320. * <li><code>InstanceCount</code> - <code>integer</code> - Optional - The number of Amazon EC2 instances used to execute the job flow. </li>
  321. * <li><code>InstanceGroups</code> - <code>array</code> - Optional - Configuration for the job flow's instance groups. <ul>
  322. * <li><code>x</code> - <code>array</code> - This represents a simple array index. <ul>
  323. * <li><code>Name</code> - <code>string</code> - Optional - Friendly name given to the instance group. </li>
  324. * <li><code>Market</code> - <code>string</code> - Optional - Market type of the Amazon EC2 instances used to create a cluster node. [Allowed values: <code>ON_DEMAND</code>, <code>SPOT</code>]</li>
  325. * <li><code>InstanceRole</code> - <code>string</code> - Required - The role of the instance group in the cluster. [Allowed values: <code>MASTER</code>, <code>CORE</code>, <code>TASK</code>]</li>
  326. * <li><code>BidPrice</code> - <code>string</code> - Optional - Bid price for each Amazon EC2 instance in the instance group when launching nodes as Spot Instances, expressed in USD. </li>
  327. * <li><code>InstanceType</code> - <code>string</code> - Required - The Amazon EC2 instance type for all instances in the instance group. </li>
  328. * <li><code>InstanceCount</code> - <code>integer</code> - Required - Target number of instances for the instance group. </li>
  329. * </ul></li>
  330. * </ul></li>
  331. * <li><code>Ec2KeyName</code> - <code>string</code> - Optional - Specifies the name of the Amazon EC2 key pair that can be used to ssh to the master node as the user called "hadoop." </li>
  332. * <li><code>Placement</code> - <code>array</code> - Optional - Specifies the Availability Zone the job flow will run in. Takes an associative array of parameters that can have the following keys: <ul>
  333. * <li><code>AvailabilityZone</code> - <code>string</code> - Required - The Amazon EC2 Availability Zone for the job flow. </li>
  334. * </ul></li>
  335. * <li><code>KeepJobFlowAliveWhenNoSteps</code> - <code>boolean</code> - Optional - Specifies whether the job flow should terminate after completing all steps. </li>
  336. * <li><code>TerminationProtected</code> - <code>boolean</code> - Optional - Specifies whether to lock the job flow to prevent the Amazon EC2 instances from being terminated by API call, user intervention, or in the event of a job flow error. </li>
  337. * <li><code>HadoopVersion</code> - <code>string</code> - Optional - Specifies the Hadoop version for the job flow. Valid inputs are "0.18" or "0.20". </li>
  338. * </ul>
  339. * @param array $opt (Optional) An associative array of parameters that can have the following keys: <ul>
  340. * <li><code>LogUri</code> - <code>string</code> - Optional - Specifies the location in Amazon S3 to write the log files of the job flow. If a value is not provided, logs are not created. </li>
  341. * <li><code>AdditionalInfo</code> - <code>string</code> - Optional - A JSON string for selecting additional features. </li>
  342. * <li><code>Steps</code> - <code>array</code> - Optional - A list of steps to be executed by the job flow. <ul>
  343. * <li><code>x</code> - <code>array</code> - This represents a simple array index. <ul>
  344. * <li><code>Name</code> - <code>string</code> - Required - The name of the job flow step. </li>
  345. * <li><code>ActionOnFailure</code> - <code>string</code> - Optional - Specifies the action to take if the job flow step fails. [Allowed values: <code>TERMINATE_JOB_FLOW</code>, <code>CANCEL_AND_WAIT</code>, <code>CONTINUE</code>]</li>
  346. * <li><code>HadoopJarStep</code> - <code>array</code> - Required - Specifies the JAR file used for the job flow step. Takes an associative array of parameters that can have the following keys: <ul>
  347. * <li><code>Properties</code> - <code>array</code> - Optional - A list of Java properties that are set when the step runs. You can use these properties to pass key value pairs to your main function. <ul>
  348. * <li><code>x</code> - <code>array</code> - This represents a simple array index. <ul>
  349. * <li><code>Key</code> - <code>string</code> - Optional - The unique identifier of a key value pair. </li>
  350. * <li><code>Value</code> - <code>string</code> - Optional - The value part of the identified key. </li>
  351. * </ul></li>
  352. * </ul></li>
  353. * <li><code>Jar</code> - <code>string</code> - Required - A path to a JAR file run during the step. </li>
  354. * <li><code>MainClass</code> - <code>string</code> - Optional - The name of the main class in the specified Java file. If not specified, the JAR file should specify a Main-Class in its manifest file. </li>
  355. * <li><code>Args</code> - <code>string|array</code> - Optional - A list of command line arguments passed to the JAR file's main function when executed. Pass a string for a single value, or an indexed array for multiple values. </li>
  356. * </ul></li>
  357. * </ul></li>
  358. * </ul></li>
  359. * <li><code>BootstrapActions</code> - <code>array</code> - Optional - A list of bootstrap actions that will be run before Hadoop is started on the cluster nodes. <ul>
  360. * <li><code>x</code> - <code>array</code> - This represents a simple array index. <ul>
  361. * <li><code>Name</code> - <code>string</code> - Required - The name of the bootstrap action. </li>
  362. * <li><code>ScriptBootstrapAction</code> - <code>array</code> - Required - The script run by the bootstrap action. Takes an associative array of parameters that can have the following keys: <ul>
  363. * <li><code>Path</code> - <code>string</code> - Required - Location of the script to run during a bootstrap action. Can be either a location in Amazon S3 or on a local file system. </li>
  364. * <li><code>Args</code> - <code>string|array</code> - Optional - A list of command line arguments to pass to the bootstrap action script. Pass a string for a single value, or an indexed array for multiple values. </li>
  365. * </ul></li>
  366. * </ul></li>
  367. * </ul></li>
  368. * <li><code>curlopts</code> - <code>array</code> - Optional - A set of values to pass directly into <code>curl_setopt()</code>, where the key is a pre-defined <code>CURLOPT_*</code> constant.</li>
  369. * <li><code>returnCurlHandle</code> - <code>boolean</code> - Optional - A private toggle specifying that the cURL handle be returned rather than actually completing the request. This toggle is useful for manually managed batch requests.</li></ul>
  370. * @return CFResponse A <CFResponse> object containing a parsed HTTP response.
  371. */
  372. public function run_job_flow($name, $instances, $opt = null)
  373. {
  374. if (!$opt) $opt = array();
  375. $opt['Name'] = $name;
  376. // Collapse these list values for the required parameter
  377. if (isset($instances['InstanceGroups']))
  378. {
  379. $instances['InstanceGroups'] = CFComplexType::map(array(
  380. 'member' => (is_array($instances['InstanceGroups']) ? $instances['InstanceGroups'] : array($instances['InstanceGroups']))
  381. ));
  382. }
  383. // Required parameter
  384. $opt = array_merge($opt, CFComplexType::map(array(
  385. 'Instances' => (is_array($instances) ? $instances : array($instances))
  386. ), 'member'));
  387. // Optional parameter
  388. if (isset($opt['Steps']))
  389. {
  390. $opt = array_merge($opt, CFComplexType::map(array(
  391. 'Steps' => $opt['Steps']
  392. ), 'member'));
  393. unset($opt['Steps']);
  394. }
  395. // Optional parameter
  396. if (isset($opt['BootstrapActions']))
  397. {
  398. $opt = array_merge($opt, CFComplexType::map(array(
  399. 'BootstrapActions' => $opt['BootstrapActions']
  400. ), 'member'));
  401. unset($opt['BootstrapActions']);
  402. }
  403. return $this->authenticate('RunJobFlow', $opt, $this->hostname);
  404. }
  405. /**
  406. *
  407. * ModifyInstanceGroups modifies the number of nodes and configuration settings of an instance group. The input parameters include the new
  408. * target instance count for the group and the instance group ID. The call will either succeed or fail atomically.
  409. *
  410. * @param array $opt (Optional) An associative array of parameters that can have the following keys: <ul>
  411. * <li><code>InstanceGroups</code> - <code>array</code> - Optional - Instance groups to change. <ul>
  412. * <li><code>x</code> - <code>array</code> - This represents a simple array index. <ul>
  413. * <li><code>InstanceGroupId</code> - <code>string</code> - Required - Unique ID of the instance group to expand or shrink. </li>
  414. * <li><code>InstanceCount</code> - <code>integer</code> - Required - Target size for the instance group. </li>
  415. * </ul></li>
  416. * </ul></li>
  417. * <li><code>curlopts</code> - <code>array</code> - Optional - A set of values to pass directly into <code>curl_setopt()</code>, where the key is a pre-defined <code>CURLOPT_*</code> constant.</li>
  418. * <li><code>returnCurlHandle</code> - <code>boolean</code> - Optional - A private toggle specifying that the cURL handle be returned rather than actually completing the request. This toggle is useful for manually managed batch requests.</li></ul>
  419. * @return CFResponse A <CFResponse> object containing a parsed HTTP response.
  420. */
  421. public function modify_instance_groups($opt = null)
  422. {
  423. if (!$opt) $opt = array();
  424. // Optional parameter
  425. if (isset($opt['InstanceGroups']))
  426. {
  427. $opt = array_merge($opt, CFComplexType::map(array(
  428. 'InstanceGroups' => $opt['InstanceGroups']
  429. ), 'member'));
  430. unset($opt['InstanceGroups']);
  431. }
  432. return $this->authenticate('ModifyInstanceGroups', $opt, $this->hostname);
  433. }
  434. }
  435. /*%******************************************************************************************%*/
  436. // EXCEPTIONS
  437. /**
  438. * Default EMR Exception.
  439. */
  440. class EMR_Exception extends Exception {}