1+ package cloud .localstack .awssdkv2 ;
2+
3+ import java .util .*;
4+
5+ import cloud .localstack .LocalstackTestRunner ;
6+ import cloud .localstack .docker .annotation .LocalstackDockerProperties ;
7+ import org .junit .Test ;
8+ import org .junit .runner .RunWith ;
9+ import software .amazon .awssdk .services .emr .EmrClient ;
10+ import software .amazon .awssdk .services .emr .model .*;
11+
12+ @ RunWith (LocalstackTestRunner .class )
13+ @ LocalstackDockerProperties (ignoreDockerRunErrors = true )
14+ public class EMRJobFlowTest {
15+ public static List <Application > getStandardApplications () {
16+ return Arrays .asList (
17+ Application .builder ().name ("Ganglia" ).version ("3.7.2" ).build (),
18+ Application .builder ().name ("Hive" ).version ("2.3.7" ).build (),
19+ Application .builder ().name ("Livy" ).version ("0.7.0" ).build (),
20+ Application .builder ().name ("Spark" ).version ("2.4.7" ).build ()
21+ );
22+ }
23+
24+ public static RunJobFlowResponse buildEMRCluster (EmrClient client , String name , String logFolder ) {
25+ HadoopJarStepConfig debugStep = HadoopJarStepConfig
26+ .builder ()
27+ .jar ("command-runner.jar" )
28+ .args ("state-pusher-script" )
29+ .build ();
30+
31+ StepConfig debug = StepConfig .builder ()
32+ .name ("Enable Debugging" )
33+ .actionOnFailure (ActionOnFailure .TERMINATE_JOB_FLOW )
34+ .hadoopJarStep (debugStep )
35+ .build ();
36+
37+ RunJobFlowRequest request = RunJobFlowRequest .builder ()
38+ .name (name )
39+ .releaseLabel ("emr-5.32.1" )
40+ .steps (debug )
41+ .applications (getStandardApplications ())
42+ .logUri (logFolder )
43+ .instances (JobFlowInstancesConfig .builder ()
44+ .instanceCount (3 )
45+ .keepJobFlowAliveWhenNoSteps (true )
46+ .masterInstanceType ("m4.large" )
47+ .slaveInstanceType ("m4.large" )
48+ .build ())
49+ .build ();
50+
51+ return client .runJobFlow (request );
52+ }
53+
54+ public static AddJobFlowStepsResponse submitJob (EmrClient client , String jobId , String jarFile , String className ) {
55+ HadoopJarStepConfig sparkStepConfigJob = HadoopJarStepConfig .builder ()
56+ .jar ("command-runner.jar" )
57+ .args ("spark-submit" , "--executor-memory" , "1g" , "--class" , className , jarFile )
58+ .build ();
59+
60+ StepConfig sparkStep = StepConfig .builder ()
61+ .name ("Spark Step" )
62+ .actionOnFailure (ActionOnFailure .CONTINUE )
63+ .hadoopJarStep (sparkStepConfigJob )
64+ .build ();
65+
66+ AddJobFlowStepsRequest request = AddJobFlowStepsRequest .builder ()
67+ .jobFlowId (jobId )
68+ .steps (Arrays .asList (sparkStep ))
69+ .build ();
70+
71+ return client .addJobFlowSteps (request );
72+ }
73+
74+ @ Test
75+ public void testJobFlow () {
76+ EmrClient client = TestUtils .getClientEMRV2 ();
77+ String jobId = buildEMRCluster (client , "test" , "/tmp" ).jobFlowId ();
78+ // TODO: upload JAR file to S3 - currently only submitting the job without checking the result
79+ submitJob (client , jobId , "s3://test.jar" , "Test" );
80+ }
81+
82+ }
0 commit comments