web-dev-qa-db-ja.com

奇妙なspark AWS EMRのエラー

S3の寄木細工のデータからデータフレームを作成し、count()メソッドを呼び出してレコード数を出力する、非常に単純なPySparkスクリプトがあります。

AWS EMRクラスターでスクリプトを実行すると、次の奇妙なWARN情報が表示されます。

17/12/04 14:20:26 WARN ServletHandler: 
javax.servlet.ServletException: Java.util.NoSuchElementException: None.get
    at org.glassfish.jersey.servlet.WebComponent.serviceImpl(WebComponent.Java:489)
    at org.glassfish.jersey.servlet.WebComponent.service(WebComponent.Java:427)
    at org.glassfish.jersey.servlet.ServletContainer.service(ServletContainer.Java:388)
    at org.glassfish.jersey.servlet.ServletContainer.service(ServletContainer.Java:341)
    at org.glassfish.jersey.servlet.ServletContainer.service(ServletContainer.Java:228)
    at org.spark_project.jetty.servlet.ServletHolder.handle(ServletHolder.Java:845)
    at org.spark_project.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.Java:1689)
    at org.Apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter.doFilter(AmIpFilter.Java:164)
    at org.spark_project.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.Java:1676)
    at org.spark_project.jetty.servlet.ServletHandler.doHandle(ServletHandler.Java:581)
    at org.spark_project.jetty.server.handler.ContextHandler.doHandle(ContextHandler.Java:1180)
    at org.spark_project.jetty.servlet.ServletHandler.doScope(ServletHandler.Java:511)
    at org.spark_project.jetty.server.handler.ContextHandler.doScope(ContextHandler.Java:1112)
    at org.spark_project.jetty.server.handler.ScopedHandler.handle(ScopedHandler.Java:141)
    at org.spark_project.jetty.server.handler.gzip.GzipHandler.handle(GzipHandler.Java:461)
    at org.spark_project.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.Java:213)
    at org.spark_project.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.Java:134)
    at org.spark_project.jetty.server.Server.handle(Server.Java:524)
    at org.spark_project.jetty.server.HttpChannel.handle(HttpChannel.Java:319)
    at org.spark_project.jetty.server.HttpConnection.onFillable(HttpConnection.Java:253)
    at org.spark_project.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.Java:273)
    at org.spark_project.jetty.io.FillInterest.fillable(FillInterest.Java:95)
    at org.spark_project.jetty.io.SelectChannelEndPoint$2.run(SelectChannelEndPoint.Java:93)
    at org.spark_project.jetty.util.thread.strategy.ExecuteProduceConsume.executeProduceConsume(ExecuteProduceConsume.Java:303)
    at org.spark_project.jetty.util.thread.strategy.ExecuteProduceConsume.produceConsume(ExecuteProduceConsume.Java:148)
    at org.spark_project.jetty.util.thread.strategy.ExecuteProduceConsume.run(ExecuteProduceConsume.Java:136)
    at org.spark_project.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.Java:671)
    at org.spark_project.jetty.util.thread.QueuedThreadPool$2.run(QueuedThreadPool.Java:589)
    at Java.lang.Thread.run(Thread.Java:748)
Caused by: Java.util.NoSuchElementException: None.get
    at scala.None$.get(Option.scala:347)
    at scala.None$.get(Option.scala:345)
    at org.Apache.spark.status.api.v1.MetricHelper.submetricQuantiles(AllStagesResource.scala:313)
    at org.Apache.spark.status.api.v1.AllStagesResource$$anon$1.build(AllStagesResource.scala:178)
    at org.Apache.spark.status.api.v1.AllStagesResource$.taskMetricDistributions(AllStagesResource.scala:181)
    at org.Apache.spark.status.api.v1.OneStageResource$$anonfun$taskSummary$1.apply(OneStageResource.scala:71)
    at org.Apache.spark.status.api.v1.OneStageResource$$anonfun$taskSummary$1.apply(OneStageResource.scala:62)
    at org.Apache.spark.status.api.v1.OneStageResource$$anonfun$withStageAttempt$1.apply(OneStageResource.scala:130)
    at org.Apache.spark.status.api.v1.OneStageResource$$anonfun$withStageAttempt$1.apply(OneStageResource.scala:126)
    at org.Apache.spark.status.api.v1.OneStageResource.withStage(OneStageResource.scala:97)
    at org.Apache.spark.status.api.v1.OneStageResource.withStageAttempt(OneStageResource.scala:126)
    at org.Apache.spark.status.api.v1.OneStageResource.taskSummary(OneStageResource.scala:62)
    at Sun.reflect.GeneratedMethodAccessor153.invoke(Unknown Source)
    at Sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.Java:43)
    at Java.lang.reflect.Method.invoke(Method.Java:498)
    at org.glassfish.jersey.server.model.internal.ResourceMethodInvocationHandlerFactory$1.invoke(ResourceMethodInvocationHandlerFactory.Java:81)
    at org.glassfish.jersey.server.model.internal.AbstractJavaResourceMethodDispatcher$1.run(AbstractJavaResourceMethodDispatcher.Java:144)
    at org.glassfish.jersey.server.model.internal.AbstractJavaResourceMethodDispatcher.invoke(AbstractJavaResourceMethodDispatcher.Java:161)
    at org.glassfish.jersey.server.model.internal.JavaResourceMethodDispatcherProvider$TypeOutInvoker.doDispatch(JavaResourceMethodDispatcherProvider.Java:205)
    at org.glassfish.jersey.server.model.internal.AbstractJavaResourceMethodDispatcher.dispatch(AbstractJavaResourceMethodDispatcher.Java:99)
    at org.glassfish.jersey.server.model.ResourceMethodInvoker.invoke(ResourceMethodInvoker.Java:389)
    at org.glassfish.jersey.server.model.ResourceMethodInvoker.apply(ResourceMethodInvoker.Java:347)
    at org.glassfish.jersey.server.model.ResourceMethodInvoker.apply(ResourceMethodInvoker.Java:102)
    at org.glassfish.jersey.server.ServerRuntime$2.run(ServerRuntime.Java:326)
    at org.glassfish.jersey.internal.Errors$1.call(Errors.Java:271)
    at org.glassfish.jersey.internal.Errors$1.call(Errors.Java:267)
    at org.glassfish.jersey.internal.Errors.process(Errors.Java:315)
    at org.glassfish.jersey.internal.Errors.process(Errors.Java:297)
    at org.glassfish.jersey.internal.Errors.process(Errors.Java:267)
    at org.glassfish.jersey.process.internal.RequestScope.runInScope(RequestScope.Java:317)
    at org.glassfish.jersey.server.ServerRuntime.process(ServerRuntime.Java:305)
    at org.glassfish.jersey.server.ApplicationHandler.handle(ApplicationHandler.Java:1154)
    at org.glassfish.jersey.servlet.WebComponent.serviceImpl(WebComponent.Java:473)
    ... 28 more
17/12/04 14:20:26 WARN HttpChannel: //ip-172-31-81-10.ec2.internal:4040/api/v1/applications/application_1512395256824_0002/stages/3/0/taskSummary?proxyapproved=true
javax.servlet.ServletException: Java.util.NoSuchElementException: None.get
    at org.glassfish.jersey.servlet.WebComponent.serviceImpl(WebComponent.Java:489)
    at org.glassfish.jersey.servlet.WebComponent.service(WebComponent.Java:427)
    at org.glassfish.jersey.servlet.ServletContainer.service(ServletContainer.Java:388)
    at org.glassfish.jersey.servlet.ServletContainer.service(ServletContainer.Java:341)
    at org.glassfish.jersey.servlet.ServletContainer.service(ServletContainer.Java:228)
    at org.spark_project.jetty.servlet.ServletHolder.handle(ServletHolder.Java:845)
    at org.spark_project.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.Java:1689)
    at org.Apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter.doFilter(AmIpFilter.Java:164)
    at org.spark_project.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.Java:1676)
    at org.spark_project.jetty.servlet.ServletHandler.doHandle(ServletHandler.Java:581)
    at org.spark_project.jetty.server.handler.ContextHandler.doHandle(ContextHandler.Java:1180)
    at org.spark_project.jetty.servlet.ServletHandler.doScope(ServletHandler.Java:511)
    at org.spark_project.jetty.server.handler.ContextHandler.doScope(ContextHandler.Java:1112)
    at org.spark_project.jetty.server.handler.ScopedHandler.handle(ScopedHandler.Java:141)
    at org.spark_project.jetty.server.handler.gzip.GzipHandler.handle(GzipHandler.Java:461)
    at org.spark_project.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.Java:213)
    at org.spark_project.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.Java:134)
    at org.spark_project.jetty.server.Server.handle(Server.Java:524)
    at org.spark_project.jetty.server.HttpChannel.handle(HttpChannel.Java:319)
    at org.spark_project.jetty.server.HttpConnection.onFillable(HttpConnection.Java:253)
    at org.spark_project.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.Java:273)
    at org.spark_project.jetty.io.FillInterest.fillable(FillInterest.Java:95)
    at org.spark_project.jetty.io.SelectChannelEndPoint$2.run(SelectChannelEndPoint.Java:93)
    at org.spark_project.jetty.util.thread.strategy.ExecuteProduceConsume.executeProduceConsume(ExecuteProduceConsume.Java:303)
    at org.spark_project.jetty.util.thread.strategy.ExecuteProduceConsume.produceConsume(ExecuteProduceConsume.Java:148)
    at org.spark_project.jetty.util.thread.strategy.ExecuteProduceConsume.run(ExecuteProduceConsume.Java:136)
    at org.spark_project.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.Java:671)
    at org.spark_project.jetty.util.thread.QueuedThreadPool$2.run(QueuedThreadPool.Java:589)
    at Java.lang.Thread.run(Thread.Java:748)
Caused by: Java.util.NoSuchElementException: None.get
    at scala.None$.get(Option.scala:347)
    at scala.None$.get(Option.scala:345)
    at org.Apache.spark.status.api.v1.MetricHelper.submetricQuantiles(AllStagesResource.scala:313)
    at org.Apache.spark.status.api.v1.AllStagesResource$$anon$1.build(AllStagesResource.scala:178)
    at org.Apache.spark.status.api.v1.AllStagesResource$.taskMetricDistributions(AllStagesResource.scala:181)
    at org.Apache.spark.status.api.v1.OneStageResource$$anonfun$taskSummary$1.apply(OneStageResource.scala:71)
    at org.Apache.spark.status.api.v1.OneStageResource$$anonfun$taskSummary$1.apply(OneStageResource.scala:62)
    at org.Apache.spark.status.api.v1.OneStageResource$$anonfun$withStageAttempt$1.apply(OneStageResource.scala:130)
    at org.Apache.spark.status.api.v1.OneStageResource$$anonfun$withStageAttempt$1.apply(OneStageResource.scala:126)
    at org.Apache.spark.status.api.v1.OneStageResource.withStage(OneStageResource.scala:97)
    at org.Apache.spark.status.api.v1.OneStageResource.withStageAttempt(OneStageResource.scala:126)
    at org.Apache.spark.status.api.v1.OneStageResource.taskSummary(OneStageResource.scala:62)
    at Sun.reflect.GeneratedMethodAccessor153.invoke(Unknown Source)
    at Sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.Java:43)
    at Java.lang.reflect.Method.invoke(Method.Java:498)
    at org.glassfish.jersey.server.model.internal.ResourceMethodInvocationHandlerFactory$1.invoke(ResourceMethodInvocationHandlerFactory.Java:81)
    at org.glassfish.jersey.server.model.internal.AbstractJavaResourceMethodDispatcher$1.run(AbstractJavaResourceMethodDispatcher.Java:144)
    at org.glassfish.jersey.server.model.internal.AbstractJavaResourceMethodDispatcher.invoke(AbstractJavaResourceMethodDispatcher.Java:161)
    at org.glassfish.jersey.server.model.internal.JavaResourceMethodDispatcherProvider$TypeOutInvoker.doDispatch(JavaResourceMethodDispatcherProvider.Java:205)
    at org.glassfish.jersey.server.model.internal.AbstractJavaResourceMethodDispatcher.dispatch(AbstractJavaResourceMethodDispatcher.Java:99)
    at org.glassfish.jersey.server.model.ResourceMethodInvoker.invoke(ResourceMethodInvoker.Java:389)
    at org.glassfish.jersey.server.model.ResourceMethodInvoker.apply(ResourceMethodInvoker.Java:347)
    at org.glassfish.jersey.server.model.ResourceMethodInvoker.apply(ResourceMethodInvoker.Java:102)
    at org.glassfish.jersey.server.ServerRuntime$2.run(ServerRuntime.Java:326)
    at org.glassfish.jersey.internal.Errors$1.call(Errors.Java:271)
    at org.glassfish.jersey.internal.Errors$1.call(Errors.Java:267)
    at org.glassfish.jersey.internal.Errors.process(Errors.Java:315)
    at org.glassfish.jersey.internal.Errors.process(Errors.Java:297)
    at org.glassfish.jersey.internal.Errors.process(Errors.Java:267)
    at org.glassfish.jersey.process.internal.RequestScope.runInScope(RequestScope.Java:317)
    at org.glassfish.jersey.server.ServerRuntime.process(ServerRuntime.Java:305)
    at org.glassfish.jersey.server.ApplicationHandler.handle(ApplicationHandler.Java:1154)
    at org.glassfish.jersey.servlet.WebComponent.serviceImpl(WebComponent.Java:473)

しかし、それは仕事に失敗しないようです。カウントが正常に返されました。

なぜこれが起こるのか、どうやってそれを取り除くのかを誰かが知っているのだろうか?.

ありがとう

23
seiya

これらの警告メッセージは、次の行を/etc/spark/conf/log4j.properties

log4j.logger.org.spark_project.jetty.server.HttpChannel=ERROR

log4j.logger.org.spark_project.jetty.servlet.ServletHandler=ERROR

パフォーマンスにも仕事の安定性にも影響はありませんでした。今、私のログは非常に読みやすいです:)

17
Guy Cohen

SparkでAWS EMRを使用しているユーザーの場合、Spark jobs in

  • emr-5.10.0
  • emr-5.11.0
  • emr-5.11.1
  • emr-5.12.0

単純にemr-5.9.0にダウングレードすると、この問題を解決するのに役立ちます。

それが役に立てば幸い。

6
pdm