linkedin · akshayrai · Feb 28, 2017 · Feb 25, 2017
diff --git a/app-conf/FetcherConf.xml b/app-conf/FetcherConf.xml
@@ -45,6 +45,9 @@
     files, but also increase the risk of OutOfMemory error. The default heap size of Dr. Elephant
     is 1024MB. To increase this, e.g. to 2048MB, run this before start.sh:
       export OPTS="-mem 2048"
+
+    To work properly, this fetcher should use the same timezone with the job history server.
+    If not set, the local timezone will be used.
   -->
   <!--
   <fetcher>
@@ -53,6 +56,7 @@
     <params>
       <sampling_enabled>false</sampling_enabled>
       <history_log_size_limit_in_mb>500</history_log_size_limit_in_mb>
+      <history_server_time_zone>PST</history_server_time_zone>
     </params>
   </fetcher>
   -->

diff --git a/app/com/linkedin/drelephant/mapreduce/fetchers/MapReduceFSFetcherHadoop2.java b/app/com/linkedin/drelephant/mapreduce/fetchers/MapReduceFSFetcherHadoop2.java
@@ -46,6 +46,7 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Properties;
+import java.util.TimeZone;
 
 /**
  * This class implements the Fetcher for MapReduce Applications on Hadoop2
@@ -57,6 +58,7 @@ public class MapReduceFSFetcherHadoop2 extends MapReduceFetcher {
   private static final Logger logger = Logger.getLogger(MapReduceFSFetcherHadoop2.class);
 
   private static final String LOG_SIZE_XML_FIELD = "history_log_size_limit_in_mb";
+  private static final String HISTORY_SERVER_TIME_ZONE_XML_FIELD = "history_server_time_zone";
   private static final String TIMESTAMP_DIR_FORMAT = "%04d" + File.separator + "%02d" + File.separator + "%02d";
   private static final int SERIAL_NUMBER_DIRECTORY_DIGITS = 6;
   protected static final double DEFALUT_MAX_LOG_SIZE_IN_MB = 500;
@@ -65,6 +67,7 @@ public class MapReduceFSFetcherHadoop2 extends MapReduceFetcher {
   private String _historyLocation;
   private String _intermediateHistoryLocation;
   private double _maxLogSizeInMB;
+  private TimeZone _timeZone;
 
   public MapReduceFSFetcherHadoop2(FetcherConfigurationData fetcherConfData) throws IOException {
     super(fetcherConfData);
@@ -78,6 +81,10 @@ public MapReduceFSFetcherHadoop2(FetcherConfigurationData fetcherConfData) throw
     }
     logger.info("The history log limit of MapReduce application is set to " + _maxLogSizeInMB + " MB");
 
+    String timeZoneStr = fetcherConfData.getParamMap().get(HISTORY_SERVER_TIME_ZONE_XML_FIELD);
+    _timeZone = timeZoneStr == null ? TimeZone.getDefault() : TimeZone.getTimeZone(timeZoneStr);
+    logger.info("Using timezone: " + _timeZone.getID());
+
     Configuration conf = new Configuration();
     this._fs = FileSystem.get(conf);
     this._historyLocation = conf.get("mapreduce.jobhistory.done-dir");
@@ -94,6 +101,10 @@ public double getMaxLogSizeInMB() {
     return _maxLogSizeInMB;
   }
 
+  public TimeZone getTimeZone() {
+    return _timeZone;
+  }
+
   /**
    * The location of a job history file is in format: {done-dir}/yyyy/mm/dd/{serialPart}.
    * yyyy/mm/dd is the year, month and date of the finish time.
@@ -112,7 +123,7 @@ public double getMaxLogSizeInMB() {
    */
   protected String getHistoryDir(AnalyticJob job) {
     // generate the date part
-    Calendar timestamp = Calendar.getInstance();
+    Calendar timestamp = Calendar.getInstance(_timeZone);
     timestamp.setTimeInMillis(job.getFinishTime());
     String datePart = String.format(TIMESTAMP_DIR_FORMAT,
             timestamp.get(Calendar.YEAR),

diff --git a/test/com/linkedin/drelephant/mapreduce/fetchers/MapReduceFSFetcherHadoop2Test.java b/test/com/linkedin/drelephant/mapreduce/fetchers/MapReduceFSFetcherHadoop2Test.java
@@ -41,6 +41,7 @@
 import java.util.List;
 import java.util.Map;
 import java.util.HashMap;
+import java.util.TimeZone;
 
 public class MapReduceFSFetcherHadoop2Test {
 
@@ -77,6 +78,7 @@ public void testFetcherDefaultConfig() {
               fetcherConf.getFetchersConfigurationData().get(0));
       Assert.assertFalse("Sampling should be disabled in default", fetcher.isSamplingEnabled());
       Assert.assertEquals(fetcher.DEFALUT_MAX_LOG_SIZE_IN_MB, fetcher.getMaxLogSizeInMB(), 0.0001);
+      Assert.assertEquals(TimeZone.getDefault(), fetcher.getTimeZone());
 
       List<Object> list = new ArrayList<Object>();
       int listLen = fetcher.MAX_SAMPLE_SIZE * 2;
@@ -98,6 +100,7 @@ public void testFetcherConfig() {
               fetcherConf.getFetchersConfigurationData().get(0));
       Assert.assertTrue("Failed to enable sampling", fetcher.isSamplingEnabled());
       Assert.assertEquals(200d, fetcher.getMaxLogSizeInMB(), 0.0001);
+      Assert.assertEquals(TimeZone.getTimeZone("PST"), fetcher.getTimeZone());
 
       List<Object> list = new ArrayList<Object>();
       int listLen = fetcher.MAX_SAMPLE_SIZE * 2;
@@ -119,6 +122,7 @@ public void testFetcherEmptyConf() {
               fetcherConf.getFetchersConfigurationData().get(0));
       Assert.assertFalse("Sampling should be disabled in default", fetcher.isSamplingEnabled());
       Assert.assertEquals(fetcher.DEFALUT_MAX_LOG_SIZE_IN_MB, fetcher.getMaxLogSizeInMB(), 0.0001);
+      Assert.assertEquals(TimeZone.getDefault(), fetcher.getTimeZone());
 
       List<Object> list = new ArrayList<Object>();
       int listLen = fetcher.MAX_SAMPLE_SIZE * 2;

diff --git a/test/resources/configurations/fetcher/FetcherConfTest10.xml b/test/resources/configurations/fetcher/FetcherConfTest10.xml
@@ -22,6 +22,7 @@
     <params>
       <sampling_enabled>true</sampling_enabled>
       <history_log_size_limit_in_mb>200</history_log_size_limit_in_mb>
+      <history_server_time_zone>PST</history_server_time_zone>
     </params>
   </fetcher>
 </fetchers>