From d30c85f92fd202c5c1fc92c764fa4f1bf36997b9 Mon Sep 17 00:00:00 2001
From: Peter Powers <pmpowers@usgs.gov>
Date: Thu, 30 Nov 2017 13:00:15 -0700
Subject: [PATCH] deagg gmm/source components 266; export updates 268

---
 .gitignore                                    |  1 +
 .../earthquake/nshmp/calc/CalcConfig.java     |  4 +-
 .../usgs/earthquake/nshmp/calc/DataType.java  |  2 +-
 .../earthquake/nshmp/calc/DeaggExport.java    | 19 +++-
 .../earthquake/nshmp/calc/Deaggregation.java  | 90 +++++++++++--------
 .../earthquake/nshmp/calc/EqRateExport.java   | 25 +++---
 .../earthquake/nshmp/calc/HazardExport.java   | 61 ++++++++-----
 7 files changed, 120 insertions(+), 82 deletions(-)

diff --git a/.gitignore b/.gitignore
index 2511358d2..e1d3dbf31 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,6 +7,7 @@ build
 classes
 tmp
 curves*/
+hazout*/
 eq-prob*/
 eq-rate*/
 Scratch*.java
diff --git a/src/gov/usgs/earthquake/nshmp/calc/CalcConfig.java b/src/gov/usgs/earthquake/nshmp/calc/CalcConfig.java
index bdd48dd59..483d9d89a 100644
--- a/src/gov/usgs/earthquake/nshmp/calc/CalcConfig.java
+++ b/src/gov/usgs/earthquake/nshmp/calc/CalcConfig.java
@@ -63,7 +63,7 @@ public final class CalcConfig {
   static final String FILE_NAME = "config.json";
   private static final String ID = CalcConfig.class.getSimpleName();
   private static final String STATE_ERROR = "%s %s not set";
-  static final String DEFAULT_OUT = "curves";
+  static final String DEFAULT_OUT = "hazout";
 
   /**
    * The resource from which {@code this} was derived. This field may be empty.
@@ -865,7 +865,7 @@ public final class CalcConfig {
     /**
      * The directory to write any results to.
      *
-     * <p><b>Default:</b> {@code "curves"} for hazard and deaggregation
+     * <p><b>Default:</b> {@code "hazout"} for hazard and deaggregation
      * calculations; {@code "eq-rate"} or {@code "eq-prob"} for rate
      * calculations.
      */
diff --git a/src/gov/usgs/earthquake/nshmp/calc/DataType.java b/src/gov/usgs/earthquake/nshmp/calc/DataType.java
index 73929b15f..c01365c37 100644
--- a/src/gov/usgs/earthquake/nshmp/calc/DataType.java
+++ b/src/gov/usgs/earthquake/nshmp/calc/DataType.java
@@ -12,7 +12,7 @@ import gov.usgs.earthquake.nshmp.gmm.Gmm;
  */
 public enum DataType {
 
-  /** Total hazard curves or magnitude-frequencey distributions, etc. */
+  /** Total hazard curves or magnitude-frequency distributions, etc. */
   TOTAL,
 
   /** {@linkplain Gmm Ground motion model} specific data. */
diff --git a/src/gov/usgs/earthquake/nshmp/calc/DeaggExport.java b/src/gov/usgs/earthquake/nshmp/calc/DeaggExport.java
index 095e3d622..c6e76dc3b 100644
--- a/src/gov/usgs/earthquake/nshmp/calc/DeaggExport.java
+++ b/src/gov/usgs/earthquake/nshmp/calc/DeaggExport.java
@@ -54,6 +54,9 @@ final class DeaggExport {
   final SummaryElements summary;
   final List<JsonContributor> sources;
 
+  private static final String DEAGG_DATA = "data.csv";
+  private static final String DEAGG_SUMMARY = "summary.txt";
+
   /*
    * All component DeaggDatasets require data from the final total DeaggDataset
    * to correctly calculate contributions and represent summary data that is not
@@ -78,13 +81,21 @@ final class DeaggExport {
   }
 
   void toFile(Path dir, String site) throws IOException {
-    Path dataPath = dir.resolve(site + "-data.csv");
-    Files.write(dataPath, data.toString().getBytes(UTF_8));
-    Path summaryPath = dir.resolve(site + "-summary.txt");
+    Path siteDir = dir.resolve(site);
+    Files.createDirectories(siteDir);
+    Path dataPath = siteDir.resolve(DEAGG_DATA);
+    Files.write(
+        dataPath,
+        data.toString().getBytes(UTF_8),
+        WRITE);
+    Path summaryPath = siteDir.resolve(DEAGG_SUMMARY);
     String summaryString = summaryStringBuilder()
         .append(DATASET_SEPARATOR)
         .toString();
-    Files.write(summaryPath, summaryString.getBytes(UTF_8), WRITE);
+    Files.write(
+        summaryPath,
+        summaryString.getBytes(UTF_8),
+        WRITE);
   }
 
   @Override
diff --git a/src/gov/usgs/earthquake/nshmp/calc/Deaggregation.java b/src/gov/usgs/earthquake/nshmp/calc/Deaggregation.java
index 5eba28f13..9c4b6198f 100644
--- a/src/gov/usgs/earthquake/nshmp/calc/Deaggregation.java
+++ b/src/gov/usgs/earthquake/nshmp/calc/Deaggregation.java
@@ -16,6 +16,7 @@ import java.util.Map.Entry;
 
 import gov.usgs.earthquake.nshmp.data.Interpolator;
 import gov.usgs.earthquake.nshmp.data.XySequence;
+import gov.usgs.earthquake.nshmp.eq.model.SourceType;
 import gov.usgs.earthquake.nshmp.gmm.Gmm;
 import gov.usgs.earthquake.nshmp.gmm.Imt;
 
@@ -38,7 +39,7 @@ public final class Deaggregation {
    * Deaggregate on probability of occurrence instead of exceedance.
    * -------------------------------------------------------------------------
    * Revisit precision issues associated with integer based return period;
-   * 2%in50 years os really 0.00040405414, not 1/2475 = 0.0004040404
+   * 2%in50 years is really 0.00040405414, not 1/2475 = 0.0004040404
    * -------------------------------------------------------------------------
    * -------------------------------------------------------------------------
    * One of the difficulties with deaggregation is deciding how to specify
@@ -181,19 +182,23 @@ public final class Deaggregation {
     final DeaggConfig config;
     final DeaggDataset totalDataset;
     final Map<Gmm, DeaggDataset> gmmDatasets;
+    final Map<SourceType, DeaggDataset> typeDatasets;
 
     ImtDeagg(Hazard hazard, DeaggConfig config) {
       this.config = config;
 
       /*
-       * Datasets are combined as follows: For each HazardCurveSet/SourceSet
-       * deaggregation is performed across all relevant Gmms. These are
-       * preserved in a ListMultimap for output of deaggregation by Gmm. It's
-       * too much work to consolidate the ListMultimap and keep track of all the
-       * nested DeaggContributors, so a list is maintained of datasets per
-       * SourceSet, the total across all Gmms that result from each call to
+       * Datasets are combined as follows:
+       * 
+       * For each HazardCurveSet (SourceSet), deaggregation is performed across
+       * all relevant Gmms. These are preserved in ListMultimaps for output of
+       * deaggregation by Gmm and SourceType. It's too much work to consolidate
+       * ListMultimaps on the fly and keep track of all the nested
+       * DeaggContributors, so lists are maintained of Gmm and SourceType
+       * datasets, and the total across all Gmms that result from each call to
        * deaggregate(). The combination of multiple datasets for single
-       * SourceSets is straightforward.
+       * SourceSets is then straightforward via static consolidators in
+       * DeaggDataset.
        */
 
       int sourceSetCount = hazard.sourceSetCurves.size();
@@ -201,7 +206,10 @@ public final class Deaggregation {
           .enumKeys(Gmm.class)
           .arrayListValues(sourceSetCount)
           .build();
-      List<DeaggDataset> totalDatasetList = new ArrayList<>(sourceSetCount);
+      ListMultimap<SourceType, DeaggDataset> typeDatasetLists = MultimapBuilder
+          .enumKeys(SourceType.class)
+          .arrayListValues(sourceSetCount)
+          .build();
 
       for (HazardCurveSet curveSet : hazard.sourceSetCurves.values()) {
         XySequence sourceSetCurve = curveSet.totalCurves.get(config.imt);
@@ -216,7 +224,8 @@ public final class Deaggregation {
             config,
             hazard.site);
         gmmDatasetLists.putAll(Multimaps.forMap(sourceSetDatasets));
-        totalDatasetList.add(SOURCE_CONSOLIDATOR.apply(sourceSetDatasets.values()));
+        DeaggDataset sourceSetTotal = SOURCE_CONSOLIDATOR.apply(sourceSetDatasets.values());
+        typeDatasetLists.put(curveSet.sourceSet.type(), sourceSetTotal);
       }
 
       /* Combine SourceSets across Gmms. */
@@ -224,29 +233,24 @@ public final class Deaggregation {
           Multimaps.asMap(gmmDatasetLists),
           SOURCE_SET_CONSOLIDATOR));
 
+      /* Combine SourceSets across SourceTypes. */
+      typeDatasets = Maps.immutableEnumMap(Maps.transformValues(
+          Multimaps.asMap(typeDatasetLists),
+          SOURCE_SET_CONSOLIDATOR));
+
       /* Combine SourceSet totals. */
-      totalDataset = SOURCE_SET_CONSOLIDATOR.apply(totalDatasetList);
+      totalDataset = SOURCE_SET_CONSOLIDATOR.apply(typeDatasets.values());
     }
 
+    private static final String TOTAL_COMPONENT = "Total";
+    private static final String GMM_COMPONENT = "GMM: ";
+    private static final String TYPE_COMPONENT = "Source Type: ";
+
     @Override
     public String toString() {
       StringBuilder sb = new StringBuilder();
       sb.append(NEWLINE);
-      DeaggExport export = new DeaggExport(
-          totalDataset,
-          totalDataset,
-          config,
-          "Total",
-          false);
-      sb.append(export.toString());
-      sb.append(NEWLINE);
-      for (Entry<Gmm, DeaggDataset> ddEntry : gmmDatasets.entrySet()) {
-        export = new DeaggExport(
-            totalDataset,
-            ddEntry.getValue(),
-            config,
-            ddEntry.getKey().toString(),
-            false);
+      for (DeaggExport export : buildExports(false)) {
         sb.append(export.toString());
         sb.append(NEWLINE);
       }
@@ -259,25 +263,39 @@ public final class Deaggregation {
      * object prior to serialization.
      */
     Object toJson() {
-      List<DeaggExport> jsonDeaggs = new ArrayList<>();
+      return buildExports(true);
+    }
+
+    private List<DeaggExport> buildExports(boolean json) {
+      List<DeaggExport> exports = new ArrayList<>();
       DeaggExport total = new DeaggExport(
           totalDataset,
           totalDataset,
           config,
-          "Total",
-          true);
-      jsonDeaggs.add(total);
-      for (Entry<Gmm, DeaggDataset> ddEntry : gmmDatasets.entrySet()) {
+          TOTAL_COMPONENT,
+          json);
+      exports.add(total);
+      for (Entry<Gmm, DeaggDataset> gmmEntry : gmmDatasets.entrySet()) {
         DeaggExport gmm = new DeaggExport(
             totalDataset,
-            ddEntry.getValue(),
+            gmmEntry.getValue(),
             config,
-            ddEntry.getKey().toString(),
-            true);
-        jsonDeaggs.add(gmm);
+            GMM_COMPONENT + gmmEntry.getKey().toString(),
+            json);
+        exports.add(gmm);
       }
-      return jsonDeaggs;
+      for (Entry<SourceType, DeaggDataset> typeEntry : typeDatasets.entrySet()) {
+        DeaggExport type = new DeaggExport(
+            totalDataset,
+            typeEntry.getValue(),
+            config,
+            TYPE_COMPONENT + typeEntry.getKey().toString(),
+            json);
+        exports.add(type);
+      }
+      return exports;
     }
+
   }
 
 }
diff --git a/src/gov/usgs/earthquake/nshmp/calc/EqRateExport.java b/src/gov/usgs/earthquake/nshmp/calc/EqRateExport.java
index 8eeffe969..be7dc990b 100644
--- a/src/gov/usgs/earthquake/nshmp/calc/EqRateExport.java
+++ b/src/gov/usgs/earthquake/nshmp/calc/EqRateExport.java
@@ -15,7 +15,6 @@ import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.OpenOption;
 import java.nio.file.Path;
-import java.nio.file.Paths;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
@@ -38,9 +37,12 @@ public final class EqRateExport {
 
   private static final String RATE_FORMAT = "%.8g";
   private static final String PROB_FORMAT = "%.2f";
+  private static final String RATE_FILE = "rates.csv";
+  private static final String PROB_FILE = "probs.csv";
 
   private final Logger log;
   private final Path dir;
+  private final String file;
   private final String valueFormat;
   private final CalcConfig config;
   private final boolean exportSource;
@@ -62,7 +64,8 @@ public final class EqRateExport {
     boolean rates = config.rate.valueFormat == ValueFormat.ANNUAL_RATE;
 
     this.log = log;
-    this.dir = HazardExport.createOutputDir(updateOutDir(config.output.directory, rates));
+    this.dir = HazardExport.createOutputDir(config.output.directory);
+    this.file = rates ? RATE_FILE : PROB_FILE;
     this.valueFormat = rates ? RATE_FORMAT : PROB_FORMAT;
     this.config = config;
     this.exportSource = config.output.dataTypes.contains(DataType.SOURCE);
@@ -75,14 +78,6 @@ public final class EqRateExport {
     this.totalWatch = Stopwatch.createStarted();
   }
 
-  /* If config output is 'curves', change to 'eq-rate' or 'eq-prob'. */
-  static Path updateOutDir(Path dir, boolean rates) {
-    if (dir.toString().equals(CalcConfig.DEFAULT_OUT)) {
-      return (rates ? Paths.get("eq-rate") : Paths.get("eq-prob"));
-    }
-    return dir;
-  }
-
   /**
    * Create a new results handler.
    * 
@@ -242,15 +237,15 @@ public final class EqRateExport {
       }
 
       /* write/append */
-      Path totalFile = dir.resolve("total" + HazardExport.TEXT_SUFFIX);
+      Path totalFile = dir.resolve(file);
       Files.write(totalFile, totalLines, US_ASCII, options);
       if (exportSource) {
-        Path typeDir = dir.resolve("source");
-        Files.createDirectories(typeDir);
+        Path parentDir = dir.resolve(HazardExport.TYPE_DIR);
         for (Entry<SourceType, List<String>> typeEntry : typeLines.entrySet()) {
           SourceType type = typeEntry.getKey();
-          String filename = type.toString();
-          Path typeFile = typeDir.resolve(filename + HazardExport.TEXT_SUFFIX);
+          Path typeDir = parentDir.resolve(type.name());
+          Files.createDirectories(typeDir);
+          Path typeFile = typeDir.resolve(file);
           Files.write(typeFile, typeEntry.getValue(), US_ASCII, options);
         }
       }
diff --git a/src/gov/usgs/earthquake/nshmp/calc/HazardExport.java b/src/gov/usgs/earthquake/nshmp/calc/HazardExport.java
index 52bc1b12a..8b40d42bd 100644
--- a/src/gov/usgs/earthquake/nshmp/calc/HazardExport.java
+++ b/src/gov/usgs/earthquake/nshmp/calc/HazardExport.java
@@ -58,8 +58,9 @@ public final class HazardExport {
 
   static final String DEAGG_DIR = "deagg";
   static final String GMM_DIR = "gmm";
-  static final String BINARY_SUFFIX = ".bin";
-  static final String TEXT_SUFFIX = ".csv";
+  static final String TYPE_DIR = "source";
+  static final String CURVE_FILE_ASCII = "curves.csv";
+  static final String CURVE_FILE_BINARY = "curves.bin";
   static final String RATE_FMT = "%.8e";
 
   static final OpenOption[] WRITE = new OpenOption[] {
@@ -396,42 +397,42 @@ public final class HazardExport {
 
       Path imtDir = dir.resolve(imt.name());
       Files.createDirectories(imtDir);
-      Path totalFile = imtDir.resolve("total" + TEXT_SUFFIX);
+      Path totalFile = imtDir.resolve(CURVE_FILE_ASCII);
       Files.write(totalFile, totalEntry.getValue(), US_ASCII, options);
 
       Metadata meta = null;
 
       if (exportBinary) {
         meta = metaMap.get(imt);
-        Path totalBinFile = imtDir.resolve("total" + BINARY_SUFFIX);
+        Path totalBinFile = imtDir.resolve(CURVE_FILE_BINARY);
         writeBinaryBatch(totalBinFile, meta, totalCurves.get(imt));
       }
 
       if (exportSource) {
-        Path typeDir = imtDir.resolve("source");
-        Files.createDirectories(typeDir);
+        Path typeParent = imtDir.resolve(TYPE_DIR);
         for (Entry<SourceType, List<String>> typeEntry : typeLines.get(imt).entrySet()) {
           SourceType type = typeEntry.getKey();
-          String filename = type.toString();
-          Path typeFile = typeDir.resolve(filename + TEXT_SUFFIX);
+          Path typeDir = typeParent.resolve(type.name());
+          Files.createDirectories(typeDir);
+          Path typeFile = typeDir.resolve(CURVE_FILE_ASCII);
           Files.write(typeFile, typeEntry.getValue(), US_ASCII, options);
           if (exportBinary) {
-            Path typeBinFile = typeDir.resolve(filename + BINARY_SUFFIX);
+            Path typeBinFile = typeDir.resolve(CURVE_FILE_BINARY);
             writeBinaryBatch(typeBinFile, meta, typeCurves.get(imt).get(type));
           }
         }
       }
 
       if (exportGmm) {
-        Path gmmDir = imtDir.resolve("gmm");
-        Files.createDirectories(gmmDir);
+        Path gmmParent = imtDir.resolve(GMM_DIR);
         for (Entry<Gmm, List<String>> gmmEntry : gmmLines.get(imt).entrySet()) {
           Gmm gmm = gmmEntry.getKey();
-          String filename = gmm.name();
-          Path gmmFile = gmmDir.resolve(filename + TEXT_SUFFIX);
+          Path gmmDir = gmmParent.resolve(gmm.name());
+          Files.createDirectories(gmmDir);
+          Path gmmFile = gmmDir.resolve(CURVE_FILE_ASCII);
           Files.write(gmmFile, gmmEntry.getValue(), US_ASCII, options);
           if (exportBinary) {
-            Path gmmBinFile = gmmDir.resolve(filename + BINARY_SUFFIX);
+            Path gmmBinFile = gmmDir.resolve(CURVE_FILE_BINARY);
             writeBinaryBatch(gmmBinFile, meta, gmmCurves.get(imt).get(gmm));
           }
         }
@@ -445,31 +446,43 @@ public final class HazardExport {
   private void writeDeaggs() throws IOException {
 
     /*
-     * Writing of Hazard results will have already created necessary Imt
-     * directories.
+     * Writing of Hazard results will have already created necessary Imt, Gmm,
+     * and SourceType directories.
      */
     for (Deaggregation deagg : deaggs) {
       String name = namedSites ? deagg.site.name : lonLatStr(deagg.site.location);
       for (Entry<Imt, ImtDeagg> imtEntry : deagg.deaggs.entrySet()) {
 
         /* Write total dataset. */
-        Path imtDir = dir.resolve(imtEntry.getKey().name());
-        Path imtDeaggDir = imtDir.resolve(DEAGG_DIR);
-        Files.createDirectories(imtDeaggDir);
         ImtDeagg imtDeagg = imtEntry.getValue();
         DeaggDataset ddTotal = imtDeagg.totalDataset;
         DeaggConfig dc = imtDeagg.config;
         DeaggExport exporter = new DeaggExport(ddTotal, ddTotal, dc, "Total", false);
-        exporter.toFile(imtDeaggDir, name);
+        Path imtDir = dir.resolve(imtEntry.getKey().name());
+        Path totalDir = imtDir.resolve(DEAGG_DIR);
+        Files.createDirectories(totalDir);
+        exporter.toFile(totalDir, name);
+
+        if (exportSource) {
+          for (Entry<SourceType, DeaggDataset> typeEntry : imtDeagg.typeDatasets.entrySet()) {
+            SourceType type = typeEntry.getKey();
+            Path typeDir = imtDir.resolve(TYPE_DIR)
+                .resolve(type.name())
+                .resolve(DEAGG_DIR);
+            DeaggDataset ddType = typeEntry.getValue();
+            exporter = new DeaggExport(ddTotal, ddType, dc, type.toString(), false);
+            exporter.toFile(typeDir, name);
+          }
+        }
 
         if (exportGmm) {
           for (Entry<Gmm, DeaggDataset> gmmEntry : imtDeagg.gmmDatasets.entrySet()) {
+            Gmm gmm = gmmEntry.getKey();
             Path gmmDir = imtDir.resolve(GMM_DIR)
-                .resolve(DEAGG_DIR)
-                .resolve(gmmEntry.getKey().name());
-            Files.createDirectories(gmmDir);
+                .resolve(gmm.name())
+                .resolve(DEAGG_DIR);
             DeaggDataset ddGmm = gmmEntry.getValue();
-            exporter = new DeaggExport(ddTotal, ddGmm, dc, gmmEntry.getKey().toString(), false);
+            exporter = new DeaggExport(ddTotal, ddGmm, dc, gmm.toString(), false);
             exporter.toFile(gmmDir, name);
           }
         }
-- 
GitLab