elalish · elalish · Aug 7, 2024 · Jul 22, 2024 · Jul 22, 2024 · Jul 31, 2024
diff --git a/extras/CMakeLists.txt b/extras/CMakeLists.txt
@@ -53,4 +53,10 @@ if(BUILD_TEST_CGAL)
     target_link_libraries(perfTestCGAL manifold CGAL::CGAL CGAL::CGAL_Core Boost::thread)
     target_compile_options(perfTestCGAL PRIVATE ${MANIFOLD_FLAGS})
     target_compile_features(perfTestCGAL PUBLIC cxx_std_17)
-endif()
+
+    add_executable(testHullPerformance test_hull_performance.cpp)
+    target_compile_definitions(testHullPerformance PRIVATE CGAL_USE_GMPXX)
+    target_link_libraries(testHullPerformance manifold meshIO samples CGAL::CGAL CGAL::CGAL_Core Boost::thread)
+    target_compile_options(testHullPerformance PRIVATE ${MANIFOLD_FLAGS})
+    target_compile_features(testHullPerformance PUBLIC cxx_std_17)
+endif()
diff --git a/extras/Thingi10K/raw_meshes/233198.stl b/extras/Thingi10K/raw_meshes/233198.stl
diff --git a/extras/merge_and_stats.py b/extras/merge_and_stats.py
@@ -0,0 +1,242 @@
+import pandas as pd
+
+
+# MERGING THE DATA
+
+
+filenames=[]
+# merged_data = {} 
+def parse_csv_and_merge(csv_files, output_file='merged_data.csv'):
+    """
+    Merges CSV files, handling multiline entries and various error conditions.
+
+    Args:
+        csv_files (list): List of tuples containing (filename, implementation_name).
+        output_file (str, optional): Name of the output CSV file. Defaults to 'merged_data.csv'.
+    """
+
+    # merged_data = pd.DataFrame(columns=['Filename'])
+    merged_data={}
+    is_multiline = False
+    multiline_data = []
+    curr_file=""
+    for file, implementation in csv_files:
+        print(f"Starting File : {file}")
+        try:
+            df = pd.read_csv(file)
+        except FileNotFoundError:
+            print(f"Error: File '{file}' not found. Skipping...")
+            continue
+
+        for i, row in df.iterrows():
+            if is_multiline:
+                # Handling multiline entries (Before standard algorithm call)
+                if 'After standard algorithm call' in row.values[0]:
+                    is_multiline = True
+                    continue
+                elif row.values[1] == "Error":
+                    row.fillna(0, inplace=True)  
+                    row['Status'] = 'Error'
+                    row.values[0]= curr_file
+                    row.values[1] = 0
+                    is_multiline=False
+                    filename = row['Filename']
+                    if filename not in merged_data:
+                        merged_data[filename] = row.to_dict()  
+                    else:
+                        for col in df.columns: 
+                          if col != 'Filename' and not pd.isna(row[col]):
+                              merged_data[filename][col+"_"+implementation] = row[col]
+                elif row.values[0] == "Invalid Output by algorithm":
+                    is_multiline = True
+                    continue
+                else:
+                    is_multiline = False
+                    prev_item=curr_file
+                    filenames.append(curr_file)
+                    temp_item=row.values[0]
+                    temp_len=row.values.size
+                    for i in range(1,temp_len):
+                      # print(temp_item)
+                      temp_item=row.values[i-1]
+                      row.values[i-1]=prev_item
+                      prev_item=temp_item
+                    # print(row)
+                    filename = row['Filename']
+                    if filename not in merged_data:
+                        merged_data[filename] = row.to_dict()  
+                    else:
+                        for col in df.columns:  
+                          if col != 'Filename' and not pd.isna(row[col]):
+                              merged_data[filename][col+"_"+implementation] = row[col]
+            else:
+                # Handling single-line entries or first line of multiline entries
+                # Checking for timeout or error
+                if pd.isna(row['VolManifold']): 
+                    if (row['VolHull']=="Timeout"):
+                    # if 'Timeout' in row['Status']:
+                        row['VolHull']=0
+                        row['VolManifold'] = 0
+                        row.fillna(0, inplace=True)  
+                        row['Status'] = 'Timeout'
+                    elif 'Error' in row['Status']:
+                        row.fillna(0, inplace=True)
+                        row['Status'] = 'Error'
+                    elif (row['VolHull'] == "Error"):
+                        row.fillna(0, inplace=True)
+                        row['Status'] = 'Error'
+                        pass
+                    filename = row['Filename']
+                    if filename not in merged_data:
+                        merged_data[filename] = row.to_dict()
+                    else:
+                      for col in df.columns: 
+                          if col != 'Filename' and not pd.isna(row[col]):
+                              merged_data[filename][col+"_"+implementation] = row[col]
+                    continue
+                # Converting Series to df for renaming columns
+                if 'Before standard algorithm call' in row.values[1]:
+                    if row.values[2] == "Timeout":
+                        row.fillna(0, inplace=True)
+                        row['Status'] = 'Timeout'
+                        row['VolHull']=0
+                        row['VolManifold'] = 0
+                        filename = row['Filename']
+                        if filename not in merged_data:
+                            merged_data[filename] = row.to_dict() 
+                        else:
+                            for col in df.columns:
+                              if col != 'Filename' and not pd.isna(row[col]):
+                                  merged_data[filename][col+"_"+implementation] = row[col]
+                        continue
+                    is_multiline = True
+                    curr_file=row.values[0]
+                else:
+                  if (row['VolManifold']=="timeout: the monitored command dumped core"):
+                        row.fillna(0, inplace=True)  
+                        row['VolManifold']=0
+                        row['VolHull'] = 0
+                        row['Status'] = 'Error'
+                  filename = row['Filename']
+                  if filename not in merged_data:
+                      merged_data[filename] = row.to_dict() 
+                  else:
+                      # print(merged_data[filename])
+                      for col in df.columns:
+                          if col != 'Filename' and not pd.isna(row[col]):
+                              merged_data[filename][col+"_"+implementation] = row[col]
+
+                    # multiline_data.append(row.tolist())
+            # print(merged_data)
+
+    if not merged_data:
+        print("Warning: No valid data found in any CSV files.")
+        return
+
+    # Creating df from the dictionary to store the merged data
+    merged_data = pd.DataFrame.from_dict(merged_data, orient='index')
+
+    merged_data.to_csv(output_file, index=False)
+
+csv_files = [('Hull1.csv','hull1'),('CGAL.csv', 'CGAL')]
+parse_csv_and_merge(csv_files)
+
+
+# NORMALIZE THE DATA
+
+
+file_path = 'merged_data.csv'
+df = pd.read_csv(file_path)
+
+time_columns = [col for col in df.columns if 'Time' in col]
+for col in time_columns:
+    df[col] = df[col].str.replace(' sec', '').astype(float)
+
+# List of base columns to normalize against
+base_columns = ['VolManifold', 'VolHull', 'AreaManifold', 'AreaHull', 'ManifoldTri', 'HullTri', 'Time']
+# List of suffixes to normalize
+suffixes = ['_CGAL']
+# for suffix in suffixes : 
+# For time metric avoiding cases with time less than 0.001 seconds
+# df = df[(df['Time'] > 0.001)]
+# Normalize the columns and check for zero base values
+stl_files_with_diff = []
+
+for base in base_columns:
+    base_col = base
+    if base_col in df.columns:
+        for suffix in suffixes:
+            col_name = f"{base}{suffix}"
+            if col_name in df.columns:
+                # Checking if base column is zero and suffix column is not zero
+                zero_base_nonzero_suffix = (df[base_col] == 0) & (df[col_name] != 0)
+                if zero_base_nonzero_suffix.any():
+                    raise ValueError(f"Error: {base_col} is zero while {col_name} is not zero in row(s): {df[zero_base_nonzero_suffix].index.tolist()}")
+
+                # Setting col_name column in df to 1 if both are zero
+                both_zero = (df[base_col] == 0) & (df[col_name] == 0)
+                df.loc[both_zero, col_name] = 1
+
+                # Normalizing the column while handling division by zero
+                df[col_name] = df[col_name] / df[base_col].replace({0: 1})  
+
+        df[base_col] = 1.0         
+
+
+df.to_csv('normalized_output.csv', index=False)
+
+
+# CALCULATE STATISTICS ON NORMALZIED OUTPUT
+
+
+import pandas as pd
+
+file_path = 'normalized_output.csv'
+df = pd.read_csv(file_path)
+
+# Columns for statistics calculation
+columns = ['VolHull', 'AreaHull', 'HullTri', 'Time']
+# Columns suffixes to use
+suffixes = ['', '_CGAL']
+
+# Function to calculate statistics for each base and implementation
+def calculate_stats(column, status,suffix):
+    filtered_df = df[(df['Status'+suffix] == status) & ~df[column].isnull()]
+    # filtered_df = df[(df['Status'+suffix] == status) & ~df[column].isnull() & (df['Time'+suffix] > 0.001) & (df['Time'] > 0.001)]
+    success_count = filtered_df.shape[0]
+
+    if success_count > 0:
+        mean_val = filtered_df[column].mean()
+        median_val = filtered_df[column].median()
+        mode_val = filtered_df[column].mode().iloc[0] if not filtered_df[column].mode().empty else None
+        max_val = filtered_df[column].max()
+        min_val = filtered_df[column].min()
+    else:
+        mean_val = median_val = mode_val = max_val = min_val = None
+
+    return mean_val, median_val, mode_val, max_val, min_val, success_count
+
+stats_dict = {}
+
+# Calculating stats for each column and their suffixes
+for base in columns:
+    for suffix in suffixes:
+        col_name = f"{base}{suffix}"
+        if col_name in df.columns:
+            mean_val, median_val, mode_val, max_val, min_val, success_count = calculate_stats(col_name, 'Success',suffix)
+            stats_dict[col_name] = {
+                'mean': mean_val,
+                'median': median_val,
+                'mode': mode_val,
+                'max': max_val,
+                'min': min_val,
+                'Success_Count': success_count
+            }
+
+# Converting the stats dictionary to a df for better visualization
+stats_df = pd.DataFrame(stats_dict).T
+
+stats_df.to_csv('statistics_output.csv')
+
+print("Statistics calculation complete. Output saved to 'statistics_output.csv'.")
+print(stats_df)
diff --git a/extras/perf_test_cgal.cpp b/extras/perf_test_cgal.cpp
@@ -40,7 +40,7 @@ typedef CGAL::SM_Vertex_index Vertex;
 void manifoldToCGALSurfaceMesh(Manifold &manifold, TriangleMesh &cgalMesh) {
   auto maniMesh = manifold.GetMesh();
 
-  const int n = maniMesh.vertPos.size();
+  const size_t n = maniMesh.vertPos.size();
   std::vector<Vertex> vertices(n);
   for (size_t i = 0; i < n; i++) {
     auto &vert = maniMesh.vertPos[i];

diff --git a/extras/run.sh b/extras/run.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Go to manifold/extras directory and run the command as `./run.sh {path_to_dataset_folder} {name_of_csv} {implementation(Hull,Hull_CGAL)}`
+# example ./run.sh ./Thingi10K/raw_meshes/ Hull4.csv Hull
+
+# Checking if the correct number of arguments is provided
+if [ "$#" -ne 3 ]; then
+    echo "Usage: $0 <input_folder> <output.csv> <Implementation>"
+    exit 1
+fi
+
+EXECUTABLE="../build/extras/testHullPerformance" 
+INPUT_FOLDER=$1
+OUTPUT_CSV=$2
+IMPLEMENTATION=$3
+TIME_LIMIT=10m # time limit in minutes
+RAM_LIMIT=6000  # Memory limit in MB
+
+# Initializing the headers
+echo "Filename,VolManifold,VolHull,AreaManifold,AreaHull,ManifoldTri,HullTri,Time,Status," > $OUTPUT_CSV
+
+# Iterate over all files in the input folder
+for INPUT_FILE in "$INPUT_FOLDER"/*; do
+    FILE_NAME=$(basename "$INPUT_FILE")
+
+    # Run the EXECUTABLE with the specified argument, time limit, and used to capture the output
+    OUTPUT=$(ulimit -v $((RAM_LIMIT * 1024)); timeout $TIME_LIMIT  $EXECUTABLE "Input" "$IMPLEMENTATION" "0" "$INPUT_FILE" 2>&1)
+    STATUS=$?
+
+    # Checking if the EXECUTABLE timed out
+    if [ $STATUS -eq 124 ]; then
+        STATUS="Timeout"
+    elif [ $STATUS -ne 0 ]; then
+        STATUS="Error"
+    else
+        STATUS="Success"
+    fi
+
+    # Adding the result to the output file
+    echo "\"$FILE_NAME\",$OUTPUT,\"$STATUS\"" >> $OUTPUT_CSV
+done