Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Testing Framework for Hull #875

Merged
merged 12 commits into from
Aug 7, 2024
8 changes: 7 additions & 1 deletion extras/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,10 @@ if(BUILD_TEST_CGAL)
target_link_libraries(perfTestCGAL manifold CGAL::CGAL CGAL::CGAL_Core Boost::thread)
target_compile_options(perfTestCGAL PRIVATE ${MANIFOLD_FLAGS})
target_compile_features(perfTestCGAL PUBLIC cxx_std_17)
endif()

add_executable(testHullPerformance test_hull_performance.cpp)
target_compile_definitions(testHullPerformance PRIVATE CGAL_USE_GMPXX)
target_link_libraries(testHullPerformance manifold meshIO samples CGAL::CGAL CGAL::CGAL_Core Boost::thread)
target_compile_options(testHullPerformance PRIVATE ${MANIFOLD_FLAGS})
target_compile_features(testHullPerformance PUBLIC cxx_std_17)
endif()
Binary file added extras/Thingi10K/raw_meshes/233198.stl
Binary file not shown.
242 changes: 242 additions & 0 deletions extras/merge_and_stats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,242 @@
import pandas as pd


# MERGING THE DATA


filenames=[]
# merged_data = {}
def parse_csv_and_merge(csv_files, output_file='merged_data.csv'):
"""
Merges CSV files, handling multiline entries and various error conditions.

Args:
csv_files (list): List of tuples containing (filename, implementation_name).
output_file (str, optional): Name of the output CSV file. Defaults to 'merged_data.csv'.
"""

# merged_data = pd.DataFrame(columns=['Filename'])
merged_data={}
is_multiline = False
multiline_data = []
curr_file=""
for file, implementation in csv_files:
print(f"Starting File : {file}")
try:
df = pd.read_csv(file)
except FileNotFoundError:
print(f"Error: File '{file}' not found. Skipping...")
continue

for i, row in df.iterrows():
if is_multiline:
# Handling multiline entries (Before standard algorithm call)
if 'After standard algorithm call' in row.values[0]:
is_multiline = True
continue
elif row.values[1] == "Error":
row.fillna(0, inplace=True)
row['Status'] = 'Error'
row.values[0]= curr_file
row.values[1] = 0
is_multiline=False
filename = row['Filename']
if filename not in merged_data:
merged_data[filename] = row.to_dict()
else:
for col in df.columns:
if col != 'Filename' and not pd.isna(row[col]):
merged_data[filename][col+"_"+implementation] = row[col]
elif row.values[0] == "Invalid Output by algorithm":
is_multiline = True
continue
else:
is_multiline = False
prev_item=curr_file
filenames.append(curr_file)
temp_item=row.values[0]
temp_len=row.values.size
for i in range(1,temp_len):
# print(temp_item)
temp_item=row.values[i-1]
row.values[i-1]=prev_item
prev_item=temp_item
# print(row)
filename = row['Filename']
if filename not in merged_data:
merged_data[filename] = row.to_dict()
else:
for col in df.columns:
if col != 'Filename' and not pd.isna(row[col]):
merged_data[filename][col+"_"+implementation] = row[col]
else:
# Handling single-line entries or first line of multiline entries
# Checking for timeout or error
if pd.isna(row['VolManifold']):
if (row['VolHull']=="Timeout"):
# if 'Timeout' in row['Status']:
row['VolHull']=0
row['VolManifold'] = 0
row.fillna(0, inplace=True)
row['Status'] = 'Timeout'
elif 'Error' in row['Status']:
row.fillna(0, inplace=True)
row['Status'] = 'Error'
elif (row['VolHull'] == "Error"):
row.fillna(0, inplace=True)
row['Status'] = 'Error'
pass
filename = row['Filename']
if filename not in merged_data:
merged_data[filename] = row.to_dict()
else:
for col in df.columns:
if col != 'Filename' and not pd.isna(row[col]):
merged_data[filename][col+"_"+implementation] = row[col]
continue
# Converting Series to df for renaming columns
if 'Before standard algorithm call' in row.values[1]:
if row.values[2] == "Timeout":
row.fillna(0, inplace=True)
row['Status'] = 'Timeout'
row['VolHull']=0
row['VolManifold'] = 0
filename = row['Filename']
if filename not in merged_data:
merged_data[filename] = row.to_dict()
else:
for col in df.columns:
if col != 'Filename' and not pd.isna(row[col]):
merged_data[filename][col+"_"+implementation] = row[col]
continue
is_multiline = True
curr_file=row.values[0]
else:
if (row['VolManifold']=="timeout: the monitored command dumped core"):
row.fillna(0, inplace=True)
row['VolManifold']=0
row['VolHull'] = 0
row['Status'] = 'Error'
filename = row['Filename']
if filename not in merged_data:
merged_data[filename] = row.to_dict()
else:
# print(merged_data[filename])
for col in df.columns:
if col != 'Filename' and not pd.isna(row[col]):
merged_data[filename][col+"_"+implementation] = row[col]

# multiline_data.append(row.tolist())
# print(merged_data)

if not merged_data:
print("Warning: No valid data found in any CSV files.")
return

# Creating df from the dictionary to store the merged data
merged_data = pd.DataFrame.from_dict(merged_data, orient='index')

merged_data.to_csv(output_file, index=False)

csv_files = [('Hull1.csv','hull1'),('CGAL.csv', 'CGAL')]
parse_csv_and_merge(csv_files)


# NORMALIZE THE DATA


file_path = 'merged_data.csv'
df = pd.read_csv(file_path)

time_columns = [col for col in df.columns if 'Time' in col]
for col in time_columns:
df[col] = df[col].str.replace(' sec', '').astype(float)

# List of base columns to normalize against
base_columns = ['VolManifold', 'VolHull', 'AreaManifold', 'AreaHull', 'ManifoldTri', 'HullTri', 'Time']
# List of suffixes to normalize
suffixes = ['_CGAL']
# for suffix in suffixes :
# For time metric avoiding cases with time less than 0.001 seconds
# df = df[(df['Time'] > 0.001)]
# Normalize the columns and check for zero base values
stl_files_with_diff = []

for base in base_columns:
base_col = base
if base_col in df.columns:
for suffix in suffixes:
col_name = f"{base}{suffix}"
if col_name in df.columns:
# Checking if base column is zero and suffix column is not zero
zero_base_nonzero_suffix = (df[base_col] == 0) & (df[col_name] != 0)
if zero_base_nonzero_suffix.any():
raise ValueError(f"Error: {base_col} is zero while {col_name} is not zero in row(s): {df[zero_base_nonzero_suffix].index.tolist()}")

# Setting col_name column in df to 1 if both are zero
both_zero = (df[base_col] == 0) & (df[col_name] == 0)
df.loc[both_zero, col_name] = 1

# Normalizing the column while handling division by zero
df[col_name] = df[col_name] / df[base_col].replace({0: 1})

df[base_col] = 1.0


df.to_csv('normalized_output.csv', index=False)


# CALCULATE STATISTICS ON NORMALZIED OUTPUT


import pandas as pd

file_path = 'normalized_output.csv'
df = pd.read_csv(file_path)

# Columns for statistics calculation
columns = ['VolHull', 'AreaHull', 'HullTri', 'Time']
# Columns suffixes to use
suffixes = ['', '_CGAL']

# Function to calculate statistics for each base and implementation
def calculate_stats(column, status,suffix):
filtered_df = df[(df['Status'+suffix] == status) & ~df[column].isnull()]
# filtered_df = df[(df['Status'+suffix] == status) & ~df[column].isnull() & (df['Time'+suffix] > 0.001) & (df['Time'] > 0.001)]
success_count = filtered_df.shape[0]

if success_count > 0:
mean_val = filtered_df[column].mean()
median_val = filtered_df[column].median()
mode_val = filtered_df[column].mode().iloc[0] if not filtered_df[column].mode().empty else None
max_val = filtered_df[column].max()
min_val = filtered_df[column].min()
else:
mean_val = median_val = mode_val = max_val = min_val = None

return mean_val, median_val, mode_val, max_val, min_val, success_count

stats_dict = {}

# Calculating stats for each column and their suffixes
for base in columns:
for suffix in suffixes:
col_name = f"{base}{suffix}"
if col_name in df.columns:
mean_val, median_val, mode_val, max_val, min_val, success_count = calculate_stats(col_name, 'Success',suffix)
stats_dict[col_name] = {
'mean': mean_val,
'median': median_val,
'mode': mode_val,
'max': max_val,
'min': min_val,
'Success_Count': success_count
}

# Converting the stats dictionary to a df for better visualization
stats_df = pd.DataFrame(stats_dict).T

stats_df.to_csv('statistics_output.csv')

print("Statistics calculation complete. Output saved to 'statistics_output.csv'.")
print(stats_df)
2 changes: 1 addition & 1 deletion extras/perf_test_cgal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ typedef CGAL::SM_Vertex_index Vertex;
void manifoldToCGALSurfaceMesh(Manifold &manifold, TriangleMesh &cgalMesh) {
auto maniMesh = manifold.GetMesh();

const int n = maniMesh.vertPos.size();
const size_t n = maniMesh.vertPos.size();
std::vector<Vertex> vertices(n);
for (size_t i = 0; i < n; i++) {
auto &vert = maniMesh.vertPos[i];
Expand Down
41 changes: 41 additions & 0 deletions extras/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/bin/bash

# Go to manifold/extras directory and run the command as `./run.sh {path_to_dataset_folder} {name_of_csv} {implementation(Hull,Hull_CGAL)}`
# example ./run.sh ./Thingi10K/raw_meshes/ Hull4.csv Hull

# Checking if the correct number of arguments is provided
if [ "$#" -ne 3 ]; then
echo "Usage: $0 <input_folder> <output.csv> <Implementation>"
exit 1
fi

EXECUTABLE="../build/extras/testHullPerformance"
INPUT_FOLDER=$1
OUTPUT_CSV=$2
IMPLEMENTATION=$3
TIME_LIMIT=10m # time limit in minutes
RAM_LIMIT=6000 # Memory limit in MB

# Initializing the headers
echo "Filename,VolManifold,VolHull,AreaManifold,AreaHull,ManifoldTri,HullTri,Time,Status," > $OUTPUT_CSV

# Iterate over all files in the input folder
for INPUT_FILE in "$INPUT_FOLDER"/*; do
FILE_NAME=$(basename "$INPUT_FILE")

# Run the EXECUTABLE with the specified argument, time limit, and used to capture the output
OUTPUT=$(ulimit -v $((RAM_LIMIT * 1024)); timeout $TIME_LIMIT $EXECUTABLE "Input" "$IMPLEMENTATION" "0" "$INPUT_FILE" 2>&1)
STATUS=$?

# Checking if the EXECUTABLE timed out
if [ $STATUS -eq 124 ]; then
STATUS="Timeout"
elif [ $STATUS -ne 0 ]; then
STATUS="Error"
else
STATUS="Success"
fi

# Adding the result to the output file
echo "\"$FILE_NAME\",$OUTPUT,\"$STATUS\"" >> $OUTPUT_CSV
done
Loading
Loading