Update Staging (#100) (#101)

matt-yuma · peterc-yuma · web-flow · commit 1c1539079595 · 2025-03-10T12:24:48.000-04:00
* Enhancement/increased evaluation window (#91) * update the rewards to be calculated with the last 6 hours of predictions * make evaluation window a config option * update reward to properly get 6 hours of past data offset by one hour ago. * add completeness to rewards calculation * Just multiply the current rewards by the completeness score * remove config options and introduce constants for the evaluation window * release notes and version incremementing --------- Co-authored-by: Peter | Yuma, a DCG Company <peter@yumaai.com>
diff --git a/docs/Release Notes.md b/docs/Release Notes.md
@@ -1,6 +1,11 @@
 Release Notes
 =============
 
+2.3.0
+-----
+Released on March 10th 2025
+- Increase the evaluation window to include 6 hours of miner predictions
+
 2.2.2
 -----
 Released on March 5th 2025
diff --git a/precog/constants/__init__.py b/precog/constants/__init__.py
@@ -1,3 +1,9 @@
 # Project Constants
 
+# Wandb Constants
 WANDB_PROJECT = "yumaai"
+
+# Predictions Constants
+EVALUATION_WINDOW_HOURS: int = 6
+PREDICTION_FUTURE_HOURS: int = 1
+PREDICTION_INTERVAL_MINUTES: int = 5
diff --git a/precog/utils/classes.py b/precog/utils/classes.py
@@ -1,4 +1,4 @@
-from datetime import datetime, timedelta
+from datetime import timedelta
 from typing import List
 
 from precog.utils.timestamp import get_now, get_timezone, round_minute_down, to_datetime
@@ -33,28 +33,47 @@ def clear_old_predictions(self):
         self.intervals = filtered_interval_dict
 
     def format_predictions(self, reference_timestamp=None, hours: int = 1):
-        # intervals = []
+        """
+        Filter and format prediction and interval data based on a reference timestamp and time window.
+
+        This function filters the prediction and interval dictionaries to include only entries
+        within a specified time window, ending at the reference timestamp and extending back
+        by the specified number of hours.
+
+        Parameters:
+        -----------
+        reference_timestamp : datetime or str, optional
+            The end timestamp for the time window. If None, the current time rounded down
+            to the nearest minute is used. If a string is provided, it will be converted
+            to a datetime object.
+        hours : int, default=1
+            The number of hours to look back from the reference timestamp.
+
+        Returns:
+        --------
+        tuple
+            A tuple containing two dictionaries:
+            - filtered_pred_dict: Dictionary of filtered predictions where keys are timestamps
+            and values are the corresponding prediction values.
+            - filtered_interval_dict: Dictionary of filtered intervals where keys are timestamps
+            and values are the corresponding interval values.
+
+        Notes:
+        ------
+        The actual time window used is (hours + 1) to ensure complete coverage of the requested period.
+        """
         if reference_timestamp is None:
             reference_timestamp = round_minute_down(get_now())
         if isinstance(reference_timestamp, str):
             reference_timestamp = to_datetime(reference_timestamp)
+
         start_time = round_minute_down(reference_timestamp) - timedelta(hours=hours + 1)
+
         filtered_pred_dict = {
             key: value for key, value in self.predictions.items() if start_time <= key <= reference_timestamp
         }
         filtered_interval_dict = {
             key: value for key, value in self.intervals.items() if start_time <= key <= reference_timestamp
         }
-        return filtered_pred_dict, filtered_interval_dict
 
-    def get_relevant_timestamps(self, reference_timestamp: datetime):
-        # returns a list of aligned timestamps
-        # round down reference to nearest 5m
-        round_down_now = round_minute_down(reference_timestamp)
-        # get the timestamps for the past 12 epochs
-        timestamps = [round_down_now - timedelta(minutes=5 * i) for i in range(12)]
-        # remove any timestamps that are not in the dicts
-        filtered_list = [
-            item for item in timestamps if item in self.predictions.keys() and item in self.intervals.keys()
-        ]
-        return filtered_list
+        return filtered_pred_dict, filtered_interval_dict
diff --git a/precog/utils/config.py b/precog/utils/config.py
@@ -233,10 +233,6 @@ def add_validator_args(parser):
         default="opentensor-dev",
     )
 
-    parser.add_argument("--prediction_interval", type=int, default=5)
-
-    parser.add_argument("--N_TIMEPOINTS", type=int, default=12)
-
     parser.add_argument("--reset_state", action="store_true", dest="reset_state", help="Overwrites the state file")
 
 
diff --git a/precog/utils/wandb.py b/precog/utils/wandb.py
@@ -40,7 +40,7 @@ def log_wandb(responses, rewards, miner_uids, hotkeys):
             },
         }
 
-        bt.logging.debug(f"Attempting to log data to wandb: {wandb_val_log}")
+        bt.logging.trace(f"Attempting to log data to wandb: {wandb_val_log}")
         wandb.log(wandb_val_log)
     except Exception as e:
         bt.logging.error(f"Failed to log to wandb: {str(e)}")
diff --git a/precog/validators/reward.py b/precog/validators/reward.py
@@ -4,6 +4,7 @@
 import numpy as np
 from pandas import DataFrame
 
+from precog import constants
 from precog.protocol import Challenge
 from precog.utils.cm_data import CMData
 from precog.utils.general import pd_to_dict, rank
@@ -15,42 +16,91 @@ def calc_rewards(
     self,
     responses: List[Challenge],
 ) -> np.ndarray:
+    evaluation_window_hours = constants.EVALUATION_WINDOW_HOURS
+    prediction_future_hours = constants.PREDICTION_FUTURE_HOURS
+    prediction_interval_minutes = constants.PREDICTION_INTERVAL_MINUTES
+
+    expected_timepoints = evaluation_window_hours * 60 / prediction_interval_minutes
+
     # preallocate
     point_errors = []
     interval_errors = []
+    completeness_scores = []
     decay = 0.9
     weights = np.linspace(0, len(self.available_uids) - 1, len(self.available_uids))
     decayed_weights = decay**weights
     timestamp = responses[0].timestamp
+    bt.logging.debug(f"Calculating rewards for timestamp: {timestamp}")
     cm = CMData()
-    start_time: str = to_str(get_before(timestamp=timestamp, hours=1))
-    end_time: str = to_str(to_datetime(timestamp))  # built-ins handle CM API's formatting
+    # Adjust time window to look at predictions that have had time to mature
+    # Start: (evaluation_window + prediction) hours ago
+    # End: prediction_future_hours ago (to ensure all predictions have matured)
+    start_time: str = to_str(get_before(timestamp=timestamp, hours=evaluation_window_hours + prediction_future_hours))
+    end_time: str = to_str(to_datetime(get_before(timestamp=timestamp, hours=prediction_future_hours)))
     # Query CM API for sample standard deviation of the 1s residuals
     historical_price_data: DataFrame = cm.get_CM_ReferenceRate(
         assets="BTC", start=start_time, end=end_time, frequency="1s"
     )
     cm_data = pd_to_dict(historical_price_data)
+
     for uid, response in zip(self.available_uids, responses):
         current_miner = self.MinerHistory[uid]
         self.MinerHistory[uid].add_prediction(response.timestamp, response.prediction, response.interval)
-        prediction_dict, interval_dict = current_miner.format_predictions(response.timestamp)
-        mature_time_dict = mature_dictionary(prediction_dict)
+        # Get predictions from the evaluation window that have had time to mature
+        prediction_dict, interval_dict = current_miner.format_predictions(
+            reference_timestamp=get_before(timestamp, hours=prediction_future_hours),
+            hours=evaluation_window_hours,
+        )
+
+        # Mature the predictions (shift forward by 1 hour)
+        mature_time_dict = mature_dictionary(prediction_dict, hours=prediction_future_hours)
+
         preds, price, aligned_pred_timestamps = align_timepoints(mature_time_dict, cm_data)
-        for i, j, k in zip(preds, price, aligned_pred_timestamps):
-            bt.logging.debug(f"Prediction: {i} | Price: {j} | Aligned Prediction: {k}")
+
+        num_predictions = len(preds) if preds is not None else 0
+
+        # Ensure a maximum ratio of 1.0
+        completeness_ratio = min(num_predictions / expected_timepoints, 1.0)
+        completeness_scores.append(completeness_ratio)
+        bt.logging.debug(
+            f"UID: {uid} | Completeness: {completeness_ratio:.2f} ({num_predictions}/{expected_timepoints})"
+        )
+
+        # for i, j, k in zip(preds, price, aligned_pred_timestamps):
+        #     bt.logging.debug(f"Prediction: {i} | Price: {j} | Aligned Prediction: {k}")
         inters, interval_prices, aligned_int_timestamps = align_timepoints(interval_dict, cm_data)
-        for i, j, k in zip(inters, interval_prices, aligned_int_timestamps):
-            bt.logging.debug(f"Interval: {i} | Interval Price: {j} | Aligned TS: {k}")
-        point_errors.append(point_error(preds, price))
+        # for i, j, k in zip(inters, interval_prices, aligned_int_timestamps):
+        #     bt.logging.debug(f"Interval: {i} | Interval Price: {j} | Aligned TS: {k}")
+
+        # Penalize miners with missing predictions by increasing their point error
+        if preds is None or len(preds) == 0:
+            point_errors.append(np.inf)  # Maximum penalty for no predictions
+        else:
+            # Calculate error as normal, but apply completeness penalty
+            base_point_error = point_error(preds, price)
+            # Apply penalty inversely proportional to completeness
+            # This will increase error for incomplete prediction sets
+            adjusted_point_error = base_point_error / completeness_ratio
+            point_errors.append(adjusted_point_error)
+
         if any([np.isnan(inters).any(), np.isnan(interval_prices).any()]):
             interval_errors.append(0)
         else:
-            interval_errors.append(interval_error(inters, interval_prices))
+            # Similarly, penalize interval errors for incompleteness
+            base_interval_error = interval_error(inters, interval_prices)
+            adjusted_interval_error = base_interval_error * completeness_ratio  # Lower score for incomplete sets
+            interval_errors.append(adjusted_interval_error)
+
         bt.logging.debug(f"UID: {uid} | point_errors: {point_errors[-1]} | interval_errors: {interval_errors[-1]}")
 
     point_ranks = rank(np.array(point_errors))
     interval_ranks = rank(-np.array(interval_errors))  # 1 is best, 0 is worst, so flip it
-    rewards = (decayed_weights[point_ranks] + decayed_weights[interval_ranks]) / 2
+
+    base_rewards = (decayed_weights[point_ranks] + decayed_weights[interval_ranks]) / 2
+
+    # Simply multiply the final rewards by the completeness score
+    rewards = base_rewards * np.array(completeness_scores)
+
     return rewards
 
 
diff --git a/precog/validators/weight_setter.py b/precog/validators/weight_setter.py
@@ -6,7 +6,7 @@
 from numpy import array
 from pytz import timezone
 
-from precog import __spec_version__
+from precog import __spec_version__, constants
 from precog.protocol import Challenge
 from precog.utils.bittensor import check_uid_availability, print_info, setup_bittensor_objects
 from precog.utils.classes import MinerHistory
@@ -39,8 +39,7 @@ async def create(cls, config=None, loop=None):
     async def initialize(self):
         setup_bittensor_objects(self)
         self.timezone = timezone("UTC")
-        self.prediction_interval = self.config.prediction_interval  # in seconds
-        self.N_TIMEPOINTS = self.config.N_TIMEPOINTS  # number of timepoints to predict
+        self.prediction_interval = constants.PREDICTION_INTERVAL_MINUTES
         self.hyperparameters = func_with_retry(self.subtensor.get_subnet_hyperparameters, netuid=self.config.netuid)
         self.resync_metagraph_rate = 600  # in seconds
         bt.logging.info(
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "precog"
-version = "2.2.2"
+version = "2.3.0"
 description = "Bitcoin Price Prediction Subnet"
 authors = ["Coin Metrics", "Yuma Group"]
 readme = "README.md"
diff --git a/tests/test_package.py b/tests/test_package.py
@@ -16,4 +16,4 @@ def setUp(self):
     def test_package_version(self):
         # Check that version is as expected
         # Must update to increment package version successfully
-        self.assertEqual(__version__, "2.2.2")
+        self.assertEqual(__version__, "2.3.0")

Original file line number	Diff line number	Diff line change
`@@ -233,10 +233,6 @@ def add_validator_args(parser):`
`233`	`233`	`default="opentensor-dev",`
`234`	`234`	`)`
`235`	`235`
`236`		`- parser.add_argument("--prediction_interval", type=int, default=5)`
`237`		`-`
`238`		`- parser.add_argument("--N_TIMEPOINTS", type=int, default=12)`
`239`		`-`
`240`	`236`	`parser.add_argument("--reset_state", action="store_true", dest="reset_state", help="Overwrites the state file")`
`241`	`237`
`242`	`238`
Original file line number	Diff line number	Diff line change
`@@ -40,7 +40,7 @@ def log_wandb(responses, rewards, miner_uids, hotkeys):`
`40`	`40`	`},`
`41`	`41`	`}`
`42`	`42`
`43`		`- bt.logging.debug(f"Attempting to log data to wandb: {wandb_val_log}")`
	`43`	`+ bt.logging.trace(f"Attempting to log data to wandb: {wandb_val_log}")`
`44`	`44`	`wandb.log(wandb_val_log)`
`45`	`45`	`except Exception as e:`
`46`	`46`	`bt.logging.error(f"Failed to log to wandb: {str(e)}")`