@@ -29,18 +29,18 @@ struct BeamHypotheses
29
29
{
30
30
// clang-format off
31
31
32
- // BS: batch_size, BM: beam_width, MSL: max_seq_length
33
- // %%: parameter name when dynamic_decoder.forward() / gather_tree() are called in [ generation.py] (python workflow)
32
+ // MBS: max_batch_size, BS: batch_size, BM: beam_width, MSL: max_seq_length
33
+ // %%: parameter name in file generation.py (python workflow)
34
34
35
35
// Candidate beams: a beam which generates end_id or its sequence length reaches MSL
36
- // Candidate-Beam-Array (CBA): The arrays (size: BM*2) to place the candidate beams and related information
36
+ // Candidate-Beam-Array (CBA): The arrays to place the candidate beams and related information
37
37
38
38
// Scalar values
39
39
bool bReturnNormedScore{false }; // return normed_score / cum_log_probs, useless yet
40
- int nBatchSize{0 }; //
40
+ int nMaxBatchSize{0 }; // max batch size by model configuration
41
+ int nBatchSize{0 }; // batch size by runtime input data
41
42
int nBeamWidth{0 }; //
42
43
int nIte{0 }; // index of local_batch, always be 0 when pp_size==1
43
- int nBatchSizeLocal{0 }; //
44
44
int nMaxSeqLen{0 }; //
45
45
int nVocabSize{0 }; // vocab_size_padded
46
46
@@ -54,8 +54,9 @@ struct BeamHypotheses
54
54
int const * endIds{nullptr }; // [BS, BM] %% self.end_ids
55
55
56
56
// Pointers for output
57
- int * outputIds{nullptr }; // [BS, BM, MSL] %% self.output_ids
58
- float * logProbs{nullptr }; // [MSL, BS, BM] %% self.log_probs_tiled
57
+ int * outputIds{nullptr }; // [BS, BM, MSL] %% self.output_ids only used in gather_tree
58
+ float * logProbs{nullptr }; // [BS, BM, MSL] %% self.log_probs only used in gather_tree
59
+ float * logProbsTiled{nullptr }; // [MSL, MBS, BM] %% self.log_probs_tiled
59
60
int * sequenceLengths{nullptr }; // [BS, BM] %% self.sequence_length_buffer
60
61
float * cumLogProbs{nullptr }; // [BS, BM] %% self.cum_log_probs
61
62
@@ -65,8 +66,8 @@ struct BeamHypotheses
65
66
int * sequenceLengthsCBA{nullptr }; // [BS, BM*2] %% self.beam_hyps_seq_len_cba
66
67
float * cumLogProbsCBA{nullptr }; // [BS, BM*2] %% self.beam_hyps_cum_log_probs_cba
67
68
float * normedScoresCBA{nullptr }; // [BS, BM*2] %% self.beam_hyps_normed_scores_cba
68
- int * numBeamsCBA{nullptr }; // [BS] %% self.beam_hyps_num_beams number of beams in CBA
69
- float * minNormedScoresCBA{nullptr }; // [BS] %% self.beam_hyps_min_normed_scores worst score in CBA
69
+ int * numBeamsCBA{nullptr }; // [BS] %% self.beam_hyps_num_beams number of beams in CBA
70
+ float * minNormedScoresCBA{nullptr }; // [BS] %% self.beam_hyps_min_normed_scores worst score in CBA
70
71
71
72
// Pointers related to beam search process, they are initialized in those two functions:
72
73
// [gptDecoder.cpp] GptDecoder<T>::forward or [dynamicDecodeOp.cpp] FtDynamicDecode<T>::forward
0 commit comments