Loading megatron/inference/api.py +1 −1 Original line number Diff line number Diff line Loading @@ -38,7 +38,7 @@ def generate_and_post_process(model, temperature=1.0, add_BOS=False, use_eod_token_for_early_termination=True): """Run inferecne and post-process outputs, i.e., detokenize, """Run inference and post-process outputs, i.e., detokenize, move to cpu and convert to list.""" # Main inference. Loading megatron/inference/forward_step.py +2 −1 Original line number Diff line number Diff line Loading @@ -61,7 +61,8 @@ class ForwardStep: max_sequence_len) # Pipelining arguments. args = get_args() self.pipeline_size_larger_than_one = args.pipeline_model_parallel_size self.pipeline_size_larger_than_one = ( args.pipeline_model_parallel_size > 1) # Threshold of pipelining. self.pipelining_batch_x_seqlen = \ args.inference_batch_times_seqlen_threshold Loading Loading
megatron/inference/api.py +1 −1 Original line number Diff line number Diff line Loading @@ -38,7 +38,7 @@ def generate_and_post_process(model, temperature=1.0, add_BOS=False, use_eod_token_for_early_termination=True): """Run inferecne and post-process outputs, i.e., detokenize, """Run inference and post-process outputs, i.e., detokenize, move to cpu and convert to list.""" # Main inference. Loading
megatron/inference/forward_step.py +2 −1 Original line number Diff line number Diff line Loading @@ -61,7 +61,8 @@ class ForwardStep: max_sequence_len) # Pipelining arguments. args = get_args() self.pipeline_size_larger_than_one = args.pipeline_model_parallel_size self.pipeline_size_larger_than_one = ( args.pipeline_model_parallel_size > 1) # Threshold of pipelining. self.pipelining_batch_x_seqlen = \ args.inference_batch_times_seqlen_threshold Loading