[2022-08-01 01:27:38,652][fairseq_cli.train][INFO] - {'_name': None, 'common': {'_name': None, 'no_progress_bar': False, 'log_interval': 200, 'log_format': 'json', 'log_file': None, 'aim_repo': None, 'aim_run_hash': None, 'tensorboard_logdir': None, 'wandb_project': None, 'azureml_logging': False, 'seed': 1, 'cpu': False, 'tpu': False, 'bf16': False, 'memory_efficient_bf16': False, 'fp16': True, 'memory_efficient_fp16': False, 'fp16_no_flatten_grads': False, 'fp16_init_scale': 128, 'fp16_scale_window': None, 'fp16_scale_tolerance': 0.0, 'on_cpu_convert_precision': False, 'min_loss_scale': 0.0001, 'threshold_loss_scale': None, 'amp': False, 'amp_batch_retries': 2, 'amp_init_scale': 128, 'amp_scale_window': None, 'user_dir': None, 'empty_cache_freq': 0, 'all_gather_list_size': 16384, 'model_parallel_size': 1, 'quantization_config_path': None, 'profile': False, 'reset_logging': False, 'suppress_crashes': False, 'use_plasma_view': False, 'plasma_path': '/tmp/plasma'}, 'common_eval': {'_name': None, 'path': None, 'post_process': None, 'quiet': False, 'model_overrides': '{}', 'results_path': None}, 'distributed_training': {'_name': None, 'distributed_world_size': 8, 'distributed_num_procs': 8, 'distributed_rank': 0, 'distributed_backend': 'nccl', 'distributed_init_method': 'tcp://localhost:19896', 'distributed_port': -1, 'device_id': 0, 'distributed_no_spawn': False, 'ddp_backend': 'no_c10d', 'ddp_comm_hook': 'none', 'bucket_cap_mb': 25, 'fix_batches_to_gpus': False, 'find_unused_parameters': False, 'gradient_as_bucket_view': False, 'fast_stat_sync': False, 'heartbeat_timeout': -1, 'broadcast_buffers': False, 'slowmo_momentum': None, 'slowmo_base_algorithm': 'localsgd', 'localsgd_frequency': 3, 'nprocs_per_node': 8, 'pipeline_model_parallel': False, 'pipeline_balance': None, 'pipeline_devices': None, 'pipeline_chunks': 0, 'pipeline_encoder_balance': None, 'pipeline_encoder_devices': None, 'pipeline_decoder_balance': None, 'pipeline_decoder_devices': None, 'pipeline_checkpoint': 'never', 'zero_sharding': 'none', 'fp16': True, 'memory_efficient_fp16': False, 'tpu': False, 'no_reshard_after_forward': False, 'fp32_reduce_scatter': False, 'cpu_offload': False, 'use_sharded_state': False, 'not_fsdp_flatten_parameters': False}, 'dataset': {'_name': None, 'num_workers': 1, 'skip_invalid_size_inputs_valid_test': True, 'max_tokens': None, 'batch_size': 32, 'required_batch_size_multiple': 8, 'required_seq_len_multiple': 1, 'dataset_impl': None, 'data_buffer_size': 10, 'train_subset': 'train', 'valid_subset': 'valid', 'combine_valid_subsets': None, 'ignore_unused_valid_subsets': True, 'validate_interval': 5, 'validate_interval_updates': 50000, 'validate_after_updates': 0, 'fixed_validation_seed': None, 'disable_validation': False, 'max_tokens_valid': None, 'batch_size_valid': 32, 'max_valid_steps': None, 'curriculum': 0, 'gen_subset': 'test', 'num_shards': 1, 'shard_id': 0, 'grouped_shuffling': False, 'update_epoch_batch_itr': False, 'update_ordered_indices_seed': False}, 'optimization': {'_name': None, 'max_epoch': 0, 'max_update': 1000000, 'stop_time_hours': 0.0, 'clip_norm': 5.0, 'sentence_avg': False, 'update_freq': [1], 'lr': [0.001], 'stop_min_lr': -1.0, 'use_bmuf': False, 'skip_remainder_batch': False}, 'checkpoint': {'_name': None, 'save_dir': 'bert/adan2/', 'restore_file': 'checkpoint_last.pt', 'continue_once': None, 'finetune_from_model': None, 'reset_dataloader': False, 'reset_lr_scheduler': False, 'reset_meters': False, 'reset_optimizer': False, 'optimizer_overrides': '{}', 'save_interval': 5, 'save_interval_updates': 50000, 'keep_interval_updates': -1, 'keep_interval_updates_pattern': -1, 'keep_last_epochs': -1, 'keep_best_checkpoints': -1, 'no_save': False, 'no_epoch_checkpoints': True, 'no_last_checkpoints': False, 'no_save_optimizer_state': False, 'best_checkpoint_metric': 'loss', 'maximize_best_checkpoint_metric': False, 'patience': -1, 'checkpoint_suffix': '', 'checkpoint_shard_count': 1, 'load_checkpoint_on_all_dp_ranks': False, 'write_checkpoints_asynchronously': False, 'model_parallel_size': 1}, 'bmuf': {'_name': None, 'block_lr': 1.0, 'block_momentum': 0.875, 'global_sync_iter': 50, 'warmup_iterations': 500, 'use_nbm': False, 'average_sync': False, 'distributed_world_size': 8}, 'generation': {'_name': None, 'beam': 5, 'nbest': 1, 'max_len_a': 0.0, 'max_len_b': 200, 'min_len': 1, 'match_source_len': False, 'unnormalized': False, 'no_early_stop': False, 'no_beamable_mm': False, 'lenpen': 1.0, 'unkpen': 0.0, 'replace_unk': None, 'sacrebleu': False, 'score_reference': False, 'prefix_size': 0, 'no_repeat_ngram_size': 0, 'sampling': False, 'sampling_topk': -1, 'sampling_topp': -1.0, 'constraints': None, 'temperature': 1.0, 'diverse_beam_groups': -1, 'diverse_beam_strength': 0.5, 'diversity_rate': -1.0, 'print_alignment': None, 'print_step': False, 'lm_path': None, 'lm_weight': 0.0, 'iter_decode_eos_penalty': 0.0, 'iter_decode_max_iter': 10, 'iter_decode_force_max_iter': False, 'iter_decode_with_beam': 1, 'iter_decode_with_external_reranker': False, 'retain_iter_history': False, 'retain_dropout': False, 'retain_dropout_modules': None, 'decoding_format': None, 'no_seed_provided': False, 'eos_token': None}, 'eval_lm': {'_name': None, 'output_word_probs': False, 'output_word_stats': False, 'context_window': 0, 'softmax_batch': 9223372036854775807}, 'interactive': {'_name': None, 'buffer_size': 0, 'input': '-'}, 'model': {'_name': 'roberta', 'max_positions': 512, 'dropout': 0.1, 'attention_dropout': 0.1}, 'task': {'_name': 'masked_lm', 'data': '/dataset/common/bert-corpus-0729/', 'sample_break_mode': complete, 'tokens_per_sample': 512, 'mask_prob': 0.15, 'leave_unmasked_prob': 0.1, 'random_token_prob': 0.1, 'freq_weighted_replacement': False, 'mask_whole_words': False, 'mask_multiple_length': 1, 'mask_stdev': 0.0, 'shorten_method': none, 'shorten_data_split_list': '', 'seed': 1, 'include_target_tokens': False}, 'criterion': {'_name': 'masked_lm', 'tpu': False}, 'optimizer': {'_name': 'adan', 'adan_betas': '(0.98,0.92,0.99)', 'adan_eps': 1e-08, 'weight_decay': 0.02, 'no_prox': False, 'fp16_adan_stats': False, 'tpu': False, 'lr': [0.001]}, 'lr_scheduler': {'_name': 'polynomial_decay', 'warmup_updates': 10000, 'force_anneal': None, 'end_learning_rate': 0.0, 'power': 1.0, 'total_num_update': 1000000.0, 'lr': [0.001]}, 'scoring': None, 'bpe': None, 'tokenizer': None, 'ema': {'_name': None, 'store_ema': False, 'ema_decay': 0.9999, 'ema_start_update': 0, 'ema_seed_model': None, 'ema_update_freq': 1, 'ema_fp32': False}, 'job_logging_cfg': {'version': 1, 'formatters': {'simple': {'format': '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'}}, 'handlers': {'console': {'class': 'logging.StreamHandler', 'formatter': 'simple', 'stream': 'ext://sys.stdout'}, 'file': {'class': 'logging.FileHandler', 'formatter': 'simple', 'filename': 'hydra_train.log'}}, 'root': {'level': 'INFO', 'handlers': ['console', 'file']}, 'disable_existing_loggers': False}} [2022-08-01 01:27:38,705][fairseq.tasks.masked_lm][INFO] - dictionary: 50264 types [2022-08-01 01:27:47,160][fairseq_cli.train][INFO] - RobertaModel( (encoder): RobertaEncoder( (sentence_encoder): TransformerEncoder( (dropout_module): FairseqDropout() (embed_tokens): Embedding(50265, 768, padding_idx=1) (embed_positions): LearnedPositionalEmbedding(514, 768, padding_idx=1) (layernorm_embedding): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) (layers): ModuleList( (0): TransformerEncoderLayerBase( (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=768, out_features=768, bias=True) (v_proj): Linear(in_features=768, out_features=768, bias=True) (q_proj): Linear(in_features=768, out_features=768, bias=True) (out_proj): Linear(in_features=768, out_features=768, bias=True) ) (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (fc1): Linear(in_features=768, out_features=3072, bias=True) (fc2): Linear(in_features=3072, out_features=768, bias=True) (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) ) (1): TransformerEncoderLayerBase( (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=768, out_features=768, bias=True) (v_proj): Linear(in_features=768, out_features=768, bias=True) (q_proj): Linear(in_features=768, out_features=768, bias=True) (out_proj): Linear(in_features=768, out_features=768, bias=True) ) (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (fc1): Linear(in_features=768, out_features=3072, bias=True) (fc2): Linear(in_features=3072, out_features=768, bias=True) (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) ) (2): TransformerEncoderLayerBase( (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=768, out_features=768, bias=True) (v_proj): Linear(in_features=768, out_features=768, bias=True) (q_proj): Linear(in_features=768, out_features=768, bias=True) (out_proj): Linear(in_features=768, out_features=768, bias=True) ) (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (fc1): Linear(in_features=768, out_features=3072, bias=True) (fc2): Linear(in_features=3072, out_features=768, bias=True) (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) ) (3): TransformerEncoderLayerBase( (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=768, out_features=768, bias=True) (v_proj): Linear(in_features=768, out_features=768, bias=True) (q_proj): Linear(in_features=768, out_features=768, bias=True) (out_proj): Linear(in_features=768, out_features=768, bias=True) ) (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (fc1): Linear(in_features=768, out_features=3072, bias=True) (fc2): Linear(in_features=3072, out_features=768, bias=True) (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) ) (4): TransformerEncoderLayerBase( (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=768, out_features=768, bias=True) (v_proj): Linear(in_features=768, out_features=768, bias=True) (q_proj): Linear(in_features=768, out_features=768, bias=True) (out_proj): Linear(in_features=768, out_features=768, bias=True) ) (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (fc1): Linear(in_features=768, out_features=3072, bias=True) (fc2): Linear(in_features=3072, out_features=768, bias=True) (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) ) (5): TransformerEncoderLayerBase( (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=768, out_features=768, bias=True) (v_proj): Linear(in_features=768, out_features=768, bias=True) (q_proj): Linear(in_features=768, out_features=768, bias=True) (out_proj): Linear(in_features=768, out_features=768, bias=True) ) (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (fc1): Linear(in_features=768, out_features=3072, bias=True) (fc2): Linear(in_features=3072, out_features=768, bias=True) (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) ) (6): TransformerEncoderLayerBase( (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=768, out_features=768, bias=True) (v_proj): Linear(in_features=768, out_features=768, bias=True) (q_proj): Linear(in_features=768, out_features=768, bias=True) (out_proj): Linear(in_features=768, out_features=768, bias=True) ) (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (fc1): Linear(in_features=768, out_features=3072, bias=True) (fc2): Linear(in_features=3072, out_features=768, bias=True) (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) ) (7): TransformerEncoderLayerBase( (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=768, out_features=768, bias=True) (v_proj): Linear(in_features=768, out_features=768, bias=True) (q_proj): Linear(in_features=768, out_features=768, bias=True) (out_proj): Linear(in_features=768, out_features=768, bias=True) ) (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (fc1): Linear(in_features=768, out_features=3072, bias=True) (fc2): Linear(in_features=3072, out_features=768, bias=True) (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) ) (8): TransformerEncoderLayerBase( (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=768, out_features=768, bias=True) (v_proj): Linear(in_features=768, out_features=768, bias=True) (q_proj): Linear(in_features=768, out_features=768, bias=True) (out_proj): Linear(in_features=768, out_features=768, bias=True) ) (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (fc1): Linear(in_features=768, out_features=3072, bias=True) (fc2): Linear(in_features=3072, out_features=768, bias=True) (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) ) (9): TransformerEncoderLayerBase( (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=768, out_features=768, bias=True) (v_proj): Linear(in_features=768, out_features=768, bias=True) (q_proj): Linear(in_features=768, out_features=768, bias=True) (out_proj): Linear(in_features=768, out_features=768, bias=True) ) (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (fc1): Linear(in_features=768, out_features=3072, bias=True) (fc2): Linear(in_features=3072, out_features=768, bias=True) (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) ) (10): TransformerEncoderLayerBase( (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=768, out_features=768, bias=True) (v_proj): Linear(in_features=768, out_features=768, bias=True) (q_proj): Linear(in_features=768, out_features=768, bias=True) (out_proj): Linear(in_features=768, out_features=768, bias=True) ) (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (fc1): Linear(in_features=768, out_features=3072, bias=True) (fc2): Linear(in_features=3072, out_features=768, bias=True) (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) ) (11): TransformerEncoderLayerBase( (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=768, out_features=768, bias=True) (v_proj): Linear(in_features=768, out_features=768, bias=True) (q_proj): Linear(in_features=768, out_features=768, bias=True) (out_proj): Linear(in_features=768, out_features=768, bias=True) ) (self_attn_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (fc1): Linear(in_features=768, out_features=3072, bias=True) (fc2): Linear(in_features=3072, out_features=768, bias=True) (final_layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) ) ) ) (lm_head): RobertaLMHead( (dense): Linear(in_features=768, out_features=768, bias=True) (layer_norm): FusedLayerNorm(torch.Size([768]), eps=1e-05, elementwise_affine=True) ) ) (classification_heads): ModuleDict() ) [2022-08-01 01:27:47,162][fairseq_cli.train][INFO] - task: MaskedLMTask [2022-08-01 01:27:47,162][fairseq_cli.train][INFO] - model: RobertaModel [2022-08-01 01:27:47,162][fairseq_cli.train][INFO] - criterion: MaskedLmLoss [2022-08-01 01:27:47,163][fairseq_cli.train][INFO] - num. shared model params: 209,714,265 (num. trained: 209,714,265) [2022-08-01 01:27:47,164][fairseq_cli.train][INFO] - num. expert model params: 0 (num. trained: 0) [2022-08-01 01:27:47,252][fairseq.data.data_utils][INFO] - loaded 1,066,112 examples from: /dataset/common/bert-corpus-0729/valid [2022-08-01 01:27:47,284][fairseq.tasks.masked_lm][INFO] - loaded 67780 blocks from: /dataset/common/bert-corpus-0729/valid [2022-08-01 01:28:02,749][torch.distributed.distributed_c10d][INFO] - Added key: store_based_barrier_key:2 to store for rank: 0 [2022-08-01 01:28:02,770][torch.distributed.distributed_c10d][INFO] - Rank 0: Completed store-based barrier for key:store_based_barrier_key:2 with 8 nodes. [2022-08-01 01:28:02,771][fairseq.trainer][INFO] - detected shared parameter: encoder.sentence_encoder.embed_tokens.weight <- encoder.lm_head.weight [2022-08-01 01:28:07,678][fairseq.utils][INFO] - ***********************CUDA enviroments for all 8 workers*********************** [2022-08-01 01:28:07,678][fairseq.utils][INFO] - rank 0: capabilities = 8.0 ; total memory = 39.587 GB ; name = A100-SXM4-40GB [2022-08-01 01:28:07,678][fairseq.utils][INFO] - rank 1: capabilities = 8.0 ; total memory = 39.587 GB ; name = A100-SXM4-40GB [2022-08-01 01:28:07,678][fairseq.utils][INFO] - rank 2: capabilities = 8.0 ; total memory = 39.587 GB ; name = A100-SXM4-40GB [2022-08-01 01:28:07,678][fairseq.utils][INFO] - rank 3: capabilities = 8.0 ; total memory = 39.587 GB ; name = A100-SXM4-40GB [2022-08-01 01:28:07,678][fairseq.utils][INFO] - rank 4: capabilities = 8.0 ; total memory = 39.587 GB ; name = A100-SXM4-40GB [2022-08-01 01:28:07,678][fairseq.utils][INFO] - rank 5: capabilities = 8.0 ; total memory = 39.587 GB ; name = A100-SXM4-40GB [2022-08-01 01:28:07,678][fairseq.utils][INFO] - rank 6: capabilities = 8.0 ; total memory = 39.587 GB ; name = A100-SXM4-40GB [2022-08-01 01:28:07,678][fairseq.utils][INFO] - rank 7: capabilities = 8.0 ; total memory = 39.587 GB ; name = A100-SXM4-40GB [2022-08-01 01:28:07,678][fairseq.utils][INFO] - ***********************CUDA enviroments for all 8 workers*********************** [2022-08-01 01:28:07,678][fairseq_cli.train][INFO] - training on 8 devices (GPUs/TPUs) [2022-08-01 01:28:07,678][fairseq_cli.train][INFO] - max tokens per device = None and max sentences per device = 32 [2022-08-01 01:28:07,680][fairseq.trainer][INFO] - Preparing to load checkpoint bert/adan2/checkpoint_last.pt [2022-08-01 01:28:07,680][fairseq.trainer][INFO] - No existing checkpoint found bert/adan2/checkpoint_last.pt [2022-08-01 01:28:07,680][fairseq.trainer][INFO] - loading train data for epoch 1 [2022-08-01 01:28:10,423][fairseq.data.data_utils][INFO] - loaded 214,960,826 examples from: /dataset/common/bert-corpus-0729/train [2022-08-01 01:28:14,723][fairseq.tasks.masked_lm][INFO] - loaded 13244396 blocks from: /dataset/common/bert-corpus-0729/train [2022-08-01 01:28:19,456][fairseq.tasks.fairseq_task][WARNING] - 63,646 samples have invalid sizes and will be skipped, max_positions=512, first few sample ids=[11407155, 8928673, 3941655, 2058309, 2058002, 11308513, 1003447, 2725530, 13205669, 7271248] [2022-08-01 01:28:28,999][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 [2022-08-01 01:28:29,002][fairseq.trainer][INFO] - begin training epoch 1 [2022-08-01 01:28:29,002][fairseq_cli.train][INFO] - Start iterating over samples [2022-08-01 01:28:38,650][fairseq.modules.cross_entropy][INFO] - using fused cross entropy [2022-08-01 01:28:59,309][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 64.0 [2022-08-01 01:29:52,104][train_inner][INFO] - {"epoch": 1, "update": 0.004, "loss": "14.125", "ppl": "17863.8", "wps": "360757", "ups": "3.05", "wpb": "118068", "bsz": "256", "num_updates": "200", "lr": "2e-05", "gnorm": "2.326", "clip": "0", "loss_scale": "64", "train_wall": "73", "gb_free": "19.9", "wall": "104"} [2022-08-01 01:30:57,103][train_inner][INFO] - {"epoch": 1, "update": 0.008, "loss": "11.592", "ppl": "3087.5", "wps": "363489", "ups": "3.08", "wpb": "118132", "bsz": "256", "num_updates": "400", "lr": "4e-05", "gnorm": "0.807", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "19.9", "wall": "169"} [2022-08-01 01:32:02,116][train_inner][INFO] - {"epoch": 1, "update": 0.012, "loss": "10.661", "ppl": "1618.86", "wps": "364315", "ups": "3.08", "wpb": "118423", "bsz": "256", "num_updates": "600", "lr": "6e-05", "gnorm": "0.546", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "26.3", "wall": "234"} [2022-08-01 01:33:06,831][train_inner][INFO] - {"epoch": 1, "update": 0.016, "loss": "10.338", "ppl": "1294.13", "wps": "363299", "ups": "3.09", "wpb": "117553", "bsz": "256", "num_updates": "800", "lr": "8e-05", "gnorm": "0.604", "clip": "0", "loss_scale": "64", "train_wall": "64", "gb_free": "19.8", "wall": "299"} [2022-08-01 01:34:11,556][train_inner][INFO] - {"epoch": 1, "update": 0.019, "loss": "10.123", "ppl": "1115.05", "wps": "366500", "ups": "3.09", "wpb": "118607", "bsz": "256", "num_updates": "1000", "lr": "0.0001", "gnorm": "0.648", "clip": "0", "loss_scale": "64", "train_wall": "64", "gb_free": "20.1", "wall": "364"} [2022-08-01 01:35:16,599][train_inner][INFO] - {"epoch": 1, "update": 0.023, "loss": "9.956", "ppl": "993.35", "wps": "363026", "ups": "3.07", "wpb": "118060", "bsz": "256", "num_updates": "1200", "lr": "0.00012", "gnorm": "0.689", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "20.2", "wall": "429"} [2022-08-01 01:36:22,235][train_inner][INFO] - {"epoch": 1, "update": 0.027, "loss": "9.822", "ppl": "905.4", "wps": "359906", "ups": "3.05", "wpb": "118112", "bsz": "256", "num_updates": "1400", "lr": "0.00014", "gnorm": "0.735", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "19.8", "wall": "495"} [2022-08-01 01:37:26,957][train_inner][INFO] - {"epoch": 1, "update": 0.031, "loss": "9.719", "ppl": "842.51", "wps": "365887", "ups": "3.09", "wpb": "118403", "bsz": "256", "num_updates": "1600", "lr": "0.00016", "gnorm": "0.755", "clip": "0", "loss_scale": "64", "train_wall": "64", "gb_free": "26.8", "wall": "559"} [2022-08-01 01:38:32,064][train_inner][INFO] - {"epoch": 1, "update": 0.035, "loss": "9.622", "ppl": "788.07", "wps": "362895", "ups": "3.07", "wpb": "118133", "bsz": "256", "num_updates": "1800", "lr": "0.00018", "gnorm": "0.78", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "21", "wall": "624"} [2022-08-01 01:39:37,046][train_inner][INFO] - {"epoch": 1, "update": 0.039, "loss": "9.546", "ppl": "747.46", "wps": "365269", "ups": "3.08", "wpb": "118677", "bsz": "256", "num_updates": "2000", "lr": "0.0002", "gnorm": "0.771", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "19.7", "wall": "689"} [2022-08-01 01:40:41,879][train_inner][INFO] - {"epoch": 1, "update": 0.043, "loss": "9.482", "ppl": "715.2", "wps": "363413", "ups": "3.08", "wpb": "117804", "bsz": "256", "num_updates": "2200", "lr": "0.00022", "gnorm": "0.791", "clip": "0", "loss_scale": "128", "train_wall": "64", "gb_free": "21", "wall": "754"} [2022-08-01 01:41:46,995][train_inner][INFO] - {"epoch": 1, "update": 0.047, "loss": "9.422", "ppl": "685.78", "wps": "364787", "ups": "3.07", "wpb": "118766", "bsz": "256", "num_updates": "2400", "lr": "0.00024", "gnorm": "0.79", "clip": "0", "loss_scale": "128", "train_wall": "65", "gb_free": "23.3", "wall": "819"} [2022-08-01 01:42:51,832][train_inner][INFO] - {"epoch": 1, "update": 0.051, "loss": "9.368", "ppl": "660.99", "wps": "364640", "ups": "3.08", "wpb": "118210", "bsz": "256", "num_updates": "2600", "lr": "0.00026", "gnorm": "0.81", "clip": "0", "loss_scale": "128", "train_wall": "64", "gb_free": "19.7", "wall": "884"} [2022-08-01 01:43:56,819][train_inner][INFO] - {"epoch": 1, "update": 0.054, "loss": "9.219", "ppl": "596", "wps": "362311", "ups": "3.08", "wpb": "117726", "bsz": "256", "num_updates": "2800", "lr": "0.00028", "gnorm": "0.908", "clip": "0", "loss_scale": "128", "train_wall": "65", "gb_free": "19.9", "wall": "949"} [2022-08-01 01:44:08,096][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 64.0 [2022-08-01 01:45:02,131][train_inner][INFO] - {"epoch": 1, "update": 0.058, "loss": "9.106", "ppl": "551.07", "wps": "363424", "ups": "3.06", "wpb": "118677", "bsz": "256", "num_updates": "3000", "lr": "0.0003", "gnorm": "1.065", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "21.3", "wall": "1014"} [2022-08-01 01:46:07,222][train_inner][INFO] - {"epoch": 1, "update": 0.062, "loss": "9.014", "ppl": "517.14", "wps": "365921", "ups": "3.07", "wpb": "119090", "bsz": "256", "num_updates": "3200", "lr": "0.00032", "gnorm": "1.186", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "27.9", "wall": "1080"} [2022-08-01 01:47:12,058][train_inner][INFO] - {"epoch": 1, "update": 0.066, "loss": "8.936", "ppl": "489.75", "wps": "365284", "ups": "3.08", "wpb": "118416", "bsz": "256", "num_updates": "3400", "lr": "0.00034", "gnorm": "1.253", "clip": "0", "loss_scale": "64", "train_wall": "64", "gb_free": "20.8", "wall": "1144"} [2022-08-01 01:48:16,904][train_inner][INFO] - {"epoch": 1, "update": 0.07, "loss": "8.856", "ppl": "463.33", "wps": "366286", "ups": "3.08", "wpb": "118758", "bsz": "256", "num_updates": "3600", "lr": "0.00036", "gnorm": "1.347", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "19.7", "wall": "1209"} [2022-08-01 01:49:22,903][train_inner][INFO] - {"epoch": 1, "update": 0.074, "loss": "8.759", "ppl": "433.3", "wps": "357149", "ups": "3.03", "wpb": "117857", "bsz": "256", "num_updates": "3800", "lr": "0.00038", "gnorm": "1.436", "clip": "0", "loss_scale": "64", "train_wall": "66", "gb_free": "20.3", "wall": "1275"} [2022-08-01 01:50:27,582][train_inner][INFO] - {"epoch": 1, "update": 0.078, "loss": "8.653", "ppl": "402.42", "wps": "367566", "ups": "3.09", "wpb": "118866", "bsz": "256", "num_updates": "4000", "lr": "0.0004", "gnorm": "1.581", "clip": "0", "loss_scale": "64", "train_wall": "64", "gb_free": "23.1", "wall": "1340"} [2022-08-01 01:51:33,516][train_inner][INFO] - {"epoch": 1, "update": 0.082, "loss": "8.525", "ppl": "368.4", "wps": "359342", "ups": "3.03", "wpb": "118462", "bsz": "256", "num_updates": "4200", "lr": "0.00042", "gnorm": "1.715", "clip": "0", "loss_scale": "64", "train_wall": "66", "gb_free": "19.9", "wall": "1406"} [2022-08-01 01:52:38,588][train_inner][INFO] - {"epoch": 1, "update": 0.085, "loss": "8.361", "ppl": "328.71", "wps": "363183", "ups": "3.07", "wpb": "118164", "bsz": "256", "num_updates": "4400", "lr": "0.00044", "gnorm": "1.935", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "26.1", "wall": "1471"} [2022-08-01 01:53:04,858][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 [2022-08-01 01:53:43,888][train_inner][INFO] - {"epoch": 1, "update": 0.089, "loss": "8.007", "ppl": "257.24", "wps": "362078", "ups": "3.06", "wpb": "118216", "bsz": "256", "num_updates": "4600", "lr": "0.00046", "gnorm": "2.246", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "24.3", "wall": "1536"} [2022-08-01 01:54:49,049][train_inner][INFO] - {"epoch": 1, "update": 0.093, "loss": "7.418", "ppl": "170.98", "wps": "363305", "ups": "3.07", "wpb": "118367", "bsz": "256", "num_updates": "4800", "lr": "0.00048", "gnorm": "2.344", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.8", "wall": "1601"} [2022-08-01 01:55:53,662][train_inner][INFO] - {"epoch": 1, "update": 0.097, "loss": "6.793", "ppl": "110.92", "wps": "366968", "ups": "3.1", "wpb": "118553", "bsz": "256", "num_updates": "5000", "lr": "0.0005", "gnorm": "1.904", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "19.7", "wall": "1666"} [2022-08-01 01:56:58,565][train_inner][INFO] - {"epoch": 1, "update": 0.101, "loss": "5.971", "ppl": "62.74", "wps": "364983", "ups": "3.08", "wpb": "118440", "bsz": "256", "num_updates": "5200", "lr": "0.00052", "gnorm": "1.458", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "19.9", "wall": "1731"} [2022-08-01 01:58:03,416][train_inner][INFO] - {"epoch": 1, "update": 0.105, "loss": "5.461", "ppl": "44.04", "wps": "363252", "ups": "3.08", "wpb": "117785", "bsz": "256", "num_updates": "5400", "lr": "0.00054", "gnorm": "1.353", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "25.5", "wall": "1796"} [2022-08-01 01:59:08,586][train_inner][INFO] - {"epoch": 1, "update": 0.109, "loss": "5.127", "ppl": "34.94", "wps": "363059", "ups": "3.07", "wpb": "118300", "bsz": "256", "num_updates": "5600", "lr": "0.00056", "gnorm": "1.246", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21", "wall": "1861"} [2022-08-01 02:00:15,025][train_inner][INFO] - {"epoch": 1, "update": 0.113, "loss": "4.887", "ppl": "29.6", "wps": "357821", "ups": "3.01", "wpb": "118863", "bsz": "256", "num_updates": "5800", "lr": "0.00058", "gnorm": "1.17", "clip": "0", "loss_scale": "32", "train_wall": "66", "gb_free": "20", "wall": "1927"} [2022-08-01 02:01:20,318][train_inner][INFO] - {"epoch": 1, "update": 0.117, "loss": "4.723", "ppl": "26.41", "wps": "361443", "ups": "3.06", "wpb": "117997", "bsz": "256", "num_updates": "6000", "lr": "0.0006", "gnorm": "1.116", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.3", "wall": "1993"} [2022-08-01 02:02:25,331][train_inner][INFO] - {"epoch": 1, "update": 0.12, "loss": "4.588", "ppl": "24.05", "wps": "364245", "ups": "3.08", "wpb": "118402", "bsz": "256", "num_updates": "6200", "lr": "0.00062", "gnorm": "1.079", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.9", "wall": "2058"} [2022-08-01 02:03:30,606][train_inner][INFO] - {"epoch": 1, "update": 0.124, "loss": "4.46", "ppl": "22.01", "wps": "362584", "ups": "3.06", "wpb": "118336", "bsz": "256", "num_updates": "6400", "lr": "0.00064", "gnorm": "1.044", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "23.7", "wall": "2123"} [2022-08-01 02:04:35,712][train_inner][INFO] - {"epoch": 1, "update": 0.128, "loss": "4.371", "ppl": "20.7", "wps": "363604", "ups": "3.07", "wpb": "118362", "bsz": "256", "num_updates": "6600", "lr": "0.00066", "gnorm": "1.016", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "23.2", "wall": "2188"} [2022-08-01 02:05:40,591][train_inner][INFO] - {"epoch": 1, "update": 0.132, "loss": "4.282", "ppl": "19.45", "wps": "363942", "ups": "3.08", "wpb": "118060", "bsz": "256", "num_updates": "6800", "lr": "0.00068", "gnorm": "0.987", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "25.4", "wall": "2253"} [2022-08-01 02:06:45,664][train_inner][INFO] - {"epoch": 1, "update": 0.136, "loss": "4.202", "ppl": "18.4", "wps": "363267", "ups": "3.07", "wpb": "118192", "bsz": "256", "num_updates": "7000", "lr": "0.0007", "gnorm": "0.968", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "21.3", "wall": "2318"} [2022-08-01 02:07:50,591][train_inner][INFO] - {"epoch": 1, "update": 0.14, "loss": "4.138", "ppl": "17.6", "wps": "364663", "ups": "3.08", "wpb": "118381", "bsz": "256", "num_updates": "7200", "lr": "0.00072", "gnorm": "0.947", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "20", "wall": "2383"} [2022-08-01 02:08:55,941][train_inner][INFO] - {"epoch": 1, "update": 0.144, "loss": "4.074", "ppl": "16.85", "wps": "362326", "ups": "3.06", "wpb": "118388", "bsz": "256", "num_updates": "7400", "lr": "0.00074", "gnorm": "0.933", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "20.5", "wall": "2448"} [2022-08-01 02:10:01,077][train_inner][INFO] - {"epoch": 1, "update": 0.148, "loss": "4.023", "ppl": "16.26", "wps": "365803", "ups": "3.07", "wpb": "119133", "bsz": "256", "num_updates": "7600", "lr": "0.00076", "gnorm": "0.907", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "19.8", "wall": "2513"} [2022-08-01 02:11:05,800][train_inner][INFO] - {"epoch": 1, "update": 0.152, "loss": "3.982", "ppl": "15.81", "wps": "364478", "ups": "3.09", "wpb": "117943", "bsz": "256", "num_updates": "7800", "lr": "0.00078", "gnorm": "0.902", "clip": "0", "loss_scale": "64", "train_wall": "64", "gb_free": "24.3", "wall": "2578"} [2022-08-01 02:12:11,192][train_inner][INFO] - {"epoch": 1, "update": 0.155, "loss": "3.931", "ppl": "15.25", "wps": "361977", "ups": "3.06", "wpb": "118350", "bsz": "256", "num_updates": "8000", "lr": "0.0008", "gnorm": "0.881", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "21.4", "wall": "2644"} [2022-08-01 02:13:16,096][train_inner][INFO] - {"epoch": 1, "update": 0.159, "loss": "3.891", "ppl": "14.84", "wps": "364880", "ups": "3.08", "wpb": "118409", "bsz": "256", "num_updates": "8200", "lr": "0.00082", "gnorm": "0.877", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "20.1", "wall": "2708"} [2022-08-01 02:14:21,136][train_inner][INFO] - {"epoch": 1, "update": 0.163, "loss": "3.861", "ppl": "14.53", "wps": "361933", "ups": "3.08", "wpb": "117698", "bsz": "256", "num_updates": "8400", "lr": "0.00084", "gnorm": "0.86", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "20.3", "wall": "2773"} [2022-08-01 02:15:26,472][train_inner][INFO] - {"epoch": 1, "update": 0.167, "loss": "3.819", "ppl": "14.11", "wps": "363868", "ups": "3.06", "wpb": "118866", "bsz": "256", "num_updates": "8600", "lr": "0.00086", "gnorm": "0.847", "clip": "0", "loss_scale": "128", "train_wall": "65", "gb_free": "21.9", "wall": "2839"} [2022-08-01 02:16:31,524][train_inner][INFO] - {"epoch": 1, "update": 0.171, "loss": "3.795", "ppl": "13.88", "wps": "363190", "ups": "3.07", "wpb": "118125", "bsz": "256", "num_updates": "8800", "lr": "0.00088", "gnorm": "0.84", "clip": "0", "loss_scale": "128", "train_wall": "65", "gb_free": "26.5", "wall": "2904"} [2022-08-01 02:17:36,473][train_inner][INFO] - {"epoch": 1, "update": 0.175, "loss": "3.765", "ppl": "13.6", "wps": "365548", "ups": "3.08", "wpb": "118708", "bsz": "256", "num_updates": "9000", "lr": "0.0009", "gnorm": "0.83", "clip": "0", "loss_scale": "128", "train_wall": "65", "gb_free": "19.9", "wall": "2969"} [2022-08-01 02:18:41,597][train_inner][INFO] - {"epoch": 1, "update": 0.179, "loss": "3.736", "ppl": "13.32", "wps": "364441", "ups": "3.07", "wpb": "118668", "bsz": "256", "num_updates": "9200", "lr": "0.00092", "gnorm": "0.831", "clip": "0", "loss_scale": "128", "train_wall": "65", "gb_free": "22", "wall": "3034"} [2022-08-01 02:19:46,540][train_inner][INFO] - {"epoch": 1, "update": 0.183, "loss": "3.709", "ppl": "13.08", "wps": "363658", "ups": "3.08", "wpb": "118083", "bsz": "256", "num_updates": "9400", "lr": "0.00094", "gnorm": "0.819", "clip": "0", "loss_scale": "128", "train_wall": "65", "gb_free": "20", "wall": "3099"} [2022-08-01 02:20:52,340][train_inner][INFO] - {"epoch": 1, "update": 0.187, "loss": "3.685", "ppl": "12.86", "wps": "358726", "ups": "3.04", "wpb": "118018", "bsz": "256", "num_updates": "9600", "lr": "0.00096", "gnorm": "0.816", "clip": "0", "loss_scale": "128", "train_wall": "65", "gb_free": "22.6", "wall": "3165"} [2022-08-01 02:21:58,374][train_inner][INFO] - {"epoch": 1, "update": 0.19, "loss": "3.658", "ppl": "12.62", "wps": "358406", "ups": "3.03", "wpb": "118334", "bsz": "256", "num_updates": "9800", "lr": "0.00098", "gnorm": "0.806", "clip": "0", "loss_scale": "128", "train_wall": "66", "gb_free": "25.9", "wall": "3231"} [2022-08-01 02:23:04,138][train_inner][INFO] - {"epoch": 1, "update": 0.194, "loss": "3.646", "ppl": "12.52", "wps": "361683", "ups": "3.04", "wpb": "118927", "bsz": "256", "num_updates": "10000", "lr": "0.001", "gnorm": "0.801", "clip": "0", "loss_scale": "128", "train_wall": "65", "gb_free": "26.9", "wall": "3296"} [2022-08-01 02:24:09,151][train_inner][INFO] - {"epoch": 1, "update": 0.198, "loss": "3.626", "ppl": "12.35", "wps": "364313", "ups": "3.08", "wpb": "118424", "bsz": "256", "num_updates": "10200", "lr": "0.000999798", "gnorm": "0.794", "clip": "0", "loss_scale": "128", "train_wall": "65", "gb_free": "21.1", "wall": "3361"} [2022-08-01 02:24:59,671][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 64.0 [2022-08-01 02:25:14,067][train_inner][INFO] - {"epoch": 1, "update": 0.202, "loss": "3.61", "ppl": "12.21", "wps": "362650", "ups": "3.08", "wpb": "117707", "bsz": "256", "num_updates": "10400", "lr": "0.000999596", "gnorm": "0.793", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "20.1", "wall": "3426"} [2022-08-01 02:26:19,932][train_inner][INFO] - {"epoch": 1, "update": 0.206, "loss": "3.584", "ppl": "11.99", "wps": "359211", "ups": "3.04", "wpb": "118297", "bsz": "256", "num_updates": "10600", "lr": "0.000999394", "gnorm": "0.784", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "20", "wall": "3492"} [2022-08-01 02:27:25,207][train_inner][INFO] - {"epoch": 1, "update": 0.21, "loss": "3.557", "ppl": "11.77", "wps": "361972", "ups": "3.06", "wpb": "118137", "bsz": "256", "num_updates": "10800", "lr": "0.000999192", "gnorm": "0.775", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "20.1", "wall": "3558"} [2022-08-01 02:28:29,867][train_inner][INFO] - {"epoch": 1, "update": 0.214, "loss": "3.536", "ppl": "11.6", "wps": "367024", "ups": "3.09", "wpb": "118656", "bsz": "256", "num_updates": "11000", "lr": "0.00099899", "gnorm": "0.771", "clip": "0", "loss_scale": "64", "train_wall": "64", "gb_free": "20.2", "wall": "3622"} [2022-08-01 02:29:34,915][train_inner][INFO] - {"epoch": 1, "update": 0.218, "loss": "3.528", "ppl": "11.54", "wps": "363294", "ups": "3.07", "wpb": "118157", "bsz": "256", "num_updates": "11200", "lr": "0.000998788", "gnorm": "0.772", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "21.6", "wall": "3687"} [2022-08-01 02:30:39,998][train_inner][INFO] - {"epoch": 1, "update": 0.221, "loss": "3.508", "ppl": "11.37", "wps": "362272", "ups": "3.07", "wpb": "117886", "bsz": "256", "num_updates": "11400", "lr": "0.000998586", "gnorm": "0.766", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "24.3", "wall": "3752"} [2022-08-01 02:31:44,763][train_inner][INFO] - {"epoch": 1, "update": 0.225, "loss": "3.473", "ppl": "11.1", "wps": "366090", "ups": "3.09", "wpb": "118548", "bsz": "256", "num_updates": "11600", "lr": "0.000998384", "gnorm": "0.76", "clip": "0", "loss_scale": "64", "train_wall": "64", "gb_free": "21", "wall": "3817"} [2022-08-01 02:32:49,468][train_inner][INFO] - {"epoch": 1, "update": 0.229, "loss": "3.47", "ppl": "11.08", "wps": "366042", "ups": "3.09", "wpb": "118422", "bsz": "256", "num_updates": "11800", "lr": "0.000998182", "gnorm": "0.759", "clip": "0", "loss_scale": "64", "train_wall": "64", "gb_free": "20.3", "wall": "3882"} [2022-08-01 02:33:54,422][train_inner][INFO] - {"epoch": 1, "update": 0.233, "loss": "3.451", "ppl": "10.93", "wps": "364707", "ups": "3.08", "wpb": "118445", "bsz": "256", "num_updates": "12000", "lr": "0.00099798", "gnorm": "0.756", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "23.9", "wall": "3947"} [2022-08-01 02:34:40,225][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 [2022-08-01 02:35:00,221][train_inner][INFO] - {"epoch": 1, "update": 0.237, "loss": "3.432", "ppl": "10.79", "wps": "359656", "ups": "3.04", "wpb": "118323", "bsz": "256", "num_updates": "12200", "lr": "0.000997778", "gnorm": "0.768", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.9", "wall": "4013"} [2022-08-01 02:36:05,287][train_inner][INFO] - {"epoch": 1, "update": 0.241, "loss": "3.408", "ppl": "10.61", "wps": "364885", "ups": "3.07", "wpb": "118707", "bsz": "256", "num_updates": "12400", "lr": "0.000997576", "gnorm": "0.758", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "19.7", "wall": "4078"} [2022-08-01 02:37:10,239][train_inner][INFO] - {"epoch": 1, "update": 0.245, "loss": "3.404", "ppl": "10.59", "wps": "365271", "ups": "3.08", "wpb": "118623", "bsz": "256", "num_updates": "12600", "lr": "0.000997374", "gnorm": "0.752", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20", "wall": "4143"} [2022-08-01 02:38:15,090][train_inner][INFO] - {"epoch": 1, "update": 0.249, "loss": "3.379", "ppl": "10.4", "wps": "365640", "ups": "3.08", "wpb": "118558", "bsz": "256", "num_updates": "12800", "lr": "0.000997172", "gnorm": "0.746", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "19.9", "wall": "4207"} [2022-08-01 02:39:20,229][train_inner][INFO] - {"epoch": 1, "update": 0.253, "loss": "3.37", "ppl": "10.34", "wps": "364435", "ups": "3.07", "wpb": "118694", "bsz": "256", "num_updates": "13000", "lr": "0.00099697", "gnorm": "0.747", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20", "wall": "4273"} [2022-08-01 02:40:25,298][train_inner][INFO] - {"epoch": 1, "update": 0.256, "loss": "3.358", "ppl": "10.25", "wps": "364627", "ups": "3.07", "wpb": "118627", "bsz": "256", "num_updates": "13200", "lr": "0.000996768", "gnorm": "0.747", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "19.8", "wall": "4338"} [2022-08-01 02:41:30,600][train_inner][INFO] - {"epoch": 1, "update": 0.26, "loss": "3.342", "ppl": "10.14", "wps": "363175", "ups": "3.06", "wpb": "118579", "bsz": "256", "num_updates": "13400", "lr": "0.000996566", "gnorm": "0.743", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.2", "wall": "4403"} [2022-08-01 02:42:35,453][train_inner][INFO] - {"epoch": 1, "update": 0.264, "loss": "3.326", "ppl": "10.03", "wps": "366632", "ups": "3.08", "wpb": "118884", "bsz": "256", "num_updates": "13600", "lr": "0.000996364", "gnorm": "0.746", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "19.8", "wall": "4468"} [2022-08-01 02:43:39,936][train_inner][INFO] - {"epoch": 1, "update": 0.268, "loss": "3.318", "ppl": "9.97", "wps": "366951", "ups": "3.1", "wpb": "118308", "bsz": "256", "num_updates": "13800", "lr": "0.000996162", "gnorm": "0.744", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "23", "wall": "4532"} [2022-08-01 02:44:44,731][train_inner][INFO] - {"epoch": 1, "update": 0.272, "loss": "3.307", "ppl": "9.9", "wps": "363851", "ups": "3.09", "wpb": "117876", "bsz": "256", "num_updates": "14000", "lr": "0.00099596", "gnorm": "0.745", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20", "wall": "4597"} [2022-08-01 02:45:49,941][train_inner][INFO] - {"epoch": 1, "update": 0.276, "loss": "3.299", "ppl": "9.84", "wps": "363765", "ups": "3.07", "wpb": "118604", "bsz": "256", "num_updates": "14200", "lr": "0.000995758", "gnorm": "0.743", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "23.7", "wall": "4662"} [2022-08-01 02:46:54,916][train_inner][INFO] - {"epoch": 1, "update": 0.28, "loss": "3.282", "ppl": "9.72", "wps": "365032", "ups": "3.08", "wpb": "118588", "bsz": "256", "num_updates": "14400", "lr": "0.000995556", "gnorm": "0.74", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "21", "wall": "4727"} [2022-08-01 02:46:56,797][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 [2022-08-01 02:48:00,086][train_inner][INFO] - {"epoch": 1, "update": 0.284, "loss": "3.272", "ppl": "9.66", "wps": "361694", "ups": "3.07", "wpb": "117856", "bsz": "256", "num_updates": "14600", "lr": "0.000995354", "gnorm": "0.74", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.8", "wall": "4792"} [2022-08-01 02:49:05,447][train_inner][INFO] - {"epoch": 1, "update": 0.288, "loss": "3.26", "ppl": "9.58", "wps": "364066", "ups": "3.06", "wpb": "118976", "bsz": "256", "num_updates": "14800", "lr": "0.000995152", "gnorm": "0.738", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "23.8", "wall": "4858"} [2022-08-01 02:50:10,199][train_inner][INFO] - {"epoch": 1, "update": 0.291, "loss": "3.256", "ppl": "9.55", "wps": "367160", "ups": "3.09", "wpb": "118871", "bsz": "256", "num_updates": "15000", "lr": "0.000994949", "gnorm": "0.737", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "26.9", "wall": "4923"} [2022-08-01 02:51:15,132][train_inner][INFO] - {"epoch": 1, "update": 0.295, "loss": "3.245", "ppl": "9.48", "wps": "364320", "ups": "3.08", "wpb": "118279", "bsz": "256", "num_updates": "15200", "lr": "0.000994747", "gnorm": "0.735", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.4", "wall": "4987"} [2022-08-01 02:52:20,309][train_inner][INFO] - {"epoch": 1, "update": 0.299, "loss": "3.238", "ppl": "9.43", "wps": "362960", "ups": "3.07", "wpb": "118282", "bsz": "256", "num_updates": "15400", "lr": "0.000994545", "gnorm": "0.734", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.1", "wall": "5053"} [2022-08-01 02:53:24,456][train_inner][INFO] - {"epoch": 1, "update": 0.303, "loss": "3.229", "ppl": "9.37", "wps": "368870", "ups": "3.12", "wpb": "118307", "bsz": "256", "num_updates": "15600", "lr": "0.000994343", "gnorm": "0.737", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "22.4", "wall": "5117"} [2022-08-01 02:54:29,288][train_inner][INFO] - {"epoch": 1, "update": 0.307, "loss": "3.218", "ppl": "9.31", "wps": "366493", "ups": "3.08", "wpb": "118800", "bsz": "256", "num_updates": "15800", "lr": "0.000994141", "gnorm": "0.732", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "19.7", "wall": "5182"} [2022-08-01 02:55:34,014][train_inner][INFO] - {"epoch": 1, "update": 0.311, "loss": "3.209", "ppl": "9.25", "wps": "365178", "ups": "3.09", "wpb": "118181", "bsz": "256", "num_updates": "16000", "lr": "0.000993939", "gnorm": "0.736", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "19.9", "wall": "5246"} [2022-08-01 02:56:38,770][train_inner][INFO] - {"epoch": 1, "update": 0.315, "loss": "3.199", "ppl": "9.19", "wps": "366690", "ups": "3.09", "wpb": "118725", "bsz": "256", "num_updates": "16200", "lr": "0.000993737", "gnorm": "0.735", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20", "wall": "5311"} [2022-08-01 02:57:43,710][train_inner][INFO] - {"epoch": 1, "update": 0.319, "loss": "3.202", "ppl": "9.2", "wps": "360894", "ups": "3.08", "wpb": "117180", "bsz": "256", "num_updates": "16400", "lr": "0.000993535", "gnorm": "0.738", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.2", "wall": "5376"} [2022-08-01 02:58:48,710][train_inner][INFO] - {"epoch": 1, "update": 0.323, "loss": "3.182", "ppl": "9.07", "wps": "365526", "ups": "3.08", "wpb": "118794", "bsz": "256", "num_updates": "16600", "lr": "0.000993333", "gnorm": "0.731", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "19.9", "wall": "5441"} [2022-08-01 02:59:01,684][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 [2022-08-01 02:59:54,100][train_inner][INFO] - {"epoch": 1, "update": 0.326, "loss": "3.183", "ppl": "9.08", "wps": "359853", "ups": "3.06", "wpb": "117653", "bsz": "256", "num_updates": "16800", "lr": "0.000993131", "gnorm": "0.735", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.2", "wall": "5506"} [2022-08-01 03:00:58,610][train_inner][INFO] - {"epoch": 1, "update": 0.33, "loss": "3.17", "ppl": "9", "wps": "365707", "ups": "3.1", "wpb": "117957", "bsz": "256", "num_updates": "17000", "lr": "0.000992929", "gnorm": "0.728", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "23.8", "wall": "5571"} [2022-08-01 03:02:03,338][train_inner][INFO] - {"epoch": 1, "update": 0.334, "loss": "3.155", "ppl": "8.91", "wps": "364933", "ups": "3.09", "wpb": "118104", "bsz": "256", "num_updates": "17200", "lr": "0.000992727", "gnorm": "0.733", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "21.4", "wall": "5636"} [2022-08-01 03:03:08,531][train_inner][INFO] - {"epoch": 1, "update": 0.338, "loss": "3.148", "ppl": "8.86", "wps": "363216", "ups": "3.07", "wpb": "118394", "bsz": "256", "num_updates": "17400", "lr": "0.000992525", "gnorm": "0.722", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.4", "wall": "5701"} [2022-08-01 03:04:13,658][train_inner][INFO] - {"epoch": 1, "update": 0.342, "loss": "3.143", "ppl": "8.84", "wps": "363554", "ups": "3.07", "wpb": "118385", "bsz": "256", "num_updates": "17600", "lr": "0.000992323", "gnorm": "0.726", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20", "wall": "5766"} [2022-08-01 03:05:18,568][train_inner][INFO] - {"epoch": 1, "update": 0.346, "loss": "3.133", "ppl": "8.77", "wps": "364258", "ups": "3.08", "wpb": "118218", "bsz": "256", "num_updates": "17800", "lr": "0.000992121", "gnorm": "0.73", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.7", "wall": "5831"} [2022-08-01 03:06:23,065][train_inner][INFO] - {"epoch": 1, "update": 0.35, "loss": "3.136", "ppl": "8.79", "wps": "364792", "ups": "3.1", "wpb": "117638", "bsz": "256", "num_updates": "18000", "lr": "0.000991919", "gnorm": "0.736", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "21.7", "wall": "5895"} [2022-08-01 03:07:28,092][train_inner][INFO] - {"epoch": 1, "update": 0.354, "loss": "3.126", "ppl": "8.73", "wps": "364173", "ups": "3.08", "wpb": "118404", "bsz": "256", "num_updates": "18200", "lr": "0.000991717", "gnorm": "0.726", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "19.8", "wall": "5960"} [2022-08-01 03:08:33,179][train_inner][INFO] - {"epoch": 1, "update": 0.358, "loss": "3.116", "ppl": "8.67", "wps": "364669", "ups": "3.07", "wpb": "118674", "bsz": "256", "num_updates": "18400", "lr": "0.000991515", "gnorm": "0.722", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "19.8", "wall": "6026"} [2022-08-01 03:09:38,037][train_inner][INFO] - {"epoch": 1, "update": 0.361, "loss": "3.115", "ppl": "8.67", "wps": "365815", "ups": "3.08", "wpb": "118629", "bsz": "256", "num_updates": "18600", "lr": "0.000991313", "gnorm": "0.722", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "25.4", "wall": "6090"} [2022-08-01 03:10:38,330][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 [2022-08-01 03:10:43,263][train_inner][INFO] - {"epoch": 1, "update": 0.365, "loss": "3.108", "ppl": "8.62", "wps": "362499", "ups": "3.07", "wpb": "118218", "bsz": "256", "num_updates": "18800", "lr": "0.000991111", "gnorm": "0.722", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.3", "wall": "6156"} [2022-08-01 03:11:48,239][train_inner][INFO] - {"epoch": 1, "update": 0.369, "loss": "3.098", "ppl": "8.56", "wps": "365594", "ups": "3.08", "wpb": "118774", "bsz": "256", "num_updates": "19000", "lr": "0.000990909", "gnorm": "0.725", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "22.8", "wall": "6221"} [2022-08-01 03:12:52,791][train_inner][INFO] - {"epoch": 1, "update": 0.373, "loss": "3.085", "ppl": "8.48", "wps": "366524", "ups": "3.1", "wpb": "118297", "bsz": "256", "num_updates": "19200", "lr": "0.000990707", "gnorm": "0.718", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20", "wall": "6285"} [2022-08-01 03:13:57,490][train_inner][INFO] - {"epoch": 1, "update": 0.377, "loss": "3.09", "ppl": "8.51", "wps": "364214", "ups": "3.09", "wpb": "117820", "bsz": "256", "num_updates": "19400", "lr": "0.000990505", "gnorm": "0.717", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "19.7", "wall": "6350"} [2022-08-01 03:15:02,391][train_inner][INFO] - {"epoch": 1, "update": 0.381, "loss": "3.084", "ppl": "8.48", "wps": "364037", "ups": "3.08", "wpb": "118130", "bsz": "256", "num_updates": "19600", "lr": "0.000990303", "gnorm": "0.718", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.6", "wall": "6415"} [2022-08-01 03:16:07,196][train_inner][INFO] - {"epoch": 1, "update": 0.385, "loss": "3.083", "ppl": "8.48", "wps": "363889", "ups": "3.09", "wpb": "117908", "bsz": "256", "num_updates": "19800", "lr": "0.000990101", "gnorm": "0.717", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "21.7", "wall": "6480"} [2022-08-01 03:17:12,182][train_inner][INFO] - {"epoch": 1, "update": 0.389, "loss": "3.067", "ppl": "8.38", "wps": "365063", "ups": "3.08", "wpb": "118617", "bsz": "256", "num_updates": "20000", "lr": "0.000989899", "gnorm": "0.712", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.3", "wall": "6545"} [2022-08-01 03:18:17,329][train_inner][INFO] - {"epoch": 1, "update": 0.392, "loss": "3.064", "ppl": "8.37", "wps": "362315", "ups": "3.07", "wpb": "118017", "bsz": "256", "num_updates": "20200", "lr": "0.000989697", "gnorm": "0.713", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.7", "wall": "6610"} [2022-08-01 03:19:22,442][train_inner][INFO] - {"epoch": 1, "update": 0.396, "loss": "3.057", "ppl": "8.33", "wps": "363082", "ups": "3.07", "wpb": "118207", "bsz": "256", "num_updates": "20400", "lr": "0.000989495", "gnorm": "0.719", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20", "wall": "6675"} [2022-08-01 03:20:27,714][train_inner][INFO] - {"epoch": 1, "update": 0.4, "loss": "3.05", "ppl": "8.28", "wps": "362843", "ups": "3.06", "wpb": "118414", "bsz": "256", "num_updates": "20600", "lr": "0.000989293", "gnorm": "0.718", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "28.2", "wall": "6740"} [2022-08-01 03:21:32,570][train_inner][INFO] - {"epoch": 1, "update": 0.404, "loss": "3.048", "ppl": "8.27", "wps": "366310", "ups": "3.08", "wpb": "118785", "bsz": "256", "num_updates": "20800", "lr": "0.000989091", "gnorm": "0.711", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.9", "wall": "6805"} [2022-08-01 03:22:38,762][train_inner][INFO] - {"epoch": 1, "update": 0.408, "loss": "3.043", "ppl": "8.24", "wps": "358502", "ups": "3.02", "wpb": "118648", "bsz": "256", "num_updates": "21000", "lr": "0.000988889", "gnorm": "0.711", "clip": "0", "loss_scale": "64", "train_wall": "66", "gb_free": "20.3", "wall": "6871"} [2022-08-01 03:23:43,695][train_inner][INFO] - {"epoch": 1, "update": 0.412, "loss": "3.038", "ppl": "8.21", "wps": "364573", "ups": "3.08", "wpb": "118363", "bsz": "256", "num_updates": "21200", "lr": "0.000988687", "gnorm": "0.708", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "19.9", "wall": "6936"} [2022-08-01 03:24:02,937][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 [2022-08-01 03:24:49,056][train_inner][INFO] - {"epoch": 1, "update": 0.416, "loss": "3.038", "ppl": "8.21", "wps": "360575", "ups": "3.06", "wpb": "117836", "bsz": "256", "num_updates": "21400", "lr": "0.000988485", "gnorm": "0.708", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "22", "wall": "7001"} [2022-08-01 03:25:53,640][train_inner][INFO] - {"epoch": 1, "update": 0.42, "loss": "3.036", "ppl": "8.2", "wps": "365382", "ups": "3.1", "wpb": "117988", "bsz": "256", "num_updates": "21600", "lr": "0.000988283", "gnorm": "0.71", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.5", "wall": "7066"} [2022-08-01 03:26:58,821][train_inner][INFO] - {"epoch": 1, "update": 0.424, "loss": "3.022", "ppl": "8.12", "wps": "364667", "ups": "3.07", "wpb": "118845", "bsz": "256", "num_updates": "21800", "lr": "0.000988081", "gnorm": "0.706", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "23.2", "wall": "7131"} [2022-08-01 03:28:03,811][train_inner][INFO] - {"epoch": 1, "update": 0.427, "loss": "3.022", "ppl": "8.13", "wps": "362993", "ups": "3.08", "wpb": "117953", "bsz": "256", "num_updates": "22000", "lr": "0.000987879", "gnorm": "0.704", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "23.4", "wall": "7196"} [2022-08-01 03:29:08,578][train_inner][INFO] - {"epoch": 1, "update": 0.431, "loss": "3.009", "ppl": "8.05", "wps": "365777", "ups": "3.09", "wpb": "118450", "bsz": "256", "num_updates": "22200", "lr": "0.000987677", "gnorm": "0.702", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20", "wall": "7261"} [2022-08-01 03:30:13,239][train_inner][INFO] - {"epoch": 1, "update": 0.435, "loss": "3.007", "ppl": "8.04", "wps": "365155", "ups": "3.09", "wpb": "118055", "bsz": "256", "num_updates": "22400", "lr": "0.000987475", "gnorm": "0.702", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "19.8", "wall": "7326"} [2022-08-01 03:31:18,031][train_inner][INFO] - {"epoch": 1, "update": 0.439, "loss": "3.003", "ppl": "8.02", "wps": "366872", "ups": "3.09", "wpb": "118851", "bsz": "256", "num_updates": "22600", "lr": "0.000987273", "gnorm": "0.698", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.6", "wall": "7390"} [2022-08-01 03:32:23,020][train_inner][INFO] - {"epoch": 1, "update": 0.443, "loss": "2.997", "ppl": "7.98", "wps": "363770", "ups": "3.08", "wpb": "118204", "bsz": "256", "num_updates": "22800", "lr": "0.000987071", "gnorm": "0.694", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "23.4", "wall": "7455"} [2022-08-01 03:33:27,835][train_inner][INFO] - {"epoch": 1, "update": 0.447, "loss": "2.988", "ppl": "7.93", "wps": "366242", "ups": "3.09", "wpb": "118687", "bsz": "256", "num_updates": "23000", "lr": "0.000986869", "gnorm": "0.697", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.5", "wall": "7520"} [2022-08-01 03:34:32,856][train_inner][INFO] - {"epoch": 1, "update": 0.451, "loss": "2.989", "ppl": "7.94", "wps": "364354", "ups": "3.08", "wpb": "118452", "bsz": "256", "num_updates": "23200", "lr": "0.000986667", "gnorm": "0.697", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.2", "wall": "7585"} [2022-08-01 03:35:37,960][train_inner][INFO] - {"epoch": 1, "update": 0.455, "loss": "2.985", "ppl": "7.92", "wps": "364288", "ups": "3.07", "wpb": "118582", "bsz": "256", "num_updates": "23400", "lr": "0.000986465", "gnorm": "0.692", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "19.7", "wall": "7650"} [2022-08-01 03:36:43,047][train_inner][INFO] - {"epoch": 1, "update": 0.459, "loss": "2.98", "ppl": "7.89", "wps": "364196", "ups": "3.07", "wpb": "118520", "bsz": "256", "num_updates": "23600", "lr": "0.000986263", "gnorm": "0.696", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "20.8", "wall": "7715"} [2022-08-01 03:37:14,594][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 [2022-08-01 03:37:48,269][train_inner][INFO] - {"epoch": 1, "update": 0.462, "loss": "2.973", "ppl": "7.85", "wps": "363912", "ups": "3.07", "wpb": "118674", "bsz": "256", "num_updates": "23800", "lr": "0.000986061", "gnorm": "0.69", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "26.7", "wall": "7781"} [2022-08-01 03:38:54,158][train_inner][INFO] - {"epoch": 1, "update": 0.466, "loss": "2.974", "ppl": "7.86", "wps": "358955", "ups": "3.04", "wpb": "118254", "bsz": "256", "num_updates": "24000", "lr": "0.000985859", "gnorm": "0.69", "clip": "0", "loss_scale": "32", "train_wall": "66", "gb_free": "19.8", "wall": "7846"} [2022-08-01 03:39:58,991][train_inner][INFO] - {"epoch": 1, "update": 0.47, "loss": "2.971", "ppl": "7.84", "wps": "364458", "ups": "3.08", "wpb": "118143", "bsz": "255.9", "num_updates": "24200", "lr": "0.000985657", "gnorm": "0.687", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "19.7", "wall": "7911"} [2022-08-01 03:41:03,671][train_inner][INFO] - {"epoch": 1, "update": 0.474, "loss": "2.968", "ppl": "7.82", "wps": "365722", "ups": "3.09", "wpb": "118273", "bsz": "256", "num_updates": "24400", "lr": "0.000985455", "gnorm": "0.69", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "28.2", "wall": "7976"} [2022-08-01 03:42:08,447][train_inner][INFO] - {"epoch": 1, "update": 0.478, "loss": "2.966", "ppl": "7.81", "wps": "364137", "ups": "3.09", "wpb": "117934", "bsz": "256", "num_updates": "24600", "lr": "0.000985253", "gnorm": "0.7", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.6", "wall": "8041"} [2022-08-01 03:43:13,386][train_inner][INFO] - {"epoch": 1, "update": 0.482, "loss": "2.955", "ppl": "7.75", "wps": "364784", "ups": "3.08", "wpb": "118441", "bsz": "256", "num_updates": "24800", "lr": "0.000985051", "gnorm": "0.686", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "19.7", "wall": "8106"} [2022-08-01 03:44:19,247][train_inner][INFO] - {"epoch": 1, "update": 0.486, "loss": "2.951", "ppl": "7.73", "wps": "359210", "ups": "3.04", "wpb": "118289", "bsz": "256", "num_updates": "25000", "lr": "0.000984848", "gnorm": "0.683", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "22.2", "wall": "8172"} [2022-08-01 03:45:25,238][train_inner][INFO] - {"epoch": 1, "update": 0.49, "loss": "2.951", "ppl": "7.73", "wps": "357625", "ups": "3.03", "wpb": "117998", "bsz": "256", "num_updates": "25200", "lr": "0.000984646", "gnorm": "0.683", "clip": "0", "loss_scale": "32", "train_wall": "66", "gb_free": "25.6", "wall": "8238"} [2022-08-01 03:46:30,142][train_inner][INFO] - {"epoch": 1, "update": 0.494, "loss": "2.949", "ppl": "7.72", "wps": "363780", "ups": "3.08", "wpb": "118051", "bsz": "256", "num_updates": "25400", "lr": "0.000984444", "gnorm": "0.685", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.2", "wall": "8302"} [2022-08-01 03:47:35,064][train_inner][INFO] - {"epoch": 1, "update": 0.497, "loss": "2.942", "ppl": "7.68", "wps": "365141", "ups": "3.08", "wpb": "118528", "bsz": "256", "num_updates": "25600", "lr": "0.000984242", "gnorm": "0.687", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "19.9", "wall": "8367"} [2022-08-01 03:48:41,037][train_inner][INFO] - {"epoch": 1, "update": 0.501, "loss": "2.941", "ppl": "7.68", "wps": "357528", "ups": "3.03", "wpb": "117933", "bsz": "256", "num_updates": "25800", "lr": "0.00098404", "gnorm": "0.685", "clip": "0", "loss_scale": "64", "train_wall": "66", "gb_free": "21.7", "wall": "8433"} [2022-08-01 03:49:45,572][train_inner][INFO] - {"epoch": 1, "update": 0.505, "loss": "2.937", "ppl": "7.66", "wps": "365847", "ups": "3.1", "wpb": "118049", "bsz": "256", "num_updates": "26000", "lr": "0.000983838", "gnorm": "0.681", "clip": "0", "loss_scale": "64", "train_wall": "64", "gb_free": "22", "wall": "8498"} [2022-08-01 03:50:41,271][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 [2022-08-01 03:50:50,730][train_inner][INFO] - {"epoch": 1, "update": 0.509, "loss": "2.935", "ppl": "7.65", "wps": "362514", "ups": "3.07", "wpb": "118101", "bsz": "256", "num_updates": "26200", "lr": "0.000983636", "gnorm": "0.677", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.5", "wall": "8563"} [2022-08-01 03:51:55,818][train_inner][INFO] - {"epoch": 1, "update": 0.513, "loss": "2.929", "ppl": "7.61", "wps": "364139", "ups": "3.07", "wpb": "118505", "bsz": "256", "num_updates": "26400", "lr": "0.000983434", "gnorm": "0.678", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "22.6", "wall": "8628"} [2022-08-01 03:53:00,516][train_inner][INFO] - {"epoch": 1, "update": 0.517, "loss": "2.923", "ppl": "7.59", "wps": "365396", "ups": "3.09", "wpb": "118200", "bsz": "256", "num_updates": "26600", "lr": "0.000983232", "gnorm": "0.677", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.8", "wall": "8693"} [2022-08-01 03:54:05,300][train_inner][INFO] - {"epoch": 1, "update": 0.521, "loss": "2.916", "ppl": "7.55", "wps": "364843", "ups": "3.09", "wpb": "118177", "bsz": "256", "num_updates": "26800", "lr": "0.00098303", "gnorm": "0.674", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "24.2", "wall": "8758"} [2022-08-01 03:55:10,320][train_inner][INFO] - {"epoch": 1, "update": 0.525, "loss": "2.926", "ppl": "7.6", "wps": "363429", "ups": "3.08", "wpb": "118150", "bsz": "256", "num_updates": "27000", "lr": "0.000982828", "gnorm": "0.671", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.7", "wall": "8823"} [2022-08-01 03:56:15,765][train_inner][INFO] - {"epoch": 1, "update": 0.528, "loss": "2.917", "ppl": "7.55", "wps": "361701", "ups": "3.06", "wpb": "118356", "bsz": "256", "num_updates": "27200", "lr": "0.000982626", "gnorm": "0.672", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.9", "wall": "8888"} [2022-08-01 03:57:21,196][train_inner][INFO] - {"epoch": 1, "update": 0.532, "loss": "2.913", "ppl": "7.53", "wps": "361064", "ups": "3.06", "wpb": "118122", "bsz": "256", "num_updates": "27400", "lr": "0.000982424", "gnorm": "0.673", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.5", "wall": "8954"} [2022-08-01 03:58:26,150][train_inner][INFO] - {"epoch": 1, "update": 0.536, "loss": "2.906", "ppl": "7.5", "wps": "363081", "ups": "3.08", "wpb": "117915", "bsz": "256", "num_updates": "27600", "lr": "0.000982222", "gnorm": "0.668", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "22.3", "wall": "9018"} [2022-08-01 03:59:31,344][train_inner][INFO] - {"epoch": 1, "update": 0.54, "loss": "2.896", "ppl": "7.44", "wps": "365156", "ups": "3.07", "wpb": "119029", "bsz": "256", "num_updates": "27800", "lr": "0.00098202", "gnorm": "0.669", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.1", "wall": "9084"} [2022-08-01 04:00:37,306][train_inner][INFO] - {"epoch": 1, "update": 0.544, "loss": "2.897", "ppl": "7.45", "wps": "357880", "ups": "3.03", "wpb": "118029", "bsz": "256", "num_updates": "28000", "lr": "0.000981818", "gnorm": "0.665", "clip": "0", "loss_scale": "32", "train_wall": "66", "gb_free": "20", "wall": "9150"} [2022-08-01 04:01:42,562][train_inner][INFO] - {"epoch": 1, "update": 0.548, "loss": "2.888", "ppl": "7.4", "wps": "363964", "ups": "3.07", "wpb": "118745", "bsz": "256", "num_updates": "28200", "lr": "0.000981616", "gnorm": "0.663", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.8", "wall": "9215"} [2022-08-01 04:02:47,260][train_inner][INFO] - {"epoch": 1, "update": 0.552, "loss": "2.893", "ppl": "7.43", "wps": "366210", "ups": "3.09", "wpb": "118464", "bsz": "256", "num_updates": "28400", "lr": "0.000981414", "gnorm": "0.663", "clip": "0", "loss_scale": "64", "train_wall": "64", "gb_free": "20.6", "wall": "9280"} [2022-08-01 04:03:53,436][train_inner][INFO] - {"epoch": 1, "update": 0.556, "loss": "2.892", "ppl": "7.42", "wps": "357328", "ups": "3.02", "wpb": "118230", "bsz": "256", "num_updates": "28600", "lr": "0.000981212", "gnorm": "0.658", "clip": "0", "loss_scale": "64", "train_wall": "66", "gb_free": "26.7", "wall": "9346"} [2022-08-01 04:04:24,997][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 [2022-08-01 04:04:58,651][train_inner][INFO] - {"epoch": 1, "update": 0.56, "loss": "2.891", "ppl": "7.42", "wps": "362865", "ups": "3.07", "wpb": "118320", "bsz": "256", "num_updates": "28800", "lr": "0.00098101", "gnorm": "0.662", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.5", "wall": "9411"} [2022-08-01 04:06:03,854][train_inner][INFO] - {"epoch": 1, "update": 0.563, "loss": "2.881", "ppl": "7.37", "wps": "362225", "ups": "3.07", "wpb": "118090", "bsz": "256", "num_updates": "29000", "lr": "0.000980808", "gnorm": "0.661", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "23.8", "wall": "9476"} [2022-08-01 04:07:01,207][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-01 04:07:08,929][train_inner][INFO] - {"epoch": 1, "update": 0.567, "loss": "2.877", "ppl": "7.35", "wps": "362262", "ups": "3.07", "wpb": "117869", "bsz": "256", "num_updates": "29200", "lr": "0.000980606", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21", "wall": "9541"} [2022-08-01 04:08:13,834][train_inner][INFO] - {"epoch": 1, "update": 0.571, "loss": "2.884", "ppl": "7.38", "wps": "363391", "ups": "3.08", "wpb": "117928", "bsz": "256", "num_updates": "29400", "lr": "0.000980404", "gnorm": "0.663", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.4", "wall": "9606"} [2022-08-01 04:09:18,585][train_inner][INFO] - {"epoch": 1, "update": 0.575, "loss": "2.869", "ppl": "7.3", "wps": "366461", "ups": "3.09", "wpb": "118642", "bsz": "256", "num_updates": "29600", "lr": "0.000980202", "gnorm": "0.654", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.9", "wall": "9671"} [2022-08-01 04:10:23,632][train_inner][INFO] - {"epoch": 1, "update": 0.579, "loss": "2.873", "ppl": "7.32", "wps": "363877", "ups": "3.07", "wpb": "118343", "bsz": "256", "num_updates": "29800", "lr": "0.00098", "gnorm": "0.652", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "9736"} [2022-08-01 04:11:28,594][train_inner][INFO] - {"epoch": 1, "update": 0.583, "loss": "2.87", "ppl": "7.31", "wps": "365756", "ups": "3.08", "wpb": "118800", "bsz": "256", "num_updates": "30000", "lr": "0.000979798", "gnorm": "0.656", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "9801"} [2022-08-01 04:12:33,635][train_inner][INFO] - {"epoch": 1, "update": 0.587, "loss": "2.865", "ppl": "7.29", "wps": "362940", "ups": "3.08", "wpb": "118028", "bsz": "256", "num_updates": "30200", "lr": "0.000979596", "gnorm": "0.656", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.4", "wall": "9866"} [2022-08-01 04:13:38,492][train_inner][INFO] - {"epoch": 1, "update": 0.591, "loss": "2.875", "ppl": "7.33", "wps": "364018", "ups": "3.08", "wpb": "118044", "bsz": "256", "num_updates": "30400", "lr": "0.000979394", "gnorm": "0.653", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "9931"} [2022-08-01 04:14:43,306][train_inner][INFO] - {"epoch": 1, "update": 0.595, "loss": "2.868", "ppl": "7.3", "wps": "364588", "ups": "3.09", "wpb": "118151", "bsz": "256", "num_updates": "30600", "lr": "0.000979192", "gnorm": "0.657", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "9996"} [2022-08-01 04:15:49,120][train_inner][INFO] - {"epoch": 1, "update": 0.598, "loss": "2.851", "ppl": "7.22", "wps": "361280", "ups": "3.04", "wpb": "118884", "bsz": "256", "num_updates": "30800", "lr": "0.00097899", "gnorm": "0.65", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.8", "wall": "10061"} [2022-08-01 04:16:54,330][train_inner][INFO] - {"epoch": 1, "update": 0.602, "loss": "2.861", "ppl": "7.26", "wps": "363370", "ups": "3.07", "wpb": "118476", "bsz": "256", "num_updates": "31000", "lr": "0.000978788", "gnorm": "0.65", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.1", "wall": "10127"} [2022-08-01 04:17:58,862][train_inner][INFO] - {"epoch": 1, "update": 0.606, "loss": "2.851", "ppl": "7.21", "wps": "365286", "ups": "3.1", "wpb": "117860", "bsz": "256", "num_updates": "31200", "lr": "0.000978586", "gnorm": "0.651", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.3", "wall": "10191"} [2022-08-01 04:19:03,684][train_inner][INFO] - {"epoch": 1, "update": 0.61, "loss": "2.852", "ppl": "7.22", "wps": "364354", "ups": "3.09", "wpb": "118090", "bsz": "256", "num_updates": "31400", "lr": "0.000978384", "gnorm": "0.649", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "24.4", "wall": "10256"} [2022-08-01 04:20:08,600][train_inner][INFO] - {"epoch": 1, "update": 0.614, "loss": "2.85", "ppl": "7.21", "wps": "363322", "ups": "3.08", "wpb": "117927", "bsz": "256", "num_updates": "31600", "lr": "0.000978182", "gnorm": "0.648", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "19.8", "wall": "10321"} [2022-08-01 04:21:14,614][train_inner][INFO] - {"epoch": 1, "update": 0.618, "loss": "2.845", "ppl": "7.19", "wps": "357344", "ups": "3.03", "wpb": "117946", "bsz": "256", "num_updates": "31800", "lr": "0.00097798", "gnorm": "0.645", "clip": "0", "loss_scale": "32", "train_wall": "66", "gb_free": "20", "wall": "10387"} [2022-08-01 04:22:19,551][train_inner][INFO] - {"epoch": 1, "update": 0.622, "loss": "2.836", "ppl": "7.14", "wps": "366321", "ups": "3.08", "wpb": "118938", "bsz": "256", "num_updates": "32000", "lr": "0.000977778", "gnorm": "0.638", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "19.8", "wall": "10452"} [2022-08-01 04:23:24,647][train_inner][INFO] - {"epoch": 1, "update": 0.626, "loss": "2.841", "ppl": "7.17", "wps": "365214", "ups": "3.07", "wpb": "118868", "bsz": "256", "num_updates": "32200", "lr": "0.000977576", "gnorm": "0.638", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "25.9", "wall": "10517"} [2022-08-01 04:24:30,501][train_inner][INFO] - {"epoch": 1, "update": 0.63, "loss": "2.838", "ppl": "7.15", "wps": "357703", "ups": "3.04", "wpb": "117780", "bsz": "256", "num_updates": "32400", "lr": "0.000977374", "gnorm": "0.64", "clip": "0", "loss_scale": "32", "train_wall": "66", "gb_free": "21.3", "wall": "10583"} [2022-08-01 04:25:35,211][train_inner][INFO] - {"epoch": 1, "update": 0.633, "loss": "2.835", "ppl": "7.13", "wps": "365928", "ups": "3.09", "wpb": "118392", "bsz": "256", "num_updates": "32600", "lr": "0.000977172", "gnorm": "0.644", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "26.5", "wall": "10648"} [2022-08-01 04:26:39,986][train_inner][INFO] - {"epoch": 1, "update": 0.637, "loss": "2.826", "ppl": "7.09", "wps": "364437", "ups": "3.09", "wpb": "118032", "bsz": "256", "num_updates": "32800", "lr": "0.00097697", "gnorm": "0.638", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "23", "wall": "10712"} [2022-08-01 04:27:45,778][train_inner][INFO] - {"epoch": 1, "update": 0.641, "loss": "2.833", "ppl": "7.13", "wps": "359856", "ups": "3.04", "wpb": "118376", "bsz": "256", "num_updates": "33000", "lr": "0.000976768", "gnorm": "0.636", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "23.5", "wall": "10778"} [2022-08-01 04:28:50,339][train_inner][INFO] - {"epoch": 1, "update": 0.645, "loss": "2.829", "ppl": "7.11", "wps": "365848", "ups": "3.1", "wpb": "118095", "bsz": "256", "num_updates": "33200", "lr": "0.000976566", "gnorm": "0.638", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "21.8", "wall": "10843"} [2022-08-01 04:29:31,239][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 [2022-08-01 04:29:55,836][train_inner][INFO] - {"epoch": 1, "update": 0.649, "loss": "2.82", "ppl": "7.06", "wps": "360395", "ups": "3.05", "wpb": "118022", "bsz": "256", "num_updates": "33400", "lr": "0.000976364", "gnorm": "0.634", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "23", "wall": "10908"} [2022-08-01 04:31:00,298][train_inner][INFO] - {"epoch": 1, "update": 0.653, "loss": "2.825", "ppl": "7.08", "wps": "366254", "ups": "3.1", "wpb": "118048", "bsz": "256", "num_updates": "33600", "lr": "0.000976162", "gnorm": "0.632", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "21.7", "wall": "10973"} [2022-08-01 04:32:05,355][train_inner][INFO] - {"epoch": 1, "update": 0.657, "loss": "2.82", "ppl": "7.06", "wps": "365442", "ups": "3.07", "wpb": "118871", "bsz": "256", "num_updates": "33800", "lr": "0.00097596", "gnorm": "0.632", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "19.8", "wall": "11038"} [2022-08-01 04:33:10,454][train_inner][INFO] - {"epoch": 1, "update": 0.661, "loss": "2.822", "ppl": "7.07", "wps": "363173", "ups": "3.07", "wpb": "118208", "bsz": "256", "num_updates": "34000", "lr": "0.000975758", "gnorm": "0.631", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20", "wall": "11103"} [2022-08-01 04:34:15,389][train_inner][INFO] - {"epoch": 1, "update": 0.665, "loss": "2.82", "ppl": "7.06", "wps": "363641", "ups": "3.08", "wpb": "118064", "bsz": "256", "num_updates": "34200", "lr": "0.000975556", "gnorm": "0.638", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "27.4", "wall": "11168"} [2022-08-01 04:35:20,068][train_inner][INFO] - {"epoch": 1, "update": 0.668, "loss": "2.813", "ppl": "7.03", "wps": "365465", "ups": "3.09", "wpb": "118187", "bsz": "256", "num_updates": "34400", "lr": "0.000975354", "gnorm": "0.632", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "26.5", "wall": "11232"} [2022-08-01 04:36:24,776][train_inner][INFO] - {"epoch": 1, "update": 0.672, "loss": "2.808", "ppl": "7", "wps": "364928", "ups": "3.09", "wpb": "118067", "bsz": "256", "num_updates": "34600", "lr": "0.000975152", "gnorm": "0.63", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.1", "wall": "11297"} [2022-08-01 04:37:29,694][train_inner][INFO] - {"epoch": 1, "update": 0.676, "loss": "2.804", "ppl": "6.98", "wps": "365627", "ups": "3.08", "wpb": "118676", "bsz": "256", "num_updates": "34800", "lr": "0.000974949", "gnorm": "0.629", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20", "wall": "11362"} [2022-08-01 04:38:34,327][train_inner][INFO] - {"epoch": 1, "update": 0.68, "loss": "2.807", "ppl": "7", "wps": "363080", "ups": "3.09", "wpb": "117334", "bsz": "256", "num_updates": "35000", "lr": "0.000974747", "gnorm": "0.634", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "23.8", "wall": "11427"} [2022-08-01 04:39:39,464][train_inner][INFO] - {"epoch": 1, "update": 0.684, "loss": "2.799", "ppl": "6.96", "wps": "365199", "ups": "3.07", "wpb": "118936", "bsz": "256", "num_updates": "35200", "lr": "0.000974545", "gnorm": "0.622", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20", "wall": "11492"} [2022-08-01 04:40:44,942][train_inner][INFO] - {"epoch": 1, "update": 0.688, "loss": "2.8", "ppl": "6.96", "wps": "359993", "ups": "3.05", "wpb": "117858", "bsz": "256", "num_updates": "35400", "lr": "0.000974343", "gnorm": "0.627", "clip": "0", "loss_scale": "64", "train_wall": "65", "gb_free": "25.6", "wall": "11557"} [2022-08-01 04:41:49,732][train_inner][INFO] - {"epoch": 1, "update": 0.692, "loss": "2.798", "ppl": "6.96", "wps": "367557", "ups": "3.09", "wpb": "119068", "bsz": "256", "num_updates": "35600", "lr": "0.000974141", "gnorm": "0.624", "clip": "0", "loss_scale": "64", "train_wall": "64", "gb_free": "20.6", "wall": "11622"} [2022-08-01 04:41:57,123][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 [2022-08-01 04:42:55,343][train_inner][INFO] - {"epoch": 1, "update": 0.696, "loss": "2.799", "ppl": "6.96", "wps": "361820", "ups": "3.05", "wpb": "118694", "bsz": "256", "num_updates": "35800", "lr": "0.000973939", "gnorm": "0.626", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "28.2", "wall": "11688"} [2022-08-01 04:44:00,366][train_inner][INFO] - {"epoch": 1, "update": 0.699, "loss": "2.8", "ppl": "6.96", "wps": "363132", "ups": "3.08", "wpb": "118059", "bsz": "256", "num_updates": "36000", "lr": "0.000973737", "gnorm": "0.625", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.2", "wall": "11753"} [2022-08-01 04:45:05,218][train_inner][INFO] - {"epoch": 1, "update": 0.703, "loss": "2.795", "ppl": "6.94", "wps": "364988", "ups": "3.08", "wpb": "118350", "bsz": "256", "num_updates": "36200", "lr": "0.000973535", "gnorm": "0.624", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.1", "wall": "11818"} [2022-08-01 04:46:10,315][train_inner][INFO] - {"epoch": 1, "update": 0.707, "loss": "2.79", "ppl": "6.91", "wps": "365211", "ups": "3.07", "wpb": "118869", "bsz": "256", "num_updates": "36400", "lr": "0.000973333", "gnorm": "0.619", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.4", "wall": "11883"} [2022-08-01 04:47:14,836][train_inner][INFO] - {"epoch": 1, "update": 0.711, "loss": "2.783", "ppl": "6.88", "wps": "367250", "ups": "3.1", "wpb": "118475", "bsz": "256", "num_updates": "36600", "lr": "0.000973131", "gnorm": "0.621", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "23.4", "wall": "11947"} [2022-08-01 04:47:38,407][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-01 04:48:20,121][train_inner][INFO] - {"epoch": 1, "update": 0.715, "loss": "2.782", "ppl": "6.88", "wps": "362329", "ups": "3.06", "wpb": "118272", "bsz": "256", "num_updates": "36800", "lr": "0.000972929", "gnorm": "0.618", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "12012"} [2022-08-01 04:49:24,785][train_inner][INFO] - {"epoch": 1, "update": 0.719, "loss": "2.792", "ppl": "6.93", "wps": "364407", "ups": "3.09", "wpb": "117820", "bsz": "256", "num_updates": "37000", "lr": "0.000972727", "gnorm": "0.621", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.8", "wall": "12077"} [2022-08-01 04:50:29,917][train_inner][INFO] - {"epoch": 1, "update": 0.723, "loss": "2.786", "ppl": "6.9", "wps": "362397", "ups": "3.07", "wpb": "118016", "bsz": "256", "num_updates": "37200", "lr": "0.000972525", "gnorm": "0.621", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "12142"} [2022-08-01 04:51:34,631][train_inner][INFO] - {"epoch": 1, "update": 0.727, "loss": "2.78", "ppl": "6.87", "wps": "366528", "ups": "3.09", "wpb": "118594", "bsz": "256", "num_updates": "37400", "lr": "0.000972323", "gnorm": "0.618", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.5", "wall": "12207"} [2022-08-01 04:52:39,743][train_inner][INFO] - {"epoch": 1, "update": 0.731, "loss": "2.776", "ppl": "6.85", "wps": "362510", "ups": "3.07", "wpb": "118017", "bsz": "256", "num_updates": "37600", "lr": "0.000972121", "gnorm": "0.618", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.5", "wall": "12272"} [2022-08-01 04:53:44,996][train_inner][INFO] - {"epoch": 1, "update": 0.734, "loss": "2.776", "ppl": "6.85", "wps": "364390", "ups": "3.07", "wpb": "118886", "bsz": "256", "num_updates": "37800", "lr": "0.000971919", "gnorm": "0.62", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "12337"} [2022-08-01 04:54:45,235][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-01 04:54:50,807][train_inner][INFO] - {"epoch": 1, "update": 0.738, "loss": "2.785", "ppl": "6.89", "wps": "358905", "ups": "3.04", "wpb": "118099", "bsz": "256", "num_updates": "38000", "lr": "0.000971717", "gnorm": "0.621", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.7", "wall": "12403"} [2022-08-01 04:55:55,491][train_inner][INFO] - {"epoch": 1, "update": 0.742, "loss": "2.775", "ppl": "6.84", "wps": "366347", "ups": "3.09", "wpb": "118483", "bsz": "256", "num_updates": "38200", "lr": "0.000971515", "gnorm": "0.62", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.9", "wall": "12468"} [2022-08-01 04:57:00,689][train_inner][INFO] - {"epoch": 1, "update": 0.746, "loss": "2.771", "ppl": "6.83", "wps": "363887", "ups": "3.07", "wpb": "118621", "bsz": "256", "num_updates": "38400", "lr": "0.000971313", "gnorm": "0.615", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.2", "wall": "12533"} [2022-08-01 04:58:05,015][train_inner][INFO] - {"epoch": 1, "update": 0.75, "loss": "2.774", "ppl": "6.84", "wps": "365856", "ups": "3.11", "wpb": "117669", "bsz": "256", "num_updates": "38600", "lr": "0.000971111", "gnorm": "0.626", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.5", "wall": "12597"} [2022-08-01 04:59:09,866][train_inner][INFO] - {"epoch": 1, "update": 0.754, "loss": "2.77", "ppl": "6.82", "wps": "363516", "ups": "3.08", "wpb": "117869", "bsz": "256", "num_updates": "38800", "lr": "0.000970909", "gnorm": "0.62", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.4", "wall": "12662"} [2022-08-01 05:00:15,020][train_inner][INFO] - {"epoch": 1, "update": 0.758, "loss": "2.77", "ppl": "6.82", "wps": "364870", "ups": "3.07", "wpb": "118862", "bsz": "255.9", "num_updates": "39000", "lr": "0.000970707", "gnorm": "0.612", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "12727"} [2022-08-01 05:01:19,394][train_inner][INFO] - {"epoch": 1, "update": 0.762, "loss": "2.774", "ppl": "6.84", "wps": "365249", "ups": "3.11", "wpb": "117560", "bsz": "256", "num_updates": "39200", "lr": "0.000970505", "gnorm": "0.625", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.3", "wall": "12792"} [2022-08-01 05:02:24,164][train_inner][INFO] - {"epoch": 1, "update": 0.766, "loss": "2.762", "ppl": "6.79", "wps": "362437", "ups": "3.09", "wpb": "117372", "bsz": "256", "num_updates": "39400", "lr": "0.000970303", "gnorm": "0.613", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.7", "wall": "12856"} [2022-08-01 05:03:28,845][train_inner][INFO] - {"epoch": 1, "update": 0.769, "loss": "2.766", "ppl": "6.8", "wps": "363354", "ups": "3.09", "wpb": "117509", "bsz": "256", "num_updates": "39600", "lr": "0.000970101", "gnorm": "0.615", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.5", "wall": "12921"} [2022-08-01 05:04:34,151][train_inner][INFO] - {"epoch": 1, "update": 0.773, "loss": "2.749", "ppl": "6.72", "wps": "361769", "ups": "3.06", "wpb": "118127", "bsz": "256", "num_updates": "39800", "lr": "0.000969899", "gnorm": "0.611", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.1", "wall": "12986"} [2022-08-01 05:05:38,644][train_inner][INFO] - {"epoch": 1, "update": 0.777, "loss": "2.763", "ppl": "6.79", "wps": "366563", "ups": "3.1", "wpb": "118202", "bsz": "256", "num_updates": "40000", "lr": "0.000969697", "gnorm": "0.615", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.7", "wall": "13051"} [2022-08-01 05:06:43,170][train_inner][INFO] - {"epoch": 1, "update": 0.781, "loss": "2.765", "ppl": "6.8", "wps": "366018", "ups": "3.1", "wpb": "118086", "bsz": "256", "num_updates": "40200", "lr": "0.000969495", "gnorm": "0.609", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.4", "wall": "13115"} [2022-08-01 05:07:47,947][train_inner][INFO] - {"epoch": 1, "update": 0.785, "loss": "2.758", "ppl": "6.76", "wps": "363980", "ups": "3.09", "wpb": "117887", "bsz": "256", "num_updates": "40400", "lr": "0.000969293", "gnorm": "0.611", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "13180"} [2022-08-01 05:08:52,455][train_inner][INFO] - {"epoch": 1, "update": 0.789, "loss": "2.754", "ppl": "6.74", "wps": "366622", "ups": "3.1", "wpb": "118249", "bsz": "256", "num_updates": "40600", "lr": "0.000969091", "gnorm": "0.609", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "13245"} [2022-08-01 05:09:57,272][train_inner][INFO] - {"epoch": 1, "update": 0.793, "loss": "2.749", "ppl": "6.72", "wps": "364363", "ups": "3.09", "wpb": "118083", "bsz": "256", "num_updates": "40800", "lr": "0.000968889", "gnorm": "0.61", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.1", "wall": "13310"} [2022-08-01 05:11:02,467][train_inner][INFO] - {"epoch": 1, "update": 0.797, "loss": "2.754", "ppl": "6.75", "wps": "362213", "ups": "3.07", "wpb": "118071", "bsz": "256", "num_updates": "41000", "lr": "0.000968687", "gnorm": "0.607", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21", "wall": "13375"} [2022-08-01 05:12:07,607][train_inner][INFO] - {"epoch": 1, "update": 0.801, "loss": "2.752", "ppl": "6.74", "wps": "361983", "ups": "3.07", "wpb": "117895", "bsz": "256", "num_updates": "41200", "lr": "0.000968485", "gnorm": "0.616", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.4", "wall": "13440"} [2022-08-01 05:13:12,448][train_inner][INFO] - {"epoch": 1, "update": 0.804, "loss": "2.738", "ppl": "6.67", "wps": "364532", "ups": "3.08", "wpb": "118182", "bsz": "256", "num_updates": "41400", "lr": "0.000968283", "gnorm": "0.604", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "26.1", "wall": "13505"} [2022-08-01 05:14:17,363][train_inner][INFO] - {"epoch": 1, "update": 0.808, "loss": "2.737", "ppl": "6.67", "wps": "365332", "ups": "3.08", "wpb": "118574", "bsz": "256", "num_updates": "41600", "lr": "0.000968081", "gnorm": "0.603", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "13570"} [2022-08-01 05:15:22,260][train_inner][INFO] - {"epoch": 1, "update": 0.812, "loss": "2.741", "ppl": "6.69", "wps": "364164", "ups": "3.08", "wpb": "118166", "bsz": "256", "num_updates": "41800", "lr": "0.000967879", "gnorm": "0.604", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.3", "wall": "13635"} [2022-08-01 05:16:27,352][train_inner][INFO] - {"epoch": 1, "update": 0.816, "loss": "2.747", "ppl": "6.71", "wps": "362769", "ups": "3.07", "wpb": "118064", "bsz": "256", "num_updates": "42000", "lr": "0.000967677", "gnorm": "0.602", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "13700"} [2022-08-01 05:17:32,474][train_inner][INFO] - {"epoch": 1, "update": 0.82, "loss": "2.745", "ppl": "6.7", "wps": "361393", "ups": "3.07", "wpb": "117672", "bsz": "256", "num_updates": "42200", "lr": "0.000967475", "gnorm": "0.609", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "22.6", "wall": "13765"} [2022-08-01 05:18:37,402][train_inner][INFO] - {"epoch": 1, "update": 0.824, "loss": "2.741", "ppl": "6.68", "wps": "364074", "ups": "3.08", "wpb": "118191", "bsz": "256", "num_updates": "42400", "lr": "0.000967273", "gnorm": "0.602", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.2", "wall": "13830"} [2022-08-01 05:19:42,553][train_inner][INFO] - {"epoch": 1, "update": 0.828, "loss": "2.735", "ppl": "6.66", "wps": "361989", "ups": "3.07", "wpb": "117918", "bsz": "256", "num_updates": "42600", "lr": "0.000967071", "gnorm": "0.602", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.4", "wall": "13895"} [2022-08-01 05:20:12,599][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-01 05:20:47,779][train_inner][INFO] - {"epoch": 1, "update": 0.832, "loss": "2.735", "ppl": "6.66", "wps": "362147", "ups": "3.07", "wpb": "118105", "bsz": "256", "num_updates": "42800", "lr": "0.000966869", "gnorm": "0.605", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21", "wall": "13960"} [2022-08-01 05:21:52,854][train_inner][INFO] - {"epoch": 1, "update": 0.835, "loss": "2.729", "ppl": "6.63", "wps": "364339", "ups": "3.07", "wpb": "118546", "bsz": "256", "num_updates": "43000", "lr": "0.000966667", "gnorm": "0.602", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "14025"} [2022-08-01 05:22:57,882][train_inner][INFO] - {"epoch": 1, "update": 0.839, "loss": "2.734", "ppl": "6.66", "wps": "364818", "ups": "3.08", "wpb": "118617", "bsz": "256", "num_updates": "43200", "lr": "0.000966465", "gnorm": "0.607", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.8", "wall": "14090"} [2022-08-01 05:24:02,692][train_inner][INFO] - {"epoch": 1, "update": 0.843, "loss": "2.723", "ppl": "6.6", "wps": "366654", "ups": "3.09", "wpb": "118812", "bsz": "256", "num_updates": "43400", "lr": "0.000966263", "gnorm": "0.605", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "14155"} [2022-08-01 05:25:07,268][train_inner][INFO] - {"epoch": 1, "update": 0.847, "loss": "2.724", "ppl": "6.61", "wps": "368516", "ups": "3.1", "wpb": "118984", "bsz": "256", "num_updates": "43600", "lr": "0.000966061", "gnorm": "0.596", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.4", "wall": "14220"} [2022-08-01 05:26:12,284][train_inner][INFO] - {"epoch": 1, "update": 0.851, "loss": "2.727", "ppl": "6.62", "wps": "364200", "ups": "3.08", "wpb": "118393", "bsz": "256", "num_updates": "43800", "lr": "0.000965859", "gnorm": "0.599", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.1", "wall": "14285"} [2022-08-01 05:27:17,105][train_inner][INFO] - {"epoch": 1, "update": 0.855, "loss": "2.726", "ppl": "6.62", "wps": "364202", "ups": "3.09", "wpb": "118038", "bsz": "256", "num_updates": "44000", "lr": "0.000965657", "gnorm": "0.601", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "14349"} [2022-08-01 05:28:21,942][train_inner][INFO] - {"epoch": 1, "update": 0.859, "loss": "2.723", "ppl": "6.6", "wps": "365282", "ups": "3.08", "wpb": "118417", "bsz": "256", "num_updates": "44200", "lr": "0.000965455", "gnorm": "0.6", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "25.3", "wall": "14414"} [2022-08-01 05:29:26,801][train_inner][INFO] - {"epoch": 1, "update": 0.863, "loss": "2.73", "ppl": "6.63", "wps": "364988", "ups": "3.08", "wpb": "118361", "bsz": "256", "num_updates": "44400", "lr": "0.000965253", "gnorm": "0.597", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "14479"} [2022-08-01 05:30:31,463][train_inner][INFO] - {"epoch": 1, "update": 0.867, "loss": "2.717", "ppl": "6.58", "wps": "365176", "ups": "3.09", "wpb": "118064", "bsz": "256", "num_updates": "44600", "lr": "0.000965051", "gnorm": "0.599", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.3", "wall": "14544"} [2022-08-01 05:31:36,295][train_inner][INFO] - {"epoch": 1, "update": 0.87, "loss": "2.718", "ppl": "6.58", "wps": "365488", "ups": "3.08", "wpb": "118475", "bsz": "256", "num_updates": "44800", "lr": "0.000964848", "gnorm": "0.599", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "25.3", "wall": "14609"} [2022-08-01 05:32:06,658][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-01 05:32:41,320][train_inner][INFO] - {"epoch": 1, "update": 0.874, "loss": "2.722", "ppl": "6.6", "wps": "364470", "ups": "3.08", "wpb": "118495", "bsz": "256", "num_updates": "45000", "lr": "0.000964646", "gnorm": "0.598", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.2", "wall": "14674"} [2022-08-01 05:33:46,128][train_inner][INFO] - {"epoch": 1, "update": 0.878, "loss": "2.709", "ppl": "6.54", "wps": "366899", "ups": "3.09", "wpb": "118890", "bsz": "256", "num_updates": "45200", "lr": "0.000964444", "gnorm": "0.598", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.2", "wall": "14738"} [2022-08-01 05:34:50,499][train_inner][INFO] - {"epoch": 1, "update": 0.882, "loss": "2.713", "ppl": "6.56", "wps": "367221", "ups": "3.11", "wpb": "118190", "bsz": "256", "num_updates": "45400", "lr": "0.000964242", "gnorm": "0.6", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22", "wall": "14803"} [2022-08-01 05:35:55,285][train_inner][INFO] - {"epoch": 1, "update": 0.886, "loss": "2.708", "ppl": "6.54", "wps": "366760", "ups": "3.09", "wpb": "118802", "bsz": "256", "num_updates": "45600", "lr": "0.00096404", "gnorm": "0.597", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.3", "wall": "14868"} [2022-08-01 05:37:00,197][train_inner][INFO] - {"epoch": 1, "update": 0.89, "loss": "2.709", "ppl": "6.54", "wps": "365361", "ups": "3.08", "wpb": "118580", "bsz": "256", "num_updates": "45800", "lr": "0.000963838", "gnorm": "0.596", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "14933"} [2022-08-01 05:38:05,372][train_inner][INFO] - {"epoch": 1, "update": 0.894, "loss": "2.71", "ppl": "6.54", "wps": "364238", "ups": "3.07", "wpb": "118693", "bsz": "256", "num_updates": "46000", "lr": "0.000963636", "gnorm": "0.592", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "14998"} [2022-08-01 05:39:10,433][train_inner][INFO] - {"epoch": 1, "update": 0.898, "loss": "2.71", "ppl": "6.54", "wps": "364881", "ups": "3.07", "wpb": "118697", "bsz": "256", "num_updates": "46200", "lr": "0.000963434", "gnorm": "0.596", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25.3", "wall": "15063"} [2022-08-01 05:40:15,260][train_inner][INFO] - {"epoch": 1, "update": 0.902, "loss": "2.701", "ppl": "6.5", "wps": "365900", "ups": "3.09", "wpb": "118598", "bsz": "256", "num_updates": "46400", "lr": "0.000963232", "gnorm": "0.597", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.3", "wall": "15128"} [2022-08-01 05:41:19,775][train_inner][INFO] - {"epoch": 1, "update": 0.905, "loss": "2.707", "ppl": "6.53", "wps": "366495", "ups": "3.1", "wpb": "118222", "bsz": "256", "num_updates": "46600", "lr": "0.00096303", "gnorm": "0.594", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.9", "wall": "15192"} [2022-08-01 05:42:24,695][train_inner][INFO] - {"epoch": 1, "update": 0.909, "loss": "2.705", "ppl": "6.52", "wps": "363773", "ups": "3.08", "wpb": "118078", "bsz": "256", "num_updates": "46800", "lr": "0.000962828", "gnorm": "0.595", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.4", "wall": "15257"} [2022-08-01 05:43:29,632][train_inner][INFO] - {"epoch": 1, "update": 0.913, "loss": "2.709", "ppl": "6.54", "wps": "363754", "ups": "3.08", "wpb": "118104", "bsz": "256", "num_updates": "47000", "lr": "0.000962626", "gnorm": "0.601", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20", "wall": "15322"} [2022-08-01 05:44:34,359][train_inner][INFO] - {"epoch": 1, "update": 0.917, "loss": "2.702", "ppl": "6.51", "wps": "364418", "ups": "3.09", "wpb": "117936", "bsz": "256", "num_updates": "47200", "lr": "0.000962424", "gnorm": "0.596", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20", "wall": "15387"} [2022-08-01 05:45:39,370][train_inner][INFO] - {"epoch": 1, "update": 0.921, "loss": "2.705", "ppl": "6.52", "wps": "363650", "ups": "3.08", "wpb": "118205", "bsz": "256", "num_updates": "47400", "lr": "0.000962222", "gnorm": "0.594", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.1", "wall": "15452"} [2022-08-01 05:46:43,550][train_inner][INFO] - {"epoch": 1, "update": 0.925, "loss": "2.707", "ppl": "6.53", "wps": "366931", "ups": "3.12", "wpb": "117748", "bsz": "256", "num_updates": "47600", "lr": "0.00096202", "gnorm": "0.595", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "21.9", "wall": "15516"} [2022-08-01 05:47:48,709][train_inner][INFO] - {"epoch": 1, "update": 0.929, "loss": "2.696", "ppl": "6.48", "wps": "363664", "ups": "3.07", "wpb": "118477", "bsz": "256", "num_updates": "47800", "lr": "0.000961818", "gnorm": "0.594", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20", "wall": "15581"} [2022-08-01 05:48:53,823][train_inner][INFO] - {"epoch": 1, "update": 0.933, "loss": "2.689", "ppl": "6.45", "wps": "364724", "ups": "3.07", "wpb": "118741", "bsz": "256", "num_updates": "48000", "lr": "0.000961616", "gnorm": "0.59", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20", "wall": "15646"} [2022-08-01 05:49:58,778][train_inner][INFO] - {"epoch": 1, "update": 0.937, "loss": "2.696", "ppl": "6.48", "wps": "363485", "ups": "3.08", "wpb": "118048", "bsz": "256", "num_updates": "48200", "lr": "0.000961414", "gnorm": "0.592", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.3", "wall": "15711"} [2022-08-01 05:51:03,602][train_inner][INFO] - {"epoch": 1, "update": 0.94, "loss": "2.704", "ppl": "6.52", "wps": "364041", "ups": "3.09", "wpb": "117982", "bsz": "256", "num_updates": "48400", "lr": "0.000961212", "gnorm": "0.596", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "19.8", "wall": "15776"} [2022-08-01 05:51:47,961][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-01 05:51:52,437][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-01 05:52:08,678][train_inner][INFO] - {"epoch": 1, "update": 0.944, "loss": "2.69", "ppl": "6.45", "wps": "363203", "ups": "3.07", "wpb": "118177", "bsz": "256", "num_updates": "48600", "lr": "0.00096101", "gnorm": "0.612", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "15841"} [2022-08-01 05:53:13,378][train_inner][INFO] - {"epoch": 1, "update": 0.948, "loss": "2.694", "ppl": "6.47", "wps": "366091", "ups": "3.09", "wpb": "118430", "bsz": "256", "num_updates": "48800", "lr": "0.000960808", "gnorm": "0.589", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "15906"} [2022-08-01 05:54:18,607][train_inner][INFO] - {"epoch": 1, "update": 0.952, "loss": "2.687", "ppl": "6.44", "wps": "363076", "ups": "3.07", "wpb": "118413", "bsz": "256", "num_updates": "49000", "lr": "0.000960606", "gnorm": "0.594", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.3", "wall": "15971"} [2022-08-01 05:55:22,885][train_inner][INFO] - {"epoch": 1, "update": 0.956, "loss": "2.687", "ppl": "6.44", "wps": "366915", "ups": "3.11", "wpb": "117920", "bsz": "256", "num_updates": "49200", "lr": "0.000960404", "gnorm": "0.59", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "16035"} [2022-08-01 05:56:27,829][train_inner][INFO] - {"epoch": 1, "update": 0.96, "loss": "2.689", "ppl": "6.45", "wps": "365616", "ups": "3.08", "wpb": "118722", "bsz": "256", "num_updates": "49400", "lr": "0.000960202", "gnorm": "0.589", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "16100"} [2022-08-01 05:57:32,897][train_inner][INFO] - {"epoch": 1, "update": 0.964, "loss": "2.689", "ppl": "6.45", "wps": "361562", "ups": "3.07", "wpb": "117629", "bsz": "256", "num_updates": "49600", "lr": "0.00096", "gnorm": "0.595", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "16165"} [2022-08-01 05:58:37,776][train_inner][INFO] - {"epoch": 1, "update": 0.968, "loss": "2.683", "ppl": "6.42", "wps": "363716", "ups": "3.08", "wpb": "117986", "bsz": "256", "num_updates": "49800", "lr": "0.000959798", "gnorm": "0.592", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "16230"} [2022-08-01 05:59:42,386][train_inner][INFO] - {"epoch": 1, "update": 0.972, "loss": "2.682", "ppl": "6.42", "wps": "366933", "ups": "3.1", "wpb": "118537", "bsz": "256", "num_updates": "50000", "lr": "0.000959596", "gnorm": "0.587", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.4", "wall": "16295"} [2022-08-01 05:59:42,388][fairseq_cli.train][INFO] - begin validation on "valid" subset [2022-08-01 05:59:42,395][fairseq.tasks.fairseq_task][WARNING] - 576 samples have invalid sizes and will be skipped, max_positions=512, first few sample ids=[2086, 18643, 32310, 47209, 49755, 37591, 48266, 19397, 19415, 38832] [2022-08-01 06:00:04,987][valid][INFO] - {"epoch": 1, "valid_loss": "2.572", "valid_ppl": "5.95", "valid_wps": "1.61125e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "50000"} [2022-08-01 06:00:04,991][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 50000 updates [2022-08-01 06:00:04,992][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_1_50000.pt [2022-08-01 06:00:20,027][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_1_50000.pt [2022-08-01 06:00:51,853][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/adan2/checkpoint_1_50000.pt (epoch 1 @ 50000 updates, score 2.572) (writing took 46.862448069266975 seconds) [2022-08-01 06:01:56,965][train_inner][INFO] - {"epoch": 1, "update": 0.975, "loss": "2.686", "ppl": "6.43", "wps": "175494", "ups": "1.49", "wpb": "118088", "bsz": "256", "num_updates": "50200", "lr": "0.000959394", "gnorm": "0.593", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.5", "wall": "16429"} [2022-08-01 06:03:01,973][train_inner][INFO] - {"epoch": 1, "update": 0.979, "loss": "2.681", "ppl": "6.41", "wps": "363864", "ups": "3.08", "wpb": "118268", "bsz": "256", "num_updates": "50400", "lr": "0.000959192", "gnorm": "0.593", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.7", "wall": "16494"} [2022-08-01 06:04:06,738][train_inner][INFO] - {"epoch": 1, "update": 0.983, "loss": "2.677", "ppl": "6.4", "wps": "364088", "ups": "3.09", "wpb": "117900", "bsz": "256", "num_updates": "50600", "lr": "0.00095899", "gnorm": "0.592", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21", "wall": "16559"} [2022-08-01 06:05:12,408][train_inner][INFO] - {"epoch": 1, "update": 0.987, "loss": "2.686", "ppl": "6.44", "wps": "360770", "ups": "3.05", "wpb": "118457", "bsz": "256", "num_updates": "50800", "lr": "0.000958788", "gnorm": "0.586", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.9", "wall": "16625"} [2022-08-01 06:06:17,255][train_inner][INFO] - {"epoch": 1, "update": 0.991, "loss": "2.683", "ppl": "6.42", "wps": "364123", "ups": "3.08", "wpb": "118060", "bsz": "256", "num_updates": "51000", "lr": "0.000958586", "gnorm": "0.589", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "16690"} [2022-08-01 06:07:23,352][train_inner][INFO] - {"epoch": 1, "update": 0.995, "loss": "2.678", "ppl": "6.4", "wps": "359124", "ups": "3.03", "wpb": "118684", "bsz": "256", "num_updates": "51200", "lr": "0.000958384", "gnorm": "0.588", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "21.9", "wall": "16756"} [2022-08-01 06:08:28,291][train_inner][INFO] - {"epoch": 1, "update": 0.999, "loss": "2.679", "ppl": "6.4", "wps": "364100", "ups": "3.08", "wpb": "118219", "bsz": "256", "num_updates": "51400", "lr": "0.000958182", "gnorm": "0.586", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.3", "wall": "16821"} [2022-08-01 06:08:50,004][fairseq_cli.train][INFO] - end of epoch 1 (average epoch stats below) [2022-08-01 06:08:50,005][train][INFO] - {"epoch": 1, "train_loss": "3.688", "train_ppl": "12.89", "train_wps": "362334", "train_ups": "3.06", "train_wpb": "118298", "train_bsz": "256", "train_num_updates": "51467", "train_lr": "0.000958114", "train_gnorm": "0.744", "train_clip": "0", "train_loss_scale": "16", "train_train_wall": "16649", "train_gb_free": "22.8", "train_wall": "16842"} [2022-08-01 06:08:50,093][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 [2022-08-01 06:08:50,096][fairseq.trainer][INFO] - begin training epoch 2 [2022-08-01 06:08:50,096][fairseq_cli.train][INFO] - Start iterating over samples [2022-08-01 06:09:43,489][train_inner][INFO] - {"epoch": 2, "update": 1.003, "loss": "2.674", "ppl": "6.38", "wps": "312913", "ups": "2.66", "wpb": "117650", "bsz": "255.4", "num_updates": "51600", "lr": "0.00095798", "gnorm": "0.594", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "21.6", "wall": "16896"} [2022-08-01 06:10:48,189][train_inner][INFO] - {"epoch": 2, "update": 1.006, "loss": "2.673", "ppl": "6.38", "wps": "365726", "ups": "3.09", "wpb": "118311", "bsz": "256", "num_updates": "51800", "lr": "0.000957778", "gnorm": "0.588", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "28.2", "wall": "16961"} [2022-08-01 06:11:52,801][train_inner][INFO] - {"epoch": 2, "update": 1.01, "loss": "2.672", "ppl": "6.38", "wps": "364625", "ups": "3.1", "wpb": "117793", "bsz": "256", "num_updates": "52000", "lr": "0.000957576", "gnorm": "0.588", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "17025"} [2022-08-01 06:12:57,721][train_inner][INFO] - {"epoch": 2, "update": 1.014, "loss": "2.664", "ppl": "6.34", "wps": "365380", "ups": "3.08", "wpb": "118602", "bsz": "256", "num_updates": "52200", "lr": "0.000957374", "gnorm": "0.589", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.7", "wall": "17090"} [2022-08-01 06:14:02,828][train_inner][INFO] - {"epoch": 2, "update": 1.018, "loss": "2.66", "ppl": "6.32", "wps": "363574", "ups": "3.07", "wpb": "118354", "bsz": "256", "num_updates": "52400", "lr": "0.000957172", "gnorm": "0.586", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.3", "wall": "17155"} [2022-08-01 06:15:07,520][train_inner][INFO] - {"epoch": 2, "update": 1.022, "loss": "2.666", "ppl": "6.35", "wps": "364571", "ups": "3.09", "wpb": "117923", "bsz": "256", "num_updates": "52600", "lr": "0.00095697", "gnorm": "0.591", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "17220"} [2022-08-01 06:16:12,548][train_inner][INFO] - {"epoch": 2, "update": 1.026, "loss": "2.667", "ppl": "6.35", "wps": "362542", "ups": "3.08", "wpb": "117876", "bsz": "256", "num_updates": "52800", "lr": "0.000956768", "gnorm": "0.589", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.8", "wall": "17285"} [2022-08-01 06:17:17,295][train_inner][INFO] - {"epoch": 2, "update": 1.03, "loss": "2.657", "ppl": "6.31", "wps": "366310", "ups": "3.09", "wpb": "118584", "bsz": "256", "num_updates": "53000", "lr": "0.000956566", "gnorm": "0.586", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.2", "wall": "17350"} [2022-08-01 06:18:22,004][train_inner][INFO] - {"epoch": 2, "update": 1.034, "loss": "2.659", "ppl": "6.31", "wps": "366596", "ups": "3.09", "wpb": "118610", "bsz": "256", "num_updates": "53200", "lr": "0.000956364", "gnorm": "0.583", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "21.2", "wall": "17414"} [2022-08-01 06:19:27,932][train_inner][INFO] - {"epoch": 2, "update": 1.038, "loss": "2.666", "ppl": "6.35", "wps": "360244", "ups": "3.03", "wpb": "118746", "bsz": "256", "num_updates": "53400", "lr": "0.000956162", "gnorm": "0.587", "clip": "0", "loss_scale": "32", "train_wall": "66", "gb_free": "21.6", "wall": "17480"} [2022-08-01 06:20:32,781][train_inner][INFO] - {"epoch": 2, "update": 1.041, "loss": "2.662", "ppl": "6.33", "wps": "362976", "ups": "3.08", "wpb": "117681", "bsz": "256", "num_updates": "53600", "lr": "0.00095596", "gnorm": "0.589", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.3", "wall": "17545"} [2022-08-01 06:20:40,154][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-01 06:21:20,235][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-01 06:21:38,163][train_inner][INFO] - {"epoch": 2, "update": 1.045, "loss": "2.66", "ppl": "6.32", "wps": "362479", "ups": "3.06", "wpb": "118498", "bsz": "256", "num_updates": "53800", "lr": "0.000955758", "gnorm": "0.588", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.6", "wall": "17610"} [2022-08-01 06:22:42,712][train_inner][INFO] - {"epoch": 2, "update": 1.049, "loss": "2.662", "ppl": "6.33", "wps": "366301", "ups": "3.1", "wpb": "118220", "bsz": "256", "num_updates": "54000", "lr": "0.000955556", "gnorm": "0.593", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "17675"} [2022-08-01 06:23:47,830][train_inner][INFO] - {"epoch": 2, "update": 1.053, "loss": "2.653", "ppl": "6.29", "wps": "363954", "ups": "3.07", "wpb": "118498", "bsz": "256", "num_updates": "54200", "lr": "0.000955354", "gnorm": "0.583", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.8", "wall": "17740"} [2022-08-01 06:24:52,296][train_inner][INFO] - {"epoch": 2, "update": 1.057, "loss": "2.651", "ppl": "6.28", "wps": "368792", "ups": "3.1", "wpb": "118871", "bsz": "256", "num_updates": "54400", "lr": "0.000955152", "gnorm": "0.585", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.5", "wall": "17805"} [2022-08-01 06:25:57,353][train_inner][INFO] - {"epoch": 2, "update": 1.061, "loss": "2.656", "ppl": "6.3", "wps": "364467", "ups": "3.07", "wpb": "118554", "bsz": "256", "num_updates": "54600", "lr": "0.000954949", "gnorm": "0.583", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.7", "wall": "17870"} [2022-08-01 06:27:02,071][train_inner][INFO] - {"epoch": 2, "update": 1.065, "loss": "2.659", "ppl": "6.31", "wps": "367567", "ups": "3.09", "wpb": "118939", "bsz": "256", "num_updates": "54800", "lr": "0.000954747", "gnorm": "0.58", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.8", "wall": "17934"} [2022-08-01 06:28:07,076][train_inner][INFO] - {"epoch": 2, "update": 1.069, "loss": "2.653", "ppl": "6.29", "wps": "363708", "ups": "3.08", "wpb": "118213", "bsz": "256", "num_updates": "55000", "lr": "0.000954545", "gnorm": "0.581", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.4", "wall": "17999"} [2022-08-01 06:29:11,707][train_inner][INFO] - {"epoch": 2, "update": 1.073, "loss": "2.651", "ppl": "6.28", "wps": "364310", "ups": "3.09", "wpb": "117726", "bsz": "256", "num_updates": "55200", "lr": "0.000954343", "gnorm": "0.583", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.3", "wall": "18064"} [2022-08-01 06:30:16,308][train_inner][INFO] - {"epoch": 2, "update": 1.076, "loss": "2.651", "ppl": "6.28", "wps": "365568", "ups": "3.1", "wpb": "118080", "bsz": "256", "num_updates": "55400", "lr": "0.000954141", "gnorm": "0.585", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.3", "wall": "18129"} [2022-08-01 06:31:20,859][train_inner][INFO] - {"epoch": 2, "update": 1.08, "loss": "2.653", "ppl": "6.29", "wps": "367066", "ups": "3.1", "wpb": "118471", "bsz": "256", "num_updates": "55600", "lr": "0.000953939", "gnorm": "0.593", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.7", "wall": "18193"} [2022-08-01 06:32:25,588][train_inner][INFO] - {"epoch": 2, "update": 1.084, "loss": "2.661", "ppl": "6.32", "wps": "364839", "ups": "3.09", "wpb": "118076", "bsz": "256", "num_updates": "55800", "lr": "0.000953737", "gnorm": "0.588", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "18258"} [2022-08-01 06:33:29,902][train_inner][INFO] - {"epoch": 2, "update": 1.088, "loss": "2.652", "ppl": "6.29", "wps": "367557", "ups": "3.11", "wpb": "118194", "bsz": "256", "num_updates": "56000", "lr": "0.000953535", "gnorm": "0.583", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.9", "wall": "18322"} [2022-08-01 06:34:34,717][train_inner][INFO] - {"epoch": 2, "update": 1.092, "loss": "2.641", "ppl": "6.24", "wps": "366871", "ups": "3.09", "wpb": "118891", "bsz": "256", "num_updates": "56200", "lr": "0.000953333", "gnorm": "0.581", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.9", "wall": "18387"} [2022-08-01 06:35:39,781][train_inner][INFO] - {"epoch": 2, "update": 1.096, "loss": "2.644", "ppl": "6.25", "wps": "363573", "ups": "3.07", "wpb": "118275", "bsz": "256", "num_updates": "56400", "lr": "0.000953131", "gnorm": "0.587", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "18452"} [2022-08-01 06:36:44,694][train_inner][INFO] - {"epoch": 2, "update": 1.1, "loss": "2.639", "ppl": "6.23", "wps": "364582", "ups": "3.08", "wpb": "118330", "bsz": "256", "num_updates": "56600", "lr": "0.000952929", "gnorm": "0.582", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.6", "wall": "18517"} [2022-08-01 06:37:49,982][train_inner][INFO] - {"epoch": 2, "update": 1.104, "loss": "2.647", "ppl": "6.26", "wps": "362119", "ups": "3.06", "wpb": "118207", "bsz": "256", "num_updates": "56800", "lr": "0.000952727", "gnorm": "0.596", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.9", "wall": "18582"} [2022-08-01 06:38:54,678][train_inner][INFO] - {"epoch": 2, "update": 1.108, "loss": "2.639", "ppl": "6.23", "wps": "366008", "ups": "3.09", "wpb": "118395", "bsz": "256", "num_updates": "57000", "lr": "0.000952525", "gnorm": "0.583", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23", "wall": "18647"} [2022-08-01 06:39:59,351][train_inner][INFO] - {"epoch": 2, "update": 1.111, "loss": "2.651", "ppl": "6.28", "wps": "364708", "ups": "3.09", "wpb": "117931", "bsz": "256", "num_updates": "57200", "lr": "0.000952323", "gnorm": "0.589", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.5", "wall": "18712"} [2022-08-01 06:41:03,989][train_inner][INFO] - {"epoch": 2, "update": 1.115, "loss": "2.642", "ppl": "6.24", "wps": "365393", "ups": "3.09", "wpb": "118090", "bsz": "256", "num_updates": "57400", "lr": "0.000952121", "gnorm": "0.585", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.4", "wall": "18776"} [2022-08-01 06:42:08,986][train_inner][INFO] - {"epoch": 2, "update": 1.119, "loss": "2.636", "ppl": "6.22", "wps": "366180", "ups": "3.08", "wpb": "119001", "bsz": "256", "num_updates": "57600", "lr": "0.000951919", "gnorm": "0.592", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "18841"} [2022-08-01 06:43:13,943][train_inner][INFO] - {"epoch": 2, "update": 1.123, "loss": "2.635", "ppl": "6.21", "wps": "367540", "ups": "3.08", "wpb": "119370", "bsz": "256", "num_updates": "57800", "lr": "0.000951717", "gnorm": "0.587", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "18906"} [2022-08-01 06:43:57,774][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-01 06:44:19,256][train_inner][INFO] - {"epoch": 2, "update": 1.127, "loss": "2.639", "ppl": "6.23", "wps": "362031", "ups": "3.06", "wpb": "118225", "bsz": "256", "num_updates": "58000", "lr": "0.000951515", "gnorm": "0.586", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "18972"} [2022-08-01 06:45:24,460][train_inner][INFO] - {"epoch": 2, "update": 1.131, "loss": "2.637", "ppl": "6.22", "wps": "361222", "ups": "3.07", "wpb": "117766", "bsz": "256", "num_updates": "58200", "lr": "0.000951313", "gnorm": "0.585", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.5", "wall": "19037"} [2022-08-01 06:46:29,111][train_inner][INFO] - {"epoch": 2, "update": 1.135, "loss": "2.641", "ppl": "6.24", "wps": "364936", "ups": "3.09", "wpb": "117965", "bsz": "256", "num_updates": "58400", "lr": "0.000951111", "gnorm": "0.584", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "19101"} [2022-08-01 06:47:33,982][train_inner][INFO] - {"epoch": 2, "update": 1.139, "loss": "2.634", "ppl": "6.21", "wps": "364570", "ups": "3.08", "wpb": "118250", "bsz": "256", "num_updates": "58600", "lr": "0.000950909", "gnorm": "0.586", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.1", "wall": "19166"} [2022-08-01 06:48:38,839][train_inner][INFO] - {"epoch": 2, "update": 1.142, "loss": "2.635", "ppl": "6.21", "wps": "363855", "ups": "3.08", "wpb": "117990", "bsz": "256", "num_updates": "58800", "lr": "0.000950707", "gnorm": "0.584", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.5", "wall": "19231"} [2022-08-01 06:49:30,424][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-01 06:49:43,424][train_inner][INFO] - {"epoch": 2, "update": 1.146, "loss": "2.637", "ppl": "6.22", "wps": "364492", "ups": "3.1", "wpb": "117702", "bsz": "256", "num_updates": "59000", "lr": "0.000950505", "gnorm": "0.585", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "19296"} [2022-08-01 06:50:48,418][train_inner][INFO] - {"epoch": 2, "update": 1.15, "loss": "2.633", "ppl": "6.2", "wps": "364987", "ups": "3.08", "wpb": "118610", "bsz": "256", "num_updates": "59200", "lr": "0.000950303", "gnorm": "0.583", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "19361"} [2022-08-01 06:51:16,563][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-01 06:51:53,594][train_inner][INFO] - {"epoch": 2, "update": 1.154, "loss": "2.63", "ppl": "6.19", "wps": "362574", "ups": "3.07", "wpb": "118153", "bsz": "256", "num_updates": "59400", "lr": "0.000950101", "gnorm": "0.584", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.3", "wall": "19426"} [2022-08-01 06:52:58,453][train_inner][INFO] - {"epoch": 2, "update": 1.158, "loss": "2.63", "ppl": "6.19", "wps": "365884", "ups": "3.08", "wpb": "118654", "bsz": "256", "num_updates": "59600", "lr": "0.000949899", "gnorm": "0.584", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "19491"} [2022-08-01 06:54:03,075][train_inner][INFO] - {"epoch": 2, "update": 1.162, "loss": "2.629", "ppl": "6.18", "wps": "367569", "ups": "3.09", "wpb": "118763", "bsz": "256", "num_updates": "59800", "lr": "0.000949697", "gnorm": "0.583", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.2", "wall": "19555"} [2022-08-01 06:55:07,762][train_inner][INFO] - {"epoch": 2, "update": 1.166, "loss": "2.628", "ppl": "6.18", "wps": "365643", "ups": "3.09", "wpb": "118261", "bsz": "256", "num_updates": "60000", "lr": "0.000949495", "gnorm": "0.585", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.3", "wall": "19620"} [2022-08-01 06:56:12,744][train_inner][INFO] - {"epoch": 2, "update": 1.17, "loss": "2.623", "ppl": "6.16", "wps": "367475", "ups": "3.08", "wpb": "119394", "bsz": "256", "num_updates": "60200", "lr": "0.000949293", "gnorm": "0.583", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "19685"} [2022-08-01 06:57:17,968][train_inner][INFO] - {"epoch": 2, "update": 1.174, "loss": "2.623", "ppl": "6.16", "wps": "364274", "ups": "3.07", "wpb": "118796", "bsz": "256", "num_updates": "60400", "lr": "0.000949091", "gnorm": "0.58", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "24.3", "wall": "19750"} [2022-08-01 06:58:22,817][train_inner][INFO] - {"epoch": 2, "update": 1.177, "loss": "2.633", "ppl": "6.2", "wps": "363967", "ups": "3.08", "wpb": "118013", "bsz": "256", "num_updates": "60600", "lr": "0.000948889", "gnorm": "0.582", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.3", "wall": "19815"} [2022-08-01 06:59:27,341][train_inner][INFO] - {"epoch": 2, "update": 1.181, "loss": "2.626", "ppl": "6.17", "wps": "367055", "ups": "3.1", "wpb": "118417", "bsz": "256", "num_updates": "60800", "lr": "0.000948687", "gnorm": "0.583", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.8", "wall": "19880"} [2022-08-01 07:00:31,888][train_inner][INFO] - {"epoch": 2, "update": 1.185, "loss": "2.624", "ppl": "6.17", "wps": "366518", "ups": "3.1", "wpb": "118287", "bsz": "256", "num_updates": "61000", "lr": "0.000948485", "gnorm": "0.584", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20", "wall": "19944"} [2022-08-01 07:01:36,853][train_inner][INFO] - {"epoch": 2, "update": 1.189, "loss": "2.628", "ppl": "6.18", "wps": "363608", "ups": "3.08", "wpb": "118107", "bsz": "256", "num_updates": "61200", "lr": "0.000948283", "gnorm": "0.584", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "20009"} [2022-08-01 07:02:41,453][train_inner][INFO] - {"epoch": 2, "update": 1.193, "loss": "2.629", "ppl": "6.18", "wps": "364928", "ups": "3.1", "wpb": "117870", "bsz": "256", "num_updates": "61400", "lr": "0.000948081", "gnorm": "0.583", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.3", "wall": "20074"} [2022-08-01 07:03:46,362][train_inner][INFO] - {"epoch": 2, "update": 1.197, "loss": "2.617", "ppl": "6.14", "wps": "364661", "ups": "3.08", "wpb": "118349", "bsz": "256", "num_updates": "61600", "lr": "0.000947879", "gnorm": "0.58", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.7", "wall": "20139"} [2022-08-01 07:04:50,932][train_inner][INFO] - {"epoch": 2, "update": 1.201, "loss": "2.625", "ppl": "6.17", "wps": "366632", "ups": "3.1", "wpb": "118365", "bsz": "256", "num_updates": "61800", "lr": "0.000947677", "gnorm": "0.584", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.6", "wall": "20203"} [2022-08-01 07:05:55,992][train_inner][INFO] - {"epoch": 2, "update": 1.205, "loss": "2.618", "ppl": "6.14", "wps": "363784", "ups": "3.07", "wpb": "118338", "bsz": "256", "num_updates": "62000", "lr": "0.000947475", "gnorm": "0.59", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "20268"} [2022-08-01 07:07:00,700][train_inner][INFO] - {"epoch": 2, "update": 1.209, "loss": "2.624", "ppl": "6.16", "wps": "367470", "ups": "3.09", "wpb": "118888", "bsz": "256", "num_updates": "62200", "lr": "0.000947273", "gnorm": "0.583", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "20333"} [2022-08-01 07:08:05,395][train_inner][INFO] - {"epoch": 2, "update": 1.212, "loss": "2.614", "ppl": "6.12", "wps": "367091", "ups": "3.09", "wpb": "118744", "bsz": "256", "num_updates": "62400", "lr": "0.000947071", "gnorm": "0.582", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.5", "wall": "20398"} [2022-08-01 07:09:10,088][train_inner][INFO] - {"epoch": 2, "update": 1.216, "loss": "2.615", "ppl": "6.12", "wps": "366082", "ups": "3.09", "wpb": "118413", "bsz": "256", "num_updates": "62600", "lr": "0.000946869", "gnorm": "0.584", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21", "wall": "20462"} [2022-08-01 07:10:15,069][train_inner][INFO] - {"epoch": 2, "update": 1.22, "loss": "2.618", "ppl": "6.14", "wps": "366199", "ups": "3.08", "wpb": "118977", "bsz": "256", "num_updates": "62800", "lr": "0.000946667", "gnorm": "0.585", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "20527"} [2022-08-01 07:11:20,222][train_inner][INFO] - {"epoch": 2, "update": 1.224, "loss": "2.614", "ppl": "6.12", "wps": "364782", "ups": "3.07", "wpb": "118833", "bsz": "255.9", "num_updates": "63000", "lr": "0.000946465", "gnorm": "0.586", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "20593"} [2022-08-01 07:12:25,181][train_inner][INFO] - {"epoch": 2, "update": 1.228, "loss": "2.618", "ppl": "6.14", "wps": "363641", "ups": "3.08", "wpb": "118106", "bsz": "256", "num_updates": "63200", "lr": "0.000946263", "gnorm": "0.584", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.9", "wall": "20658"} [2022-08-01 07:13:29,981][train_inner][INFO] - {"epoch": 2, "update": 1.232, "loss": "2.612", "ppl": "6.11", "wps": "366254", "ups": "3.09", "wpb": "118665", "bsz": "256", "num_updates": "63400", "lr": "0.000946061", "gnorm": "0.581", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "20722"} [2022-08-01 07:14:34,610][train_inner][INFO] - {"epoch": 2, "update": 1.236, "loss": "2.609", "ppl": "6.1", "wps": "366563", "ups": "3.09", "wpb": "118450", "bsz": "256", "num_updates": "63600", "lr": "0.000945859", "gnorm": "0.582", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "26.2", "wall": "20787"} [2022-08-01 07:15:39,563][train_inner][INFO] - {"epoch": 2, "update": 1.24, "loss": "2.615", "ppl": "6.13", "wps": "363531", "ups": "3.08", "wpb": "118062", "bsz": "256", "num_updates": "63800", "lr": "0.000945657", "gnorm": "0.589", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "20852"} [2022-08-01 07:16:44,496][train_inner][INFO] - {"epoch": 2, "update": 1.244, "loss": "2.614", "ppl": "6.12", "wps": "364089", "ups": "3.08", "wpb": "118205", "bsz": "256", "num_updates": "64000", "lr": "0.000945455", "gnorm": "0.583", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.4", "wall": "20917"} [2022-08-01 07:17:49,616][train_inner][INFO] - {"epoch": 2, "update": 1.247, "loss": "2.611", "ppl": "6.11", "wps": "361215", "ups": "3.07", "wpb": "117608", "bsz": "256", "num_updates": "64200", "lr": "0.000945253", "gnorm": "0.585", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.4", "wall": "20982"} [2022-08-01 07:18:54,460][train_inner][INFO] - {"epoch": 2, "update": 1.251, "loss": "2.614", "ppl": "6.12", "wps": "361416", "ups": "3.08", "wpb": "117178", "bsz": "256", "num_updates": "64400", "lr": "0.000945051", "gnorm": "0.584", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21", "wall": "21047"} [2022-08-01 07:19:59,374][train_inner][INFO] - {"epoch": 2, "update": 1.255, "loss": "2.609", "ppl": "6.1", "wps": "363558", "ups": "3.08", "wpb": "117997", "bsz": "256", "num_updates": "64600", "lr": "0.000944848", "gnorm": "0.582", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "21112"} [2022-08-01 07:21:04,133][train_inner][INFO] - {"epoch": 2, "update": 1.259, "loss": "2.615", "ppl": "6.13", "wps": "363300", "ups": "3.09", "wpb": "117634", "bsz": "256", "num_updates": "64800", "lr": "0.000944646", "gnorm": "0.587", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.9", "wall": "21176"} [2022-08-01 07:22:09,746][train_inner][INFO] - {"epoch": 2, "update": 1.263, "loss": "2.607", "ppl": "6.09", "wps": "361010", "ups": "3.05", "wpb": "118432", "bsz": "256", "num_updates": "65000", "lr": "0.000944444", "gnorm": "0.582", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.5", "wall": "21242"} [2022-08-01 07:23:14,979][train_inner][INFO] - {"epoch": 2, "update": 1.267, "loss": "2.606", "ppl": "6.09", "wps": "364255", "ups": "3.07", "wpb": "118806", "bsz": "256", "num_updates": "65200", "lr": "0.000944242", "gnorm": "0.583", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "21307"} [2022-08-01 07:24:19,313][train_inner][INFO] - {"epoch": 2, "update": 1.271, "loss": "2.601", "ppl": "6.07", "wps": "367759", "ups": "3.11", "wpb": "118296", "bsz": "256", "num_updates": "65400", "lr": "0.00094404", "gnorm": "0.582", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "21372"} [2022-08-01 07:25:24,275][train_inner][INFO] - {"epoch": 2, "update": 1.275, "loss": "2.608", "ppl": "6.1", "wps": "364452", "ups": "3.08", "wpb": "118376", "bsz": "256", "num_updates": "65600", "lr": "0.000943838", "gnorm": "0.594", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "25.3", "wall": "21437"} [2022-08-01 07:25:58,757][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-01 07:26:29,411][train_inner][INFO] - {"epoch": 2, "update": 1.278, "loss": "2.601", "ppl": "6.07", "wps": "363952", "ups": "3.07", "wpb": "118530", "bsz": "256", "num_updates": "65800", "lr": "0.000943636", "gnorm": "0.581", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "21502"} [2022-08-01 07:27:34,175][train_inner][INFO] - {"epoch": 2, "update": 1.282, "loss": "2.606", "ppl": "6.09", "wps": "364663", "ups": "3.09", "wpb": "118085", "bsz": "256", "num_updates": "66000", "lr": "0.000943434", "gnorm": "0.58", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "25.6", "wall": "21566"} [2022-08-01 07:28:39,278][train_inner][INFO] - {"epoch": 2, "update": 1.286, "loss": "2.601", "ppl": "6.07", "wps": "364681", "ups": "3.07", "wpb": "118706", "bsz": "256", "num_updates": "66200", "lr": "0.000943232", "gnorm": "0.581", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.6", "wall": "21632"} [2022-08-01 07:29:45,180][train_inner][INFO] - {"epoch": 2, "update": 1.29, "loss": "2.605", "ppl": "6.08", "wps": "357707", "ups": "3.03", "wpb": "117867", "bsz": "256", "num_updates": "66400", "lr": "0.00094303", "gnorm": "0.583", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "23.1", "wall": "21698"} [2022-08-01 07:30:50,076][train_inner][INFO] - {"epoch": 2, "update": 1.294, "loss": "2.602", "ppl": "6.07", "wps": "362944", "ups": "3.08", "wpb": "117766", "bsz": "256", "num_updates": "66600", "lr": "0.000942828", "gnorm": "0.592", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "21762"} [2022-08-01 07:31:54,776][train_inner][INFO] - {"epoch": 2, "update": 1.298, "loss": "2.602", "ppl": "6.07", "wps": "364216", "ups": "3.09", "wpb": "117822", "bsz": "256", "num_updates": "66800", "lr": "0.000942626", "gnorm": "0.584", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.9", "wall": "21827"} [2022-08-01 07:32:59,685][train_inner][INFO] - {"epoch": 2, "update": 1.302, "loss": "2.596", "ppl": "6.04", "wps": "367756", "ups": "3.08", "wpb": "119352", "bsz": "256", "num_updates": "67000", "lr": "0.000942424", "gnorm": "0.579", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "27.2", "wall": "21892"} [2022-08-01 07:34:04,425][train_inner][INFO] - {"epoch": 2, "update": 1.306, "loss": "2.594", "ppl": "6.04", "wps": "365476", "ups": "3.09", "wpb": "118302", "bsz": "256", "num_updates": "67200", "lr": "0.000942222", "gnorm": "0.584", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.3", "wall": "21957"} [2022-08-01 07:35:09,263][train_inner][INFO] - {"epoch": 2, "update": 1.31, "loss": "2.591", "ppl": "6.03", "wps": "366092", "ups": "3.08", "wpb": "118683", "bsz": "256", "num_updates": "67400", "lr": "0.00094202", "gnorm": "0.581", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.3", "wall": "22022"} [2022-08-01 07:36:14,075][train_inner][INFO] - {"epoch": 2, "update": 1.313, "loss": "2.596", "ppl": "6.05", "wps": "364662", "ups": "3.09", "wpb": "118170", "bsz": "256", "num_updates": "67600", "lr": "0.000941818", "gnorm": "0.584", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.1", "wall": "22086"} [2022-08-01 07:36:26,576][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-01 07:37:18,908][train_inner][INFO] - {"epoch": 2, "update": 1.317, "loss": "2.604", "ppl": "6.08", "wps": "364984", "ups": "3.08", "wpb": "118313", "bsz": "256", "num_updates": "67800", "lr": "0.000941616", "gnorm": "0.586", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.8", "wall": "22151"} [2022-08-01 07:38:23,212][train_inner][INFO] - {"epoch": 2, "update": 1.321, "loss": "2.592", "ppl": "6.03", "wps": "369343", "ups": "3.11", "wpb": "118752", "bsz": "256", "num_updates": "68000", "lr": "0.000941414", "gnorm": "0.581", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.2", "wall": "22216"} [2022-08-01 07:39:28,308][train_inner][INFO] - {"epoch": 2, "update": 1.325, "loss": "2.597", "ppl": "6.05", "wps": "363637", "ups": "3.07", "wpb": "118353", "bsz": "256", "num_updates": "68200", "lr": "0.000941212", "gnorm": "0.588", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.6", "wall": "22281"} [2022-08-01 07:40:33,588][train_inner][INFO] - {"epoch": 2, "update": 1.329, "loss": "2.596", "ppl": "6.04", "wps": "363641", "ups": "3.06", "wpb": "118692", "bsz": "256", "num_updates": "68400", "lr": "0.00094101", "gnorm": "0.58", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.8", "wall": "22346"} [2022-08-01 07:41:39,238][train_inner][INFO] - {"epoch": 2, "update": 1.333, "loss": "2.595", "ppl": "6.04", "wps": "359568", "ups": "3.05", "wpb": "118026", "bsz": "256", "num_updates": "68600", "lr": "0.000940808", "gnorm": "0.595", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.8", "wall": "22412"} [2022-08-01 07:42:44,035][train_inner][INFO] - {"epoch": 2, "update": 1.337, "loss": "2.588", "ppl": "6.01", "wps": "365914", "ups": "3.09", "wpb": "118539", "bsz": "256", "num_updates": "68800", "lr": "0.000940606", "gnorm": "0.583", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "25.2", "wall": "22476"} [2022-08-01 07:43:49,363][train_inner][INFO] - {"epoch": 2, "update": 1.341, "loss": "2.59", "ppl": "6.02", "wps": "360753", "ups": "3.06", "wpb": "117834", "bsz": "256", "num_updates": "69000", "lr": "0.000940404", "gnorm": "0.595", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.6", "wall": "22542"} [2022-08-01 07:44:54,389][train_inner][INFO] - {"epoch": 2, "update": 1.345, "loss": "2.583", "ppl": "5.99", "wps": "365542", "ups": "3.08", "wpb": "118847", "bsz": "256", "num_updates": "69200", "lr": "0.000940202", "gnorm": "0.591", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22", "wall": "22607"} [2022-08-01 07:45:59,592][train_inner][INFO] - {"epoch": 2, "update": 1.348, "loss": "2.588", "ppl": "6.01", "wps": "365367", "ups": "3.07", "wpb": "119113", "bsz": "256", "num_updates": "69400", "lr": "0.00094", "gnorm": "0.582", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24", "wall": "22672"} [2022-08-01 07:47:04,848][train_inner][INFO] - {"epoch": 2, "update": 1.352, "loss": "2.597", "ppl": "6.05", "wps": "362663", "ups": "3.06", "wpb": "118328", "bsz": "256", "num_updates": "69600", "lr": "0.000939798", "gnorm": "0.588", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.3", "wall": "22737"} [2022-08-01 07:48:09,936][train_inner][INFO] - {"epoch": 2, "update": 1.356, "loss": "2.588", "ppl": "6.01", "wps": "364810", "ups": "3.07", "wpb": "118722", "bsz": "256", "num_updates": "69800", "lr": "0.000939596", "gnorm": "0.591", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "22802"} [2022-08-01 07:49:14,508][train_inner][INFO] - {"epoch": 2, "update": 1.36, "loss": "2.588", "ppl": "6.01", "wps": "365987", "ups": "3.1", "wpb": "118162", "bsz": "256", "num_updates": "70000", "lr": "0.000939394", "gnorm": "0.585", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.8", "wall": "22867"} [2022-08-01 07:49:38,908][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-01 07:50:19,521][train_inner][INFO] - {"epoch": 2, "update": 1.364, "loss": "2.588", "ppl": "6.01", "wps": "364807", "ups": "3.08", "wpb": "118584", "bsz": "256", "num_updates": "70200", "lr": "0.000939192", "gnorm": "0.585", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.1", "wall": "22932"} [2022-08-01 07:51:24,361][train_inner][INFO] - {"epoch": 2, "update": 1.368, "loss": "2.579", "ppl": "5.97", "wps": "364875", "ups": "3.08", "wpb": "118292", "bsz": "256", "num_updates": "70400", "lr": "0.00093899", "gnorm": "0.582", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.6", "wall": "22997"} [2022-08-01 07:52:29,678][train_inner][INFO] - {"epoch": 2, "update": 1.372, "loss": "2.593", "ppl": "6.03", "wps": "362199", "ups": "3.06", "wpb": "118287", "bsz": "256", "num_updates": "70600", "lr": "0.000938788", "gnorm": "0.582", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "23062"} [2022-08-01 07:53:34,652][train_inner][INFO] - {"epoch": 2, "update": 1.376, "loss": "2.58", "ppl": "5.98", "wps": "363378", "ups": "3.08", "wpb": "118049", "bsz": "256", "num_updates": "70800", "lr": "0.000938586", "gnorm": "0.586", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "23127"} [2022-08-01 07:54:39,485][train_inner][INFO] - {"epoch": 2, "update": 1.38, "loss": "2.591", "ppl": "6.03", "wps": "363717", "ups": "3.08", "wpb": "117902", "bsz": "256", "num_updates": "71000", "lr": "0.000938384", "gnorm": "0.592", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.2", "wall": "23192"} [2022-08-01 07:55:44,382][train_inner][INFO] - {"epoch": 2, "update": 1.383, "loss": "2.586", "ppl": "6.01", "wps": "363498", "ups": "3.08", "wpb": "117949", "bsz": "256", "num_updates": "71200", "lr": "0.000938182", "gnorm": "0.592", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.7", "wall": "23257"} [2022-08-01 07:56:49,245][train_inner][INFO] - {"epoch": 2, "update": 1.387, "loss": "2.589", "ppl": "6.02", "wps": "362436", "ups": "3.08", "wpb": "117540", "bsz": "256", "num_updates": "71400", "lr": "0.00093798", "gnorm": "0.588", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.7", "wall": "23322"} [2022-08-01 07:57:53,816][train_inner][INFO] - {"epoch": 2, "update": 1.391, "loss": "2.586", "ppl": "6", "wps": "365958", "ups": "3.1", "wpb": "118150", "bsz": "256", "num_updates": "71600", "lr": "0.000937778", "gnorm": "0.587", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.3", "wall": "23386"} [2022-08-01 07:58:59,582][train_inner][INFO] - {"epoch": 2, "update": 1.395, "loss": "2.58", "ppl": "5.98", "wps": "359777", "ups": "3.04", "wpb": "118304", "bsz": "256", "num_updates": "71800", "lr": "0.000937576", "gnorm": "0.584", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.5", "wall": "23452"} [2022-08-01 08:00:05,390][train_inner][INFO] - {"epoch": 2, "update": 1.399, "loss": "2.587", "ppl": "6.01", "wps": "359149", "ups": "3.04", "wpb": "118174", "bsz": "256", "num_updates": "72000", "lr": "0.000937374", "gnorm": "0.585", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "23518"} [2022-08-01 08:01:09,957][train_inner][INFO] - {"epoch": 2, "update": 1.403, "loss": "2.582", "ppl": "5.99", "wps": "365965", "ups": "3.1", "wpb": "118145", "bsz": "256", "num_updates": "72200", "lr": "0.000937172", "gnorm": "0.583", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "23582"} [2022-08-01 08:02:14,849][train_inner][INFO] - {"epoch": 2, "update": 1.407, "loss": "2.584", "ppl": "6", "wps": "364448", "ups": "3.08", "wpb": "118247", "bsz": "256", "num_updates": "72400", "lr": "0.00093697", "gnorm": "0.582", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "28.2", "wall": "23647"} [2022-08-01 08:02:18,952][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-01 08:03:20,148][train_inner][INFO] - {"epoch": 2, "update": 1.411, "loss": "2.578", "ppl": "5.97", "wps": "363675", "ups": "3.06", "wpb": "118736", "bsz": "256", "num_updates": "72600", "lr": "0.000936768", "gnorm": "0.588", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.7", "wall": "23712"} [2022-08-01 08:04:25,206][train_inner][INFO] - {"epoch": 2, "update": 1.415, "loss": "2.58", "ppl": "5.98", "wps": "363871", "ups": "3.07", "wpb": "118362", "bsz": "256", "num_updates": "72800", "lr": "0.000936566", "gnorm": "0.587", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.6", "wall": "23778"} [2022-08-01 08:05:30,237][train_inner][INFO] - {"epoch": 2, "update": 1.418, "loss": "2.579", "ppl": "5.97", "wps": "363089", "ups": "3.08", "wpb": "118058", "bsz": "256", "num_updates": "73000", "lr": "0.000936364", "gnorm": "0.597", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "23843"} [2022-08-01 08:06:34,958][train_inner][INFO] - {"epoch": 2, "update": 1.422, "loss": "2.57", "ppl": "5.94", "wps": "366351", "ups": "3.09", "wpb": "118552", "bsz": "256", "num_updates": "73200", "lr": "0.000936162", "gnorm": "0.585", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.8", "wall": "23907"} [2022-08-01 08:07:39,882][train_inner][INFO] - {"epoch": 2, "update": 1.426, "loss": "2.571", "ppl": "5.94", "wps": "364598", "ups": "3.08", "wpb": "118354", "bsz": "256", "num_updates": "73400", "lr": "0.00093596", "gnorm": "0.585", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "23972"} [2022-08-01 08:08:45,369][train_inner][INFO] - {"epoch": 2, "update": 1.43, "loss": "2.569", "ppl": "5.93", "wps": "363060", "ups": "3.05", "wpb": "118876", "bsz": "256", "num_updates": "73600", "lr": "0.000935758", "gnorm": "0.586", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.5", "wall": "24038"} [2022-08-01 08:09:50,210][train_inner][INFO] - {"epoch": 2, "update": 1.434, "loss": "2.576", "ppl": "5.96", "wps": "363203", "ups": "3.08", "wpb": "117752", "bsz": "256", "num_updates": "73800", "lr": "0.000935556", "gnorm": "0.591", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.1", "wall": "24103"} [2022-08-01 08:10:54,827][train_inner][INFO] - {"epoch": 2, "update": 1.438, "loss": "2.575", "ppl": "5.96", "wps": "366350", "ups": "3.1", "wpb": "118361", "bsz": "256", "num_updates": "74000", "lr": "0.000935354", "gnorm": "0.586", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "24167"} [2022-08-01 08:12:00,567][train_inner][INFO] - {"epoch": 2, "update": 1.442, "loss": "2.571", "ppl": "5.94", "wps": "359076", "ups": "3.04", "wpb": "118026", "bsz": "256", "num_updates": "74200", "lr": "0.000935152", "gnorm": "0.586", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.3", "wall": "24233"} [2022-08-01 08:13:06,239][train_inner][INFO] - {"epoch": 2, "update": 1.446, "loss": "2.577", "ppl": "5.96", "wps": "359472", "ups": "3.05", "wpb": "118035", "bsz": "256", "num_updates": "74400", "lr": "0.000934949", "gnorm": "0.59", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.7", "wall": "24299"} [2022-08-01 08:14:11,248][train_inner][INFO] - {"epoch": 2, "update": 1.449, "loss": "2.573", "ppl": "5.95", "wps": "363409", "ups": "3.08", "wpb": "118123", "bsz": "256", "num_updates": "74600", "lr": "0.000934747", "gnorm": "0.586", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "24364"} [2022-08-01 08:15:16,399][train_inner][INFO] - {"epoch": 2, "update": 1.453, "loss": "2.568", "ppl": "5.93", "wps": "363771", "ups": "3.07", "wpb": "118498", "bsz": "256", "num_updates": "74800", "lr": "0.000934545", "gnorm": "0.584", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.5", "wall": "24429"} [2022-08-01 08:16:21,505][train_inner][INFO] - {"epoch": 2, "update": 1.457, "loss": "2.567", "ppl": "5.93", "wps": "365674", "ups": "3.07", "wpb": "119036", "bsz": "256", "num_updates": "75000", "lr": "0.000934343", "gnorm": "0.585", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.3", "wall": "24494"} [2022-08-01 08:17:26,172][train_inner][INFO] - {"epoch": 2, "update": 1.461, "loss": "2.569", "ppl": "5.93", "wps": "365356", "ups": "3.09", "wpb": "118131", "bsz": "256", "num_updates": "75200", "lr": "0.000934141", "gnorm": "0.583", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.4", "wall": "24558"} [2022-08-01 08:18:30,887][train_inner][INFO] - {"epoch": 2, "update": 1.465, "loss": "2.57", "ppl": "5.94", "wps": "364874", "ups": "3.09", "wpb": "118062", "bsz": "255.9", "num_updates": "75400", "lr": "0.000933939", "gnorm": "0.588", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "24623"} [2022-08-01 08:19:35,823][train_inner][INFO] - {"epoch": 2, "update": 1.469, "loss": "2.57", "ppl": "5.94", "wps": "365448", "ups": "3.08", "wpb": "118653", "bsz": "256", "num_updates": "75600", "lr": "0.000933737", "gnorm": "0.583", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.2", "wall": "24688"} [2022-08-01 08:20:40,779][train_inner][INFO] - {"epoch": 2, "update": 1.473, "loss": "2.569", "ppl": "5.93", "wps": "362488", "ups": "3.08", "wpb": "117726", "bsz": "256", "num_updates": "75800", "lr": "0.000933535", "gnorm": "0.587", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.4", "wall": "24753"} [2022-08-01 08:21:45,604][train_inner][INFO] - {"epoch": 2, "update": 1.477, "loss": "2.565", "ppl": "5.92", "wps": "366040", "ups": "3.09", "wpb": "118643", "bsz": "256", "num_updates": "76000", "lr": "0.000933333", "gnorm": "0.587", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.2", "wall": "24818"} [2022-08-01 08:22:50,618][train_inner][INFO] - {"epoch": 2, "update": 1.481, "loss": "2.561", "ppl": "5.9", "wps": "365922", "ups": "3.08", "wpb": "118947", "bsz": "256", "num_updates": "76200", "lr": "0.000933131", "gnorm": "0.585", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.7", "wall": "24883"} [2022-08-01 08:23:55,494][train_inner][INFO] - {"epoch": 2, "update": 1.484, "loss": "2.572", "ppl": "5.95", "wps": "363344", "ups": "3.08", "wpb": "117861", "bsz": "256", "num_updates": "76400", "lr": "0.000932929", "gnorm": "0.589", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "24948"} [2022-08-01 08:25:00,161][train_inner][INFO] - {"epoch": 2, "update": 1.488, "loss": "2.577", "ppl": "5.97", "wps": "363490", "ups": "3.09", "wpb": "117526", "bsz": "256", "num_updates": "76600", "lr": "0.000932727", "gnorm": "0.588", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "21.9", "wall": "25012"} [2022-08-01 08:26:05,153][train_inner][INFO] - {"epoch": 2, "update": 1.492, "loss": "2.565", "ppl": "5.92", "wps": "364308", "ups": "3.08", "wpb": "118384", "bsz": "256", "num_updates": "76800", "lr": "0.000932525", "gnorm": "0.59", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "22", "wall": "25077"} [2022-08-01 08:27:10,147][train_inner][INFO] - {"epoch": 2, "update": 1.496, "loss": "2.556", "ppl": "5.88", "wps": "365484", "ups": "3.08", "wpb": "118771", "bsz": "256", "num_updates": "77000", "lr": "0.000932323", "gnorm": "0.587", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "23.4", "wall": "25142"} [2022-08-01 08:28:15,304][train_inner][INFO] - {"epoch": 2, "update": 1.5, "loss": "2.562", "ppl": "5.91", "wps": "364590", "ups": "3.07", "wpb": "118776", "bsz": "256", "num_updates": "77200", "lr": "0.000932121", "gnorm": "0.584", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "24.1", "wall": "25208"} [2022-08-01 08:29:20,295][train_inner][INFO] - {"epoch": 2, "update": 1.504, "loss": "2.561", "ppl": "5.9", "wps": "365514", "ups": "3.08", "wpb": "118774", "bsz": "256", "num_updates": "77400", "lr": "0.000931919", "gnorm": "0.584", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.9", "wall": "25273"} [2022-08-01 08:30:25,341][train_inner][INFO] - {"epoch": 2, "update": 1.508, "loss": "2.557", "ppl": "5.88", "wps": "364122", "ups": "3.07", "wpb": "118422", "bsz": "256", "num_updates": "77600", "lr": "0.000931717", "gnorm": "0.585", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.2", "wall": "25338"} [2022-08-01 08:31:30,269][train_inner][INFO] - {"epoch": 2, "update": 1.512, "loss": "2.564", "ppl": "5.91", "wps": "362522", "ups": "3.08", "wpb": "117687", "bsz": "256", "num_updates": "77800", "lr": "0.000931515", "gnorm": "0.588", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "22.9", "wall": "25403"} [2022-08-01 08:32:06,578][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-01 08:32:35,233][train_inner][INFO] - {"epoch": 2, "update": 1.516, "loss": "2.564", "ppl": "5.92", "wps": "363412", "ups": "3.08", "wpb": "118043", "bsz": "256", "num_updates": "78000", "lr": "0.000931313", "gnorm": "0.588", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "27.9", "wall": "25468"} [2022-08-01 08:33:40,343][train_inner][INFO] - {"epoch": 2, "update": 1.519, "loss": "2.561", "ppl": "5.9", "wps": "363092", "ups": "3.07", "wpb": "118203", "bsz": "256", "num_updates": "78200", "lr": "0.000931111", "gnorm": "0.588", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25.7", "wall": "25533"} [2022-08-01 08:34:45,158][train_inner][INFO] - {"epoch": 2, "update": 1.523, "loss": "2.561", "ppl": "5.9", "wps": "364713", "ups": "3.09", "wpb": "118193", "bsz": "256", "num_updates": "78400", "lr": "0.000930909", "gnorm": "0.586", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.5", "wall": "25597"} [2022-08-01 08:35:50,240][train_inner][INFO] - {"epoch": 2, "update": 1.527, "loss": "2.557", "ppl": "5.89", "wps": "363994", "ups": "3.07", "wpb": "118445", "bsz": "256", "num_updates": "78600", "lr": "0.000930707", "gnorm": "0.586", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.8", "wall": "25663"} [2022-08-01 08:36:54,708][train_inner][INFO] - {"epoch": 2, "update": 1.531, "loss": "2.56", "ppl": "5.9", "wps": "366525", "ups": "3.1", "wpb": "118144", "bsz": "256", "num_updates": "78800", "lr": "0.000930505", "gnorm": "0.59", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.1", "wall": "25727"} [2022-08-01 08:37:59,549][train_inner][INFO] - {"epoch": 2, "update": 1.535, "loss": "2.555", "ppl": "5.88", "wps": "365402", "ups": "3.08", "wpb": "118463", "bsz": "256", "num_updates": "79000", "lr": "0.000930303", "gnorm": "0.586", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.7", "wall": "25792"} [2022-08-01 08:39:04,235][train_inner][INFO] - {"epoch": 2, "update": 1.539, "loss": "2.556", "ppl": "5.88", "wps": "366662", "ups": "3.09", "wpb": "118587", "bsz": "256", "num_updates": "79200", "lr": "0.000930101", "gnorm": "0.587", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.4", "wall": "25857"} [2022-08-01 08:40:08,950][train_inner][INFO] - {"epoch": 2, "update": 1.543, "loss": "2.556", "ppl": "5.88", "wps": "365273", "ups": "3.09", "wpb": "118191", "bsz": "256", "num_updates": "79400", "lr": "0.000929899", "gnorm": "0.592", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "27.2", "wall": "25921"} [2022-08-01 08:41:13,777][train_inner][INFO] - {"epoch": 2, "update": 1.547, "loss": "2.562", "ppl": "5.91", "wps": "365768", "ups": "3.09", "wpb": "118558", "bsz": "256", "num_updates": "79600", "lr": "0.000929697", "gnorm": "0.603", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.1", "wall": "25986"} [2022-08-01 08:42:18,784][train_inner][INFO] - {"epoch": 2, "update": 1.55, "loss": "2.557", "ppl": "5.88", "wps": "364318", "ups": "3.08", "wpb": "118413", "bsz": "256", "num_updates": "79800", "lr": "0.000929495", "gnorm": "0.587", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.4", "wall": "26051"} [2022-08-01 08:43:23,416][train_inner][INFO] - {"epoch": 2, "update": 1.554, "loss": "2.547", "ppl": "5.85", "wps": "366833", "ups": "3.09", "wpb": "118544", "bsz": "256", "num_updates": "80000", "lr": "0.000929293", "gnorm": "0.588", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.1", "wall": "26116"} [2022-08-01 08:44:28,210][train_inner][INFO] - {"epoch": 2, "update": 1.558, "loss": "2.557", "ppl": "5.88", "wps": "364566", "ups": "3.09", "wpb": "118107", "bsz": "256", "num_updates": "80200", "lr": "0.000929091", "gnorm": "0.586", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.6", "wall": "26181"} [2022-08-01 08:45:33,354][train_inner][INFO] - {"epoch": 2, "update": 1.562, "loss": "2.558", "ppl": "5.89", "wps": "362727", "ups": "3.07", "wpb": "118147", "bsz": "256", "num_updates": "80400", "lr": "0.000928889", "gnorm": "0.59", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.8", "wall": "26246"} [2022-08-01 08:46:38,611][train_inner][INFO] - {"epoch": 2, "update": 1.566, "loss": "2.549", "ppl": "5.85", "wps": "363237", "ups": "3.06", "wpb": "118516", "bsz": "256", "num_updates": "80600", "lr": "0.000928687", "gnorm": "0.586", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "26.1", "wall": "26311"} [2022-08-01 08:47:43,947][train_inner][INFO] - {"epoch": 2, "update": 1.57, "loss": "2.543", "ppl": "5.83", "wps": "363959", "ups": "3.06", "wpb": "118897", "bsz": "256", "num_updates": "80800", "lr": "0.000928485", "gnorm": "0.588", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.4", "wall": "26376"} [2022-08-01 08:48:48,951][train_inner][INFO] - {"epoch": 2, "update": 1.574, "loss": "2.559", "ppl": "5.89", "wps": "365121", "ups": "3.08", "wpb": "118669", "bsz": "256", "num_updates": "81000", "lr": "0.000928283", "gnorm": "0.59", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "23.4", "wall": "26441"} [2022-08-01 08:49:53,947][train_inner][INFO] - {"epoch": 2, "update": 1.578, "loss": "2.547", "ppl": "5.84", "wps": "362969", "ups": "3.08", "wpb": "117956", "bsz": "256", "num_updates": "81200", "lr": "0.000928081", "gnorm": "0.593", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.8", "wall": "26506"} [2022-08-01 08:50:58,882][train_inner][INFO] - {"epoch": 2, "update": 1.582, "loss": "2.552", "ppl": "5.86", "wps": "363379", "ups": "3.08", "wpb": "117979", "bsz": "256", "num_updates": "81400", "lr": "0.000927879", "gnorm": "0.589", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.3", "wall": "26571"} [2022-08-01 08:51:25,575][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-01 08:52:03,772][train_inner][INFO] - {"epoch": 2, "update": 1.585, "loss": "2.55", "ppl": "5.86", "wps": "364672", "ups": "3.08", "wpb": "118316", "bsz": "256", "num_updates": "81600", "lr": "0.000927677", "gnorm": "0.59", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.6", "wall": "26636"} [2022-08-01 08:53:08,654][train_inner][INFO] - {"epoch": 2, "update": 1.589, "loss": "2.555", "ppl": "5.88", "wps": "362901", "ups": "3.08", "wpb": "117728", "bsz": "256", "num_updates": "81800", "lr": "0.000927475", "gnorm": "0.589", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.7", "wall": "26701"} [2022-08-01 08:54:13,588][train_inner][INFO] - {"epoch": 2, "update": 1.593, "loss": "2.548", "ppl": "5.85", "wps": "363675", "ups": "3.08", "wpb": "118072", "bsz": "256", "num_updates": "82000", "lr": "0.000927273", "gnorm": "0.591", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.6", "wall": "26766"} [2022-08-01 08:55:18,314][train_inner][INFO] - {"epoch": 2, "update": 1.597, "loss": "2.547", "ppl": "5.84", "wps": "366099", "ups": "3.09", "wpb": "118478", "bsz": "256", "num_updates": "82200", "lr": "0.000927071", "gnorm": "0.588", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.1", "wall": "26831"} [2022-08-01 08:56:23,364][train_inner][INFO] - {"epoch": 2, "update": 1.601, "loss": "2.554", "ppl": "5.87", "wps": "362264", "ups": "3.07", "wpb": "117825", "bsz": "256", "num_updates": "82400", "lr": "0.000926869", "gnorm": "0.589", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "26896"} [2022-08-01 08:57:27,837][train_inner][INFO] - {"epoch": 2, "update": 1.605, "loss": "2.548", "ppl": "5.85", "wps": "366118", "ups": "3.1", "wpb": "118021", "bsz": "256", "num_updates": "82600", "lr": "0.000926667", "gnorm": "0.591", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "26960"} [2022-08-01 08:58:33,068][train_inner][INFO] - {"epoch": 2, "update": 1.609, "loss": "2.545", "ppl": "5.83", "wps": "362488", "ups": "3.07", "wpb": "118226", "bsz": "256", "num_updates": "82800", "lr": "0.000926465", "gnorm": "0.591", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.5", "wall": "27025"} [2022-08-01 08:59:38,205][train_inner][INFO] - {"epoch": 2, "update": 1.613, "loss": "2.543", "ppl": "5.83", "wps": "364113", "ups": "3.07", "wpb": "118584", "bsz": "256", "num_updates": "83000", "lr": "0.000926263", "gnorm": "0.59", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24", "wall": "27091"} [2022-08-01 09:00:43,154][train_inner][INFO] - {"epoch": 2, "update": 1.617, "loss": "2.542", "ppl": "5.82", "wps": "365641", "ups": "3.08", "wpb": "118739", "bsz": "256", "num_updates": "83200", "lr": "0.000926061", "gnorm": "0.591", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.5", "wall": "27155"} [2022-08-01 09:01:47,835][train_inner][INFO] - {"epoch": 2, "update": 1.62, "loss": "2.545", "ppl": "5.83", "wps": "365988", "ups": "3.09", "wpb": "118361", "bsz": "256", "num_updates": "83400", "lr": "0.000925859", "gnorm": "0.588", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "27220"} [2022-08-01 09:02:52,678][train_inner][INFO] - {"epoch": 2, "update": 1.624, "loss": "2.546", "ppl": "5.84", "wps": "366841", "ups": "3.08", "wpb": "118934", "bsz": "256", "num_updates": "83600", "lr": "0.000925657", "gnorm": "0.587", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "23.8", "wall": "27285"} [2022-08-01 09:03:57,090][train_inner][INFO] - {"epoch": 2, "update": 1.628, "loss": "2.552", "ppl": "5.86", "wps": "365447", "ups": "3.11", "wpb": "117694", "bsz": "256", "num_updates": "83800", "lr": "0.000925455", "gnorm": "0.592", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "21.9", "wall": "27349"} [2022-08-01 09:04:46,483][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-01 09:05:01,989][train_inner][INFO] - {"epoch": 2, "update": 1.632, "loss": "2.547", "ppl": "5.85", "wps": "361633", "ups": "3.08", "wpb": "117347", "bsz": "256", "num_updates": "84000", "lr": "0.000925253", "gnorm": "0.593", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "27414"} [2022-08-01 09:06:06,785][train_inner][INFO] - {"epoch": 2, "update": 1.636, "loss": "2.539", "ppl": "5.81", "wps": "367432", "ups": "3.09", "wpb": "119039", "bsz": "256", "num_updates": "84200", "lr": "0.000925051", "gnorm": "0.587", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.8", "wall": "27479"} [2022-08-01 09:07:11,365][train_inner][INFO] - {"epoch": 2, "update": 1.64, "loss": "2.546", "ppl": "5.84", "wps": "364692", "ups": "3.1", "wpb": "117711", "bsz": "256", "num_updates": "84400", "lr": "0.000924848", "gnorm": "0.589", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.4", "wall": "27544"} [2022-08-01 09:08:15,890][train_inner][INFO] - {"epoch": 2, "update": 1.644, "loss": "2.539", "ppl": "5.81", "wps": "367411", "ups": "3.1", "wpb": "118533", "bsz": "256", "num_updates": "84600", "lr": "0.000924646", "gnorm": "0.587", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.7", "wall": "27608"} [2022-08-01 09:09:21,782][train_inner][INFO] - {"epoch": 2, "update": 1.648, "loss": "2.536", "ppl": "5.8", "wps": "358888", "ups": "3.04", "wpb": "118238", "bsz": "256", "num_updates": "84800", "lr": "0.000924444", "gnorm": "0.587", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "20.1", "wall": "27674"} [2022-08-01 09:10:26,863][train_inner][INFO] - {"epoch": 2, "update": 1.652, "loss": "2.541", "ppl": "5.82", "wps": "365117", "ups": "3.07", "wpb": "118810", "bsz": "256", "num_updates": "85000", "lr": "0.000924242", "gnorm": "0.589", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "27739"} [2022-08-01 09:11:31,830][train_inner][INFO] - {"epoch": 2, "update": 1.655, "loss": "2.544", "ppl": "5.83", "wps": "363156", "ups": "3.08", "wpb": "117963", "bsz": "256", "num_updates": "85200", "lr": "0.00092404", "gnorm": "0.591", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "27.6", "wall": "27804"} [2022-08-01 09:12:36,231][train_inner][INFO] - {"epoch": 2, "update": 1.659, "loss": "2.541", "ppl": "5.82", "wps": "366058", "ups": "3.11", "wpb": "117872", "bsz": "256", "num_updates": "85400", "lr": "0.000923838", "gnorm": "0.595", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "27869"} [2022-08-01 09:13:41,028][train_inner][INFO] - {"epoch": 2, "update": 1.663, "loss": "2.537", "ppl": "5.8", "wps": "364099", "ups": "3.09", "wpb": "117959", "bsz": "256", "num_updates": "85600", "lr": "0.000923636", "gnorm": "0.59", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.5", "wall": "27933"} [2022-08-01 09:14:45,675][train_inner][INFO] - {"epoch": 2, "update": 1.667, "loss": "2.543", "ppl": "5.83", "wps": "363570", "ups": "3.09", "wpb": "117517", "bsz": "256", "num_updates": "85800", "lr": "0.000923434", "gnorm": "0.593", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.7", "wall": "27998"} [2022-08-01 09:15:50,304][train_inner][INFO] - {"epoch": 2, "update": 1.671, "loss": "2.537", "ppl": "5.8", "wps": "365349", "ups": "3.09", "wpb": "118059", "bsz": "256", "num_updates": "86000", "lr": "0.000923232", "gnorm": "0.594", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "24.7", "wall": "28063"} [2022-08-01 09:16:02,411][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-01 09:16:03,368][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-01 09:16:56,147][train_inner][INFO] - {"epoch": 2, "update": 1.675, "loss": "2.536", "ppl": "5.8", "wps": "361400", "ups": "3.04", "wpb": "118978", "bsz": "256", "num_updates": "86200", "lr": "0.00092303", "gnorm": "0.588", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.9", "wall": "28128"} [2022-08-01 09:18:00,844][train_inner][INFO] - {"epoch": 2, "update": 1.679, "loss": "2.535", "ppl": "5.79", "wps": "366150", "ups": "3.09", "wpb": "118443", "bsz": "256", "num_updates": "86400", "lr": "0.000922828", "gnorm": "0.589", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.1", "wall": "28193"} [2022-08-01 09:19:05,714][train_inner][INFO] - {"epoch": 2, "update": 1.683, "loss": "2.54", "ppl": "5.82", "wps": "363101", "ups": "3.08", "wpb": "117770", "bsz": "256", "num_updates": "86600", "lr": "0.000922626", "gnorm": "0.59", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "28258"} [2022-08-01 09:19:56,022][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-01 09:20:10,644][train_inner][INFO] - {"epoch": 2, "update": 1.687, "loss": "2.536", "ppl": "5.8", "wps": "363920", "ups": "3.08", "wpb": "118146", "bsz": "256", "num_updates": "86800", "lr": "0.000922424", "gnorm": "0.606", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "28323"} [2022-08-01 09:21:15,850][train_inner][INFO] - {"epoch": 2, "update": 1.69, "loss": "2.529", "ppl": "5.77", "wps": "365823", "ups": "3.07", "wpb": "119269", "bsz": "256", "num_updates": "87000", "lr": "0.000922222", "gnorm": "0.586", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.2", "wall": "28388"} [2022-08-01 09:22:20,981][train_inner][INFO] - {"epoch": 2, "update": 1.694, "loss": "2.542", "ppl": "5.83", "wps": "362970", "ups": "3.07", "wpb": "118201", "bsz": "256", "num_updates": "87200", "lr": "0.00092202", "gnorm": "0.61", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "28.2", "wall": "28453"} [2022-08-01 09:23:25,423][train_inner][INFO] - {"epoch": 2, "update": 1.698, "loss": "2.537", "ppl": "5.8", "wps": "367176", "ups": "3.1", "wpb": "118305", "bsz": "256", "num_updates": "87400", "lr": "0.000921818", "gnorm": "0.595", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "23.3", "wall": "28518"} [2022-08-01 09:24:30,368][train_inner][INFO] - {"epoch": 2, "update": 1.702, "loss": "2.534", "ppl": "5.79", "wps": "364656", "ups": "3.08", "wpb": "118412", "bsz": "256", "num_updates": "87600", "lr": "0.000921616", "gnorm": "0.591", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.8", "wall": "28583"} [2022-08-01 09:25:35,399][train_inner][INFO] - {"epoch": 2, "update": 1.706, "loss": "2.532", "ppl": "5.78", "wps": "363644", "ups": "3.08", "wpb": "118238", "bsz": "256", "num_updates": "87800", "lr": "0.000921414", "gnorm": "0.591", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.9", "wall": "28648"} [2022-08-01 09:26:40,163][train_inner][INFO] - {"epoch": 2, "update": 1.71, "loss": "2.533", "ppl": "5.79", "wps": "364227", "ups": "3.09", "wpb": "117942", "bsz": "256", "num_updates": "88000", "lr": "0.000921212", "gnorm": "0.591", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.1", "wall": "28712"} [2022-08-01 09:27:45,225][train_inner][INFO] - {"epoch": 2, "update": 1.714, "loss": "2.533", "ppl": "5.79", "wps": "362805", "ups": "3.07", "wpb": "118023", "bsz": "256", "num_updates": "88200", "lr": "0.00092101", "gnorm": "0.593", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23", "wall": "28778"} [2022-08-01 09:28:49,854][train_inner][INFO] - {"epoch": 2, "update": 1.718, "loss": "2.531", "ppl": "5.78", "wps": "366160", "ups": "3.09", "wpb": "118320", "bsz": "256", "num_updates": "88400", "lr": "0.000920808", "gnorm": "0.597", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.7", "wall": "28842"} [2022-08-01 09:29:54,976][train_inner][INFO] - {"epoch": 2, "update": 1.721, "loss": "2.537", "ppl": "5.8", "wps": "364949", "ups": "3.07", "wpb": "118829", "bsz": "256", "num_updates": "88600", "lr": "0.000920606", "gnorm": "0.591", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.7", "wall": "28907"} [2022-08-01 09:31:00,288][train_inner][INFO] - {"epoch": 2, "update": 1.725, "loss": "2.521", "ppl": "5.74", "wps": "364738", "ups": "3.06", "wpb": "119107", "bsz": "256", "num_updates": "88800", "lr": "0.000920404", "gnorm": "0.587", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.6", "wall": "28973"} [2022-08-01 09:32:05,222][train_inner][INFO] - {"epoch": 2, "update": 1.729, "loss": "2.538", "ppl": "5.81", "wps": "363083", "ups": "3.08", "wpb": "117881", "bsz": "256", "num_updates": "89000", "lr": "0.000920202", "gnorm": "0.593", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23", "wall": "29038"} [2022-08-01 09:33:10,417][train_inner][INFO] - {"epoch": 2, "update": 1.733, "loss": "2.531", "ppl": "5.78", "wps": "362186", "ups": "3.07", "wpb": "118063", "bsz": "256", "num_updates": "89200", "lr": "0.00092", "gnorm": "0.595", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "29103"} [2022-08-01 09:34:14,964][train_inner][INFO] - {"epoch": 2, "update": 1.737, "loss": "2.541", "ppl": "5.82", "wps": "364733", "ups": "3.1", "wpb": "117709", "bsz": "256", "num_updates": "89400", "lr": "0.000919798", "gnorm": "0.596", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.3", "wall": "29167"} [2022-08-01 09:34:54,822][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-01 09:35:19,987][train_inner][INFO] - {"epoch": 2, "update": 1.741, "loss": "2.535", "ppl": "5.79", "wps": "362625", "ups": "3.08", "wpb": "117893", "bsz": "256", "num_updates": "89600", "lr": "0.000919596", "gnorm": "0.592", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.8", "wall": "29232"} [2022-08-01 09:36:24,890][train_inner][INFO] - {"epoch": 2, "update": 1.745, "loss": "2.532", "ppl": "5.78", "wps": "362330", "ups": "3.08", "wpb": "117582", "bsz": "256", "num_updates": "89800", "lr": "0.000919394", "gnorm": "0.595", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.4", "wall": "29297"} [2022-08-01 09:37:29,835][train_inner][INFO] - {"epoch": 2, "update": 1.749, "loss": "2.528", "ppl": "5.77", "wps": "363643", "ups": "3.08", "wpb": "118081", "bsz": "256", "num_updates": "90000", "lr": "0.000919192", "gnorm": "0.591", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.6", "wall": "29362"} [2022-08-01 09:38:34,932][train_inner][INFO] - {"epoch": 2, "update": 1.753, "loss": "2.524", "ppl": "5.75", "wps": "363712", "ups": "3.07", "wpb": "118381", "bsz": "256", "num_updates": "90200", "lr": "0.00091899", "gnorm": "0.595", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.7", "wall": "29427"} [2022-08-01 09:39:40,250][train_inner][INFO] - {"epoch": 2, "update": 1.756, "loss": "2.522", "ppl": "5.74", "wps": "362500", "ups": "3.06", "wpb": "118387", "bsz": "256", "num_updates": "90400", "lr": "0.000918788", "gnorm": "0.592", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.1", "wall": "29493"} [2022-08-01 09:40:44,980][train_inner][INFO] - {"epoch": 2, "update": 1.76, "loss": "2.522", "ppl": "5.75", "wps": "365738", "ups": "3.09", "wpb": "118370", "bsz": "256", "num_updates": "90600", "lr": "0.000918586", "gnorm": "0.595", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.7", "wall": "29557"} [2022-08-01 09:41:49,535][train_inner][INFO] - {"epoch": 2, "update": 1.764, "loss": "2.524", "ppl": "5.75", "wps": "364682", "ups": "3.1", "wpb": "117709", "bsz": "256", "num_updates": "90800", "lr": "0.000918384", "gnorm": "0.598", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.9", "wall": "29622"} [2022-08-01 09:42:54,407][train_inner][INFO] - {"epoch": 2, "update": 1.768, "loss": "2.525", "ppl": "5.76", "wps": "364777", "ups": "3.08", "wpb": "118317", "bsz": "256", "num_updates": "91000", "lr": "0.000918182", "gnorm": "0.594", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.5", "wall": "29687"} [2022-08-01 09:43:59,313][train_inner][INFO] - {"epoch": 2, "update": 1.772, "loss": "2.529", "ppl": "5.77", "wps": "364279", "ups": "3.08", "wpb": "118219", "bsz": "256", "num_updates": "91200", "lr": "0.00091798", "gnorm": "0.606", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.9", "wall": "29752"} [2022-08-01 09:45:04,430][train_inner][INFO] - {"epoch": 2, "update": 1.776, "loss": "2.519", "ppl": "5.73", "wps": "364850", "ups": "3.07", "wpb": "118788", "bsz": "256", "num_updates": "91400", "lr": "0.000917778", "gnorm": "0.589", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.4", "wall": "29817"} [2022-08-01 09:46:09,294][train_inner][INFO] - {"epoch": 2, "update": 1.78, "loss": "2.526", "ppl": "5.76", "wps": "364856", "ups": "3.08", "wpb": "118328", "bsz": "256", "num_updates": "91600", "lr": "0.000917576", "gnorm": "0.597", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.4", "wall": "29882"} [2022-08-01 09:47:14,281][train_inner][INFO] - {"epoch": 2, "update": 1.784, "loss": "2.526", "ppl": "5.76", "wps": "363981", "ups": "3.08", "wpb": "118269", "bsz": "256", "num_updates": "91800", "lr": "0.000917374", "gnorm": "0.594", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.5", "wall": "29947"} [2022-08-01 09:48:19,186][train_inner][INFO] - {"epoch": 2, "update": 1.788, "loss": "2.516", "ppl": "5.72", "wps": "365327", "ups": "3.08", "wpb": "118556", "bsz": "256", "num_updates": "92000", "lr": "0.000917172", "gnorm": "0.593", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "30012"} [2022-08-01 09:48:59,107][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-01 09:48:59,397][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 [2022-08-01 09:48:59,695][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 [2022-08-01 09:49:24,927][train_inner][INFO] - {"epoch": 2, "update": 1.791, "loss": "2.522", "ppl": "5.74", "wps": "360157", "ups": "3.04", "wpb": "118384", "bsz": "256", "num_updates": "92200", "lr": "0.00091697", "gnorm": "0.759", "clip": "0.5", "loss_scale": "1", "train_wall": "65", "gb_free": "23.2", "wall": "30077"} [2022-08-01 09:50:30,183][train_inner][INFO] - {"epoch": 2, "update": 1.795, "loss": "2.515", "ppl": "5.72", "wps": "362003", "ups": "3.06", "wpb": "118112", "bsz": "256", "num_updates": "92400", "lr": "0.000916768", "gnorm": "0.602", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.6", "wall": "30143"} [2022-08-01 09:51:35,207][train_inner][INFO] - {"epoch": 2, "update": 1.799, "loss": "2.528", "ppl": "5.77", "wps": "361620", "ups": "3.08", "wpb": "117570", "bsz": "256", "num_updates": "92600", "lr": "0.000916566", "gnorm": "0.615", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.4", "wall": "30208"} [2022-08-01 09:52:39,819][train_inner][INFO] - {"epoch": 2, "update": 1.803, "loss": "2.519", "ppl": "5.73", "wps": "365528", "ups": "3.1", "wpb": "118086", "bsz": "256", "num_updates": "92800", "lr": "0.000916364", "gnorm": "0.592", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "19.7", "wall": "30272"} [2022-08-01 09:53:44,380][train_inner][INFO] - {"epoch": 2, "update": 1.807, "loss": "2.517", "ppl": "5.72", "wps": "368288", "ups": "3.1", "wpb": "118883", "bsz": "256", "num_updates": "93000", "lr": "0.000916162", "gnorm": "0.591", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "22.3", "wall": "30337"} [2022-08-01 09:54:49,023][train_inner][INFO] - {"epoch": 2, "update": 1.811, "loss": "2.517", "ppl": "5.73", "wps": "366920", "ups": "3.09", "wpb": "118592", "bsz": "256", "num_updates": "93200", "lr": "0.00091596", "gnorm": "0.611", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "22.2", "wall": "30401"} [2022-08-01 09:55:53,764][train_inner][INFO] - {"epoch": 2, "update": 1.815, "loss": "2.521", "ppl": "5.74", "wps": "364705", "ups": "3.09", "wpb": "118055", "bsz": "256", "num_updates": "93400", "lr": "0.000915758", "gnorm": "0.593", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.1", "wall": "30466"} [2022-08-01 09:56:58,868][train_inner][INFO] - {"epoch": 2, "update": 1.819, "loss": "2.521", "ppl": "5.74", "wps": "362654", "ups": "3.07", "wpb": "118051", "bsz": "256", "num_updates": "93600", "lr": "0.000915556", "gnorm": "0.597", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.1", "wall": "30531"} [2022-08-01 09:58:03,309][train_inner][INFO] - {"epoch": 2, "update": 1.823, "loss": "2.518", "ppl": "5.73", "wps": "368470", "ups": "3.1", "wpb": "118721", "bsz": "256", "num_updates": "93800", "lr": "0.000915354", "gnorm": "0.592", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "21.8", "wall": "30596"} [2022-08-01 09:59:08,329][train_inner][INFO] - {"epoch": 2, "update": 1.826, "loss": "2.516", "ppl": "5.72", "wps": "363549", "ups": "3.08", "wpb": "118189", "bsz": "256", "num_updates": "94000", "lr": "0.000915152", "gnorm": "0.596", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.6", "wall": "30661"} [2022-08-01 10:00:13,494][train_inner][INFO] - {"epoch": 2, "update": 1.83, "loss": "2.519", "ppl": "5.73", "wps": "364395", "ups": "3.07", "wpb": "118726", "bsz": "256", "num_updates": "94200", "lr": "0.000914949", "gnorm": "0.594", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.7", "wall": "30726"} [2022-08-01 10:01:18,454][train_inner][INFO] - {"epoch": 2, "update": 1.834, "loss": "2.509", "ppl": "5.69", "wps": "366053", "ups": "3.08", "wpb": "118894", "bsz": "256", "num_updates": "94400", "lr": "0.000914747", "gnorm": "0.588", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.3", "wall": "30791"} [2022-08-01 10:02:23,852][train_inner][INFO] - {"epoch": 2, "update": 1.838, "loss": "2.518", "ppl": "5.73", "wps": "362933", "ups": "3.06", "wpb": "118672", "bsz": "256", "num_updates": "94600", "lr": "0.000914545", "gnorm": "0.607", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.7", "wall": "30856"} [2022-08-01 10:03:28,799][train_inner][INFO] - {"epoch": 2, "update": 1.842, "loss": "2.516", "ppl": "5.72", "wps": "363959", "ups": "3.08", "wpb": "118189", "bsz": "256", "num_updates": "94800", "lr": "0.000914343", "gnorm": "0.592", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "25.1", "wall": "30921"} [2022-08-01 10:04:34,548][train_inner][INFO] - {"epoch": 2, "update": 1.846, "loss": "2.521", "ppl": "5.74", "wps": "359623", "ups": "3.04", "wpb": "118222", "bsz": "256", "num_updates": "95000", "lr": "0.000914141", "gnorm": "0.594", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.4", "wall": "30987"} [2022-08-01 10:05:39,305][train_inner][INFO] - {"epoch": 2, "update": 1.85, "loss": "2.52", "ppl": "5.74", "wps": "365359", "ups": "3.09", "wpb": "118298", "bsz": "256", "num_updates": "95200", "lr": "0.000913939", "gnorm": "0.599", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.1", "wall": "31052"} [2022-08-01 10:06:44,321][train_inner][INFO] - {"epoch": 2, "update": 1.854, "loss": "2.519", "ppl": "5.73", "wps": "363019", "ups": "3.08", "wpb": "118007", "bsz": "256", "num_updates": "95400", "lr": "0.000913737", "gnorm": "0.596", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "23.9", "wall": "31117"} [2022-08-01 10:07:49,157][train_inner][INFO] - {"epoch": 2, "update": 1.858, "loss": "2.51", "ppl": "5.7", "wps": "365628", "ups": "3.08", "wpb": "118529", "bsz": "256", "num_updates": "95600", "lr": "0.000913535", "gnorm": "0.592", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "27.2", "wall": "31181"} [2022-08-01 10:08:53,845][train_inner][INFO] - {"epoch": 2, "update": 1.861, "loss": "2.509", "ppl": "5.69", "wps": "365311", "ups": "3.09", "wpb": "118154", "bsz": "256", "num_updates": "95800", "lr": "0.000913333", "gnorm": "0.618", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.7", "wall": "31246"} [2022-08-01 10:09:58,438][train_inner][INFO] - {"epoch": 2, "update": 1.865, "loss": "2.51", "ppl": "5.7", "wps": "366046", "ups": "3.1", "wpb": "118217", "bsz": "256", "num_updates": "96000", "lr": "0.000913131", "gnorm": "0.598", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.9", "wall": "31311"} [2022-08-01 10:11:03,052][train_inner][INFO] - {"epoch": 2, "update": 1.869, "loss": "2.517", "ppl": "5.72", "wps": "365471", "ups": "3.1", "wpb": "118072", "bsz": "256", "num_updates": "96200", "lr": "0.000912929", "gnorm": "0.595", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "25.9", "wall": "31375"} [2022-08-01 10:12:07,358][train_inner][INFO] - {"epoch": 2, "update": 1.873, "loss": "2.516", "ppl": "5.72", "wps": "366208", "ups": "3.11", "wpb": "117745", "bsz": "256", "num_updates": "96400", "lr": "0.000912727", "gnorm": "0.602", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.9", "wall": "31440"} [2022-08-01 10:13:12,279][train_inner][INFO] - {"epoch": 2, "update": 1.877, "loss": "2.515", "ppl": "5.72", "wps": "364214", "ups": "3.08", "wpb": "118224", "bsz": "256", "num_updates": "96600", "lr": "0.000912525", "gnorm": "0.594", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.7", "wall": "31505"} [2022-08-01 10:14:17,378][train_inner][INFO] - {"epoch": 2, "update": 1.881, "loss": "2.512", "ppl": "5.7", "wps": "363472", "ups": "3.07", "wpb": "118306", "bsz": "256", "num_updates": "96800", "lr": "0.000912323", "gnorm": "0.594", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.6", "wall": "31570"} [2022-08-01 10:15:22,497][train_inner][INFO] - {"epoch": 2, "update": 1.885, "loss": "2.516", "ppl": "5.72", "wps": "362784", "ups": "3.07", "wpb": "118119", "bsz": "256", "num_updates": "97000", "lr": "0.000912121", "gnorm": "0.597", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.5", "wall": "31635"} [2022-08-01 10:16:26,834][train_inner][INFO] - {"epoch": 2, "update": 1.889, "loss": "2.521", "ppl": "5.74", "wps": "366178", "ups": "3.11", "wpb": "117794", "bsz": "256", "num_updates": "97200", "lr": "0.000911919", "gnorm": "0.595", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "23.6", "wall": "31699"} [2022-08-01 10:17:31,515][train_inner][INFO] - {"epoch": 2, "update": 1.892, "loss": "2.51", "ppl": "5.7", "wps": "365395", "ups": "3.09", "wpb": "118168", "bsz": "256", "num_updates": "97400", "lr": "0.000911717", "gnorm": "0.596", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.1", "wall": "31764"} [2022-08-01 10:18:36,291][train_inner][INFO] - {"epoch": 2, "update": 1.896, "loss": "2.51", "ppl": "5.7", "wps": "366071", "ups": "3.09", "wpb": "118561", "bsz": "256", "num_updates": "97600", "lr": "0.000911515", "gnorm": "0.593", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.3", "wall": "31829"} [2022-08-01 10:19:41,211][train_inner][INFO] - {"epoch": 2, "update": 1.9, "loss": "2.503", "ppl": "5.67", "wps": "364143", "ups": "3.08", "wpb": "118200", "bsz": "256", "num_updates": "97800", "lr": "0.000911313", "gnorm": "0.596", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "27.5", "wall": "31894"} [2022-08-01 10:20:47,037][train_inner][INFO] - {"epoch": 2, "update": 1.904, "loss": "2.514", "ppl": "5.71", "wps": "359252", "ups": "3.04", "wpb": "118238", "bsz": "256", "num_updates": "98000", "lr": "0.000911111", "gnorm": "0.595", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.8", "wall": "31959"} [2022-08-01 10:21:52,224][train_inner][INFO] - {"epoch": 2, "update": 1.908, "loss": "2.506", "ppl": "5.68", "wps": "364909", "ups": "3.07", "wpb": "118936", "bsz": "256", "num_updates": "98200", "lr": "0.000910909", "gnorm": "0.604", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "24.7", "wall": "32025"} [2022-08-01 10:22:56,917][train_inner][INFO] - {"epoch": 2, "update": 1.912, "loss": "2.506", "ppl": "5.68", "wps": "365818", "ups": "3.09", "wpb": "118326", "bsz": "256", "num_updates": "98400", "lr": "0.000910707", "gnorm": "0.597", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "25.5", "wall": "32089"} [2022-08-01 10:24:02,844][train_inner][INFO] - {"epoch": 2, "update": 1.916, "loss": "2.513", "ppl": "5.71", "wps": "358190", "ups": "3.03", "wpb": "118071", "bsz": "256", "num_updates": "98600", "lr": "0.000910505", "gnorm": "0.595", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "20.2", "wall": "32155"} [2022-08-01 10:24:18,785][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-01 10:25:07,851][train_inner][INFO] - {"epoch": 2, "update": 1.92, "loss": "2.506", "ppl": "5.68", "wps": "364086", "ups": "3.08", "wpb": "118339", "bsz": "256", "num_updates": "98800", "lr": "0.000910303", "gnorm": "0.6", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23", "wall": "32220"} [2022-08-01 10:26:12,680][train_inner][INFO] - {"epoch": 2, "update": 1.924, "loss": "2.51", "ppl": "5.7", "wps": "363755", "ups": "3.09", "wpb": "117908", "bsz": "256", "num_updates": "99000", "lr": "0.000910101", "gnorm": "0.598", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "28.2", "wall": "32285"} [2022-08-01 10:27:17,364][train_inner][INFO] - {"epoch": 2, "update": 1.927, "loss": "2.51", "ppl": "5.7", "wps": "364835", "ups": "3.09", "wpb": "117994", "bsz": "256", "num_updates": "99200", "lr": "0.000909899", "gnorm": "0.603", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.3", "wall": "32350"} [2022-08-01 10:28:22,328][train_inner][INFO] - {"epoch": 2, "update": 1.931, "loss": "2.502", "ppl": "5.67", "wps": "364380", "ups": "3.08", "wpb": "118357", "bsz": "256", "num_updates": "99400", "lr": "0.000909697", "gnorm": "0.595", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.1", "wall": "32415"} [2022-08-01 10:29:26,856][train_inner][INFO] - {"epoch": 2, "update": 1.935, "loss": "2.511", "ppl": "5.7", "wps": "365606", "ups": "3.1", "wpb": "117958", "bsz": "256", "num_updates": "99600", "lr": "0.000909495", "gnorm": "0.618", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.3", "wall": "32479"} [2022-08-01 10:30:31,701][train_inner][INFO] - {"epoch": 2, "update": 1.939, "loss": "2.509", "ppl": "5.69", "wps": "366086", "ups": "3.08", "wpb": "118692", "bsz": "256", "num_updates": "99800", "lr": "0.000909293", "gnorm": "0.594", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.7", "wall": "32544"} [2022-08-01 10:31:36,493][train_inner][INFO] - {"epoch": 2, "update": 1.943, "loss": "2.502", "ppl": "5.67", "wps": "363825", "ups": "3.09", "wpb": "117864", "bsz": "256", "num_updates": "100000", "lr": "0.000909091", "gnorm": "0.595", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "24.6", "wall": "32609"} [2022-08-01 10:31:36,495][fairseq_cli.train][INFO] - begin validation on "valid" subset [2022-08-01 10:31:59,089][valid][INFO] - {"epoch": 2, "valid_loss": "2.395", "valid_ppl": "5.26", "valid_wps": "1.56106e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "100000", "valid_best_loss": "2.395"} [2022-08-01 10:31:59,092][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 100000 updates [2022-08-01 10:31:59,093][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_2_100000.pt [2022-08-01 10:32:09,733][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_2_100000.pt [2022-08-01 10:32:41,346][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/adan2/checkpoint_2_100000.pt (epoch 2 @ 100000 updates, score 2.395) (writing took 42.25377578288317 seconds) [2022-08-01 10:33:45,958][train_inner][INFO] - {"epoch": 2, "update": 1.947, "loss": "2.501", "ppl": "5.66", "wps": "182320", "ups": "1.54", "wpb": "118019", "bsz": "256", "num_updates": "100200", "lr": "0.000908889", "gnorm": "0.596", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.8", "wall": "32738"} [2022-08-01 10:34:51,120][train_inner][INFO] - {"epoch": 2, "update": 1.951, "loss": "2.505", "ppl": "5.68", "wps": "363674", "ups": "3.07", "wpb": "118487", "bsz": "256", "num_updates": "100400", "lr": "0.000908687", "gnorm": "0.595", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.7", "wall": "32803"} [2022-08-01 10:35:55,492][train_inner][INFO] - {"epoch": 2, "update": 1.955, "loss": "2.506", "ppl": "5.68", "wps": "366376", "ups": "3.11", "wpb": "117920", "bsz": "256", "num_updates": "100600", "lr": "0.000908485", "gnorm": "0.598", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.4", "wall": "32868"} [2022-08-01 10:37:00,416][train_inner][INFO] - {"epoch": 2, "update": 1.959, "loss": "2.505", "ppl": "5.68", "wps": "364490", "ups": "3.08", "wpb": "118319", "bsz": "256", "num_updates": "100800", "lr": "0.000908283", "gnorm": "0.597", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.5", "wall": "32933"} [2022-08-01 10:38:05,263][train_inner][INFO] - {"epoch": 2, "update": 1.962, "loss": "2.5", "ppl": "5.66", "wps": "365392", "ups": "3.08", "wpb": "118471", "bsz": "256", "num_updates": "101000", "lr": "0.000908081", "gnorm": "0.595", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.7", "wall": "32998"} [2022-08-01 10:39:09,771][train_inner][INFO] - {"epoch": 2, "update": 1.966, "loss": "2.496", "ppl": "5.64", "wps": "367455", "ups": "3.1", "wpb": "118517", "bsz": "256", "num_updates": "101200", "lr": "0.000907879", "gnorm": "0.598", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.7", "wall": "33062"} [2022-08-01 10:40:14,512][train_inner][INFO] - {"epoch": 2, "update": 1.97, "loss": "2.506", "ppl": "5.68", "wps": "365253", "ups": "3.09", "wpb": "118234", "bsz": "256", "num_updates": "101400", "lr": "0.000907677", "gnorm": "0.596", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.5", "wall": "33127"} [2022-08-01 10:41:19,451][train_inner][INFO] - {"epoch": 2, "update": 1.974, "loss": "2.501", "ppl": "5.66", "wps": "364842", "ups": "3.08", "wpb": "118461", "bsz": "256", "num_updates": "101600", "lr": "0.000907475", "gnorm": "0.595", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "33192"} [2022-08-01 10:42:24,035][train_inner][INFO] - {"epoch": 2, "update": 1.978, "loss": "2.496", "ppl": "5.64", "wps": "369076", "ups": "3.1", "wpb": "119179", "bsz": "256", "num_updates": "101800", "lr": "0.000907273", "gnorm": "0.595", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.5", "wall": "33256"} [2022-08-01 10:42:33,714][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-01 10:43:29,268][train_inner][INFO] - {"epoch": 2, "update": 1.982, "loss": "2.503", "ppl": "5.67", "wps": "363910", "ups": "3.07", "wpb": "118694", "bsz": "256", "num_updates": "102000", "lr": "0.000907071", "gnorm": "0.598", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.7", "wall": "33322"} [2022-08-01 10:44:33,761][train_inner][INFO] - {"epoch": 2, "update": 1.986, "loss": "2.498", "ppl": "5.65", "wps": "366765", "ups": "3.1", "wpb": "118268", "bsz": "256", "num_updates": "102200", "lr": "0.000906869", "gnorm": "0.596", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.7", "wall": "33386"} [2022-08-01 10:45:03,556][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 [2022-08-01 10:45:38,685][train_inner][INFO] - {"epoch": 2, "update": 1.99, "loss": "2.501", "ppl": "5.66", "wps": "363851", "ups": "3.08", "wpb": "118112", "bsz": "256", "num_updates": "102400", "lr": "0.000906667", "gnorm": "0.6", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "24.3", "wall": "33451"} [2022-08-01 10:46:43,644][train_inner][INFO] - {"epoch": 2, "update": 1.994, "loss": "2.509", "ppl": "5.69", "wps": "363544", "ups": "3.08", "wpb": "118077", "bsz": "256", "num_updates": "102600", "lr": "0.000906465", "gnorm": "0.598", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.7", "wall": "33516"} [2022-08-01 10:47:48,400][train_inner][INFO] - {"epoch": 2, "update": 1.997, "loss": "2.501", "ppl": "5.66", "wps": "365799", "ups": "3.09", "wpb": "118436", "bsz": "256", "num_updates": "102800", "lr": "0.000906263", "gnorm": "0.628", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.6", "wall": "33581"} [2022-08-01 10:48:31,410][fairseq_cli.train][INFO] - end of epoch 2 (average epoch stats below) [2022-08-01 10:48:31,410][train][INFO] - {"epoch": 2, "train_loss": "2.571", "train_ppl": "5.94", "train_wps": "362805", "train_ups": "3.07", "train_wpb": "118299", "train_bsz": "256", "train_num_updates": "102933", "train_lr": "0.000906128", "train_gnorm": "0.591", "train_clip": "0", "train_loss_scale": "2", "train_train_wall": "16616", "train_gb_free": "24.3", "train_wall": "33624"} [2022-08-01 10:48:31,496][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 [2022-08-01 10:48:31,499][fairseq.trainer][INFO] - begin training epoch 3 [2022-08-01 10:48:31,499][fairseq_cli.train][INFO] - Start iterating over samples [2022-08-01 10:49:03,480][train_inner][INFO] - {"epoch": 3, "update": 2.001, "loss": "2.502", "ppl": "5.67", "wps": "313018", "ups": "2.66", "wpb": "117505", "bsz": "255.4", "num_updates": "103000", "lr": "0.000906061", "gnorm": "0.601", "clip": "0", "loss_scale": "2", "train_wall": "66", "gb_free": "24.3", "wall": "33656"} [2022-08-01 10:50:08,385][train_inner][INFO] - {"epoch": 3, "update": 2.005, "loss": "2.501", "ppl": "5.66", "wps": "364367", "ups": "3.08", "wpb": "118245", "bsz": "256", "num_updates": "103200", "lr": "0.000905859", "gnorm": "0.597", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.8", "wall": "33721"} [2022-08-01 10:51:13,107][train_inner][INFO] - {"epoch": 3, "update": 2.009, "loss": "2.501", "ppl": "5.66", "wps": "364309", "ups": "3.09", "wpb": "117892", "bsz": "256", "num_updates": "103400", "lr": "0.000905657", "gnorm": "0.597", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "23.6", "wall": "33785"} [2022-08-01 10:52:18,314][train_inner][INFO] - {"epoch": 3, "update": 2.013, "loss": "2.489", "ppl": "5.62", "wps": "362558", "ups": "3.07", "wpb": "118205", "bsz": "256", "num_updates": "103600", "lr": "0.000905455", "gnorm": "0.597", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.1", "wall": "33851"} [2022-08-01 10:53:23,474][train_inner][INFO] - {"epoch": 3, "update": 2.017, "loss": "2.498", "ppl": "5.65", "wps": "364216", "ups": "3.07", "wpb": "118660", "bsz": "256", "num_updates": "103800", "lr": "0.000905253", "gnorm": "0.598", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22.9", "wall": "33916"} [2022-08-01 10:53:51,941][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 [2022-08-01 10:53:52,861][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 [2022-08-01 10:53:53,155][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.25 [2022-08-01 10:54:28,989][train_inner][INFO] - {"epoch": 3, "update": 2.021, "loss": "2.588", "ppl": "6.01", "wps": "359404", "ups": "3.05", "wpb": "117730", "bsz": "256", "num_updates": "104000", "lr": "0.000905051", "gnorm": "1.373", "clip": "1", "loss_scale": "0.25", "train_wall": "65", "gb_free": "21.1", "wall": "33981"} [2022-08-01 10:55:34,246][train_inner][INFO] - {"epoch": 3, "update": 2.025, "loss": "2.502", "ppl": "5.67", "wps": "362296", "ups": "3.06", "wpb": "118210", "bsz": "256", "num_updates": "104200", "lr": "0.000904848", "gnorm": "0.707", "clip": "0.5", "loss_scale": "0.25", "train_wall": "65", "gb_free": "21.9", "wall": "34047"} [2022-08-01 10:56:40,105][train_inner][INFO] - {"epoch": 3, "update": 2.029, "loss": "2.5", "ppl": "5.66", "wps": "358775", "ups": "3.04", "wpb": "118142", "bsz": "256", "num_updates": "104400", "lr": "0.000904646", "gnorm": "0.604", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "19.8", "wall": "34112"} [2022-08-01 10:57:45,153][train_inner][INFO] - {"epoch": 3, "update": 2.032, "loss": "2.497", "ppl": "5.65", "wps": "364337", "ups": "3.07", "wpb": "118494", "bsz": "256", "num_updates": "104600", "lr": "0.000904444", "gnorm": "0.598", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "26.1", "wall": "34177"} [2022-08-01 10:58:26,175][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.125 [2022-08-01 10:58:50,380][train_inner][INFO] - {"epoch": 3, "update": 2.036, "loss": "2.496", "ppl": "5.64", "wps": "362480", "ups": "3.07", "wpb": "118215", "bsz": "256", "num_updates": "104800", "lr": "0.000904242", "gnorm": "1.175", "clip": "0.5", "loss_scale": "0.125", "train_wall": "65", "gb_free": "23.7", "wall": "34243"} [2022-08-01 10:59:55,048][train_inner][INFO] - {"epoch": 3, "update": 2.04, "loss": "2.504", "ppl": "5.67", "wps": "365693", "ups": "3.09", "wpb": "118242", "bsz": "256", "num_updates": "105000", "lr": "0.00090404", "gnorm": "0.932", "clip": "1.5", "loss_scale": "0.125", "train_wall": "64", "gb_free": "20.1", "wall": "34307"} [2022-08-01 11:00:59,880][train_inner][INFO] - {"epoch": 3, "update": 2.044, "loss": "2.488", "ppl": "5.61", "wps": "365421", "ups": "3.08", "wpb": "118453", "bsz": "256", "num_updates": "105200", "lr": "0.000903838", "gnorm": "0.597", "clip": "0", "loss_scale": "0.125", "train_wall": "64", "gb_free": "22.4", "wall": "34372"} [2022-08-01 11:02:04,868][train_inner][INFO] - {"epoch": 3, "update": 2.048, "loss": "2.487", "ppl": "5.61", "wps": "364463", "ups": "3.08", "wpb": "118428", "bsz": "256", "num_updates": "105400", "lr": "0.000903636", "gnorm": "0.595", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "24.4", "wall": "34437"} [2022-08-01 11:03:09,514][train_inner][INFO] - {"epoch": 3, "update": 2.052, "loss": "2.487", "ppl": "5.61", "wps": "367293", "ups": "3.09", "wpb": "118718", "bsz": "256", "num_updates": "105600", "lr": "0.000903434", "gnorm": "0.597", "clip": "0", "loss_scale": "0.125", "train_wall": "64", "gb_free": "23.5", "wall": "34502"} [2022-08-01 11:04:14,371][train_inner][INFO] - {"epoch": 3, "update": 2.056, "loss": "2.491", "ppl": "5.62", "wps": "363877", "ups": "3.08", "wpb": "117999", "bsz": "256", "num_updates": "105800", "lr": "0.000903232", "gnorm": "0.6", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "26.4", "wall": "34567"} [2022-08-01 11:05:20,489][train_inner][INFO] - {"epoch": 3, "update": 2.06, "loss": "2.488", "ppl": "5.61", "wps": "358229", "ups": "3.02", "wpb": "118425", "bsz": "256", "num_updates": "106000", "lr": "0.00090303", "gnorm": "0.597", "clip": "0", "loss_scale": "0.125", "train_wall": "66", "gb_free": "28.2", "wall": "34633"} [2022-08-01 11:06:25,390][train_inner][INFO] - {"epoch": 3, "update": 2.064, "loss": "2.49", "ppl": "5.62", "wps": "365509", "ups": "3.08", "wpb": "118608", "bsz": "256", "num_updates": "106200", "lr": "0.000902828", "gnorm": "0.598", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "21.6", "wall": "34698"} [2022-08-01 11:07:30,310][train_inner][INFO] - {"epoch": 3, "update": 2.067, "loss": "2.493", "ppl": "5.63", "wps": "363900", "ups": "3.08", "wpb": "118120", "bsz": "256", "num_updates": "106400", "lr": "0.000902626", "gnorm": "0.599", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "21.7", "wall": "34763"} [2022-08-01 11:08:35,391][train_inner][INFO] - {"epoch": 3, "update": 2.071, "loss": "2.497", "ppl": "5.65", "wps": "362177", "ups": "3.07", "wpb": "117854", "bsz": "256", "num_updates": "106600", "lr": "0.000902424", "gnorm": "0.603", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "20", "wall": "34828"} [2022-08-01 11:09:40,216][train_inner][INFO] - {"epoch": 3, "update": 2.075, "loss": "2.484", "ppl": "5.6", "wps": "366131", "ups": "3.09", "wpb": "118670", "bsz": "256", "num_updates": "106800", "lr": "0.000902222", "gnorm": "0.604", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "21", "wall": "34893"} [2022-08-01 11:10:45,122][train_inner][INFO] - {"epoch": 3, "update": 2.079, "loss": "2.484", "ppl": "5.59", "wps": "363443", "ups": "3.08", "wpb": "117946", "bsz": "256", "num_updates": "107000", "lr": "0.00090202", "gnorm": "0.601", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "27.1", "wall": "34957"} [2022-08-01 11:11:50,029][train_inner][INFO] - {"epoch": 3, "update": 2.083, "loss": "2.489", "ppl": "5.62", "wps": "366610", "ups": "3.08", "wpb": "118975", "bsz": "256", "num_updates": "107200", "lr": "0.000901818", "gnorm": "0.599", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "20.6", "wall": "35022"} [2022-08-01 11:12:54,891][train_inner][INFO] - {"epoch": 3, "update": 2.087, "loss": "2.484", "ppl": "5.59", "wps": "363795", "ups": "3.08", "wpb": "117982", "bsz": "256", "num_updates": "107400", "lr": "0.000901616", "gnorm": "0.603", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "21.5", "wall": "35087"} [2022-08-01 11:13:59,401][train_inner][INFO] - {"epoch": 3, "update": 2.091, "loss": "2.492", "ppl": "5.63", "wps": "364546", "ups": "3.1", "wpb": "117582", "bsz": "256", "num_updates": "107600", "lr": "0.000901414", "gnorm": "0.604", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "19.8", "wall": "35152"} [2022-08-01 11:15:04,444][train_inner][INFO] - {"epoch": 3, "update": 2.095, "loss": "2.492", "ppl": "5.62", "wps": "362518", "ups": "3.07", "wpb": "117895", "bsz": "256", "num_updates": "107800", "lr": "0.000901212", "gnorm": "0.604", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "24.3", "wall": "35217"} [2022-08-01 11:16:09,391][train_inner][INFO] - {"epoch": 3, "update": 2.098, "loss": "2.482", "ppl": "5.59", "wps": "364481", "ups": "3.08", "wpb": "118359", "bsz": "256", "num_updates": "108000", "lr": "0.00090101", "gnorm": "0.599", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "23.1", "wall": "35282"} [2022-08-01 11:17:14,394][train_inner][INFO] - {"epoch": 3, "update": 2.102, "loss": "2.484", "ppl": "5.59", "wps": "362609", "ups": "3.08", "wpb": "117851", "bsz": "256", "num_updates": "108200", "lr": "0.000900808", "gnorm": "0.599", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "22.1", "wall": "35347"} [2022-08-01 11:18:19,153][train_inner][INFO] - {"epoch": 3, "update": 2.106, "loss": "2.484", "ppl": "5.59", "wps": "366457", "ups": "3.09", "wpb": "118657", "bsz": "256", "num_updates": "108400", "lr": "0.000900606", "gnorm": "0.6", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "24.9", "wall": "35411"} [2022-08-01 11:19:23,750][train_inner][INFO] - {"epoch": 3, "update": 2.11, "loss": "2.481", "ppl": "5.58", "wps": "366603", "ups": "3.1", "wpb": "118404", "bsz": "256", "num_updates": "108600", "lr": "0.000900404", "gnorm": "0.603", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "19.7", "wall": "35476"} [2022-08-01 11:20:28,245][train_inner][INFO] - {"epoch": 3, "update": 2.114, "loss": "2.483", "ppl": "5.59", "wps": "368644", "ups": "3.1", "wpb": "118877", "bsz": "256", "num_updates": "108800", "lr": "0.000900202", "gnorm": "0.598", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "21.3", "wall": "35541"} [2022-08-01 11:21:32,914][train_inner][INFO] - {"epoch": 3, "update": 2.118, "loss": "2.478", "ppl": "5.57", "wps": "364762", "ups": "3.09", "wpb": "117942", "bsz": "256", "num_updates": "109000", "lr": "0.0009", "gnorm": "0.602", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20", "wall": "35605"} [2022-08-01 11:22:37,670][train_inner][INFO] - {"epoch": 3, "update": 2.122, "loss": "2.49", "ppl": "5.62", "wps": "365929", "ups": "3.09", "wpb": "118479", "bsz": "256", "num_updates": "109200", "lr": "0.000899798", "gnorm": "0.603", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.4", "wall": "35670"} [2022-08-01 11:23:42,030][train_inner][INFO] - {"epoch": 3, "update": 2.126, "loss": "2.488", "ppl": "5.61", "wps": "366895", "ups": "3.11", "wpb": "118064", "bsz": "256", "num_updates": "109400", "lr": "0.000899596", "gnorm": "0.606", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.2", "wall": "35734"} [2022-08-01 11:24:47,036][train_inner][INFO] - {"epoch": 3, "update": 2.13, "loss": "2.482", "ppl": "5.59", "wps": "366171", "ups": "3.08", "wpb": "119015", "bsz": "256", "num_updates": "109600", "lr": "0.000899394", "gnorm": "0.601", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "19.8", "wall": "35799"} [2022-08-01 11:25:51,976][train_inner][INFO] - {"epoch": 3, "update": 2.133, "loss": "2.49", "ppl": "5.62", "wps": "362875", "ups": "3.08", "wpb": "117824", "bsz": "256", "num_updates": "109800", "lr": "0.000899192", "gnorm": "0.607", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.2", "wall": "35864"} [2022-08-01 11:26:56,866][train_inner][INFO] - {"epoch": 3, "update": 2.137, "loss": "2.477", "ppl": "5.57", "wps": "364342", "ups": "3.08", "wpb": "118209", "bsz": "256", "num_updates": "110000", "lr": "0.00089899", "gnorm": "0.603", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "19.8", "wall": "35929"} [2022-08-01 11:28:01,769][train_inner][INFO] - {"epoch": 3, "update": 2.141, "loss": "2.477", "ppl": "5.57", "wps": "365186", "ups": "3.08", "wpb": "118506", "bsz": "256", "num_updates": "110200", "lr": "0.000898788", "gnorm": "0.603", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.6", "wall": "35994"} [2022-08-01 11:29:06,668][train_inner][INFO] - {"epoch": 3, "update": 2.145, "loss": "2.475", "ppl": "5.56", "wps": "364725", "ups": "3.08", "wpb": "118350", "bsz": "256", "num_updates": "110400", "lr": "0.000898586", "gnorm": "0.604", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "22.9", "wall": "36059"} [2022-08-01 11:30:11,980][train_inner][INFO] - {"epoch": 3, "update": 2.149, "loss": "2.478", "ppl": "5.57", "wps": "362818", "ups": "3.06", "wpb": "118479", "bsz": "256", "num_updates": "110600", "lr": "0.000898384", "gnorm": "0.603", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20", "wall": "36124"} [2022-08-01 11:31:16,939][train_inner][INFO] - {"epoch": 3, "update": 2.153, "loss": "2.479", "ppl": "5.57", "wps": "364714", "ups": "3.08", "wpb": "118456", "bsz": "256", "num_updates": "110800", "lr": "0.000898182", "gnorm": "0.605", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "24.1", "wall": "36189"} [2022-08-01 11:32:22,010][train_inner][INFO] - {"epoch": 3, "update": 2.157, "loss": "2.478", "ppl": "5.57", "wps": "364400", "ups": "3.07", "wpb": "118557", "bsz": "256", "num_updates": "111000", "lr": "0.00089798", "gnorm": "0.618", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.9", "wall": "36254"} [2022-08-01 11:33:27,014][train_inner][INFO] - {"epoch": 3, "update": 2.161, "loss": "2.48", "ppl": "5.58", "wps": "362542", "ups": "3.08", "wpb": "117832", "bsz": "256", "num_updates": "111200", "lr": "0.000897778", "gnorm": "0.606", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "24", "wall": "36319"} [2022-08-01 11:34:32,069][train_inner][INFO] - {"epoch": 3, "update": 2.165, "loss": "2.482", "ppl": "5.59", "wps": "364070", "ups": "3.07", "wpb": "118421", "bsz": "256", "num_updates": "111400", "lr": "0.000897576", "gnorm": "0.604", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "22", "wall": "36384"} [2022-08-01 11:35:37,854][train_inner][INFO] - {"epoch": 3, "update": 2.168, "loss": "2.486", "ppl": "5.6", "wps": "358534", "ups": "3.04", "wpb": "117929", "bsz": "256", "num_updates": "111600", "lr": "0.000897374", "gnorm": "0.605", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.3", "wall": "36450"} [2022-08-01 11:36:43,380][train_inner][INFO] - {"epoch": 3, "update": 2.172, "loss": "2.477", "ppl": "5.57", "wps": "361498", "ups": "3.05", "wpb": "118437", "bsz": "256", "num_updates": "111800", "lr": "0.000897172", "gnorm": "0.604", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.5", "wall": "36516"} [2022-08-01 11:37:48,459][train_inner][INFO] - {"epoch": 3, "update": 2.176, "loss": "2.482", "ppl": "5.59", "wps": "362201", "ups": "3.07", "wpb": "117856", "bsz": "256", "num_updates": "112000", "lr": "0.00089697", "gnorm": "0.62", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.4", "wall": "36581"} [2022-08-01 11:38:53,152][train_inner][INFO] - {"epoch": 3, "update": 2.18, "loss": "2.48", "ppl": "5.58", "wps": "365525", "ups": "3.09", "wpb": "118234", "bsz": "256", "num_updates": "112200", "lr": "0.000896768", "gnorm": "0.603", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.9", "wall": "36645"} [2022-08-01 11:39:58,187][train_inner][INFO] - {"epoch": 3, "update": 2.184, "loss": "2.475", "ppl": "5.56", "wps": "364353", "ups": "3.08", "wpb": "118477", "bsz": "256", "num_updates": "112400", "lr": "0.000896566", "gnorm": "0.606", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "19.7", "wall": "36711"} [2022-08-01 11:41:02,664][train_inner][INFO] - {"epoch": 3, "update": 2.188, "loss": "2.478", "ppl": "5.57", "wps": "365613", "ups": "3.1", "wpb": "117865", "bsz": "256", "num_updates": "112600", "lr": "0.000896364", "gnorm": "0.606", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.8", "wall": "36775"} [2022-08-01 11:42:07,773][train_inner][INFO] - {"epoch": 3, "update": 2.192, "loss": "2.48", "ppl": "5.58", "wps": "362926", "ups": "3.07", "wpb": "118148", "bsz": "256", "num_updates": "112800", "lr": "0.000896162", "gnorm": "0.612", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.2", "wall": "36840"} [2022-08-01 11:43:12,954][train_inner][INFO] - {"epoch": 3, "update": 2.196, "loss": "2.487", "ppl": "5.61", "wps": "361650", "ups": "3.07", "wpb": "117861", "bsz": "256", "num_updates": "113000", "lr": "0.00089596", "gnorm": "0.626", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22.3", "wall": "36905"} [2022-08-01 11:44:17,631][train_inner][INFO] - {"epoch": 3, "update": 2.199, "loss": "2.479", "ppl": "5.57", "wps": "365457", "ups": "3.09", "wpb": "118182", "bsz": "256", "num_updates": "113200", "lr": "0.000895758", "gnorm": "0.61", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.5", "wall": "36970"} [2022-08-01 11:45:11,940][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 [2022-08-01 11:45:12,230][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 [2022-08-01 11:45:12,840][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.25 [2022-08-01 11:45:13,121][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.125 [2022-08-01 11:45:23,523][train_inner][INFO] - {"epoch": 3, "update": 2.203, "loss": "2.515", "ppl": "5.72", "wps": "360268", "ups": "3.04", "wpb": "118691", "bsz": "256", "num_updates": "113400", "lr": "0.000895556", "gnorm": "1.209", "clip": "1", "loss_scale": "0.125", "train_wall": "66", "gb_free": "20.4", "wall": "37036"} [2022-08-01 11:46:28,097][train_inner][INFO] - {"epoch": 3, "update": 2.207, "loss": "2.482", "ppl": "5.58", "wps": "365575", "ups": "3.1", "wpb": "118033", "bsz": "256", "num_updates": "113600", "lr": "0.000895354", "gnorm": "0.612", "clip": "0", "loss_scale": "0.125", "train_wall": "64", "gb_free": "21.2", "wall": "37100"} [2022-08-01 11:47:32,945][train_inner][INFO] - {"epoch": 3, "update": 2.211, "loss": "2.482", "ppl": "5.59", "wps": "363016", "ups": "3.08", "wpb": "117701", "bsz": "256", "num_updates": "113800", "lr": "0.000895152", "gnorm": "0.607", "clip": "0", "loss_scale": "0.125", "train_wall": "64", "gb_free": "21.7", "wall": "37165"} [2022-08-01 11:48:37,853][train_inner][INFO] - {"epoch": 3, "update": 2.215, "loss": "2.479", "ppl": "5.57", "wps": "364335", "ups": "3.08", "wpb": "118241", "bsz": "256", "num_updates": "114000", "lr": "0.000894949", "gnorm": "0.613", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "20", "wall": "37230"} [2022-08-01 11:49:42,880][train_inner][INFO] - {"epoch": 3, "update": 2.219, "loss": "2.478", "ppl": "5.57", "wps": "362811", "ups": "3.08", "wpb": "117959", "bsz": "256", "num_updates": "114200", "lr": "0.000894747", "gnorm": "0.611", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "19.8", "wall": "37295"} [2022-08-01 11:50:47,847][train_inner][INFO] - {"epoch": 3, "update": 2.223, "loss": "2.472", "ppl": "5.55", "wps": "364122", "ups": "3.08", "wpb": "118278", "bsz": "256", "num_updates": "114400", "lr": "0.000894545", "gnorm": "0.605", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "20", "wall": "37360"} [2022-08-01 11:51:52,527][train_inner][INFO] - {"epoch": 3, "update": 2.227, "loss": "2.471", "ppl": "5.54", "wps": "366565", "ups": "3.09", "wpb": "118545", "bsz": "256", "num_updates": "114600", "lr": "0.000894343", "gnorm": "0.608", "clip": "0", "loss_scale": "0.125", "train_wall": "64", "gb_free": "24", "wall": "37425"} [2022-08-01 11:52:57,370][train_inner][INFO] - {"epoch": 3, "update": 2.231, "loss": "2.477", "ppl": "5.57", "wps": "365223", "ups": "3.08", "wpb": "118410", "bsz": "256", "num_updates": "114800", "lr": "0.000894141", "gnorm": "0.608", "clip": "0", "loss_scale": "0.125", "train_wall": "64", "gb_free": "20.8", "wall": "37490"} [2022-08-01 11:54:02,133][train_inner][INFO] - {"epoch": 3, "update": 2.235, "loss": "2.476", "ppl": "5.56", "wps": "365608", "ups": "3.09", "wpb": "118387", "bsz": "256", "num_updates": "115000", "lr": "0.000893939", "gnorm": "0.611", "clip": "0", "loss_scale": "0.125", "train_wall": "64", "gb_free": "20.5", "wall": "37554"} [2022-08-01 11:55:07,059][train_inner][INFO] - {"epoch": 3, "update": 2.238, "loss": "2.479", "ppl": "5.57", "wps": "365535", "ups": "3.08", "wpb": "118662", "bsz": "256", "num_updates": "115200", "lr": "0.000893737", "gnorm": "0.61", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "19.8", "wall": "37619"} [2022-08-01 11:56:12,102][train_inner][INFO] - {"epoch": 3, "update": 2.242, "loss": "2.473", "ppl": "5.55", "wps": "362685", "ups": "3.07", "wpb": "117948", "bsz": "255.9", "num_updates": "115400", "lr": "0.000893535", "gnorm": "0.611", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "22.7", "wall": "37684"} [2022-08-01 11:57:17,211][train_inner][INFO] - {"epoch": 3, "update": 2.246, "loss": "2.471", "ppl": "5.55", "wps": "364470", "ups": "3.07", "wpb": "118649", "bsz": "256", "num_updates": "115600", "lr": "0.000893333", "gnorm": "0.615", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "24", "wall": "37750"} [2022-08-01 11:58:22,094][train_inner][INFO] - {"epoch": 3, "update": 2.25, "loss": "2.466", "ppl": "5.52", "wps": "366981", "ups": "3.08", "wpb": "119053", "bsz": "256", "num_updates": "115800", "lr": "0.000893131", "gnorm": "0.609", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "20.4", "wall": "37814"} [2022-08-01 11:59:27,146][train_inner][INFO] - {"epoch": 3, "update": 2.254, "loss": "2.469", "ppl": "5.54", "wps": "363453", "ups": "3.07", "wpb": "118215", "bsz": "256", "num_updates": "116000", "lr": "0.000892929", "gnorm": "0.619", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "25.2", "wall": "37879"} [2022-08-01 12:00:32,246][train_inner][INFO] - {"epoch": 3, "update": 2.258, "loss": "2.472", "ppl": "5.55", "wps": "363236", "ups": "3.07", "wpb": "118232", "bsz": "256", "num_updates": "116200", "lr": "0.000892727", "gnorm": "0.611", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "20.6", "wall": "37945"} [2022-08-01 12:01:37,201][train_inner][INFO] - {"epoch": 3, "update": 2.262, "loss": "2.473", "ppl": "5.55", "wps": "365851", "ups": "3.08", "wpb": "118818", "bsz": "256", "num_updates": "116400", "lr": "0.000892525", "gnorm": "0.612", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "19.8", "wall": "38010"} [2022-08-01 12:02:41,903][train_inner][INFO] - {"epoch": 3, "update": 2.266, "loss": "2.476", "ppl": "5.56", "wps": "365040", "ups": "3.09", "wpb": "118091", "bsz": "256", "num_updates": "116600", "lr": "0.000892323", "gnorm": "0.613", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "20.2", "wall": "38074"} [2022-08-01 12:03:47,120][train_inner][INFO] - {"epoch": 3, "update": 2.269, "loss": "2.472", "ppl": "5.55", "wps": "362454", "ups": "3.07", "wpb": "118189", "bsz": "256", "num_updates": "116800", "lr": "0.000892121", "gnorm": "0.612", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "22.9", "wall": "38139"} [2022-08-01 12:04:51,938][train_inner][INFO] - {"epoch": 3, "update": 2.273, "loss": "2.476", "ppl": "5.56", "wps": "363713", "ups": "3.09", "wpb": "117874", "bsz": "256", "num_updates": "117000", "lr": "0.000891919", "gnorm": "0.617", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "25.5", "wall": "38204"} [2022-08-01 12:05:56,538][train_inner][INFO] - {"epoch": 3, "update": 2.277, "loss": "2.477", "ppl": "5.57", "wps": "364520", "ups": "3.1", "wpb": "117738", "bsz": "256", "num_updates": "117200", "lr": "0.000891717", "gnorm": "0.62", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "21.4", "wall": "38269"} [2022-08-01 12:07:01,544][train_inner][INFO] - {"epoch": 3, "update": 2.281, "loss": "2.467", "ppl": "5.53", "wps": "365234", "ups": "3.08", "wpb": "118710", "bsz": "256", "num_updates": "117400", "lr": "0.000891515", "gnorm": "0.614", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "24.3", "wall": "38334"} [2022-08-01 12:08:06,262][train_inner][INFO] - {"epoch": 3, "update": 2.285, "loss": "2.469", "ppl": "5.54", "wps": "364709", "ups": "3.09", "wpb": "118012", "bsz": "256", "num_updates": "117600", "lr": "0.000891313", "gnorm": "0.616", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "19.7", "wall": "38399"} [2022-08-01 12:09:10,972][train_inner][INFO] - {"epoch": 3, "update": 2.289, "loss": "2.472", "ppl": "5.55", "wps": "366764", "ups": "3.09", "wpb": "118666", "bsz": "256", "num_updates": "117800", "lr": "0.000891111", "gnorm": "0.613", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.1", "wall": "38463"} [2022-08-01 12:10:16,243][train_inner][INFO] - {"epoch": 3, "update": 2.293, "loss": "2.465", "ppl": "5.52", "wps": "363663", "ups": "3.06", "wpb": "118680", "bsz": "256", "num_updates": "118000", "lr": "0.000890909", "gnorm": "0.613", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "19.8", "wall": "38529"} [2022-08-01 12:11:21,345][train_inner][INFO] - {"epoch": 3, "update": 2.297, "loss": "2.47", "ppl": "5.54", "wps": "364575", "ups": "3.07", "wpb": "118671", "bsz": "256", "num_updates": "118200", "lr": "0.000890707", "gnorm": "0.615", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.2", "wall": "38594"} [2022-08-01 12:12:26,062][train_inner][INFO] - {"epoch": 3, "update": 2.301, "loss": "2.466", "ppl": "5.52", "wps": "365970", "ups": "3.09", "wpb": "118422", "bsz": "256", "num_updates": "118400", "lr": "0.000890505", "gnorm": "0.614", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "23", "wall": "38658"} [2022-08-01 12:13:30,926][train_inner][INFO] - {"epoch": 3, "update": 2.304, "loss": "2.468", "ppl": "5.53", "wps": "365667", "ups": "3.08", "wpb": "118590", "bsz": "256", "num_updates": "118600", "lr": "0.000890303", "gnorm": "0.611", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "22.6", "wall": "38723"} [2022-08-01 12:14:35,801][train_inner][INFO] - {"epoch": 3, "update": 2.308, "loss": "2.466", "ppl": "5.53", "wps": "365708", "ups": "3.08", "wpb": "118626", "bsz": "256", "num_updates": "118800", "lr": "0.000890101", "gnorm": "0.616", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "21.5", "wall": "38788"} [2022-08-01 12:15:41,137][train_inner][INFO] - {"epoch": 3, "update": 2.312, "loss": "2.463", "ppl": "5.52", "wps": "363718", "ups": "3.06", "wpb": "118817", "bsz": "256", "num_updates": "119000", "lr": "0.000889899", "gnorm": "0.615", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "21.2", "wall": "38853"} [2022-08-01 12:16:46,102][train_inner][INFO] - {"epoch": 3, "update": 2.316, "loss": "2.468", "ppl": "5.53", "wps": "363967", "ups": "3.08", "wpb": "118224", "bsz": "256", "num_updates": "119200", "lr": "0.000889697", "gnorm": "0.635", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.5", "wall": "38918"} [2022-08-01 12:17:50,926][train_inner][INFO] - {"epoch": 3, "update": 2.32, "loss": "2.468", "ppl": "5.53", "wps": "365942", "ups": "3.09", "wpb": "118608", "bsz": "256", "num_updates": "119400", "lr": "0.000889495", "gnorm": "0.617", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "19.7", "wall": "38983"} [2022-08-01 12:18:55,747][train_inner][INFO] - {"epoch": 3, "update": 2.324, "loss": "2.474", "ppl": "5.56", "wps": "363580", "ups": "3.09", "wpb": "117836", "bsz": "256", "num_updates": "119600", "lr": "0.000889293", "gnorm": "0.62", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "26.3", "wall": "39048"} [2022-08-01 12:20:00,766][train_inner][INFO] - {"epoch": 3, "update": 2.328, "loss": "2.47", "ppl": "5.54", "wps": "363184", "ups": "3.08", "wpb": "118067", "bsz": "256", "num_updates": "119800", "lr": "0.000889091", "gnorm": "0.618", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.5", "wall": "39113"} [2022-08-01 12:21:05,423][train_inner][INFO] - {"epoch": 3, "update": 2.332, "loss": "2.473", "ppl": "5.55", "wps": "364172", "ups": "3.09", "wpb": "117730", "bsz": "256", "num_updates": "120000", "lr": "0.000888889", "gnorm": "0.617", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20", "wall": "39178"} [2022-08-01 12:22:10,492][train_inner][INFO] - {"epoch": 3, "update": 2.336, "loss": "2.465", "ppl": "5.52", "wps": "362946", "ups": "3.07", "wpb": "118081", "bsz": "256", "num_updates": "120200", "lr": "0.000888687", "gnorm": "0.626", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.7", "wall": "39243"} [2022-08-01 12:23:14,877][train_inner][INFO] - {"epoch": 3, "update": 2.339, "loss": "2.466", "ppl": "5.52", "wps": "366543", "ups": "3.11", "wpb": "117998", "bsz": "256", "num_updates": "120400", "lr": "0.000888485", "gnorm": "0.62", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "21.3", "wall": "39307"} [2022-08-01 12:24:20,291][train_inner][INFO] - {"epoch": 3, "update": 2.343, "loss": "2.461", "ppl": "5.51", "wps": "362328", "ups": "3.06", "wpb": "118504", "bsz": "256", "num_updates": "120600", "lr": "0.000888283", "gnorm": "0.622", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "26.8", "wall": "39373"} [2022-08-01 12:25:24,842][train_inner][INFO] - {"epoch": 3, "update": 2.347, "loss": "2.468", "ppl": "5.53", "wps": "365696", "ups": "3.1", "wpb": "118030", "bsz": "256", "num_updates": "120800", "lr": "0.000888081", "gnorm": "0.622", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.7", "wall": "39437"} [2022-08-01 12:26:30,113][train_inner][INFO] - {"epoch": 3, "update": 2.351, "loss": "2.458", "ppl": "5.5", "wps": "363602", "ups": "3.06", "wpb": "118662", "bsz": "256", "num_updates": "121000", "lr": "0.000887879", "gnorm": "0.619", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "19.7", "wall": "39502"} [2022-08-01 12:27:35,320][train_inner][INFO] - {"epoch": 3, "update": 2.355, "loss": "2.464", "ppl": "5.52", "wps": "363579", "ups": "3.07", "wpb": "118538", "bsz": "256", "num_updates": "121200", "lr": "0.000887677", "gnorm": "0.623", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "22", "wall": "39568"} [2022-08-01 12:28:40,013][train_inner][INFO] - {"epoch": 3, "update": 2.359, "loss": "2.475", "ppl": "5.56", "wps": "365325", "ups": "3.09", "wpb": "118167", "bsz": "256", "num_updates": "121400", "lr": "0.000887475", "gnorm": "0.625", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.3", "wall": "39632"} [2022-08-01 12:29:44,781][train_inner][INFO] - {"epoch": 3, "update": 2.363, "loss": "2.459", "ppl": "5.5", "wps": "364842", "ups": "3.09", "wpb": "118149", "bsz": "256", "num_updates": "121600", "lr": "0.000887273", "gnorm": "0.621", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "23", "wall": "39697"} [2022-08-01 12:30:49,674][train_inner][INFO] - {"epoch": 3, "update": 2.367, "loss": "2.462", "ppl": "5.51", "wps": "364878", "ups": "3.08", "wpb": "118390", "bsz": "256", "num_updates": "121800", "lr": "0.000887071", "gnorm": "0.626", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.5", "wall": "39762"} [2022-08-01 12:31:54,369][train_inner][INFO] - {"epoch": 3, "update": 2.37, "loss": "2.461", "ppl": "5.5", "wps": "366016", "ups": "3.09", "wpb": "118395", "bsz": "256", "num_updates": "122000", "lr": "0.000886869", "gnorm": "0.628", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.1", "wall": "39827"} [2022-08-01 12:32:59,149][train_inner][INFO] - {"epoch": 3, "update": 2.374, "loss": "2.463", "ppl": "5.51", "wps": "365483", "ups": "3.09", "wpb": "118378", "bsz": "256", "num_updates": "122200", "lr": "0.000886667", "gnorm": "0.625", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "25.3", "wall": "39891"} [2022-08-01 12:34:03,790][train_inner][INFO] - {"epoch": 3, "update": 2.378, "loss": "2.46", "ppl": "5.5", "wps": "365235", "ups": "3.09", "wpb": "118045", "bsz": "256", "num_updates": "122400", "lr": "0.000886465", "gnorm": "0.623", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.4", "wall": "39956"} [2022-08-01 12:35:08,682][train_inner][INFO] - {"epoch": 3, "update": 2.382, "loss": "2.458", "ppl": "5.49", "wps": "364070", "ups": "3.08", "wpb": "118125", "bsz": "255.9", "num_updates": "122600", "lr": "0.000886263", "gnorm": "0.627", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.2", "wall": "40021"} [2022-08-01 12:36:13,473][train_inner][INFO] - {"epoch": 3, "update": 2.386, "loss": "2.464", "ppl": "5.52", "wps": "362520", "ups": "3.09", "wpb": "117439", "bsz": "256", "num_updates": "122800", "lr": "0.000886061", "gnorm": "0.631", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.3", "wall": "40086"} [2022-08-01 12:37:18,277][train_inner][INFO] - {"epoch": 3, "update": 2.39, "loss": "2.463", "ppl": "5.51", "wps": "364659", "ups": "3.09", "wpb": "118154", "bsz": "256", "num_updates": "123000", "lr": "0.000885859", "gnorm": "0.629", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "24.7", "wall": "40151"} [2022-08-01 12:38:23,278][train_inner][INFO] - {"epoch": 3, "update": 2.394, "loss": "2.457", "ppl": "5.49", "wps": "363852", "ups": "3.08", "wpb": "118252", "bsz": "256", "num_updates": "123200", "lr": "0.000885657", "gnorm": "0.626", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "24.2", "wall": "40216"} [2022-08-01 12:39:28,254][train_inner][INFO] - {"epoch": 3, "update": 2.398, "loss": "2.457", "ppl": "5.49", "wps": "365312", "ups": "3.08", "wpb": "118680", "bsz": "256", "num_updates": "123400", "lr": "0.000885455", "gnorm": "0.623", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.7", "wall": "40281"} [2022-08-01 12:40:32,892][train_inner][INFO] - {"epoch": 3, "update": 2.402, "loss": "2.457", "ppl": "5.49", "wps": "365320", "ups": "3.09", "wpb": "118066", "bsz": "256", "num_updates": "123600", "lr": "0.000885253", "gnorm": "0.63", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.2", "wall": "40345"} [2022-08-01 12:41:37,888][train_inner][INFO] - {"epoch": 3, "update": 2.405, "loss": "2.457", "ppl": "5.49", "wps": "365419", "ups": "3.08", "wpb": "118753", "bsz": "256", "num_updates": "123800", "lr": "0.000885051", "gnorm": "0.63", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "40410"} [2022-08-01 12:42:42,834][train_inner][INFO] - {"epoch": 3, "update": 2.409, "loss": "2.459", "ppl": "5.5", "wps": "362359", "ups": "3.08", "wpb": "117667", "bsz": "256", "num_updates": "124000", "lr": "0.000884848", "gnorm": "0.628", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.8", "wall": "40475"} [2022-08-01 12:43:48,011][train_inner][INFO] - {"epoch": 3, "update": 2.413, "loss": "2.467", "ppl": "5.53", "wps": "360417", "ups": "3.07", "wpb": "117453", "bsz": "256", "num_updates": "124200", "lr": "0.000884646", "gnorm": "0.629", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "24.4", "wall": "40540"} [2022-08-01 12:44:52,875][train_inner][INFO] - {"epoch": 3, "update": 2.417, "loss": "2.458", "ppl": "5.49", "wps": "366859", "ups": "3.08", "wpb": "118977", "bsz": "256", "num_updates": "124400", "lr": "0.000884444", "gnorm": "0.623", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.4", "wall": "40605"} [2022-08-01 12:45:57,536][train_inner][INFO] - {"epoch": 3, "update": 2.421, "loss": "2.462", "ppl": "5.51", "wps": "366324", "ups": "3.09", "wpb": "118433", "bsz": "256", "num_updates": "124600", "lr": "0.000884242", "gnorm": "0.624", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.2", "wall": "40670"} [2022-08-01 12:47:02,194][train_inner][INFO] - {"epoch": 3, "update": 2.425, "loss": "2.457", "ppl": "5.49", "wps": "366877", "ups": "3.09", "wpb": "118606", "bsz": "256", "num_updates": "124800", "lr": "0.00088404", "gnorm": "0.634", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "25.8", "wall": "40735"} [2022-08-01 12:48:06,986][train_inner][INFO] - {"epoch": 3, "update": 2.429, "loss": "2.453", "ppl": "5.48", "wps": "365159", "ups": "3.09", "wpb": "118294", "bsz": "256", "num_updates": "125000", "lr": "0.000883838", "gnorm": "0.625", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.3", "wall": "40799"} [2022-08-01 12:49:12,197][train_inner][INFO] - {"epoch": 3, "update": 2.433, "loss": "2.452", "ppl": "5.47", "wps": "366723", "ups": "3.07", "wpb": "119570", "bsz": "256", "num_updates": "125200", "lr": "0.000883636", "gnorm": "0.625", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.5", "wall": "40865"} [2022-08-01 12:50:17,190][train_inner][INFO] - {"epoch": 3, "update": 2.437, "loss": "2.452", "ppl": "5.47", "wps": "364539", "ups": "3.08", "wpb": "118461", "bsz": "256", "num_updates": "125400", "lr": "0.000883434", "gnorm": "0.63", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.6", "wall": "40930"} [2022-08-01 12:51:22,411][train_inner][INFO] - {"epoch": 3, "update": 2.44, "loss": "2.459", "ppl": "5.5", "wps": "363345", "ups": "3.07", "wpb": "118487", "bsz": "256", "num_updates": "125600", "lr": "0.000883232", "gnorm": "0.624", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "24", "wall": "40995"} [2022-08-01 12:52:27,038][train_inner][INFO] - {"epoch": 3, "update": 2.444, "loss": "2.463", "ppl": "5.51", "wps": "364014", "ups": "3.09", "wpb": "117623", "bsz": "256", "num_updates": "125800", "lr": "0.00088303", "gnorm": "0.635", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.8", "wall": "41059"} [2022-08-01 12:53:31,878][train_inner][INFO] - {"epoch": 3, "update": 2.448, "loss": "2.459", "ppl": "5.5", "wps": "364919", "ups": "3.08", "wpb": "118305", "bsz": "256", "num_updates": "126000", "lr": "0.000882828", "gnorm": "0.625", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "41124"} [2022-08-01 12:54:37,186][train_inner][INFO] - {"epoch": 3, "update": 2.452, "loss": "2.454", "ppl": "5.48", "wps": "363431", "ups": "3.06", "wpb": "118673", "bsz": "256", "num_updates": "126200", "lr": "0.000882626", "gnorm": "0.627", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.5", "wall": "41190"} [2022-08-01 12:55:41,917][train_inner][INFO] - {"epoch": 3, "update": 2.456, "loss": "2.462", "ppl": "5.51", "wps": "363484", "ups": "3.09", "wpb": "117642", "bsz": "256", "num_updates": "126400", "lr": "0.000882424", "gnorm": "0.636", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "41254"} [2022-08-01 12:56:45,741][train_inner][INFO] - {"epoch": 3, "update": 2.46, "loss": "2.454", "ppl": "5.48", "wps": "370140", "ups": "3.13", "wpb": "118118", "bsz": "256", "num_updates": "126600", "lr": "0.000882222", "gnorm": "0.626", "clip": "0", "loss_scale": "8", "train_wall": "63", "gb_free": "22.1", "wall": "41318"} [2022-08-01 12:57:50,394][train_inner][INFO] - {"epoch": 3, "update": 2.464, "loss": "2.459", "ppl": "5.5", "wps": "364485", "ups": "3.09", "wpb": "117824", "bsz": "256", "num_updates": "126800", "lr": "0.00088202", "gnorm": "0.636", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.3", "wall": "41383"} [2022-08-01 12:58:56,265][train_inner][INFO] - {"epoch": 3, "update": 2.468, "loss": "2.453", "ppl": "5.47", "wps": "360250", "ups": "3.04", "wpb": "118648", "bsz": "256", "num_updates": "127000", "lr": "0.000881818", "gnorm": "0.631", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "21.9", "wall": "41449"} [2022-08-01 13:00:00,979][train_inner][INFO] - {"epoch": 3, "update": 2.471, "loss": "2.452", "ppl": "5.47", "wps": "366514", "ups": "3.09", "wpb": "118590", "bsz": "256", "num_updates": "127200", "lr": "0.000881616", "gnorm": "0.629", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22", "wall": "41513"} [2022-08-01 13:01:05,739][train_inner][INFO] - {"epoch": 3, "update": 2.475, "loss": "2.46", "ppl": "5.5", "wps": "364049", "ups": "3.09", "wpb": "117878", "bsz": "256", "num_updates": "127400", "lr": "0.000881414", "gnorm": "0.633", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.6", "wall": "41578"} [2022-08-01 13:02:10,397][train_inner][INFO] - {"epoch": 3, "update": 2.479, "loss": "2.451", "ppl": "5.47", "wps": "366429", "ups": "3.09", "wpb": "118461", "bsz": "256", "num_updates": "127600", "lr": "0.000881212", "gnorm": "0.629", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.6", "wall": "41643"} [2022-08-01 13:03:15,214][train_inner][INFO] - {"epoch": 3, "update": 2.483, "loss": "2.456", "ppl": "5.49", "wps": "363294", "ups": "3.09", "wpb": "117736", "bsz": "256", "num_updates": "127800", "lr": "0.00088101", "gnorm": "0.634", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "41708"} [2022-08-01 13:04:20,192][train_inner][INFO] - {"epoch": 3, "update": 2.487, "loss": "2.451", "ppl": "5.47", "wps": "365423", "ups": "3.08", "wpb": "118721", "bsz": "256", "num_updates": "128000", "lr": "0.000880808", "gnorm": "0.629", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.7", "wall": "41773"} [2022-08-01 13:05:24,934][train_inner][INFO] - {"epoch": 3, "update": 2.491, "loss": "2.457", "ppl": "5.49", "wps": "365927", "ups": "3.09", "wpb": "118451", "bsz": "256", "num_updates": "128200", "lr": "0.000880606", "gnorm": "0.631", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "41837"} [2022-08-01 13:06:29,486][train_inner][INFO] - {"epoch": 3, "update": 2.495, "loss": "2.445", "ppl": "5.44", "wps": "367012", "ups": "3.1", "wpb": "118456", "bsz": "256", "num_updates": "128400", "lr": "0.000880404", "gnorm": "0.628", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "41902"} [2022-08-01 13:07:34,395][train_inner][INFO] - {"epoch": 3, "update": 2.499, "loss": "2.45", "ppl": "5.46", "wps": "366682", "ups": "3.08", "wpb": "119003", "bsz": "256", "num_updates": "128600", "lr": "0.000880202", "gnorm": "0.63", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.9", "wall": "41967"} [2022-08-01 13:08:39,362][train_inner][INFO] - {"epoch": 3, "update": 2.503, "loss": "2.444", "ppl": "5.44", "wps": "363612", "ups": "3.08", "wpb": "118112", "bsz": "256", "num_updates": "128800", "lr": "0.00088", "gnorm": "0.632", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "42032"} [2022-08-01 13:09:44,262][train_inner][INFO] - {"epoch": 3, "update": 2.506, "loss": "2.445", "ppl": "5.44", "wps": "364237", "ups": "3.08", "wpb": "118192", "bsz": "256", "num_updates": "129000", "lr": "0.000879798", "gnorm": "0.63", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24", "wall": "42097"} [2022-08-01 13:10:49,063][train_inner][INFO] - {"epoch": 3, "update": 2.51, "loss": "2.459", "ppl": "5.5", "wps": "364854", "ups": "3.09", "wpb": "118213", "bsz": "256", "num_updates": "129200", "lr": "0.000879596", "gnorm": "0.636", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "42161"} [2022-08-01 13:11:53,770][train_inner][INFO] - {"epoch": 3, "update": 2.514, "loss": "2.451", "ppl": "5.47", "wps": "365174", "ups": "3.09", "wpb": "118144", "bsz": "256", "num_updates": "129400", "lr": "0.000879394", "gnorm": "0.633", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.5", "wall": "42226"} [2022-08-01 13:12:58,776][train_inner][INFO] - {"epoch": 3, "update": 2.518, "loss": "2.447", "ppl": "5.45", "wps": "364760", "ups": "3.08", "wpb": "118557", "bsz": "256", "num_updates": "129600", "lr": "0.000879192", "gnorm": "0.631", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "42291"} [2022-08-01 13:13:52,926][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-01 13:14:03,925][train_inner][INFO] - {"epoch": 3, "update": 2.522, "loss": "2.447", "ppl": "5.45", "wps": "361118", "ups": "3.07", "wpb": "117630", "bsz": "256", "num_updates": "129800", "lr": "0.00087899", "gnorm": "0.631", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "42356"} [2022-08-01 13:15:08,812][train_inner][INFO] - {"epoch": 3, "update": 2.526, "loss": "2.458", "ppl": "5.49", "wps": "364326", "ups": "3.08", "wpb": "118199", "bsz": "256", "num_updates": "130000", "lr": "0.000878788", "gnorm": "0.634", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.7", "wall": "42421"} [2022-08-01 13:16:13,497][train_inner][INFO] - {"epoch": 3, "update": 2.53, "loss": "2.448", "ppl": "5.46", "wps": "365893", "ups": "3.09", "wpb": "118337", "bsz": "256", "num_updates": "130200", "lr": "0.000878586", "gnorm": "0.629", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "25.6", "wall": "42486"} [2022-08-01 13:17:18,093][train_inner][INFO] - {"epoch": 3, "update": 2.534, "loss": "2.449", "ppl": "5.46", "wps": "364837", "ups": "3.1", "wpb": "117834", "bsz": "256", "num_updates": "130400", "lr": "0.000878384", "gnorm": "0.634", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.3", "wall": "42550"} [2022-08-01 13:17:23,519][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-01 13:18:24,361][train_inner][INFO] - {"epoch": 3, "update": 2.538, "loss": "2.443", "ppl": "5.44", "wps": "357690", "ups": "3.02", "wpb": "118516", "bsz": "256", "num_updates": "130600", "lr": "0.000878182", "gnorm": "0.64", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "23.7", "wall": "42617"} [2022-08-01 13:19:29,072][train_inner][INFO] - {"epoch": 3, "update": 2.541, "loss": "2.445", "ppl": "5.45", "wps": "365136", "ups": "3.09", "wpb": "118140", "bsz": "256", "num_updates": "130800", "lr": "0.00087798", "gnorm": "0.64", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "42681"} [2022-08-01 13:20:34,061][train_inner][INFO] - {"epoch": 3, "update": 2.545, "loss": "2.445", "ppl": "5.44", "wps": "364871", "ups": "3.08", "wpb": "118562", "bsz": "256", "num_updates": "131000", "lr": "0.000877778", "gnorm": "0.635", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "42746"} [2022-08-01 13:21:39,161][train_inner][INFO] - {"epoch": 3, "update": 2.549, "loss": "2.445", "ppl": "5.44", "wps": "365121", "ups": "3.07", "wpb": "118844", "bsz": "256", "num_updates": "131200", "lr": "0.000877576", "gnorm": "0.632", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "42811"} [2022-08-01 13:22:44,010][train_inner][INFO] - {"epoch": 3, "update": 2.553, "loss": "2.448", "ppl": "5.46", "wps": "364805", "ups": "3.08", "wpb": "118286", "bsz": "256", "num_updates": "131400", "lr": "0.000877374", "gnorm": "0.634", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.5", "wall": "42876"} [2022-08-01 13:23:49,148][train_inner][INFO] - {"epoch": 3, "update": 2.557, "loss": "2.45", "ppl": "5.47", "wps": "363588", "ups": "3.07", "wpb": "118415", "bsz": "256", "num_updates": "131600", "lr": "0.000877172", "gnorm": "0.636", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.1", "wall": "42941"} [2022-08-01 13:24:53,844][train_inner][INFO] - {"epoch": 3, "update": 2.561, "loss": "2.447", "ppl": "5.45", "wps": "366328", "ups": "3.09", "wpb": "118496", "bsz": "256", "num_updates": "131800", "lr": "0.00087697", "gnorm": "0.638", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.9", "wall": "43006"} [2022-08-01 13:25:59,075][train_inner][INFO] - {"epoch": 3, "update": 2.565, "loss": "2.443", "ppl": "5.44", "wps": "364066", "ups": "3.07", "wpb": "118741", "bsz": "256", "num_updates": "132000", "lr": "0.000876768", "gnorm": "0.634", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.6", "wall": "43071"} [2022-08-01 13:26:04,195][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-01 13:27:04,438][train_inner][INFO] - {"epoch": 3, "update": 2.569, "loss": "2.45", "ppl": "5.47", "wps": "361260", "ups": "3.06", "wpb": "118064", "bsz": "256", "num_updates": "132200", "lr": "0.000876566", "gnorm": "0.638", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "26.3", "wall": "43137"} [2022-08-01 13:28:09,433][train_inner][INFO] - {"epoch": 3, "update": 2.573, "loss": "2.451", "ppl": "5.47", "wps": "362710", "ups": "3.08", "wpb": "117871", "bsz": "256", "num_updates": "132400", "lr": "0.000876364", "gnorm": "0.635", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "43202"} [2022-08-01 13:29:14,618][train_inner][INFO] - {"epoch": 3, "update": 2.576, "loss": "2.445", "ppl": "5.44", "wps": "363595", "ups": "3.07", "wpb": "118504", "bsz": "256", "num_updates": "132600", "lr": "0.000876162", "gnorm": "0.631", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.9", "wall": "43267"} [2022-08-01 13:30:19,657][train_inner][INFO] - {"epoch": 3, "update": 2.58, "loss": "2.444", "ppl": "5.44", "wps": "364038", "ups": "3.08", "wpb": "118380", "bsz": "256", "num_updates": "132800", "lr": "0.00087596", "gnorm": "0.634", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "24.1", "wall": "43332"} [2022-08-01 13:31:24,532][train_inner][INFO] - {"epoch": 3, "update": 2.584, "loss": "2.441", "ppl": "5.43", "wps": "364504", "ups": "3.08", "wpb": "118235", "bsz": "256", "num_updates": "133000", "lr": "0.000875758", "gnorm": "0.636", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23", "wall": "43397"} [2022-08-01 13:31:33,444][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 [2022-08-01 13:31:33,732][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 [2022-08-01 13:32:29,715][train_inner][INFO] - {"epoch": 3, "update": 2.588, "loss": "2.45", "ppl": "5.46", "wps": "362923", "ups": "3.07", "wpb": "118279", "bsz": "256", "num_updates": "133200", "lr": "0.000875556", "gnorm": "0.667", "clip": "0.5", "loss_scale": "1", "train_wall": "65", "gb_free": "20.9", "wall": "43462"} [2022-08-01 13:33:34,646][train_inner][INFO] - {"epoch": 3, "update": 2.592, "loss": "2.441", "ppl": "5.43", "wps": "364434", "ups": "3.08", "wpb": "118315", "bsz": "256", "num_updates": "133400", "lr": "0.000875354", "gnorm": "0.635", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20", "wall": "43527"} [2022-08-01 13:33:51,703][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 [2022-08-01 13:34:39,940][train_inner][INFO] - {"epoch": 3, "update": 2.596, "loss": "2.441", "ppl": "5.43", "wps": "362998", "ups": "3.06", "wpb": "118508", "bsz": "256", "num_updates": "133600", "lr": "0.000875152", "gnorm": "0.635", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20", "wall": "43592"} [2022-08-01 13:35:44,463][train_inner][INFO] - {"epoch": 3, "update": 2.6, "loss": "2.443", "ppl": "5.44", "wps": "365987", "ups": "3.1", "wpb": "118071", "bsz": "256", "num_updates": "133800", "lr": "0.000874949", "gnorm": "0.633", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "21.4", "wall": "43657"} [2022-08-01 13:36:49,252][train_inner][INFO] - {"epoch": 3, "update": 2.604, "loss": "2.437", "ppl": "5.42", "wps": "366477", "ups": "3.09", "wpb": "118717", "bsz": "256", "num_updates": "134000", "lr": "0.000874747", "gnorm": "0.632", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "24.3", "wall": "43722"} [2022-08-01 13:37:54,147][train_inner][INFO] - {"epoch": 3, "update": 2.608, "loss": "2.443", "ppl": "5.44", "wps": "363709", "ups": "3.08", "wpb": "118013", "bsz": "256", "num_updates": "134200", "lr": "0.000874545", "gnorm": "0.636", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.2", "wall": "43786"} [2022-08-01 13:38:58,764][train_inner][INFO] - {"epoch": 3, "update": 2.611, "loss": "2.434", "ppl": "5.4", "wps": "368940", "ups": "3.1", "wpb": "119198", "bsz": "256", "num_updates": "134400", "lr": "0.000874343", "gnorm": "0.631", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "28.2", "wall": "43851"} [2022-08-01 13:40:03,355][train_inner][INFO] - {"epoch": 3, "update": 2.615, "loss": "2.442", "ppl": "5.43", "wps": "366190", "ups": "3.1", "wpb": "118261", "bsz": "256", "num_updates": "134600", "lr": "0.000874141", "gnorm": "0.635", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "24.8", "wall": "43916"} [2022-08-01 13:41:08,270][train_inner][INFO] - {"epoch": 3, "update": 2.619, "loss": "2.435", "ppl": "5.41", "wps": "365682", "ups": "3.08", "wpb": "118689", "bsz": "256", "num_updates": "134800", "lr": "0.000873939", "gnorm": "0.634", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.8", "wall": "43981"} [2022-08-01 13:42:13,404][train_inner][INFO] - {"epoch": 3, "update": 2.623, "loss": "2.445", "ppl": "5.45", "wps": "361176", "ups": "3.07", "wpb": "117623", "bsz": "256", "num_updates": "135000", "lr": "0.000873737", "gnorm": "0.642", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "23.5", "wall": "44046"} [2022-08-01 13:43:18,233][train_inner][INFO] - {"epoch": 3, "update": 2.627, "loss": "2.437", "ppl": "5.41", "wps": "367594", "ups": "3.09", "wpb": "119152", "bsz": "256", "num_updates": "135200", "lr": "0.000873535", "gnorm": "0.636", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "21.4", "wall": "44111"} [2022-08-01 13:44:23,464][train_inner][INFO] - {"epoch": 3, "update": 2.631, "loss": "2.446", "ppl": "5.45", "wps": "361460", "ups": "3.07", "wpb": "117889", "bsz": "256", "num_updates": "135400", "lr": "0.000873333", "gnorm": "0.639", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.9", "wall": "44176"} [2022-08-01 13:45:28,167][train_inner][INFO] - {"epoch": 3, "update": 2.635, "loss": "2.445", "ppl": "5.44", "wps": "367410", "ups": "3.09", "wpb": "118861", "bsz": "256", "num_updates": "135600", "lr": "0.000873131", "gnorm": "0.695", "clip": "0.5", "loss_scale": "1", "train_wall": "64", "gb_free": "20.4", "wall": "44240"} [2022-08-01 13:46:33,232][train_inner][INFO] - {"epoch": 3, "update": 2.639, "loss": "2.445", "ppl": "5.45", "wps": "363221", "ups": "3.07", "wpb": "118164", "bsz": "256", "num_updates": "135800", "lr": "0.000872929", "gnorm": "0.889", "clip": "1.5", "loss_scale": "1", "train_wall": "65", "gb_free": "19.7", "wall": "44306"} [2022-08-01 13:47:38,620][train_inner][INFO] - {"epoch": 3, "update": 2.642, "loss": "2.431", "ppl": "5.39", "wps": "364042", "ups": "3.06", "wpb": "119019", "bsz": "256", "num_updates": "136000", "lr": "0.000872727", "gnorm": "0.632", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.1", "wall": "44371"} [2022-08-01 13:48:43,765][train_inner][INFO] - {"epoch": 3, "update": 2.646, "loss": "2.438", "ppl": "5.42", "wps": "362984", "ups": "3.07", "wpb": "118232", "bsz": "256", "num_updates": "136200", "lr": "0.000872525", "gnorm": "0.636", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.7", "wall": "44436"} [2022-08-01 13:49:48,717][train_inner][INFO] - {"epoch": 3, "update": 2.65, "loss": "2.437", "ppl": "5.41", "wps": "363888", "ups": "3.08", "wpb": "118174", "bsz": "256", "num_updates": "136400", "lr": "0.000872323", "gnorm": "0.634", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.3", "wall": "44501"} [2022-08-01 13:50:54,991][train_inner][INFO] - {"epoch": 3, "update": 2.654, "loss": "2.436", "ppl": "5.41", "wps": "357269", "ups": "3.02", "wpb": "118386", "bsz": "256", "num_updates": "136600", "lr": "0.000872121", "gnorm": "0.637", "clip": "0", "loss_scale": "1", "train_wall": "66", "gb_free": "21", "wall": "44567"} [2022-08-01 13:52:00,078][train_inner][INFO] - {"epoch": 3, "update": 2.658, "loss": "2.435", "ppl": "5.41", "wps": "363296", "ups": "3.07", "wpb": "118228", "bsz": "256", "num_updates": "136800", "lr": "0.000871919", "gnorm": "0.639", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20", "wall": "44632"} [2022-08-01 13:53:04,827][train_inner][INFO] - {"epoch": 3, "update": 2.662, "loss": "2.437", "ppl": "5.42", "wps": "365341", "ups": "3.09", "wpb": "118277", "bsz": "256", "num_updates": "137000", "lr": "0.000871717", "gnorm": "0.651", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20", "wall": "44697"} [2022-08-01 13:54:09,953][train_inner][INFO] - {"epoch": 3, "update": 2.666, "loss": "2.437", "ppl": "5.42", "wps": "363681", "ups": "3.07", "wpb": "118422", "bsz": "256", "num_updates": "137200", "lr": "0.000871515", "gnorm": "0.637", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "22.3", "wall": "44762"} [2022-08-01 13:55:14,906][train_inner][INFO] - {"epoch": 3, "update": 2.67, "loss": "2.439", "ppl": "5.42", "wps": "362640", "ups": "3.08", "wpb": "117772", "bsz": "256", "num_updates": "137400", "lr": "0.000871313", "gnorm": "0.644", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.3", "wall": "44827"} [2022-08-01 13:56:19,048][train_inner][INFO] - {"epoch": 3, "update": 2.674, "loss": "2.433", "ppl": "5.4", "wps": "368882", "ups": "3.12", "wpb": "118303", "bsz": "256", "num_updates": "137600", "lr": "0.000871111", "gnorm": "0.639", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20", "wall": "44891"} [2022-08-01 13:57:12,055][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 [2022-08-01 13:57:23,834][train_inner][INFO] - {"epoch": 3, "update": 2.677, "loss": "2.496", "ppl": "5.64", "wps": "364370", "ups": "3.09", "wpb": "118029", "bsz": "256", "num_updates": "137800", "lr": "0.000870909", "gnorm": "0.917", "clip": "0.5", "loss_scale": "1", "train_wall": "64", "gb_free": "20.7", "wall": "44956"} [2022-08-01 13:58:05,766][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 [2022-08-01 13:58:29,181][train_inner][INFO] - {"epoch": 3, "update": 2.681, "loss": "2.445", "ppl": "5.45", "wps": "360716", "ups": "3.06", "wpb": "117857", "bsz": "256", "num_updates": "138000", "lr": "0.000870707", "gnorm": "0.806", "clip": "1.5", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.1", "wall": "45022"} [2022-08-01 13:58:47,630][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.25 [2022-08-01 13:59:34,451][train_inner][INFO] - {"epoch": 3, "update": 2.685, "loss": "2.44", "ppl": "5.43", "wps": "361940", "ups": "3.06", "wpb": "118118", "bsz": "256", "num_updates": "138200", "lr": "0.000870505", "gnorm": "0.657", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "20.6", "wall": "45087"} [2022-08-01 14:00:39,014][train_inner][INFO] - {"epoch": 3, "update": 2.689, "loss": "2.442", "ppl": "5.43", "wps": "365534", "ups": "3.1", "wpb": "117999", "bsz": "256", "num_updates": "138400", "lr": "0.000870303", "gnorm": "0.703", "clip": "0.5", "loss_scale": "0.25", "train_wall": "64", "gb_free": "23.8", "wall": "45151"} [2022-08-01 14:01:44,155][train_inner][INFO] - {"epoch": 3, "update": 2.693, "loss": "2.444", "ppl": "5.44", "wps": "363284", "ups": "3.07", "wpb": "118323", "bsz": "256", "num_updates": "138600", "lr": "0.000870101", "gnorm": "0.734", "clip": "1", "loss_scale": "0.25", "train_wall": "65", "gb_free": "19.8", "wall": "45216"} [2022-08-01 14:02:49,333][train_inner][INFO] - {"epoch": 3, "update": 2.697, "loss": "2.435", "ppl": "5.41", "wps": "364171", "ups": "3.07", "wpb": "118677", "bsz": "256", "num_updates": "138800", "lr": "0.000869899", "gnorm": "0.636", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "20.5", "wall": "45282"} [2022-08-01 14:03:54,651][train_inner][INFO] - {"epoch": 3, "update": 2.701, "loss": "2.443", "ppl": "5.44", "wps": "363496", "ups": "3.06", "wpb": "118714", "bsz": "256", "num_updates": "139000", "lr": "0.000869697", "gnorm": "0.636", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "20.4", "wall": "45347"} [2022-08-01 14:04:59,706][train_inner][INFO] - {"epoch": 3, "update": 2.705, "loss": "2.438", "ppl": "5.42", "wps": "363894", "ups": "3.07", "wpb": "118364", "bsz": "256", "num_updates": "139200", "lr": "0.000869495", "gnorm": "0.64", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "20.4", "wall": "45412"} [2022-08-01 14:06:04,681][train_inner][INFO] - {"epoch": 3, "update": 2.709, "loss": "2.433", "ppl": "5.4", "wps": "364747", "ups": "3.08", "wpb": "118496", "bsz": "256", "num_updates": "139400", "lr": "0.000869293", "gnorm": "0.639", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "27.5", "wall": "45477"} [2022-08-01 14:07:09,916][train_inner][INFO] - {"epoch": 3, "update": 2.712, "loss": "2.433", "ppl": "5.4", "wps": "362887", "ups": "3.07", "wpb": "118362", "bsz": "256", "num_updates": "139600", "lr": "0.000869091", "gnorm": "0.639", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "19.8", "wall": "45542"} [2022-08-01 14:08:14,930][train_inner][INFO] - {"epoch": 3, "update": 2.716, "loss": "2.432", "ppl": "5.39", "wps": "362968", "ups": "3.08", "wpb": "117989", "bsz": "256", "num_updates": "139800", "lr": "0.000868889", "gnorm": "0.64", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "19.9", "wall": "45607"} [2022-08-01 14:09:19,466][train_inner][INFO] - {"epoch": 3, "update": 2.72, "loss": "2.426", "ppl": "5.37", "wps": "366413", "ups": "3.1", "wpb": "118231", "bsz": "256", "num_updates": "140000", "lr": "0.000868687", "gnorm": "0.64", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "20.9", "wall": "45672"} [2022-08-01 14:10:24,699][train_inner][INFO] - {"epoch": 3, "update": 2.724, "loss": "2.429", "ppl": "5.39", "wps": "362974", "ups": "3.07", "wpb": "118388", "bsz": "256", "num_updates": "140200", "lr": "0.000868485", "gnorm": "0.641", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "23.1", "wall": "45737"} [2022-08-01 14:11:29,716][train_inner][INFO] - {"epoch": 3, "update": 2.728, "loss": "2.43", "ppl": "5.39", "wps": "364825", "ups": "3.08", "wpb": "118598", "bsz": "256", "num_updates": "140400", "lr": "0.000868283", "gnorm": "0.641", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "19.8", "wall": "45802"} [2022-08-01 14:12:35,380][train_inner][INFO] - {"epoch": 3, "update": 2.732, "loss": "2.434", "ppl": "5.4", "wps": "359599", "ups": "3.05", "wpb": "118062", "bsz": "256", "num_updates": "140600", "lr": "0.000868081", "gnorm": "0.645", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "28.2", "wall": "45868"} [2022-08-01 14:13:41,516][train_inner][INFO] - {"epoch": 3, "update": 2.736, "loss": "2.433", "ppl": "5.4", "wps": "359012", "ups": "3.02", "wpb": "118715", "bsz": "256", "num_updates": "140800", "lr": "0.000867879", "gnorm": "0.641", "clip": "0", "loss_scale": "0.5", "train_wall": "66", "gb_free": "23.9", "wall": "45934"} [2022-08-01 14:14:46,334][train_inner][INFO] - {"epoch": 3, "update": 2.74, "loss": "2.431", "ppl": "5.39", "wps": "366010", "ups": "3.09", "wpb": "118618", "bsz": "256", "num_updates": "141000", "lr": "0.000867677", "gnorm": "0.638", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "19.8", "wall": "45999"} [2022-08-01 14:15:51,263][train_inner][INFO] - {"epoch": 3, "update": 2.744, "loss": "2.439", "ppl": "5.42", "wps": "364486", "ups": "3.08", "wpb": "118327", "bsz": "256", "num_updates": "141200", "lr": "0.000867475", "gnorm": "0.655", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "26.4", "wall": "46064"} [2022-08-01 14:16:57,066][train_inner][INFO] - {"epoch": 3, "update": 2.747, "loss": "2.435", "ppl": "5.41", "wps": "360111", "ups": "3.04", "wpb": "118480", "bsz": "256", "num_updates": "141400", "lr": "0.000867273", "gnorm": "0.642", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.1", "wall": "46129"} [2022-08-01 14:18:01,905][train_inner][INFO] - {"epoch": 3, "update": 2.751, "loss": "2.436", "ppl": "5.41", "wps": "363560", "ups": "3.08", "wpb": "117863", "bsz": "256", "num_updates": "141600", "lr": "0.000867071", "gnorm": "0.643", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "25.4", "wall": "46194"} [2022-08-01 14:19:06,754][train_inner][INFO] - {"epoch": 3, "update": 2.755, "loss": "2.427", "ppl": "5.38", "wps": "366215", "ups": "3.08", "wpb": "118742", "bsz": "256", "num_updates": "141800", "lr": "0.000866869", "gnorm": "0.645", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.1", "wall": "46259"} [2022-08-01 14:20:11,826][train_inner][INFO] - {"epoch": 3, "update": 2.759, "loss": "2.428", "ppl": "5.38", "wps": "363447", "ups": "3.07", "wpb": "118249", "bsz": "256", "num_updates": "142000", "lr": "0.000866667", "gnorm": "0.642", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.6", "wall": "46324"} [2022-08-01 14:21:16,933][train_inner][INFO] - {"epoch": 3, "update": 2.763, "loss": "2.427", "ppl": "5.38", "wps": "364856", "ups": "3.07", "wpb": "118772", "bsz": "256", "num_updates": "142200", "lr": "0.000866465", "gnorm": "0.639", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.6", "wall": "46389"} [2022-08-01 14:22:21,886][train_inner][INFO] - {"epoch": 3, "update": 2.767, "loss": "2.43", "ppl": "5.39", "wps": "364821", "ups": "3.08", "wpb": "118478", "bsz": "256", "num_updates": "142400", "lr": "0.000866263", "gnorm": "0.638", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.5", "wall": "46454"} [2022-08-01 14:23:26,874][train_inner][INFO] - {"epoch": 3, "update": 2.771, "loss": "2.437", "ppl": "5.41", "wps": "364267", "ups": "3.08", "wpb": "118364", "bsz": "256", "num_updates": "142600", "lr": "0.000866061", "gnorm": "0.649", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20", "wall": "46519"} [2022-08-01 14:24:31,694][train_inner][INFO] - {"epoch": 3, "update": 2.775, "loss": "2.431", "ppl": "5.39", "wps": "364127", "ups": "3.09", "wpb": "118011", "bsz": "256", "num_updates": "142800", "lr": "0.000865859", "gnorm": "0.646", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "23.4", "wall": "46584"} [2022-08-01 14:25:36,673][train_inner][INFO] - {"epoch": 3, "update": 2.779, "loss": "2.438", "ppl": "5.42", "wps": "364030", "ups": "3.08", "wpb": "118271", "bsz": "256", "num_updates": "143000", "lr": "0.000865657", "gnorm": "0.642", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.7", "wall": "46649"} [2022-08-01 14:26:41,287][train_inner][INFO] - {"epoch": 3, "update": 2.782, "loss": "2.431", "ppl": "5.39", "wps": "366367", "ups": "3.1", "wpb": "118360", "bsz": "256", "num_updates": "143200", "lr": "0.000865455", "gnorm": "0.638", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "28.2", "wall": "46714"} [2022-08-01 14:27:46,221][train_inner][INFO] - {"epoch": 3, "update": 2.786, "loss": "2.434", "ppl": "5.4", "wps": "363073", "ups": "3.08", "wpb": "117877", "bsz": "256", "num_updates": "143400", "lr": "0.000865253", "gnorm": "0.643", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21", "wall": "46779"} [2022-08-01 14:28:50,913][train_inner][INFO] - {"epoch": 3, "update": 2.79, "loss": "2.423", "ppl": "5.36", "wps": "367796", "ups": "3.09", "wpb": "118966", "bsz": "256", "num_updates": "143600", "lr": "0.000865051", "gnorm": "0.643", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "25.3", "wall": "46843"} [2022-08-01 14:29:55,541][train_inner][INFO] - {"epoch": 3, "update": 2.794, "loss": "2.433", "ppl": "5.4", "wps": "365733", "ups": "3.09", "wpb": "118182", "bsz": "256", "num_updates": "143800", "lr": "0.000864848", "gnorm": "0.667", "clip": "0.5", "loss_scale": "1", "train_wall": "64", "gb_free": "20.3", "wall": "46908"} [2022-08-01 14:31:01,514][train_inner][INFO] - {"epoch": 3, "update": 2.798, "loss": "2.436", "ppl": "5.41", "wps": "358875", "ups": "3.03", "wpb": "118379", "bsz": "256", "num_updates": "144000", "lr": "0.000864646", "gnorm": "0.644", "clip": "0", "loss_scale": "1", "train_wall": "66", "gb_free": "20.8", "wall": "46974"} [2022-08-01 14:32:06,420][train_inner][INFO] - {"epoch": 3, "update": 2.802, "loss": "2.426", "ppl": "5.37", "wps": "364341", "ups": "3.08", "wpb": "118231", "bsz": "256", "num_updates": "144200", "lr": "0.000864444", "gnorm": "0.639", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.2", "wall": "47039"} [2022-08-01 14:33:11,345][train_inner][INFO] - {"epoch": 3, "update": 2.806, "loss": "2.431", "ppl": "5.39", "wps": "363626", "ups": "3.08", "wpb": "118040", "bsz": "256", "num_updates": "144400", "lr": "0.000864242", "gnorm": "0.645", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.9", "wall": "47104"} [2022-08-01 14:34:16,419][train_inner][INFO] - {"epoch": 3, "update": 2.81, "loss": "2.43", "ppl": "5.39", "wps": "363792", "ups": "3.07", "wpb": "118365", "bsz": "256", "num_updates": "144600", "lr": "0.00086404", "gnorm": "0.646", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.2", "wall": "47169"} [2022-08-01 14:35:21,258][train_inner][INFO] - {"epoch": 3, "update": 2.813, "loss": "2.428", "ppl": "5.38", "wps": "364466", "ups": "3.08", "wpb": "118156", "bsz": "256", "num_updates": "144800", "lr": "0.000863838", "gnorm": "0.643", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "22.7", "wall": "47234"} [2022-08-01 14:36:27,140][train_inner][INFO] - {"epoch": 3, "update": 2.817, "loss": "2.424", "ppl": "5.37", "wps": "357303", "ups": "3.04", "wpb": "117698", "bsz": "256", "num_updates": "145000", "lr": "0.000863636", "gnorm": "0.641", "clip": "0", "loss_scale": "2", "train_wall": "66", "gb_free": "21.8", "wall": "47299"} [2022-08-01 14:37:32,029][train_inner][INFO] - {"epoch": 3, "update": 2.821, "loss": "2.423", "ppl": "5.36", "wps": "363686", "ups": "3.08", "wpb": "117994", "bsz": "256", "num_updates": "145200", "lr": "0.000863434", "gnorm": "0.643", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "28.2", "wall": "47364"} [2022-08-01 14:38:37,198][train_inner][INFO] - {"epoch": 3, "update": 2.825, "loss": "2.419", "ppl": "5.35", "wps": "365148", "ups": "3.07", "wpb": "118980", "bsz": "256", "num_updates": "145400", "lr": "0.000863232", "gnorm": "0.638", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "27.6", "wall": "47430"} [2022-08-01 14:39:42,415][train_inner][INFO] - {"epoch": 3, "update": 2.829, "loss": "2.422", "ppl": "5.36", "wps": "363520", "ups": "3.07", "wpb": "118537", "bsz": "256", "num_updates": "145600", "lr": "0.00086303", "gnorm": "0.647", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "26.4", "wall": "47495"} [2022-08-01 14:40:47,619][train_inner][INFO] - {"epoch": 3, "update": 2.833, "loss": "2.429", "ppl": "5.38", "wps": "362146", "ups": "3.07", "wpb": "118065", "bsz": "256", "num_updates": "145800", "lr": "0.000862828", "gnorm": "0.645", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22", "wall": "47560"} [2022-08-01 14:41:52,406][train_inner][INFO] - {"epoch": 3, "update": 2.837, "loss": "2.428", "ppl": "5.38", "wps": "365627", "ups": "3.09", "wpb": "118438", "bsz": "256", "num_updates": "146000", "lr": "0.000862626", "gnorm": "0.646", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.9", "wall": "47625"} [2022-08-01 14:42:57,456][train_inner][INFO] - {"epoch": 3, "update": 2.841, "loss": "2.427", "ppl": "5.38", "wps": "363498", "ups": "3.07", "wpb": "118226", "bsz": "256", "num_updates": "146200", "lr": "0.000862424", "gnorm": "0.641", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22.3", "wall": "47690"} [2022-08-01 14:44:02,643][train_inner][INFO] - {"epoch": 3, "update": 2.845, "loss": "2.43", "ppl": "5.39", "wps": "363064", "ups": "3.07", "wpb": "118332", "bsz": "256", "num_updates": "146400", "lr": "0.000862222", "gnorm": "0.645", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.2", "wall": "47755"} [2022-08-01 14:45:07,605][train_inner][INFO] - {"epoch": 3, "update": 2.848, "loss": "2.43", "ppl": "5.39", "wps": "363107", "ups": "3.08", "wpb": "117940", "bsz": "256", "num_updates": "146600", "lr": "0.00086202", "gnorm": "0.648", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.8", "wall": "47820"} [2022-08-01 14:46:12,344][train_inner][INFO] - {"epoch": 3, "update": 2.852, "loss": "2.431", "ppl": "5.39", "wps": "366638", "ups": "3.09", "wpb": "118677", "bsz": "256", "num_updates": "146800", "lr": "0.000861818", "gnorm": "0.641", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "28.2", "wall": "47885"} [2022-08-01 14:47:17,142][train_inner][INFO] - {"epoch": 3, "update": 2.856, "loss": "2.42", "ppl": "5.35", "wps": "364993", "ups": "3.09", "wpb": "118253", "bsz": "256", "num_updates": "147000", "lr": "0.000861616", "gnorm": "0.645", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.5", "wall": "47949"} [2022-08-01 14:48:21,600][train_inner][INFO] - {"epoch": 3, "update": 2.86, "loss": "2.427", "ppl": "5.38", "wps": "366280", "ups": "3.1", "wpb": "118046", "bsz": "256", "num_updates": "147200", "lr": "0.000861414", "gnorm": "0.647", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.1", "wall": "48014"} [2022-08-01 14:49:26,550][train_inner][INFO] - {"epoch": 3, "update": 2.864, "loss": "2.432", "ppl": "5.39", "wps": "363529", "ups": "3.08", "wpb": "118055", "bsz": "256", "num_updates": "147400", "lr": "0.000861212", "gnorm": "0.645", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "48079"} [2022-08-01 14:50:31,444][train_inner][INFO] - {"epoch": 3, "update": 2.868, "loss": "2.422", "ppl": "5.36", "wps": "365114", "ups": "3.08", "wpb": "118467", "bsz": "256", "num_updates": "147600", "lr": "0.00086101", "gnorm": "0.647", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "24.6", "wall": "48144"} [2022-08-01 14:51:36,246][train_inner][INFO] - {"epoch": 3, "update": 2.872, "loss": "2.424", "ppl": "5.37", "wps": "364788", "ups": "3.09", "wpb": "118194", "bsz": "256", "num_updates": "147800", "lr": "0.000860808", "gnorm": "0.645", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.6", "wall": "48209"} [2022-08-01 14:52:41,022][train_inner][INFO] - {"epoch": 3, "update": 2.876, "loss": "2.426", "ppl": "5.37", "wps": "365044", "ups": "3.09", "wpb": "118228", "bsz": "256", "num_updates": "148000", "lr": "0.000860606", "gnorm": "0.699", "clip": "0.5", "loss_scale": "4", "train_wall": "64", "gb_free": "22.1", "wall": "48273"} [2022-08-01 14:53:45,935][train_inner][INFO] - {"epoch": 3, "update": 2.88, "loss": "2.414", "ppl": "5.33", "wps": "365277", "ups": "3.08", "wpb": "118555", "bsz": "256", "num_updates": "148200", "lr": "0.000860404", "gnorm": "0.643", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.3", "wall": "48338"} [2022-08-01 14:54:50,512][train_inner][INFO] - {"epoch": 3, "update": 2.883, "loss": "2.427", "ppl": "5.38", "wps": "366605", "ups": "3.1", "wpb": "118368", "bsz": "256", "num_updates": "148400", "lr": "0.000860202", "gnorm": "0.647", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "25", "wall": "48403"} [2022-08-01 14:55:55,537][train_inner][INFO] - {"epoch": 3, "update": 2.887, "loss": "2.422", "ppl": "5.36", "wps": "363144", "ups": "3.08", "wpb": "118066", "bsz": "256", "num_updates": "148600", "lr": "0.00086", "gnorm": "0.644", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "48468"} [2022-08-01 14:57:00,239][train_inner][INFO] - {"epoch": 3, "update": 2.891, "loss": "2.417", "ppl": "5.34", "wps": "366078", "ups": "3.09", "wpb": "118428", "bsz": "256", "num_updates": "148800", "lr": "0.000859798", "gnorm": "0.641", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.8", "wall": "48533"} [2022-08-01 14:58:05,242][train_inner][INFO] - {"epoch": 3, "update": 2.895, "loss": "2.42", "ppl": "5.35", "wps": "364926", "ups": "3.08", "wpb": "118604", "bsz": "256", "num_updates": "149000", "lr": "0.000859596", "gnorm": "0.646", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.9", "wall": "48598"} [2022-08-01 14:59:10,376][train_inner][INFO] - {"epoch": 3, "update": 2.899, "loss": "2.423", "ppl": "5.36", "wps": "364244", "ups": "3.07", "wpb": "118623", "bsz": "256", "num_updates": "149200", "lr": "0.000859394", "gnorm": "0.646", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.1", "wall": "48663"} [2022-08-01 15:00:15,116][train_inner][INFO] - {"epoch": 3, "update": 2.903, "loss": "2.424", "ppl": "5.37", "wps": "365498", "ups": "3.09", "wpb": "118310", "bsz": "256", "num_updates": "149400", "lr": "0.000859192", "gnorm": "0.654", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "48727"} [2022-08-01 15:01:19,907][train_inner][INFO] - {"epoch": 3, "update": 2.907, "loss": "2.424", "ppl": "5.37", "wps": "366358", "ups": "3.09", "wpb": "118682", "bsz": "256", "num_updates": "149600", "lr": "0.00085899", "gnorm": "0.645", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "48792"} [2022-08-01 15:02:25,014][train_inner][INFO] - {"epoch": 3, "update": 2.911, "loss": "2.42", "ppl": "5.35", "wps": "363435", "ups": "3.07", "wpb": "118310", "bsz": "256", "num_updates": "149800", "lr": "0.000858788", "gnorm": "0.647", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.9", "wall": "48857"} [2022-08-01 15:03:29,865][train_inner][INFO] - {"epoch": 3, "update": 2.914, "loss": "2.425", "ppl": "5.37", "wps": "363904", "ups": "3.08", "wpb": "117997", "bsz": "256", "num_updates": "150000", "lr": "0.000858586", "gnorm": "0.649", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "48922"} [2022-08-01 15:03:29,867][fairseq_cli.train][INFO] - begin validation on "valid" subset [2022-08-01 15:03:52,464][valid][INFO] - {"epoch": 3, "valid_loss": "2.314", "valid_ppl": "4.97", "valid_wps": "1.60236e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "150000", "valid_best_loss": "2.314"} [2022-08-01 15:03:52,467][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 3 @ 150000 updates [2022-08-01 15:03:52,468][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_3_150000.pt [2022-08-01 15:04:02,469][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_3_150000.pt [2022-08-01 15:04:33,690][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/adan2/checkpoint_3_150000.pt (epoch 3 @ 150000 updates, score 2.314) (writing took 41.22356229927391 seconds) [2022-08-01 15:05:38,294][train_inner][INFO] - {"epoch": 3, "update": 2.918, "loss": "2.418", "ppl": "5.34", "wps": "183907", "ups": "1.56", "wpb": "118093", "bsz": "256", "num_updates": "150200", "lr": "0.000858384", "gnorm": "0.644", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "28.2", "wall": "49051"} [2022-08-01 15:06:44,541][train_inner][INFO] - {"epoch": 3, "update": 2.922, "loss": "2.418", "ppl": "5.35", "wps": "357204", "ups": "3.02", "wpb": "118318", "bsz": "256", "num_updates": "150400", "lr": "0.000858182", "gnorm": "0.644", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "19.8", "wall": "49117"} [2022-08-01 15:07:49,968][train_inner][INFO] - {"epoch": 3, "update": 2.926, "loss": "2.427", "ppl": "5.38", "wps": "359429", "ups": "3.06", "wpb": "117580", "bsz": "256", "num_updates": "150600", "lr": "0.00085798", "gnorm": "0.645", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.9", "wall": "49182"} [2022-08-01 15:08:54,706][train_inner][INFO] - {"epoch": 3, "update": 2.93, "loss": "2.42", "ppl": "5.35", "wps": "366363", "ups": "3.09", "wpb": "118586", "bsz": "256", "num_updates": "150800", "lr": "0.000857778", "gnorm": "0.645", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "49247"} [2022-08-01 15:09:59,939][train_inner][INFO] - {"epoch": 3, "update": 2.934, "loss": "2.416", "ppl": "5.34", "wps": "363593", "ups": "3.07", "wpb": "118589", "bsz": "256", "num_updates": "151000", "lr": "0.000857576", "gnorm": "0.646", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.1", "wall": "49312"} [2022-08-01 15:11:04,732][train_inner][INFO] - {"epoch": 3, "update": 2.938, "loss": "2.421", "ppl": "5.36", "wps": "364449", "ups": "3.09", "wpb": "118068", "bsz": "256", "num_updates": "151200", "lr": "0.000857374", "gnorm": "0.646", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "49377"} [2022-08-01 15:11:55,536][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-01 15:12:09,849][train_inner][INFO] - {"epoch": 3, "update": 2.942, "loss": "2.418", "ppl": "5.34", "wps": "362583", "ups": "3.07", "wpb": "118049", "bsz": "256", "num_updates": "151400", "lr": "0.000857172", "gnorm": "0.647", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "49442"} [2022-08-01 15:13:14,883][train_inner][INFO] - {"epoch": 3, "update": 2.946, "loss": "2.414", "ppl": "5.33", "wps": "365483", "ups": "3.08", "wpb": "118842", "bsz": "256", "num_updates": "151600", "lr": "0.00085697", "gnorm": "0.646", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.6", "wall": "49507"} [2022-08-01 15:14:19,784][train_inner][INFO] - {"epoch": 3, "update": 2.949, "loss": "2.419", "ppl": "5.35", "wps": "364321", "ups": "3.08", "wpb": "118223", "bsz": "256", "num_updates": "151800", "lr": "0.000856768", "gnorm": "0.647", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "49572"} [2022-08-01 15:15:24,559][train_inner][INFO] - {"epoch": 3, "update": 2.953, "loss": "2.419", "ppl": "5.35", "wps": "364797", "ups": "3.09", "wpb": "118147", "bsz": "256", "num_updates": "152000", "lr": "0.000856566", "gnorm": "0.647", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.5", "wall": "49637"} [2022-08-01 15:15:48,728][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-01 15:16:30,021][train_inner][INFO] - {"epoch": 3, "update": 2.957, "loss": "2.418", "ppl": "5.34", "wps": "363505", "ups": "3.06", "wpb": "118978", "bsz": "256", "num_updates": "152200", "lr": "0.000856364", "gnorm": "0.68", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.7", "wall": "49702"} [2022-08-01 15:17:34,977][train_inner][INFO] - {"epoch": 3, "update": 2.961, "loss": "2.415", "ppl": "5.33", "wps": "364485", "ups": "3.08", "wpb": "118375", "bsz": "256", "num_updates": "152400", "lr": "0.000856162", "gnorm": "0.645", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.1", "wall": "49767"} [2022-08-01 15:18:40,813][train_inner][INFO] - {"epoch": 3, "update": 2.965, "loss": "2.416", "ppl": "5.34", "wps": "360823", "ups": "3.04", "wpb": "118774", "bsz": "256", "num_updates": "152600", "lr": "0.00085596", "gnorm": "0.644", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "49833"} [2022-08-01 15:19:45,901][train_inner][INFO] - {"epoch": 3, "update": 2.969, "loss": "2.426", "ppl": "5.37", "wps": "362250", "ups": "3.07", "wpb": "117889", "bsz": "256", "num_updates": "152800", "lr": "0.000855758", "gnorm": "0.651", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "49898"} [2022-08-01 15:20:50,585][train_inner][INFO] - {"epoch": 3, "update": 2.973, "loss": "2.422", "ppl": "5.36", "wps": "364682", "ups": "3.09", "wpb": "117943", "bsz": "256", "num_updates": "153000", "lr": "0.000855556", "gnorm": "0.646", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.4", "wall": "49963"} [2022-08-01 15:21:55,769][train_inner][INFO] - {"epoch": 3, "update": 2.977, "loss": "2.415", "ppl": "5.33", "wps": "362934", "ups": "3.07", "wpb": "118287", "bsz": "256", "num_updates": "153200", "lr": "0.000855354", "gnorm": "0.645", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.6", "wall": "50028"} [2022-08-01 15:23:00,788][train_inner][INFO] - {"epoch": 3, "update": 2.981, "loss": "2.41", "ppl": "5.32", "wps": "364460", "ups": "3.08", "wpb": "118481", "bsz": "256", "num_updates": "153400", "lr": "0.000855152", "gnorm": "0.646", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.6", "wall": "50093"} [2022-08-01 15:24:05,680][train_inner][INFO] - {"epoch": 3, "update": 2.984, "loss": "2.416", "ppl": "5.34", "wps": "364175", "ups": "3.08", "wpb": "118158", "bsz": "256", "num_updates": "153600", "lr": "0.000854949", "gnorm": "0.648", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.9", "wall": "50158"} [2022-08-01 15:25:10,662][train_inner][INFO] - {"epoch": 3, "update": 2.988, "loss": "2.419", "ppl": "5.35", "wps": "363418", "ups": "3.08", "wpb": "118077", "bsz": "256", "num_updates": "153800", "lr": "0.000854747", "gnorm": "0.647", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "50223"} [2022-08-01 15:26:15,319][train_inner][INFO] - {"epoch": 3, "update": 2.992, "loss": "2.424", "ppl": "5.37", "wps": "364511", "ups": "3.09", "wpb": "117840", "bsz": "256", "num_updates": "154000", "lr": "0.000854545", "gnorm": "0.651", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.2", "wall": "50288"} [2022-08-01 15:27:19,790][train_inner][INFO] - {"epoch": 3, "update": 2.996, "loss": "2.415", "ppl": "5.33", "wps": "366014", "ups": "3.1", "wpb": "117984", "bsz": "256", "num_updates": "154200", "lr": "0.000854343", "gnorm": "0.649", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.3", "wall": "50352"} [2022-08-01 15:28:24,182][train_inner][INFO] - {"epoch": 3, "update": 3.0, "loss": "2.409", "ppl": "5.31", "wps": "367256", "ups": "3.11", "wpb": "118241", "bsz": "256", "num_updates": "154400", "lr": "0.000854141", "gnorm": "0.648", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "27.7", "wall": "50417"} [2022-08-01 15:28:24,792][fairseq_cli.train][INFO] - end of epoch 3 (average epoch stats below) [2022-08-01 15:28:24,792][train][INFO] - {"epoch": 3, "train_loss": "2.454", "train_ppl": "5.48", "train_wps": "362564", "train_ups": "3.06", "train_wpb": "118298", "train_bsz": "256", "train_num_updates": "154402", "train_lr": "0.000854139", "train_gnorm": "0.641", "train_clip": "0", "train_loss_scale": "8", "train_train_wall": "16628", "train_gb_free": "19.7", "train_wall": "50417"} [2022-08-01 15:28:24,884][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 [2022-08-01 15:28:24,887][fairseq.trainer][INFO] - begin training epoch 4 [2022-08-01 15:28:24,887][fairseq_cli.train][INFO] - Start iterating over samples [2022-08-01 15:29:40,152][train_inner][INFO] - {"epoch": 4, "update": 3.004, "loss": "2.413", "ppl": "5.33", "wps": "310648", "ups": "2.63", "wpb": "117997", "bsz": "255.4", "num_updates": "154600", "lr": "0.000853939", "gnorm": "0.651", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "20", "wall": "50492"} [2022-08-01 15:30:45,288][train_inner][INFO] - {"epoch": 4, "update": 3.008, "loss": "2.417", "ppl": "5.34", "wps": "362762", "ups": "3.07", "wpb": "118143", "bsz": "256", "num_updates": "154800", "lr": "0.000853737", "gnorm": "0.647", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.8", "wall": "50558"} [2022-08-01 15:31:50,074][train_inner][INFO] - {"epoch": 4, "update": 3.012, "loss": "2.408", "ppl": "5.31", "wps": "364749", "ups": "3.09", "wpb": "118150", "bsz": "256", "num_updates": "155000", "lr": "0.000853535", "gnorm": "0.65", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.7", "wall": "50622"} [2022-08-01 15:32:55,075][train_inner][INFO] - {"epoch": 4, "update": 3.015, "loss": "2.412", "ppl": "5.32", "wps": "364393", "ups": "3.08", "wpb": "118428", "bsz": "256", "num_updates": "155200", "lr": "0.000853333", "gnorm": "0.647", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.6", "wall": "50687"} [2022-08-01 15:34:00,305][train_inner][INFO] - {"epoch": 4, "update": 3.019, "loss": "2.407", "ppl": "5.31", "wps": "363522", "ups": "3.07", "wpb": "118562", "bsz": "256", "num_updates": "155400", "lr": "0.000853131", "gnorm": "0.647", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.3", "wall": "50753"} [2022-08-01 15:34:40,243][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-01 15:35:05,441][train_inner][INFO] - {"epoch": 4, "update": 3.023, "loss": "2.408", "ppl": "5.31", "wps": "363704", "ups": "3.07", "wpb": "118449", "bsz": "256", "num_updates": "155600", "lr": "0.000852929", "gnorm": "0.659", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "26", "wall": "50818"} [2022-08-01 15:36:10,346][train_inner][INFO] - {"epoch": 4, "update": 3.027, "loss": "2.414", "ppl": "5.33", "wps": "364192", "ups": "3.08", "wpb": "118188", "bsz": "256", "num_updates": "155800", "lr": "0.000852727", "gnorm": "0.649", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.8", "wall": "50883"} [2022-08-01 15:37:15,357][train_inner][INFO] - {"epoch": 4, "update": 3.031, "loss": "2.411", "ppl": "5.32", "wps": "365680", "ups": "3.08", "wpb": "118865", "bsz": "256", "num_updates": "156000", "lr": "0.000852525", "gnorm": "0.658", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21", "wall": "50948"} [2022-08-01 15:38:20,325][train_inner][INFO] - {"epoch": 4, "update": 3.035, "loss": "2.405", "ppl": "5.3", "wps": "365202", "ups": "3.08", "wpb": "118631", "bsz": "256", "num_updates": "156200", "lr": "0.000852323", "gnorm": "0.649", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.8", "wall": "51013"} [2022-08-01 15:39:25,394][train_inner][INFO] - {"epoch": 4, "update": 3.039, "loss": "2.411", "ppl": "5.32", "wps": "363022", "ups": "3.07", "wpb": "118105", "bsz": "256", "num_updates": "156400", "lr": "0.000852121", "gnorm": "0.649", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.8", "wall": "51078"} [2022-08-01 15:40:30,135][train_inner][INFO] - {"epoch": 4, "update": 3.043, "loss": "2.405", "ppl": "5.3", "wps": "368103", "ups": "3.09", "wpb": "119156", "bsz": "256", "num_updates": "156600", "lr": "0.000851919", "gnorm": "0.647", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "26", "wall": "51142"} [2022-08-01 15:41:35,173][train_inner][INFO] - {"epoch": 4, "update": 3.047, "loss": "2.411", "ppl": "5.32", "wps": "361267", "ups": "3.08", "wpb": "117479", "bsz": "256", "num_updates": "156800", "lr": "0.000851717", "gnorm": "0.651", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.1", "wall": "51207"} [2022-08-01 15:42:40,078][train_inner][INFO] - {"epoch": 4, "update": 3.05, "loss": "2.41", "ppl": "5.31", "wps": "364133", "ups": "3.08", "wpb": "118167", "bsz": "256", "num_updates": "157000", "lr": "0.000851515", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "25.2", "wall": "51272"} [2022-08-01 15:43:45,307][train_inner][INFO] - {"epoch": 4, "update": 3.054, "loss": "2.412", "ppl": "5.32", "wps": "366186", "ups": "3.07", "wpb": "119429", "bsz": "256", "num_updates": "157200", "lr": "0.000851313", "gnorm": "0.645", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "51338"} [2022-08-01 15:44:50,124][train_inner][INFO] - {"epoch": 4, "update": 3.058, "loss": "2.408", "ppl": "5.31", "wps": "365609", "ups": "3.09", "wpb": "118486", "bsz": "256", "num_updates": "157400", "lr": "0.000851111", "gnorm": "0.664", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "23.1", "wall": "51402"} [2022-08-01 15:45:54,813][train_inner][INFO] - {"epoch": 4, "update": 3.062, "loss": "2.403", "ppl": "5.29", "wps": "366880", "ups": "3.09", "wpb": "118664", "bsz": "256", "num_updates": "157600", "lr": "0.000850909", "gnorm": "0.648", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.7", "wall": "51467"} [2022-08-01 15:46:59,882][train_inner][INFO] - {"epoch": 4, "update": 3.066, "loss": "2.411", "ppl": "5.32", "wps": "364479", "ups": "3.07", "wpb": "118581", "bsz": "256", "num_updates": "157800", "lr": "0.000850707", "gnorm": "0.648", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.8", "wall": "51532"} [2022-08-01 15:48:05,212][train_inner][INFO] - {"epoch": 4, "update": 3.07, "loss": "2.408", "ppl": "5.31", "wps": "362087", "ups": "3.06", "wpb": "118274", "bsz": "256", "num_updates": "158000", "lr": "0.000850505", "gnorm": "0.656", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.9", "wall": "51598"} [2022-08-01 15:49:10,174][train_inner][INFO] - {"epoch": 4, "update": 3.074, "loss": "2.413", "ppl": "5.33", "wps": "361554", "ups": "3.08", "wpb": "117434", "bsz": "256", "num_updates": "158200", "lr": "0.000850303", "gnorm": "0.654", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.4", "wall": "51662"} [2022-08-01 15:50:15,000][train_inner][INFO] - {"epoch": 4, "update": 3.078, "loss": "2.415", "ppl": "5.33", "wps": "364932", "ups": "3.09", "wpb": "118285", "bsz": "256", "num_updates": "158400", "lr": "0.000850101", "gnorm": "0.65", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "51727"} [2022-08-01 15:51:19,966][train_inner][INFO] - {"epoch": 4, "update": 3.082, "loss": "2.414", "ppl": "5.33", "wps": "362867", "ups": "3.08", "wpb": "117868", "bsz": "256", "num_updates": "158600", "lr": "0.000849899", "gnorm": "0.648", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.7", "wall": "51792"} [2022-08-01 15:52:24,597][train_inner][INFO] - {"epoch": 4, "update": 3.085, "loss": "2.408", "ppl": "5.31", "wps": "366190", "ups": "3.09", "wpb": "118333", "bsz": "256", "num_updates": "158800", "lr": "0.000849697", "gnorm": "0.649", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22", "wall": "51857"} [2022-08-01 15:53:29,326][train_inner][INFO] - {"epoch": 4, "update": 3.089, "loss": "2.405", "ppl": "5.3", "wps": "366110", "ups": "3.09", "wpb": "118488", "bsz": "256", "num_updates": "159000", "lr": "0.000849495", "gnorm": "0.65", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.1", "wall": "51922"} [2022-08-01 15:54:34,215][train_inner][INFO] - {"epoch": 4, "update": 3.093, "loss": "2.406", "ppl": "5.3", "wps": "365678", "ups": "3.08", "wpb": "118641", "bsz": "256", "num_updates": "159200", "lr": "0.000849293", "gnorm": "0.65", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.2", "wall": "51987"} [2022-08-01 15:55:38,866][train_inner][INFO] - {"epoch": 4, "update": 3.097, "loss": "2.413", "ppl": "5.32", "wps": "364508", "ups": "3.09", "wpb": "117826", "bsz": "256", "num_updates": "159400", "lr": "0.000849091", "gnorm": "0.654", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.1", "wall": "52051"} [2022-08-01 15:56:43,566][train_inner][INFO] - {"epoch": 4, "update": 3.101, "loss": "2.41", "ppl": "5.32", "wps": "365181", "ups": "3.09", "wpb": "118136", "bsz": "256", "num_updates": "159600", "lr": "0.000848889", "gnorm": "0.65", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "25.7", "wall": "52116"} [2022-08-01 15:57:48,401][train_inner][INFO] - {"epoch": 4, "update": 3.105, "loss": "2.402", "ppl": "5.28", "wps": "365171", "ups": "3.08", "wpb": "118377", "bsz": "256", "num_updates": "159800", "lr": "0.000848687", "gnorm": "0.651", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.5", "wall": "52181"} [2022-08-01 15:58:53,048][train_inner][INFO] - {"epoch": 4, "update": 3.109, "loss": "2.404", "ppl": "5.29", "wps": "366782", "ups": "3.09", "wpb": "118556", "bsz": "256", "num_updates": "160000", "lr": "0.000848485", "gnorm": "0.649", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.4", "wall": "52245"} [2022-08-01 15:59:57,788][train_inner][INFO] - {"epoch": 4, "update": 3.113, "loss": "2.409", "ppl": "5.31", "wps": "364420", "ups": "3.09", "wpb": "117961", "bsz": "256", "num_updates": "160200", "lr": "0.000848283", "gnorm": "0.659", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "25", "wall": "52310"} [2022-08-01 16:01:02,478][train_inner][INFO] - {"epoch": 4, "update": 3.117, "loss": "2.402", "ppl": "5.28", "wps": "365482", "ups": "3.09", "wpb": "118214", "bsz": "256", "num_updates": "160400", "lr": "0.000848081", "gnorm": "0.654", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "52375"} [2022-08-01 16:02:06,777][train_inner][INFO] - {"epoch": 4, "update": 3.12, "loss": "2.41", "ppl": "5.31", "wps": "367467", "ups": "3.11", "wpb": "118136", "bsz": "256", "num_updates": "160600", "lr": "0.000847879", "gnorm": "0.654", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.5", "wall": "52439"} [2022-08-01 16:03:11,797][train_inner][INFO] - {"epoch": 4, "update": 3.124, "loss": "2.406", "ppl": "5.3", "wps": "363572", "ups": "3.08", "wpb": "118197", "bsz": "256", "num_updates": "160800", "lr": "0.000847677", "gnorm": "0.65", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "52504"} [2022-08-01 16:04:17,463][train_inner][INFO] - {"epoch": 4, "update": 3.128, "loss": "2.41", "ppl": "5.32", "wps": "359702", "ups": "3.05", "wpb": "118098", "bsz": "256", "num_updates": "161000", "lr": "0.000847475", "gnorm": "0.654", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "52570"} [2022-08-01 16:05:22,271][train_inner][INFO] - {"epoch": 4, "update": 3.132, "loss": "2.402", "ppl": "5.29", "wps": "365474", "ups": "3.09", "wpb": "118428", "bsz": "256", "num_updates": "161200", "lr": "0.000847273", "gnorm": "0.651", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.4", "wall": "52635"} [2022-08-01 16:06:27,239][train_inner][INFO] - {"epoch": 4, "update": 3.136, "loss": "2.404", "ppl": "5.29", "wps": "363539", "ups": "3.08", "wpb": "118090", "bsz": "256", "num_updates": "161400", "lr": "0.000847071", "gnorm": "0.652", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "52700"} [2022-08-01 16:07:32,110][train_inner][INFO] - {"epoch": 4, "update": 3.14, "loss": "2.406", "ppl": "5.3", "wps": "365331", "ups": "3.08", "wpb": "118494", "bsz": "256", "num_updates": "161600", "lr": "0.000846869", "gnorm": "0.652", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.4", "wall": "52764"} [2022-08-01 16:08:37,340][train_inner][INFO] - {"epoch": 4, "update": 3.144, "loss": "2.406", "ppl": "5.3", "wps": "364837", "ups": "3.07", "wpb": "118990", "bsz": "256", "num_updates": "161800", "lr": "0.000846667", "gnorm": "0.651", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "22.7", "wall": "52830"} [2022-08-01 16:08:48,474][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-01 16:09:42,709][train_inner][INFO] - {"epoch": 4, "update": 3.148, "loss": "2.403", "ppl": "5.29", "wps": "362916", "ups": "3.06", "wpb": "118615", "bsz": "256", "num_updates": "162000", "lr": "0.000846465", "gnorm": "0.65", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.7", "wall": "52895"} [2022-08-01 16:10:47,175][train_inner][INFO] - {"epoch": 4, "update": 3.151, "loss": "2.413", "ppl": "5.32", "wps": "365875", "ups": "3.1", "wpb": "117932", "bsz": "256", "num_updates": "162200", "lr": "0.000846263", "gnorm": "0.655", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.2", "wall": "52959"} [2022-08-01 16:11:52,323][train_inner][INFO] - {"epoch": 4, "update": 3.155, "loss": "2.404", "ppl": "5.29", "wps": "364015", "ups": "3.07", "wpb": "118572", "bsz": "256", "num_updates": "162400", "lr": "0.000846061", "gnorm": "0.651", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "53025"} [2022-08-01 16:12:57,065][train_inner][INFO] - {"epoch": 4, "update": 3.159, "loss": "2.402", "ppl": "5.28", "wps": "366173", "ups": "3.09", "wpb": "118533", "bsz": "256", "num_updates": "162600", "lr": "0.000845859", "gnorm": "0.652", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.6", "wall": "53089"} [2022-08-01 16:13:53,874][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-01 16:14:02,354][train_inner][INFO] - {"epoch": 4, "update": 3.163, "loss": "2.4", "ppl": "5.28", "wps": "362538", "ups": "3.06", "wpb": "118346", "bsz": "256", "num_updates": "162800", "lr": "0.000845657", "gnorm": "0.654", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.8", "wall": "53155"} [2022-08-01 16:15:07,491][train_inner][INFO] - {"epoch": 4, "update": 3.167, "loss": "2.4", "ppl": "5.28", "wps": "362233", "ups": "3.07", "wpb": "117973", "bsz": "256", "num_updates": "163000", "lr": "0.000845455", "gnorm": "0.654", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.4", "wall": "53220"} [2022-08-01 16:16:12,582][train_inner][INFO] - {"epoch": 4, "update": 3.171, "loss": "2.4", "ppl": "5.28", "wps": "363003", "ups": "3.07", "wpb": "118140", "bsz": "256", "num_updates": "163200", "lr": "0.000845253", "gnorm": "0.653", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.5", "wall": "53285"} [2022-08-01 16:17:18,619][train_inner][INFO] - {"epoch": 4, "update": 3.175, "loss": "2.397", "ppl": "5.27", "wps": "357314", "ups": "3.03", "wpb": "117978", "bsz": "256", "num_updates": "163400", "lr": "0.000845051", "gnorm": "0.66", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "22.1", "wall": "53351"} [2022-08-01 16:18:23,676][train_inner][INFO] - {"epoch": 4, "update": 3.179, "loss": "2.401", "ppl": "5.28", "wps": "363920", "ups": "3.07", "wpb": "118376", "bsz": "256", "num_updates": "163600", "lr": "0.000844848", "gnorm": "0.654", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "53416"} [2022-08-01 16:19:28,693][train_inner][INFO] - {"epoch": 4, "update": 3.183, "loss": "2.405", "ppl": "5.3", "wps": "362741", "ups": "3.08", "wpb": "117919", "bsz": "256", "num_updates": "163800", "lr": "0.000844646", "gnorm": "0.672", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22", "wall": "53481"} [2022-08-01 16:20:34,018][train_inner][INFO] - {"epoch": 4, "update": 3.186, "loss": "2.406", "ppl": "5.3", "wps": "361393", "ups": "3.06", "wpb": "118039", "bsz": "256", "num_updates": "164000", "lr": "0.000844444", "gnorm": "0.653", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "26.1", "wall": "53546"} [2022-08-01 16:21:39,280][train_inner][INFO] - {"epoch": 4, "update": 3.19, "loss": "2.399", "ppl": "5.27", "wps": "364190", "ups": "3.06", "wpb": "118837", "bsz": "256", "num_updates": "164200", "lr": "0.000844242", "gnorm": "0.652", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.5", "wall": "53612"} [2022-08-01 16:22:44,114][train_inner][INFO] - {"epoch": 4, "update": 3.194, "loss": "2.399", "ppl": "5.28", "wps": "365102", "ups": "3.08", "wpb": "118354", "bsz": "256", "num_updates": "164400", "lr": "0.00084404", "gnorm": "0.656", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "53676"} [2022-08-01 16:23:49,389][train_inner][INFO] - {"epoch": 4, "update": 3.198, "loss": "2.403", "ppl": "5.29", "wps": "363121", "ups": "3.06", "wpb": "118513", "bsz": "256", "num_updates": "164600", "lr": "0.000843838", "gnorm": "0.656", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "53742"} [2022-08-01 16:24:53,813][train_inner][INFO] - {"epoch": 4, "update": 3.202, "loss": "2.397", "ppl": "5.27", "wps": "366766", "ups": "3.1", "wpb": "118141", "bsz": "256", "num_updates": "164800", "lr": "0.000843636", "gnorm": "0.655", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.1", "wall": "53806"} [2022-08-01 16:25:58,535][train_inner][INFO] - {"epoch": 4, "update": 3.206, "loss": "2.403", "ppl": "5.29", "wps": "364039", "ups": "3.09", "wpb": "117805", "bsz": "256", "num_updates": "165000", "lr": "0.000843434", "gnorm": "0.655", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "25", "wall": "53871"} [2022-08-01 16:27:03,608][train_inner][INFO] - {"epoch": 4, "update": 3.21, "loss": "2.407", "ppl": "5.3", "wps": "363678", "ups": "3.07", "wpb": "118326", "bsz": "256", "num_updates": "165200", "lr": "0.000843232", "gnorm": "0.657", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "53936"} [2022-08-01 16:28:08,585][train_inner][INFO] - {"epoch": 4, "update": 3.214, "loss": "2.393", "ppl": "5.25", "wps": "364389", "ups": "3.08", "wpb": "118384", "bsz": "256", "num_updates": "165400", "lr": "0.00084303", "gnorm": "0.653", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25.1", "wall": "54001"} [2022-08-01 16:29:13,402][train_inner][INFO] - {"epoch": 4, "update": 3.218, "loss": "2.404", "ppl": "5.29", "wps": "365848", "ups": "3.09", "wpb": "118563", "bsz": "256", "num_updates": "165600", "lr": "0.000842828", "gnorm": "0.652", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.4", "wall": "54066"} [2022-08-01 16:30:17,889][train_inner][INFO] - {"epoch": 4, "update": 3.221, "loss": "2.398", "ppl": "5.27", "wps": "367013", "ups": "3.1", "wpb": "118337", "bsz": "256", "num_updates": "165800", "lr": "0.000842626", "gnorm": "0.656", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.5", "wall": "54130"} [2022-08-01 16:31:22,694][train_inner][INFO] - {"epoch": 4, "update": 3.225, "loss": "2.397", "ppl": "5.27", "wps": "366561", "ups": "3.09", "wpb": "118773", "bsz": "256", "num_updates": "166000", "lr": "0.000842424", "gnorm": "0.65", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.2", "wall": "54195"} [2022-08-01 16:32:27,375][train_inner][INFO] - {"epoch": 4, "update": 3.229, "loss": "2.405", "ppl": "5.3", "wps": "363429", "ups": "3.09", "wpb": "117533", "bsz": "256", "num_updates": "166200", "lr": "0.000842222", "gnorm": "0.663", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.3", "wall": "54260"} [2022-08-01 16:33:32,199][train_inner][INFO] - {"epoch": 4, "update": 3.233, "loss": "2.398", "ppl": "5.27", "wps": "366367", "ups": "3.09", "wpb": "118745", "bsz": "256", "num_updates": "166400", "lr": "0.00084202", "gnorm": "0.655", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.7", "wall": "54325"} [2022-08-01 16:34:36,999][train_inner][INFO] - {"epoch": 4, "update": 3.237, "loss": "2.401", "ppl": "5.28", "wps": "364960", "ups": "3.09", "wpb": "118244", "bsz": "256", "num_updates": "166600", "lr": "0.000841818", "gnorm": "0.655", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "54389"} [2022-08-01 16:34:41,819][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-01 16:35:42,096][train_inner][INFO] - {"epoch": 4, "update": 3.241, "loss": "2.403", "ppl": "5.29", "wps": "363070", "ups": "3.07", "wpb": "118173", "bsz": "256", "num_updates": "166800", "lr": "0.000841616", "gnorm": "0.658", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.4", "wall": "54454"} [2022-08-01 16:36:47,016][train_inner][INFO] - {"epoch": 4, "update": 3.245, "loss": "2.398", "ppl": "5.27", "wps": "365165", "ups": "3.08", "wpb": "118531", "bsz": "255.9", "num_updates": "167000", "lr": "0.000841414", "gnorm": "0.655", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "28.2", "wall": "54519"} [2022-08-01 16:37:51,754][train_inner][INFO] - {"epoch": 4, "update": 3.249, "loss": "2.396", "ppl": "5.26", "wps": "366560", "ups": "3.09", "wpb": "118650", "bsz": "256", "num_updates": "167200", "lr": "0.000841212", "gnorm": "0.653", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.3", "wall": "54584"} [2022-08-01 16:38:56,452][train_inner][INFO] - {"epoch": 4, "update": 3.253, "loss": "2.4", "ppl": "5.28", "wps": "365648", "ups": "3.09", "wpb": "118283", "bsz": "256", "num_updates": "167400", "lr": "0.00084101", "gnorm": "0.654", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.6", "wall": "54649"} [2022-08-01 16:40:01,515][train_inner][INFO] - {"epoch": 4, "update": 3.256, "loss": "2.397", "ppl": "5.27", "wps": "363700", "ups": "3.07", "wpb": "118314", "bsz": "256", "num_updates": "167600", "lr": "0.000840808", "gnorm": "0.657", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.8", "wall": "54714"} [2022-08-01 16:41:05,976][train_inner][INFO] - {"epoch": 4, "update": 3.26, "loss": "2.402", "ppl": "5.29", "wps": "366754", "ups": "3.1", "wpb": "118205", "bsz": "256", "num_updates": "167800", "lr": "0.000840606", "gnorm": "0.656", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "25.9", "wall": "54778"} [2022-08-01 16:42:10,775][train_inner][INFO] - {"epoch": 4, "update": 3.264, "loss": "2.395", "ppl": "5.26", "wps": "364703", "ups": "3.09", "wpb": "118160", "bsz": "256", "num_updates": "168000", "lr": "0.000840404", "gnorm": "0.654", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.6", "wall": "54843"} [2022-08-01 16:43:15,178][train_inner][INFO] - {"epoch": 4, "update": 3.268, "loss": "2.399", "ppl": "5.28", "wps": "367045", "ups": "3.11", "wpb": "118192", "bsz": "256", "num_updates": "168200", "lr": "0.000840202", "gnorm": "0.655", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.8", "wall": "54907"} [2022-08-01 16:44:20,115][train_inner][INFO] - {"epoch": 4, "update": 3.272, "loss": "2.404", "ppl": "5.29", "wps": "363168", "ups": "3.08", "wpb": "117915", "bsz": "256", "num_updates": "168400", "lr": "0.00084", "gnorm": "0.659", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.2", "wall": "54972"} [2022-08-01 16:45:24,918][train_inner][INFO] - {"epoch": 4, "update": 3.276, "loss": "2.402", "ppl": "5.29", "wps": "364855", "ups": "3.09", "wpb": "118216", "bsz": "256", "num_updates": "168600", "lr": "0.000839798", "gnorm": "0.658", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.5", "wall": "55037"} [2022-08-01 16:46:29,991][train_inner][INFO] - {"epoch": 4, "update": 3.28, "loss": "2.388", "ppl": "5.24", "wps": "363352", "ups": "3.07", "wpb": "118221", "bsz": "256", "num_updates": "168800", "lr": "0.000839596", "gnorm": "0.655", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "55102"} [2022-08-01 16:47:34,856][train_inner][INFO] - {"epoch": 4, "update": 3.284, "loss": "2.387", "ppl": "5.23", "wps": "365862", "ups": "3.08", "wpb": "118656", "bsz": "256", "num_updates": "169000", "lr": "0.000839394", "gnorm": "0.654", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.6", "wall": "55167"} [2022-08-01 16:48:39,502][train_inner][INFO] - {"epoch": 4, "update": 3.287, "loss": "2.394", "ppl": "5.25", "wps": "366136", "ups": "3.09", "wpb": "118345", "bsz": "256", "num_updates": "169200", "lr": "0.000839192", "gnorm": "0.656", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "55232"} [2022-08-01 16:49:44,021][train_inner][INFO] - {"epoch": 4, "update": 3.291, "loss": "2.398", "ppl": "5.27", "wps": "366969", "ups": "3.1", "wpb": "118379", "bsz": "256", "num_updates": "169400", "lr": "0.00083899", "gnorm": "0.658", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.9", "wall": "55296"} [2022-08-01 16:50:48,948][train_inner][INFO] - {"epoch": 4, "update": 3.295, "loss": "2.394", "ppl": "5.26", "wps": "363266", "ups": "3.08", "wpb": "117927", "bsz": "256", "num_updates": "169600", "lr": "0.000838788", "gnorm": "0.658", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "55361"} [2022-08-01 16:51:53,817][train_inner][INFO] - {"epoch": 4, "update": 3.299, "loss": "2.394", "ppl": "5.25", "wps": "364316", "ups": "3.08", "wpb": "118164", "bsz": "256", "num_updates": "169800", "lr": "0.000838586", "gnorm": "0.656", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.8", "wall": "55426"} [2022-08-01 16:52:58,589][train_inner][INFO] - {"epoch": 4, "update": 3.303, "loss": "2.397", "ppl": "5.27", "wps": "366416", "ups": "3.09", "wpb": "118665", "bsz": "256", "num_updates": "170000", "lr": "0.000838384", "gnorm": "0.66", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "25.6", "wall": "55491"} [2022-08-01 16:54:03,646][train_inner][INFO] - {"epoch": 4, "update": 3.307, "loss": "2.393", "ppl": "5.25", "wps": "364267", "ups": "3.07", "wpb": "118488", "bsz": "256", "num_updates": "170200", "lr": "0.000838182", "gnorm": "0.658", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "55556"} [2022-08-01 16:54:15,162][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-01 16:55:08,929][train_inner][INFO] - {"epoch": 4, "update": 3.311, "loss": "2.392", "ppl": "5.25", "wps": "363740", "ups": "3.06", "wpb": "118730", "bsz": "256", "num_updates": "170400", "lr": "0.00083798", "gnorm": "0.657", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.9", "wall": "55621"} [2022-08-01 16:56:13,967][train_inner][INFO] - {"epoch": 4, "update": 3.315, "loss": "2.397", "ppl": "5.27", "wps": "363417", "ups": "3.08", "wpb": "118178", "bsz": "256", "num_updates": "170600", "lr": "0.000837778", "gnorm": "0.66", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "55686"} [2022-08-01 16:57:18,858][train_inner][INFO] - {"epoch": 4, "update": 3.319, "loss": "2.393", "ppl": "5.25", "wps": "361463", "ups": "3.08", "wpb": "117277", "bsz": "256", "num_updates": "170800", "lr": "0.000837576", "gnorm": "0.659", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.2", "wall": "55751"} [2022-08-01 16:58:23,656][train_inner][INFO] - {"epoch": 4, "update": 3.322, "loss": "2.399", "ppl": "5.27", "wps": "364081", "ups": "3.09", "wpb": "117958", "bsz": "256", "num_updates": "171000", "lr": "0.000837374", "gnorm": "0.659", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.9", "wall": "55816"} [2022-08-01 16:59:28,403][train_inner][INFO] - {"epoch": 4, "update": 3.326, "loss": "2.399", "ppl": "5.27", "wps": "364829", "ups": "3.09", "wpb": "118106", "bsz": "256", "num_updates": "171200", "lr": "0.000837172", "gnorm": "0.665", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.3", "wall": "55881"} [2022-08-01 17:00:33,207][train_inner][INFO] - {"epoch": 4, "update": 3.33, "loss": "2.39", "ppl": "5.24", "wps": "366318", "ups": "3.09", "wpb": "118693", "bsz": "256", "num_updates": "171400", "lr": "0.00083697", "gnorm": "0.658", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22", "wall": "55946"} [2022-08-01 17:01:38,227][train_inner][INFO] - {"epoch": 4, "update": 3.334, "loss": "2.398", "ppl": "5.27", "wps": "362437", "ups": "3.08", "wpb": "117826", "bsz": "256", "num_updates": "171600", "lr": "0.000836768", "gnorm": "0.66", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.3", "wall": "56011"} [2022-08-01 17:02:43,115][train_inner][INFO] - {"epoch": 4, "update": 3.338, "loss": "2.391", "ppl": "5.24", "wps": "363963", "ups": "3.08", "wpb": "118083", "bsz": "256", "num_updates": "171800", "lr": "0.000836566", "gnorm": "0.661", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "56075"} [2022-08-01 17:03:48,311][train_inner][INFO] - {"epoch": 4, "update": 3.342, "loss": "2.395", "ppl": "5.26", "wps": "359581", "ups": "3.07", "wpb": "117216", "bsz": "256", "num_updates": "172000", "lr": "0.000836364", "gnorm": "0.661", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.4", "wall": "56141"} [2022-08-01 17:04:53,343][train_inner][INFO] - {"epoch": 4, "update": 3.346, "loss": "2.393", "ppl": "5.25", "wps": "363695", "ups": "3.08", "wpb": "118257", "bsz": "256", "num_updates": "172200", "lr": "0.000836162", "gnorm": "0.665", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "26.6", "wall": "56206"} [2022-08-01 17:05:58,431][train_inner][INFO] - {"epoch": 4, "update": 3.35, "loss": "2.394", "ppl": "5.26", "wps": "363104", "ups": "3.07", "wpb": "118166", "bsz": "256", "num_updates": "172400", "lr": "0.00083596", "gnorm": "0.659", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.5", "wall": "56271"} [2022-08-01 17:07:03,231][train_inner][INFO] - {"epoch": 4, "update": 3.354, "loss": "2.393", "ppl": "5.25", "wps": "366052", "ups": "3.09", "wpb": "118599", "bsz": "256", "num_updates": "172600", "lr": "0.000835758", "gnorm": "0.661", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "56336"} [2022-08-01 17:07:54,180][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-01 17:08:08,217][train_inner][INFO] - {"epoch": 4, "update": 3.357, "loss": "2.391", "ppl": "5.25", "wps": "364488", "ups": "3.08", "wpb": "118433", "bsz": "256", "num_updates": "172800", "lr": "0.000835556", "gnorm": "0.658", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.3", "wall": "56401"} [2022-08-01 17:09:13,156][train_inner][INFO] - {"epoch": 4, "update": 3.361, "loss": "2.399", "ppl": "5.27", "wps": "364823", "ups": "3.08", "wpb": "118454", "bsz": "256", "num_updates": "173000", "lr": "0.000835354", "gnorm": "0.658", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.1", "wall": "56465"} [2022-08-01 17:10:18,641][train_inner][INFO] - {"epoch": 4, "update": 3.365, "loss": "2.389", "ppl": "5.24", "wps": "361810", "ups": "3.05", "wpb": "118465", "bsz": "256", "num_updates": "173200", "lr": "0.000835152", "gnorm": "0.658", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "56531"} [2022-08-01 17:11:23,339][train_inner][INFO] - {"epoch": 4, "update": 3.369, "loss": "2.388", "ppl": "5.23", "wps": "365094", "ups": "3.09", "wpb": "118103", "bsz": "256", "num_updates": "173400", "lr": "0.000834949", "gnorm": "0.658", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.8", "wall": "56596"} [2022-08-01 17:12:28,028][train_inner][INFO] - {"epoch": 4, "update": 3.373, "loss": "2.393", "ppl": "5.25", "wps": "366419", "ups": "3.09", "wpb": "118515", "bsz": "256", "num_updates": "173600", "lr": "0.000834747", "gnorm": "0.659", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.3", "wall": "56660"} [2022-08-01 17:13:32,802][train_inner][INFO] - {"epoch": 4, "update": 3.377, "loss": "2.386", "ppl": "5.23", "wps": "367155", "ups": "3.09", "wpb": "118908", "bsz": "256", "num_updates": "173800", "lr": "0.000834545", "gnorm": "0.657", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21", "wall": "56725"} [2022-08-01 17:14:37,596][train_inner][INFO] - {"epoch": 4, "update": 3.381, "loss": "2.387", "ppl": "5.23", "wps": "366171", "ups": "3.09", "wpb": "118627", "bsz": "256", "num_updates": "174000", "lr": "0.000834343", "gnorm": "0.661", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "56790"} [2022-08-01 17:15:42,700][train_inner][INFO] - {"epoch": 4, "update": 3.385, "loss": "2.395", "ppl": "5.26", "wps": "362916", "ups": "3.07", "wpb": "118134", "bsz": "256", "num_updates": "174200", "lr": "0.000834141", "gnorm": "0.66", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "56855"} [2022-08-01 17:16:47,199][train_inner][INFO] - {"epoch": 4, "update": 3.389, "loss": "2.386", "ppl": "5.23", "wps": "364762", "ups": "3.1", "wpb": "117633", "bsz": "256", "num_updates": "174400", "lr": "0.000833939", "gnorm": "0.662", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.4", "wall": "56920"} [2022-08-01 17:17:51,828][train_inner][INFO] - {"epoch": 4, "update": 3.392, "loss": "2.396", "ppl": "5.26", "wps": "363746", "ups": "3.09", "wpb": "117540", "bsz": "256", "num_updates": "174600", "lr": "0.000833737", "gnorm": "0.662", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "28.2", "wall": "56984"} [2022-08-01 17:18:56,253][train_inner][INFO] - {"epoch": 4, "update": 3.396, "loss": "2.392", "ppl": "5.25", "wps": "367028", "ups": "3.1", "wpb": "118229", "bsz": "256", "num_updates": "174800", "lr": "0.000833535", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.8", "wall": "57049"} [2022-08-01 17:20:01,173][train_inner][INFO] - {"epoch": 4, "update": 3.4, "loss": "2.393", "ppl": "5.25", "wps": "365820", "ups": "3.08", "wpb": "118742", "bsz": "256", "num_updates": "175000", "lr": "0.000833333", "gnorm": "0.657", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "57113"} [2022-08-01 17:21:06,043][train_inner][INFO] - {"epoch": 4, "update": 3.404, "loss": "2.39", "ppl": "5.24", "wps": "364063", "ups": "3.08", "wpb": "118082", "bsz": "256", "num_updates": "175200", "lr": "0.000833131", "gnorm": "0.663", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "57178"} [2022-08-01 17:21:31,021][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-01 17:21:51,929][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-01 17:22:11,419][train_inner][INFO] - {"epoch": 4, "update": 3.408, "loss": "2.391", "ppl": "5.24", "wps": "361346", "ups": "3.06", "wpb": "118116", "bsz": "256", "num_updates": "175400", "lr": "0.000832929", "gnorm": "0.666", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "25.7", "wall": "57244"} [2022-08-01 17:23:16,368][train_inner][INFO] - {"epoch": 4, "update": 3.412, "loss": "2.393", "ppl": "5.25", "wps": "362175", "ups": "3.08", "wpb": "117614", "bsz": "256", "num_updates": "175600", "lr": "0.000832727", "gnorm": "0.661", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.1", "wall": "57309"} [2022-08-01 17:24:21,262][train_inner][INFO] - {"epoch": 4, "update": 3.416, "loss": "2.388", "ppl": "5.23", "wps": "364573", "ups": "3.08", "wpb": "118290", "bsz": "256", "num_updates": "175800", "lr": "0.000832525", "gnorm": "0.66", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.3", "wall": "57374"} [2022-08-01 17:24:56,562][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 [2022-08-01 17:25:26,549][train_inner][INFO] - {"epoch": 4, "update": 3.42, "loss": "2.379", "ppl": "5.2", "wps": "362879", "ups": "3.06", "wpb": "118455", "bsz": "256", "num_updates": "176000", "lr": "0.000832323", "gnorm": "0.659", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "23.1", "wall": "57439"} [2022-08-01 17:26:31,343][train_inner][INFO] - {"epoch": 4, "update": 3.424, "loss": "2.39", "ppl": "5.24", "wps": "365928", "ups": "3.09", "wpb": "118548", "bsz": "256", "num_updates": "176200", "lr": "0.000832121", "gnorm": "0.677", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "24.5", "wall": "57504"} [2022-08-01 17:27:08,751][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 [2022-08-01 17:27:36,466][train_inner][INFO] - {"epoch": 4, "update": 3.427, "loss": "2.393", "ppl": "5.25", "wps": "362488", "ups": "3.07", "wpb": "118031", "bsz": "256", "num_updates": "176400", "lr": "0.000831919", "gnorm": "0.663", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20", "wall": "57569"} [2022-08-01 17:28:41,275][train_inner][INFO] - {"epoch": 4, "update": 3.431, "loss": "2.388", "ppl": "5.23", "wps": "365801", "ups": "3.09", "wpb": "118534", "bsz": "256", "num_updates": "176600", "lr": "0.000831717", "gnorm": "0.661", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.9", "wall": "57634"} [2022-08-01 17:29:46,158][train_inner][INFO] - {"epoch": 4, "update": 3.435, "loss": "2.384", "ppl": "5.22", "wps": "365286", "ups": "3.08", "wpb": "118503", "bsz": "256", "num_updates": "176800", "lr": "0.000831515", "gnorm": "0.66", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.4", "wall": "57698"} [2022-08-01 17:30:50,933][train_inner][INFO] - {"epoch": 4, "update": 3.439, "loss": "2.386", "ppl": "5.23", "wps": "366134", "ups": "3.09", "wpb": "118581", "bsz": "256", "num_updates": "177000", "lr": "0.000831313", "gnorm": "0.657", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "28.2", "wall": "57763"} [2022-08-01 17:31:55,499][train_inner][INFO] - {"epoch": 4, "update": 3.443, "loss": "2.389", "ppl": "5.24", "wps": "365103", "ups": "3.1", "wpb": "117864", "bsz": "256", "num_updates": "177200", "lr": "0.000831111", "gnorm": "0.666", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "19.8", "wall": "57828"} [2022-08-01 17:33:01,550][train_inner][INFO] - {"epoch": 4, "update": 3.447, "loss": "2.39", "ppl": "5.24", "wps": "358267", "ups": "3.03", "wpb": "118317", "bsz": "256", "num_updates": "177400", "lr": "0.000830909", "gnorm": "0.662", "clip": "0", "loss_scale": "1", "train_wall": "66", "gb_free": "23.4", "wall": "57894"} [2022-08-01 17:34:07,583][train_inner][INFO] - {"epoch": 4, "update": 3.451, "loss": "2.39", "ppl": "5.24", "wps": "359552", "ups": "3.03", "wpb": "118711", "bsz": "256", "num_updates": "177600", "lr": "0.000830707", "gnorm": "0.657", "clip": "0", "loss_scale": "1", "train_wall": "66", "gb_free": "20.3", "wall": "57960"} [2022-08-01 17:35:12,534][train_inner][INFO] - {"epoch": 4, "update": 3.455, "loss": "2.382", "ppl": "5.21", "wps": "363968", "ups": "3.08", "wpb": "118199", "bsz": "256", "num_updates": "177800", "lr": "0.000830505", "gnorm": "0.66", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "19.9", "wall": "58025"} [2022-08-01 17:36:17,227][train_inner][INFO] - {"epoch": 4, "update": 3.459, "loss": "2.386", "ppl": "5.23", "wps": "365684", "ups": "3.09", "wpb": "118283", "bsz": "256", "num_updates": "178000", "lr": "0.000830303", "gnorm": "0.66", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "22.2", "wall": "58090"} [2022-08-01 17:37:22,538][train_inner][INFO] - {"epoch": 4, "update": 3.462, "loss": "2.383", "ppl": "5.22", "wps": "360574", "ups": "3.06", "wpb": "117747", "bsz": "256", "num_updates": "178200", "lr": "0.000830101", "gnorm": "0.663", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "19.9", "wall": "58155"} [2022-08-01 17:38:27,322][train_inner][INFO] - {"epoch": 4, "update": 3.466, "loss": "2.383", "ppl": "5.22", "wps": "365577", "ups": "3.09", "wpb": "118415", "bsz": "256", "num_updates": "178400", "lr": "0.000829899", "gnorm": "0.663", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "22.2", "wall": "58220"} [2022-08-01 17:39:32,108][train_inner][INFO] - {"epoch": 4, "update": 3.47, "loss": "2.383", "ppl": "5.22", "wps": "366019", "ups": "3.09", "wpb": "118562", "bsz": "256", "num_updates": "178600", "lr": "0.000829697", "gnorm": "0.664", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.1", "wall": "58284"} [2022-08-01 17:40:36,932][train_inner][INFO] - {"epoch": 4, "update": 3.474, "loss": "2.381", "ppl": "5.21", "wps": "366968", "ups": "3.09", "wpb": "118942", "bsz": "256", "num_updates": "178800", "lr": "0.000829495", "gnorm": "0.66", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "22.7", "wall": "58349"} [2022-08-01 17:41:41,779][train_inner][INFO] - {"epoch": 4, "update": 3.478, "loss": "2.381", "ppl": "5.21", "wps": "365196", "ups": "3.08", "wpb": "118407", "bsz": "256", "num_updates": "179000", "lr": "0.000829293", "gnorm": "0.662", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20", "wall": "58414"} [2022-08-01 17:42:46,834][train_inner][INFO] - {"epoch": 4, "update": 3.482, "loss": "2.387", "ppl": "5.23", "wps": "364891", "ups": "3.07", "wpb": "118688", "bsz": "256", "num_updates": "179200", "lr": "0.000829091", "gnorm": "0.66", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.9", "wall": "58479"} [2022-08-01 17:43:51,655][train_inner][INFO] - {"epoch": 4, "update": 3.486, "loss": "2.392", "ppl": "5.25", "wps": "364839", "ups": "3.09", "wpb": "118244", "bsz": "256", "num_updates": "179400", "lr": "0.000828889", "gnorm": "0.677", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.9", "wall": "58544"} [2022-08-01 17:44:56,511][train_inner][INFO] - {"epoch": 4, "update": 3.49, "loss": "2.378", "ppl": "5.2", "wps": "365803", "ups": "3.08", "wpb": "118622", "bsz": "256", "num_updates": "179600", "lr": "0.000828687", "gnorm": "0.66", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20", "wall": "58609"} [2022-08-01 17:46:01,527][train_inner][INFO] - {"epoch": 4, "update": 3.493, "loss": "2.381", "ppl": "5.21", "wps": "364480", "ups": "3.08", "wpb": "118483", "bsz": "256", "num_updates": "179800", "lr": "0.000828485", "gnorm": "0.664", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.5", "wall": "58674"} [2022-08-01 17:47:06,266][train_inner][INFO] - {"epoch": 4, "update": 3.497, "loss": "2.384", "ppl": "5.22", "wps": "365315", "ups": "3.09", "wpb": "118250", "bsz": "256", "num_updates": "180000", "lr": "0.000828283", "gnorm": "0.662", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.5", "wall": "58739"} [2022-08-01 17:47:30,165][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 [2022-08-01 17:48:11,230][train_inner][INFO] - {"epoch": 4, "update": 3.501, "loss": "2.41", "ppl": "5.31", "wps": "364302", "ups": "3.08", "wpb": "118329", "bsz": "256", "num_updates": "180200", "lr": "0.000828081", "gnorm": "0.839", "clip": "0.5", "loss_scale": "1", "train_wall": "65", "gb_free": "21.2", "wall": "58804"} [2022-08-01 17:49:15,791][train_inner][INFO] - {"epoch": 4, "update": 3.505, "loss": "2.383", "ppl": "5.22", "wps": "367613", "ups": "3.1", "wpb": "118668", "bsz": "256", "num_updates": "180400", "lr": "0.000827879", "gnorm": "0.66", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20", "wall": "58868"} [2022-08-01 17:50:20,551][train_inner][INFO] - {"epoch": 4, "update": 3.509, "loss": "2.389", "ppl": "5.24", "wps": "365582", "ups": "3.09", "wpb": "118373", "bsz": "256", "num_updates": "180600", "lr": "0.000827677", "gnorm": "0.668", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "26.8", "wall": "58933"} [2022-08-01 17:51:25,541][train_inner][INFO] - {"epoch": 4, "update": 3.513, "loss": "2.386", "ppl": "5.23", "wps": "363391", "ups": "3.08", "wpb": "118082", "bsz": "256", "num_updates": "180800", "lr": "0.000827475", "gnorm": "0.665", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.9", "wall": "58998"} [2022-08-01 17:52:30,968][train_inner][INFO] - {"epoch": 4, "update": 3.517, "loss": "2.382", "ppl": "5.21", "wps": "362889", "ups": "3.06", "wpb": "118712", "bsz": "256", "num_updates": "181000", "lr": "0.000827273", "gnorm": "0.662", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "23.7", "wall": "59063"} [2022-08-01 17:53:35,795][train_inner][INFO] - {"epoch": 4, "update": 3.521, "loss": "2.38", "ppl": "5.21", "wps": "365438", "ups": "3.09", "wpb": "118449", "bsz": "256", "num_updates": "181200", "lr": "0.000827071", "gnorm": "0.662", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "22.7", "wall": "59128"} [2022-08-01 17:54:40,977][train_inner][INFO] - {"epoch": 4, "update": 3.525, "loss": "2.383", "ppl": "5.22", "wps": "361843", "ups": "3.07", "wpb": "117925", "bsz": "256", "num_updates": "181400", "lr": "0.000826869", "gnorm": "0.659", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20", "wall": "59193"} [2022-08-01 17:55:45,766][train_inner][INFO] - {"epoch": 4, "update": 3.528, "loss": "2.385", "ppl": "5.23", "wps": "366282", "ups": "3.09", "wpb": "118653", "bsz": "256", "num_updates": "181600", "lr": "0.000826667", "gnorm": "0.662", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.1", "wall": "59258"} [2022-08-01 17:56:50,877][train_inner][INFO] - {"epoch": 4, "update": 3.532, "loss": "2.385", "ppl": "5.22", "wps": "360740", "ups": "3.07", "wpb": "117438", "bsz": "256", "num_updates": "181800", "lr": "0.000826465", "gnorm": "0.666", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.3", "wall": "59323"} [2022-08-01 17:57:55,820][train_inner][INFO] - {"epoch": 4, "update": 3.536, "loss": "2.385", "ppl": "5.22", "wps": "365284", "ups": "3.08", "wpb": "118611", "bsz": "256", "num_updates": "182000", "lr": "0.000826263", "gnorm": "0.661", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.4", "wall": "59388"} [2022-08-01 17:59:00,897][train_inner][INFO] - {"epoch": 4, "update": 3.54, "loss": "2.381", "ppl": "5.21", "wps": "363492", "ups": "3.07", "wpb": "118272", "bsz": "256", "num_updates": "182200", "lr": "0.000826061", "gnorm": "0.665", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.4", "wall": "59453"} [2022-08-01 18:00:05,577][train_inner][INFO] - {"epoch": 4, "update": 3.544, "loss": "2.377", "ppl": "5.19", "wps": "366398", "ups": "3.09", "wpb": "118492", "bsz": "256", "num_updates": "182400", "lr": "0.000825859", "gnorm": "0.664", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "19.7", "wall": "59518"} [2022-08-01 18:01:10,464][train_inner][INFO] - {"epoch": 4, "update": 3.548, "loss": "2.385", "ppl": "5.22", "wps": "364513", "ups": "3.08", "wpb": "118259", "bsz": "256", "num_updates": "182600", "lr": "0.000825657", "gnorm": "0.663", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.8", "wall": "59583"} [2022-08-01 18:02:15,590][train_inner][INFO] - {"epoch": 4, "update": 3.552, "loss": "2.382", "ppl": "5.21", "wps": "363318", "ups": "3.07", "wpb": "118306", "bsz": "256", "num_updates": "182800", "lr": "0.000825455", "gnorm": "0.664", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.8", "wall": "59648"} [2022-08-01 18:03:20,693][train_inner][INFO] - {"epoch": 4, "update": 3.556, "loss": "2.378", "ppl": "5.2", "wps": "365050", "ups": "3.07", "wpb": "118828", "bsz": "256", "num_updates": "183000", "lr": "0.000825253", "gnorm": "0.67", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.5", "wall": "59713"} [2022-08-01 18:04:25,643][train_inner][INFO] - {"epoch": 4, "update": 3.56, "loss": "2.383", "ppl": "5.22", "wps": "365296", "ups": "3.08", "wpb": "118627", "bsz": "256", "num_updates": "183200", "lr": "0.000825051", "gnorm": "0.664", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "28.2", "wall": "59778"} [2022-08-01 18:05:30,435][train_inner][INFO] - {"epoch": 4, "update": 3.563, "loss": "2.391", "ppl": "5.24", "wps": "365274", "ups": "3.09", "wpb": "118332", "bsz": "256", "num_updates": "183400", "lr": "0.000824848", "gnorm": "0.665", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.3", "wall": "59843"} [2022-08-01 18:06:35,883][train_inner][INFO] - {"epoch": 4, "update": 3.567, "loss": "2.376", "ppl": "5.19", "wps": "363549", "ups": "3.06", "wpb": "118966", "bsz": "256", "num_updates": "183600", "lr": "0.000824646", "gnorm": "0.665", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22.3", "wall": "59908"} [2022-08-01 18:07:40,846][train_inner][INFO] - {"epoch": 4, "update": 3.571, "loss": "2.384", "ppl": "5.22", "wps": "365557", "ups": "3.08", "wpb": "118732", "bsz": "256", "num_updates": "183800", "lr": "0.000824444", "gnorm": "0.666", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.1", "wall": "59973"} [2022-08-01 18:08:45,592][train_inner][INFO] - {"epoch": 4, "update": 3.575, "loss": "2.378", "ppl": "5.2", "wps": "366412", "ups": "3.09", "wpb": "118616", "bsz": "256", "num_updates": "184000", "lr": "0.000824242", "gnorm": "0.666", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "23.8", "wall": "60038"} [2022-08-01 18:09:50,097][train_inner][INFO] - {"epoch": 4, "update": 3.579, "loss": "2.384", "ppl": "5.22", "wps": "365839", "ups": "3.1", "wpb": "117990", "bsz": "256", "num_updates": "184200", "lr": "0.00082404", "gnorm": "0.68", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.4", "wall": "60102"} [2022-08-01 18:10:55,015][train_inner][INFO] - {"epoch": 4, "update": 3.583, "loss": "2.386", "ppl": "5.23", "wps": "364076", "ups": "3.08", "wpb": "118175", "bsz": "256", "num_updates": "184400", "lr": "0.000823838", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "24.1", "wall": "60167"} [2022-08-01 18:12:00,224][train_inner][INFO] - {"epoch": 4, "update": 3.587, "loss": "2.385", "ppl": "5.23", "wps": "362817", "ups": "3.07", "wpb": "118292", "bsz": "256", "num_updates": "184600", "lr": "0.000823636", "gnorm": "0.663", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "28.2", "wall": "60233"} [2022-08-01 18:13:05,229][train_inner][INFO] - {"epoch": 4, "update": 3.591, "loss": "2.379", "ppl": "5.2", "wps": "365028", "ups": "3.08", "wpb": "118641", "bsz": "256", "num_updates": "184800", "lr": "0.000823434", "gnorm": "0.664", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.7", "wall": "60298"} [2022-08-01 18:14:09,942][train_inner][INFO] - {"epoch": 4, "update": 3.594, "loss": "2.384", "ppl": "5.22", "wps": "364749", "ups": "3.09", "wpb": "118018", "bsz": "256", "num_updates": "185000", "lr": "0.000823232", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.7", "wall": "60362"} [2022-08-01 18:15:14,866][train_inner][INFO] - {"epoch": 4, "update": 3.598, "loss": "2.375", "ppl": "5.19", "wps": "364396", "ups": "3.08", "wpb": "118289", "bsz": "256", "num_updates": "185200", "lr": "0.00082303", "gnorm": "0.684", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "28.2", "wall": "60427"} [2022-08-01 18:16:19,641][train_inner][INFO] - {"epoch": 4, "update": 3.602, "loss": "2.381", "ppl": "5.21", "wps": "365362", "ups": "3.09", "wpb": "118329", "bsz": "256", "num_updates": "185400", "lr": "0.000822828", "gnorm": "0.665", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.6", "wall": "60492"} [2022-08-01 18:17:24,310][train_inner][INFO] - {"epoch": 4, "update": 3.606, "loss": "2.386", "ppl": "5.23", "wps": "364248", "ups": "3.09", "wpb": "117777", "bsz": "256", "num_updates": "185600", "lr": "0.000822626", "gnorm": "0.672", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "24.1", "wall": "60557"} [2022-08-01 18:18:28,921][train_inner][INFO] - {"epoch": 4, "update": 3.61, "loss": "2.387", "ppl": "5.23", "wps": "365360", "ups": "3.1", "wpb": "118030", "bsz": "256", "num_updates": "185800", "lr": "0.000822424", "gnorm": "0.67", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.3", "wall": "60621"} [2022-08-01 18:19:35,060][train_inner][INFO] - {"epoch": 4, "update": 3.614, "loss": "2.379", "ppl": "5.2", "wps": "356832", "ups": "3.02", "wpb": "118000", "bsz": "256", "num_updates": "186000", "lr": "0.000822222", "gnorm": "0.666", "clip": "0", "loss_scale": "4", "train_wall": "66", "gb_free": "19.8", "wall": "60687"} [2022-08-01 18:20:40,045][train_inner][INFO] - {"epoch": 4, "update": 3.618, "loss": "2.373", "ppl": "5.18", "wps": "365374", "ups": "3.08", "wpb": "118716", "bsz": "256", "num_updates": "186200", "lr": "0.00082202", "gnorm": "0.666", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.8", "wall": "60752"} [2022-08-01 18:21:45,559][train_inner][INFO] - {"epoch": 4, "update": 3.622, "loss": "2.378", "ppl": "5.2", "wps": "358447", "ups": "3.05", "wpb": "117414", "bsz": "256", "num_updates": "186400", "lr": "0.000821818", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.7", "wall": "60818"} [2022-08-01 18:22:51,478][train_inner][INFO] - {"epoch": 4, "update": 3.626, "loss": "2.375", "ppl": "5.19", "wps": "359188", "ups": "3.03", "wpb": "118386", "bsz": "256", "num_updates": "186600", "lr": "0.000821616", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "21.1", "wall": "60884"} [2022-08-01 18:23:56,230][train_inner][INFO] - {"epoch": 4, "update": 3.629, "loss": "2.374", "ppl": "5.18", "wps": "367359", "ups": "3.09", "wpb": "118934", "bsz": "256", "num_updates": "186800", "lr": "0.000821414", "gnorm": "0.665", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "60949"} [2022-08-01 18:25:02,519][train_inner][INFO] - {"epoch": 4, "update": 3.633, "loss": "2.374", "ppl": "5.18", "wps": "357850", "ups": "3.02", "wpb": "118605", "bsz": "256", "num_updates": "187000", "lr": "0.000821212", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "20.9", "wall": "61015"} [2022-08-01 18:26:07,554][train_inner][INFO] - {"epoch": 4, "update": 3.637, "loss": "2.377", "ppl": "5.19", "wps": "364972", "ups": "3.08", "wpb": "118678", "bsz": "256", "num_updates": "187200", "lr": "0.00082101", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.9", "wall": "61080"} [2022-08-01 18:27:12,782][train_inner][INFO] - {"epoch": 4, "update": 3.641, "loss": "2.379", "ppl": "5.2", "wps": "363651", "ups": "3.07", "wpb": "118599", "bsz": "256", "num_updates": "187400", "lr": "0.000820808", "gnorm": "0.664", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.7", "wall": "61145"} [2022-08-01 18:28:18,965][train_inner][INFO] - {"epoch": 4, "update": 3.645, "loss": "2.378", "ppl": "5.2", "wps": "356468", "ups": "3.02", "wpb": "117958", "bsz": "256", "num_updates": "187600", "lr": "0.000820606", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "26.4", "wall": "61211"} [2022-08-01 18:29:24,049][train_inner][INFO] - {"epoch": 4, "update": 3.649, "loss": "2.377", "ppl": "5.19", "wps": "364576", "ups": "3.07", "wpb": "118638", "bsz": "256", "num_updates": "187800", "lr": "0.000820404", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.8", "wall": "61276"} [2022-08-01 18:30:29,200][train_inner][INFO] - {"epoch": 4, "update": 3.653, "loss": "2.372", "ppl": "5.18", "wps": "364062", "ups": "3.07", "wpb": "118594", "bsz": "256", "num_updates": "188000", "lr": "0.000820202", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "61342"} [2022-08-01 18:31:34,011][train_inner][INFO] - {"epoch": 4, "update": 3.657, "loss": "2.378", "ppl": "5.2", "wps": "366088", "ups": "3.09", "wpb": "118630", "bsz": "256", "num_updates": "188200", "lr": "0.00082", "gnorm": "0.665", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.2", "wall": "61406"} [2022-08-01 18:32:38,882][train_inner][INFO] - {"epoch": 4, "update": 3.661, "loss": "2.38", "ppl": "5.2", "wps": "365505", "ups": "3.08", "wpb": "118553", "bsz": "256", "num_updates": "188400", "lr": "0.000819798", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "61471"} [2022-08-01 18:33:43,878][train_inner][INFO] - {"epoch": 4, "update": 3.664, "loss": "2.371", "ppl": "5.17", "wps": "363879", "ups": "3.08", "wpb": "118251", "bsz": "256", "num_updates": "188600", "lr": "0.000819596", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.9", "wall": "61536"} [2022-08-01 18:34:48,593][train_inner][INFO] - {"epoch": 4, "update": 3.668, "loss": "2.377", "ppl": "5.2", "wps": "365530", "ups": "3.09", "wpb": "118276", "bsz": "256", "num_updates": "188800", "lr": "0.000819394", "gnorm": "0.667", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "61601"} [2022-08-01 18:35:53,802][train_inner][INFO] - {"epoch": 4, "update": 3.672, "loss": "2.375", "ppl": "5.19", "wps": "363943", "ups": "3.07", "wpb": "118659", "bsz": "256", "num_updates": "189000", "lr": "0.000819192", "gnorm": "0.666", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.3", "wall": "61666"} [2022-08-01 18:36:58,641][train_inner][INFO] - {"epoch": 4, "update": 3.676, "loss": "2.375", "ppl": "5.19", "wps": "364008", "ups": "3.08", "wpb": "118009", "bsz": "256", "num_updates": "189200", "lr": "0.00081899", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.8", "wall": "61731"} [2022-08-01 18:38:00,458][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-01 18:38:03,940][train_inner][INFO] - {"epoch": 4, "update": 3.68, "loss": "2.377", "ppl": "5.2", "wps": "364214", "ups": "3.06", "wpb": "118911", "bsz": "256", "num_updates": "189400", "lr": "0.000818788", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.6", "wall": "61796"} [2022-08-01 18:39:08,663][train_inner][INFO] - {"epoch": 4, "update": 3.684, "loss": "2.366", "ppl": "5.15", "wps": "365910", "ups": "3.09", "wpb": "118415", "bsz": "256", "num_updates": "189600", "lr": "0.000818586", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.2", "wall": "61861"} [2022-08-01 18:40:13,317][train_inner][INFO] - {"epoch": 4, "update": 3.688, "loss": "2.366", "ppl": "5.16", "wps": "367761", "ups": "3.09", "wpb": "118884", "bsz": "256", "num_updates": "189800", "lr": "0.000818384", "gnorm": "0.665", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.6", "wall": "61926"} [2022-08-01 18:41:18,139][train_inner][INFO] - {"epoch": 4, "update": 3.692, "loss": "2.369", "ppl": "5.17", "wps": "365622", "ups": "3.09", "wpb": "118500", "bsz": "256", "num_updates": "190000", "lr": "0.000818182", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.5", "wall": "61990"} [2022-08-01 18:42:23,333][train_inner][INFO] - {"epoch": 4, "update": 3.696, "loss": "2.37", "ppl": "5.17", "wps": "363025", "ups": "3.07", "wpb": "118332", "bsz": "256", "num_updates": "190200", "lr": "0.00081798", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "62056"} [2022-08-01 18:43:27,972][train_inner][INFO] - {"epoch": 4, "update": 3.699, "loss": "2.378", "ppl": "5.2", "wps": "365993", "ups": "3.09", "wpb": "118287", "bsz": "256", "num_updates": "190400", "lr": "0.000817778", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.2", "wall": "62120"} [2022-08-01 18:44:32,356][train_inner][INFO] - {"epoch": 4, "update": 3.703, "loss": "2.373", "ppl": "5.18", "wps": "367234", "ups": "3.11", "wpb": "118217", "bsz": "256", "num_updates": "190600", "lr": "0.000817576", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "62185"} [2022-08-01 18:45:37,437][train_inner][INFO] - {"epoch": 4, "update": 3.707, "loss": "2.375", "ppl": "5.19", "wps": "363238", "ups": "3.07", "wpb": "118198", "bsz": "256", "num_updates": "190800", "lr": "0.000817374", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.3", "wall": "62250"} [2022-08-01 18:45:39,661][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-01 18:45:40,602][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 [2022-08-01 18:45:41,202][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 [2022-08-01 18:45:41,496][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 [2022-08-01 18:45:41,795][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.25 [2022-08-01 18:46:44,145][train_inner][INFO] - {"epoch": 4, "update": 3.711, "loss": "2.387", "ppl": "5.23", "wps": "356060", "ups": "3", "wpb": "118759", "bsz": "256", "num_updates": "191000", "lr": "0.000817172", "gnorm": "0.973", "clip": "0.5", "loss_scale": "0.25", "train_wall": "66", "gb_free": "20.5", "wall": "62316"} [2022-08-01 18:47:49,019][train_inner][INFO] - {"epoch": 4, "update": 3.715, "loss": "2.375", "ppl": "5.19", "wps": "364366", "ups": "3.08", "wpb": "118188", "bsz": "256", "num_updates": "191200", "lr": "0.00081697", "gnorm": "0.669", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "19.9", "wall": "62381"} [2022-08-01 18:48:53,703][train_inner][INFO] - {"epoch": 4, "update": 3.719, "loss": "2.374", "ppl": "5.19", "wps": "366708", "ups": "3.09", "wpb": "118598", "bsz": "256", "num_updates": "191400", "lr": "0.000816768", "gnorm": "0.669", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "19.7", "wall": "62446"} [2022-08-01 18:49:58,310][train_inner][INFO] - {"epoch": 4, "update": 3.723, "loss": "2.369", "ppl": "5.17", "wps": "366659", "ups": "3.1", "wpb": "118442", "bsz": "256", "num_updates": "191600", "lr": "0.000816566", "gnorm": "0.669", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "27.5", "wall": "62511"} [2022-08-01 18:51:03,550][train_inner][INFO] - {"epoch": 4, "update": 3.727, "loss": "2.378", "ppl": "5.2", "wps": "362286", "ups": "3.07", "wpb": "118174", "bsz": "256", "num_updates": "191800", "lr": "0.000816364", "gnorm": "0.669", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "21.5", "wall": "62576"} [2022-08-01 18:52:09,235][train_inner][INFO] - {"epoch": 4, "update": 3.731, "loss": "2.378", "ppl": "5.2", "wps": "359861", "ups": "3.04", "wpb": "118187", "bsz": "256", "num_updates": "192000", "lr": "0.000816162", "gnorm": "0.67", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "27.5", "wall": "62642"} [2022-08-01 18:53:14,083][train_inner][INFO] - {"epoch": 4, "update": 3.734, "loss": "2.382", "ppl": "5.21", "wps": "362126", "ups": "3.08", "wpb": "117413", "bsz": "256", "num_updates": "192200", "lr": "0.00081596", "gnorm": "0.673", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "22.1", "wall": "62706"} [2022-08-01 18:54:19,237][train_inner][INFO] - {"epoch": 4, "update": 3.738, "loss": "2.372", "ppl": "5.18", "wps": "363480", "ups": "3.07", "wpb": "118408", "bsz": "256", "num_updates": "192400", "lr": "0.000815758", "gnorm": "0.669", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "20.9", "wall": "62772"} [2022-08-01 18:55:24,052][train_inner][INFO] - {"epoch": 4, "update": 3.742, "loss": "2.37", "ppl": "5.17", "wps": "365631", "ups": "3.09", "wpb": "118492", "bsz": "256", "num_updates": "192600", "lr": "0.000815556", "gnorm": "0.669", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "22.4", "wall": "62836"} [2022-08-01 18:56:29,864][train_inner][INFO] - {"epoch": 4, "update": 3.746, "loss": "2.369", "ppl": "5.17", "wps": "359840", "ups": "3.04", "wpb": "118406", "bsz": "256", "num_updates": "192800", "lr": "0.000815354", "gnorm": "0.67", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "20.5", "wall": "62902"} [2022-08-01 18:57:34,477][train_inner][INFO] - {"epoch": 4, "update": 3.75, "loss": "2.372", "ppl": "5.18", "wps": "364922", "ups": "3.1", "wpb": "117893", "bsz": "256", "num_updates": "193000", "lr": "0.000815152", "gnorm": "0.671", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.2", "wall": "62967"} [2022-08-01 18:58:39,224][train_inner][INFO] - {"epoch": 4, "update": 3.754, "loss": "2.365", "ppl": "5.15", "wps": "365732", "ups": "3.09", "wpb": "118398", "bsz": "256", "num_updates": "193200", "lr": "0.000814949", "gnorm": "0.673", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20", "wall": "63032"} [2022-08-01 18:59:43,691][train_inner][INFO] - {"epoch": 4, "update": 3.758, "loss": "2.377", "ppl": "5.2", "wps": "366553", "ups": "3.1", "wpb": "118152", "bsz": "256", "num_updates": "193400", "lr": "0.000814747", "gnorm": "0.677", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "19.7", "wall": "63096"} [2022-08-01 19:00:48,443][train_inner][INFO] - {"epoch": 4, "update": 3.762, "loss": "2.373", "ppl": "5.18", "wps": "366528", "ups": "3.09", "wpb": "118664", "bsz": "256", "num_updates": "193600", "lr": "0.000814545", "gnorm": "0.671", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.4", "wall": "63161"} [2022-08-01 19:01:54,163][train_inner][INFO] - {"epoch": 4, "update": 3.766, "loss": "2.369", "ppl": "5.17", "wps": "359318", "ups": "3.04", "wpb": "118071", "bsz": "256", "num_updates": "193800", "lr": "0.000814343", "gnorm": "0.668", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.1", "wall": "63226"} [2022-08-01 19:02:58,655][train_inner][INFO] - {"epoch": 4, "update": 3.769, "loss": "2.377", "ppl": "5.19", "wps": "365323", "ups": "3.1", "wpb": "117801", "bsz": "256", "num_updates": "194000", "lr": "0.000814141", "gnorm": "0.674", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.5", "wall": "63291"} [2022-08-01 19:04:03,796][train_inner][INFO] - {"epoch": 4, "update": 3.773, "loss": "2.37", "ppl": "5.17", "wps": "362484", "ups": "3.07", "wpb": "118060", "bsz": "256", "num_updates": "194200", "lr": "0.000813939", "gnorm": "0.67", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "22.1", "wall": "63356"} [2022-08-01 19:05:08,819][train_inner][INFO] - {"epoch": 4, "update": 3.777, "loss": "2.37", "ppl": "5.17", "wps": "363182", "ups": "3.08", "wpb": "118075", "bsz": "256", "num_updates": "194400", "lr": "0.000813737", "gnorm": "0.67", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.5", "wall": "63421"} [2022-08-01 19:06:13,588][train_inner][INFO] - {"epoch": 4, "update": 3.781, "loss": "2.371", "ppl": "5.17", "wps": "364293", "ups": "3.09", "wpb": "117972", "bsz": "256", "num_updates": "194600", "lr": "0.000813535", "gnorm": "0.696", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "22.5", "wall": "63486"} [2022-08-01 19:07:17,994][train_inner][INFO] - {"epoch": 4, "update": 3.785, "loss": "2.371", "ppl": "5.17", "wps": "368137", "ups": "3.11", "wpb": "118549", "bsz": "256", "num_updates": "194800", "lr": "0.000813333", "gnorm": "0.676", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "21.3", "wall": "63550"} [2022-08-01 19:08:22,669][train_inner][INFO] - {"epoch": 4, "update": 3.789, "loss": "2.365", "ppl": "5.15", "wps": "365994", "ups": "3.09", "wpb": "118352", "bsz": "256", "num_updates": "195000", "lr": "0.000813131", "gnorm": "0.671", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.1", "wall": "63615"} [2022-08-01 19:09:27,809][train_inner][INFO] - {"epoch": 4, "update": 3.793, "loss": "2.374", "ppl": "5.19", "wps": "362489", "ups": "3.07", "wpb": "118060", "bsz": "256", "num_updates": "195200", "lr": "0.000812929", "gnorm": "0.677", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.1", "wall": "63680"} [2022-08-01 19:10:32,542][train_inner][INFO] - {"epoch": 4, "update": 3.797, "loss": "2.364", "ppl": "5.15", "wps": "366742", "ups": "3.09", "wpb": "118700", "bsz": "256", "num_updates": "195400", "lr": "0.000812727", "gnorm": "0.67", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.5", "wall": "63745"} [2022-08-01 19:11:37,570][train_inner][INFO] - {"epoch": 4, "update": 3.8, "loss": "2.371", "ppl": "5.17", "wps": "363933", "ups": "3.08", "wpb": "118327", "bsz": "256", "num_updates": "195600", "lr": "0.000812525", "gnorm": "0.672", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20", "wall": "63810"} [2022-08-01 19:12:42,341][train_inner][INFO] - {"epoch": 4, "update": 3.804, "loss": "2.365", "ppl": "5.15", "wps": "364030", "ups": "3.09", "wpb": "117892", "bsz": "256", "num_updates": "195800", "lr": "0.000812323", "gnorm": "0.674", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "21.3", "wall": "63875"} [2022-08-01 19:13:47,019][train_inner][INFO] - {"epoch": 4, "update": 3.808, "loss": "2.366", "ppl": "5.16", "wps": "367765", "ups": "3.09", "wpb": "118930", "bsz": "256", "num_updates": "196000", "lr": "0.000812121", "gnorm": "0.678", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "19.9", "wall": "63939"} [2022-08-01 19:14:52,385][train_inner][INFO] - {"epoch": 4, "update": 3.812, "loss": "2.372", "ppl": "5.18", "wps": "361210", "ups": "3.06", "wpb": "118053", "bsz": "256", "num_updates": "196200", "lr": "0.000811919", "gnorm": "0.674", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "19.9", "wall": "64005"} [2022-08-01 19:15:58,329][train_inner][INFO] - {"epoch": 4, "update": 3.816, "loss": "2.37", "ppl": "5.17", "wps": "359463", "ups": "3.03", "wpb": "118520", "bsz": "256", "num_updates": "196400", "lr": "0.000811717", "gnorm": "0.674", "clip": "0", "loss_scale": "1", "train_wall": "66", "gb_free": "22.8", "wall": "64071"} [2022-08-01 19:17:03,021][train_inner][INFO] - {"epoch": 4, "update": 3.82, "loss": "2.372", "ppl": "5.18", "wps": "364479", "ups": "3.09", "wpb": "117893", "bsz": "256", "num_updates": "196600", "lr": "0.000811515", "gnorm": "0.679", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "22", "wall": "64135"} [2022-08-01 19:18:07,873][train_inner][INFO] - {"epoch": 4, "update": 3.824, "loss": "2.366", "ppl": "5.15", "wps": "363811", "ups": "3.08", "wpb": "117968", "bsz": "256", "num_updates": "196800", "lr": "0.000811313", "gnorm": "0.682", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "23", "wall": "64200"} [2022-08-01 19:19:12,461][train_inner][INFO] - {"epoch": 4, "update": 3.828, "loss": "2.367", "ppl": "5.16", "wps": "365961", "ups": "3.1", "wpb": "118181", "bsz": "256", "num_updates": "197000", "lr": "0.000811111", "gnorm": "0.673", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "22", "wall": "64265"} [2022-08-01 19:20:17,420][train_inner][INFO] - {"epoch": 4, "update": 3.832, "loss": "2.364", "ppl": "5.15", "wps": "364709", "ups": "3.08", "wpb": "118454", "bsz": "256", "num_updates": "197200", "lr": "0.000810909", "gnorm": "0.671", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.4", "wall": "64330"} [2022-08-01 19:20:31,394][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 [2022-08-01 19:20:31,692][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 [2022-08-01 19:21:23,044][train_inner][INFO] - {"epoch": 4, "update": 3.835, "loss": "2.371", "ppl": "5.17", "wps": "360997", "ups": "3.05", "wpb": "118449", "bsz": "256", "num_updates": "197400", "lr": "0.000810707", "gnorm": "0.79", "clip": "0.5", "loss_scale": "0.5", "train_wall": "65", "gb_free": "21.5", "wall": "64395"} [2022-08-01 19:22:28,151][train_inner][INFO] - {"epoch": 4, "update": 3.839, "loss": "2.373", "ppl": "5.18", "wps": "364201", "ups": "3.07", "wpb": "118559", "bsz": "256", "num_updates": "197600", "lr": "0.000810505", "gnorm": "0.679", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "22.3", "wall": "64460"} [2022-08-01 19:23:32,871][train_inner][INFO] - {"epoch": 4, "update": 3.843, "loss": "2.368", "ppl": "5.16", "wps": "365460", "ups": "3.09", "wpb": "118260", "bsz": "256", "num_updates": "197800", "lr": "0.000810303", "gnorm": "0.671", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20", "wall": "64525"} [2022-08-01 19:24:37,596][train_inner][INFO] - {"epoch": 4, "update": 3.847, "loss": "2.366", "ppl": "5.16", "wps": "366162", "ups": "3.09", "wpb": "118498", "bsz": "256", "num_updates": "198000", "lr": "0.000810101", "gnorm": "0.674", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "24", "wall": "64590"} [2022-08-01 19:25:42,503][train_inner][INFO] - {"epoch": 4, "update": 3.851, "loss": "2.367", "ppl": "5.16", "wps": "365250", "ups": "3.08", "wpb": "118535", "bsz": "256", "num_updates": "198200", "lr": "0.000809899", "gnorm": "0.718", "clip": "0.5", "loss_scale": "0.5", "train_wall": "65", "gb_free": "21.6", "wall": "64655"} [2022-08-01 19:26:47,354][train_inner][INFO] - {"epoch": 4, "update": 3.855, "loss": "2.374", "ppl": "5.18", "wps": "363855", "ups": "3.08", "wpb": "117981", "bsz": "256", "num_updates": "198400", "lr": "0.000809697", "gnorm": "0.678", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "21.2", "wall": "64720"} [2022-08-01 19:27:52,472][train_inner][INFO] - {"epoch": 4, "update": 3.859, "loss": "2.367", "ppl": "5.16", "wps": "362735", "ups": "3.07", "wpb": "118101", "bsz": "256", "num_updates": "198600", "lr": "0.000809495", "gnorm": "0.673", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "24.5", "wall": "64785"} [2022-08-01 19:28:57,518][train_inner][INFO] - {"epoch": 4, "update": 3.863, "loss": "2.365", "ppl": "5.15", "wps": "363968", "ups": "3.07", "wpb": "118371", "bsz": "256", "num_updates": "198800", "lr": "0.000809293", "gnorm": "0.671", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "19.8", "wall": "64850"} [2022-08-01 19:30:02,000][train_inner][INFO] - {"epoch": 4, "update": 3.867, "loss": "2.362", "ppl": "5.14", "wps": "366510", "ups": "3.1", "wpb": "118166", "bsz": "256", "num_updates": "199000", "lr": "0.000809091", "gnorm": "0.676", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "19.9", "wall": "64914"} [2022-08-01 19:31:06,465][train_inner][INFO] - {"epoch": 4, "update": 3.87, "loss": "2.364", "ppl": "5.15", "wps": "365672", "ups": "3.1", "wpb": "117863", "bsz": "256", "num_updates": "199200", "lr": "0.000808889", "gnorm": "0.674", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "19.8", "wall": "64979"} [2022-08-01 19:32:11,304][train_inner][INFO] - {"epoch": 4, "update": 3.874, "loss": "2.371", "ppl": "5.17", "wps": "362642", "ups": "3.08", "wpb": "117566", "bsz": "256", "num_updates": "199400", "lr": "0.000808687", "gnorm": "0.677", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "26.9", "wall": "65044"} [2022-08-01 19:33:16,035][train_inner][INFO] - {"epoch": 4, "update": 3.878, "loss": "2.368", "ppl": "5.16", "wps": "363892", "ups": "3.09", "wpb": "117772", "bsz": "256", "num_updates": "199600", "lr": "0.000808485", "gnorm": "0.677", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.5", "wall": "65108"} [2022-08-01 19:34:20,804][train_inner][INFO] - {"epoch": 4, "update": 3.882, "loss": "2.363", "ppl": "5.14", "wps": "364194", "ups": "3.09", "wpb": "117941", "bsz": "256", "num_updates": "199800", "lr": "0.000808283", "gnorm": "0.677", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "21.6", "wall": "65173"} [2022-08-01 19:35:25,668][train_inner][INFO] - {"epoch": 4, "update": 3.886, "loss": "2.368", "ppl": "5.16", "wps": "365748", "ups": "3.08", "wpb": "118619", "bsz": "256", "num_updates": "200000", "lr": "0.000808081", "gnorm": "0.686", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.1", "wall": "65238"} [2022-08-01 19:35:25,670][fairseq_cli.train][INFO] - begin validation on "valid" subset [2022-08-01 19:35:48,423][valid][INFO] - {"epoch": 4, "valid_loss": "2.265", "valid_ppl": "4.81", "valid_wps": "1.61012e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "200000", "valid_best_loss": "2.265"} [2022-08-01 19:35:48,427][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 4 @ 200000 updates [2022-08-01 19:35:48,427][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_4_200000.pt [2022-08-01 19:35:58,298][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_4_200000.pt [2022-08-01 19:36:27,074][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/adan2/checkpoint_4_200000.pt (epoch 4 @ 200000 updates, score 2.265) (writing took 38.64764930214733 seconds) [2022-08-01 19:37:31,949][train_inner][INFO] - {"epoch": 4, "update": 3.89, "loss": "2.364", "ppl": "5.15", "wps": "187928", "ups": "1.58", "wpb": "118658", "bsz": "256", "num_updates": "200200", "lr": "0.000807879", "gnorm": "0.671", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.3", "wall": "65364"} [2022-08-01 19:38:37,026][train_inner][INFO] - {"epoch": 4, "update": 3.894, "loss": "2.362", "ppl": "5.14", "wps": "363952", "ups": "3.07", "wpb": "118422", "bsz": "256", "num_updates": "200400", "lr": "0.000807677", "gnorm": "0.671", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.8", "wall": "65429"} [2022-08-01 19:39:43,481][train_inner][INFO] - {"epoch": 4, "update": 3.898, "loss": "2.365", "ppl": "5.15", "wps": "356713", "ups": "3.01", "wpb": "118524", "bsz": "256", "num_updates": "200600", "lr": "0.000807475", "gnorm": "0.673", "clip": "0", "loss_scale": "1", "train_wall": "66", "gb_free": "20.3", "wall": "65496"} [2022-08-01 19:40:48,837][train_inner][INFO] - {"epoch": 4, "update": 3.902, "loss": "2.358", "ppl": "5.13", "wps": "362642", "ups": "3.06", "wpb": "118502", "bsz": "256", "num_updates": "200800", "lr": "0.000807273", "gnorm": "0.678", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.3", "wall": "65561"} [2022-08-01 19:41:53,564][train_inner][INFO] - {"epoch": 4, "update": 3.905, "loss": "2.363", "ppl": "5.14", "wps": "367279", "ups": "3.09", "wpb": "118863", "bsz": "256", "num_updates": "201000", "lr": "0.000807071", "gnorm": "0.677", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.2", "wall": "65626"} [2022-08-01 19:42:58,610][train_inner][INFO] - {"epoch": 4, "update": 3.909, "loss": "2.369", "ppl": "5.16", "wps": "363921", "ups": "3.07", "wpb": "118358", "bsz": "256", "num_updates": "201200", "lr": "0.000806869", "gnorm": "0.675", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "24.9", "wall": "65691"} [2022-08-01 19:44:03,296][train_inner][INFO] - {"epoch": 4, "update": 3.913, "loss": "2.367", "ppl": "5.16", "wps": "364948", "ups": "3.09", "wpb": "118032", "bsz": "256", "num_updates": "201400", "lr": "0.000806667", "gnorm": "0.678", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "19.7", "wall": "65756"} [2022-08-01 19:45:08,221][train_inner][INFO] - {"epoch": 4, "update": 3.917, "loss": "2.354", "ppl": "5.11", "wps": "365148", "ups": "3.08", "wpb": "118535", "bsz": "256", "num_updates": "201600", "lr": "0.000806465", "gnorm": "0.674", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.3", "wall": "65821"} [2022-08-01 19:46:13,063][train_inner][INFO] - {"epoch": 4, "update": 3.921, "loss": "2.369", "ppl": "5.16", "wps": "364444", "ups": "3.08", "wpb": "118154", "bsz": "255.9", "num_updates": "201800", "lr": "0.000806263", "gnorm": "0.715", "clip": "0.5", "loss_scale": "2", "train_wall": "64", "gb_free": "20", "wall": "65885"} [2022-08-01 19:47:17,817][train_inner][INFO] - {"epoch": 4, "update": 3.925, "loss": "2.364", "ppl": "5.15", "wps": "365316", "ups": "3.09", "wpb": "118277", "bsz": "256", "num_updates": "202000", "lr": "0.000806061", "gnorm": "0.675", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20", "wall": "65950"} [2022-08-01 19:48:22,541][train_inner][INFO] - {"epoch": 4, "update": 3.929, "loss": "2.358", "ppl": "5.13", "wps": "365423", "ups": "3.09", "wpb": "118256", "bsz": "256", "num_updates": "202200", "lr": "0.000805859", "gnorm": "0.674", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "19.8", "wall": "66015"} [2022-08-01 19:49:27,350][train_inner][INFO] - {"epoch": 4, "update": 3.933, "loss": "2.365", "ppl": "5.15", "wps": "365453", "ups": "3.09", "wpb": "118422", "bsz": "256", "num_updates": "202400", "lr": "0.000805657", "gnorm": "0.677", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.9", "wall": "66080"} [2022-08-01 19:50:31,730][train_inner][INFO] - {"epoch": 4, "update": 3.936, "loss": "2.365", "ppl": "5.15", "wps": "366934", "ups": "3.11", "wpb": "118114", "bsz": "256", "num_updates": "202600", "lr": "0.000805455", "gnorm": "0.677", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.9", "wall": "66144"} [2022-08-01 19:51:36,430][train_inner][INFO] - {"epoch": 4, "update": 3.94, "loss": "2.364", "ppl": "5.15", "wps": "364818", "ups": "3.09", "wpb": "118016", "bsz": "256", "num_updates": "202800", "lr": "0.000805253", "gnorm": "0.677", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.5", "wall": "66209"} [2022-08-01 19:52:17,076][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 [2022-08-01 19:52:17,699][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 [2022-08-01 19:52:23,505][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.25 [2022-08-01 19:52:42,387][train_inner][INFO] - {"epoch": 4, "update": 3.944, "loss": "2.431", "ppl": "5.39", "wps": "358262", "ups": "3.03", "wpb": "118148", "bsz": "256", "num_updates": "203000", "lr": "0.000805051", "gnorm": "1.64", "clip": "1", "loss_scale": "0.25", "train_wall": "66", "gb_free": "20.2", "wall": "66275"} [2022-08-01 19:53:47,361][train_inner][INFO] - {"epoch": 4, "update": 3.948, "loss": "2.355", "ppl": "5.12", "wps": "364176", "ups": "3.08", "wpb": "118310", "bsz": "256", "num_updates": "203200", "lr": "0.000804848", "gnorm": "0.671", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "20.4", "wall": "66340"} [2022-08-01 19:54:52,063][train_inner][INFO] - {"epoch": 4, "update": 3.952, "loss": "2.367", "ppl": "5.16", "wps": "364254", "ups": "3.09", "wpb": "117837", "bsz": "256", "num_updates": "203400", "lr": "0.000804646", "gnorm": "0.674", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "21.2", "wall": "66404"} [2022-08-01 19:55:56,763][train_inner][INFO] - {"epoch": 4, "update": 3.956, "loss": "2.364", "ppl": "5.15", "wps": "364266", "ups": "3.09", "wpb": "117839", "bsz": "256", "num_updates": "203600", "lr": "0.000804444", "gnorm": "0.674", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "20.6", "wall": "66469"} [2022-08-01 19:57:01,722][train_inner][INFO] - {"epoch": 4, "update": 3.96, "loss": "2.364", "ppl": "5.15", "wps": "362563", "ups": "3.08", "wpb": "117757", "bsz": "256", "num_updates": "203800", "lr": "0.000804242", "gnorm": "0.678", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "21.7", "wall": "66534"} [2022-08-01 19:58:06,304][train_inner][INFO] - {"epoch": 4, "update": 3.964, "loss": "2.362", "ppl": "5.14", "wps": "365646", "ups": "3.1", "wpb": "118069", "bsz": "256", "num_updates": "204000", "lr": "0.00080404", "gnorm": "0.676", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "19.7", "wall": "66599"} [2022-08-01 19:59:11,283][train_inner][INFO] - {"epoch": 4, "update": 3.968, "loss": "2.364", "ppl": "5.15", "wps": "363847", "ups": "3.08", "wpb": "118210", "bsz": "256", "num_updates": "204200", "lr": "0.000803838", "gnorm": "0.677", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "21.2", "wall": "66664"} [2022-08-01 20:00:16,552][train_inner][INFO] - {"epoch": 4, "update": 3.971, "loss": "2.358", "ppl": "5.12", "wps": "364114", "ups": "3.06", "wpb": "118824", "bsz": "256", "num_updates": "204400", "lr": "0.000803636", "gnorm": "0.673", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "24", "wall": "66729"} [2022-08-01 20:01:21,476][train_inner][INFO] - {"epoch": 4, "update": 3.975, "loss": "2.362", "ppl": "5.14", "wps": "363275", "ups": "3.08", "wpb": "117924", "bsz": "256", "num_updates": "204600", "lr": "0.000803434", "gnorm": "0.684", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "19.7", "wall": "66794"} [2022-08-01 20:02:26,221][train_inner][INFO] - {"epoch": 4, "update": 3.979, "loss": "2.364", "ppl": "5.15", "wps": "363988", "ups": "3.09", "wpb": "117830", "bsz": "256", "num_updates": "204800", "lr": "0.000803232", "gnorm": "0.677", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "20.4", "wall": "66859"} [2022-08-01 20:03:30,880][train_inner][INFO] - {"epoch": 4, "update": 3.983, "loss": "2.37", "ppl": "5.17", "wps": "364321", "ups": "3.09", "wpb": "117781", "bsz": "256", "num_updates": "205000", "lr": "0.00080303", "gnorm": "0.683", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.6", "wall": "66923"} [2022-08-01 20:04:35,759][train_inner][INFO] - {"epoch": 4, "update": 3.987, "loss": "2.358", "ppl": "5.13", "wps": "366074", "ups": "3.08", "wpb": "118751", "bsz": "256", "num_updates": "205200", "lr": "0.000802828", "gnorm": "0.67", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "23.3", "wall": "66988"} [2022-08-01 20:05:40,751][train_inner][INFO] - {"epoch": 4, "update": 3.991, "loss": "2.363", "ppl": "5.14", "wps": "365216", "ups": "3.08", "wpb": "118680", "bsz": "256", "num_updates": "205400", "lr": "0.000802626", "gnorm": "0.684", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "21.9", "wall": "67053"} [2022-08-01 20:06:45,504][train_inner][INFO] - {"epoch": 4, "update": 3.995, "loss": "2.362", "ppl": "5.14", "wps": "364780", "ups": "3.09", "wpb": "118101", "bsz": "256", "num_updates": "205600", "lr": "0.000802424", "gnorm": "0.675", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.1", "wall": "67118"} [2022-08-01 20:07:50,695][train_inner][INFO] - {"epoch": 4, "update": 3.999, "loss": "2.358", "ppl": "5.13", "wps": "362212", "ups": "3.07", "wpb": "118063", "bsz": "256", "num_updates": "205800", "lr": "0.000802222", "gnorm": "0.677", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20", "wall": "67183"} [2022-08-01 20:08:12,550][fairseq_cli.train][INFO] - end of epoch 4 (average epoch stats below) [2022-08-01 20:08:12,551][train][INFO] - {"epoch": 4, "train_loss": "2.386", "train_ppl": "5.23", "train_wps": "362664", "train_ups": "3.07", "train_wpb": "118298", "train_bsz": "256", "train_num_updates": "205868", "train_lr": "0.000802154", "train_gnorm": "0.67", "train_clip": "0", "train_loss_scale": "0.5", "train_train_wall": "16624", "train_gb_free": "22.2", "train_wall": "67205"} [2022-08-01 20:08:12,663][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 [2022-08-01 20:08:12,666][fairseq.trainer][INFO] - begin training epoch 5 [2022-08-01 20:08:12,667][fairseq_cli.train][INFO] - Start iterating over samples [2022-08-01 20:09:05,828][train_inner][INFO] - {"epoch": 5, "update": 4.003, "loss": "2.364", "ppl": "5.15", "wps": "312709", "ups": "2.66", "wpb": "117472", "bsz": "255.4", "num_updates": "206000", "lr": "0.00080202", "gnorm": "0.675", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "19.8", "wall": "67258"} [2022-08-01 20:10:11,949][train_inner][INFO] - {"epoch": 5, "update": 4.006, "loss": "2.363", "ppl": "5.15", "wps": "356929", "ups": "3.02", "wpb": "118001", "bsz": "256", "num_updates": "206200", "lr": "0.000801818", "gnorm": "0.677", "clip": "0", "loss_scale": "0.5", "train_wall": "66", "gb_free": "21.3", "wall": "67324"} [2022-08-01 20:11:16,975][train_inner][INFO] - {"epoch": 5, "update": 4.01, "loss": "2.348", "ppl": "5.09", "wps": "363790", "ups": "3.08", "wpb": "118276", "bsz": "256", "num_updates": "206400", "lr": "0.000801616", "gnorm": "0.676", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "19.7", "wall": "67389"} [2022-08-01 20:12:21,859][train_inner][INFO] - {"epoch": 5, "update": 4.014, "loss": "2.358", "ppl": "5.13", "wps": "364282", "ups": "3.08", "wpb": "118180", "bsz": "256", "num_updates": "206600", "lr": "0.000801414", "gnorm": "0.677", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "21.9", "wall": "67454"} [2022-08-01 20:13:26,880][train_inner][INFO] - {"epoch": 5, "update": 4.018, "loss": "2.358", "ppl": "5.13", "wps": "364525", "ups": "3.08", "wpb": "118507", "bsz": "256", "num_updates": "206800", "lr": "0.000801212", "gnorm": "0.672", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "19.9", "wall": "67519"} [2022-08-01 20:14:31,749][train_inner][INFO] - {"epoch": 5, "update": 4.022, "loss": "2.357", "ppl": "5.12", "wps": "365286", "ups": "3.08", "wpb": "118477", "bsz": "256", "num_updates": "207000", "lr": "0.00080101", "gnorm": "0.679", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.3", "wall": "67584"} [2022-08-01 20:15:36,467][train_inner][INFO] - {"epoch": 5, "update": 4.026, "loss": "2.357", "ppl": "5.12", "wps": "366058", "ups": "3.09", "wpb": "118451", "bsz": "256", "num_updates": "207200", "lr": "0.000800808", "gnorm": "0.674", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.2", "wall": "67649"} [2022-08-01 20:16:41,675][train_inner][INFO] - {"epoch": 5, "update": 4.03, "loss": "2.349", "ppl": "5.1", "wps": "364819", "ups": "3.07", "wpb": "118944", "bsz": "256", "num_updates": "207400", "lr": "0.000800606", "gnorm": "0.677", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "22.1", "wall": "67714"} [2022-08-01 20:17:46,576][train_inner][INFO] - {"epoch": 5, "update": 4.034, "loss": "2.353", "ppl": "5.11", "wps": "365061", "ups": "3.08", "wpb": "118462", "bsz": "256", "num_updates": "207600", "lr": "0.000800404", "gnorm": "0.677", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "27", "wall": "67779"} [2022-08-01 20:18:52,440][train_inner][INFO] - {"epoch": 5, "update": 4.038, "loss": "2.354", "ppl": "5.11", "wps": "360777", "ups": "3.04", "wpb": "118808", "bsz": "256", "num_updates": "207800", "lr": "0.000800202", "gnorm": "0.68", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.7", "wall": "67845"} [2022-08-01 20:19:57,426][train_inner][INFO] - {"epoch": 5, "update": 4.041, "loss": "2.355", "ppl": "5.12", "wps": "364688", "ups": "3.08", "wpb": "118497", "bsz": "256", "num_updates": "208000", "lr": "0.0008", "gnorm": "0.678", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.1", "wall": "67910"} [2022-08-01 20:21:02,564][train_inner][INFO] - {"epoch": 5, "update": 4.045, "loss": "2.36", "ppl": "5.13", "wps": "363056", "ups": "3.07", "wpb": "118244", "bsz": "256", "num_updates": "208200", "lr": "0.000799798", "gnorm": "0.68", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.6", "wall": "67975"} [2022-08-01 20:22:07,767][train_inner][INFO] - {"epoch": 5, "update": 4.049, "loss": "2.353", "ppl": "5.11", "wps": "362853", "ups": "3.07", "wpb": "118293", "bsz": "256", "num_updates": "208400", "lr": "0.000799596", "gnorm": "0.679", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.4", "wall": "68040"} [2022-08-01 20:23:12,284][train_inner][INFO] - {"epoch": 5, "update": 4.053, "loss": "2.361", "ppl": "5.14", "wps": "365884", "ups": "3.1", "wpb": "118027", "bsz": "256", "num_updates": "208600", "lr": "0.000799394", "gnorm": "0.686", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "28.2", "wall": "68105"} [2022-08-01 20:24:17,222][train_inner][INFO] - {"epoch": 5, "update": 4.057, "loss": "2.357", "ppl": "5.12", "wps": "363501", "ups": "3.08", "wpb": "118023", "bsz": "256", "num_updates": "208800", "lr": "0.000799192", "gnorm": "0.685", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.2", "wall": "68170"} [2022-08-01 20:25:22,537][train_inner][INFO] - {"epoch": 5, "update": 4.061, "loss": "2.354", "ppl": "5.11", "wps": "362500", "ups": "3.06", "wpb": "118382", "bsz": "256", "num_updates": "209000", "lr": "0.00079899", "gnorm": "0.678", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.9", "wall": "68235"} [2022-08-01 20:26:27,886][train_inner][INFO] - {"epoch": 5, "update": 4.065, "loss": "2.357", "ppl": "5.12", "wps": "361368", "ups": "3.06", "wpb": "118074", "bsz": "256", "num_updates": "209200", "lr": "0.000798788", "gnorm": "0.681", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.8", "wall": "68300"} [2022-08-01 20:27:32,657][train_inner][INFO] - {"epoch": 5, "update": 4.069, "loss": "2.355", "ppl": "5.12", "wps": "363904", "ups": "3.09", "wpb": "117851", "bsz": "256", "num_updates": "209400", "lr": "0.000798586", "gnorm": "0.678", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.4", "wall": "68365"} [2022-08-01 20:28:37,820][train_inner][INFO] - {"epoch": 5, "update": 4.072, "loss": "2.354", "ppl": "5.11", "wps": "364569", "ups": "3.07", "wpb": "118780", "bsz": "256", "num_updates": "209600", "lr": "0.000798384", "gnorm": "0.68", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20", "wall": "68430"} [2022-08-01 20:29:42,515][train_inner][INFO] - {"epoch": 5, "update": 4.076, "loss": "2.355", "ppl": "5.12", "wps": "366124", "ups": "3.09", "wpb": "118430", "bsz": "256", "num_updates": "209800", "lr": "0.000798182", "gnorm": "0.683", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.6", "wall": "68495"} [2022-08-01 20:30:47,779][train_inner][INFO] - {"epoch": 5, "update": 4.08, "loss": "2.353", "ppl": "5.11", "wps": "361648", "ups": "3.06", "wpb": "118011", "bsz": "256", "num_updates": "210000", "lr": "0.00079798", "gnorm": "0.701", "clip": "0.5", "loss_scale": "2", "train_wall": "65", "gb_free": "20.9", "wall": "68560"} [2022-08-01 20:31:52,487][train_inner][INFO] - {"epoch": 5, "update": 4.084, "loss": "2.353", "ppl": "5.11", "wps": "365766", "ups": "3.09", "wpb": "118338", "bsz": "255.9", "num_updates": "210200", "lr": "0.000797778", "gnorm": "0.681", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.9", "wall": "68625"} [2022-08-01 20:32:57,541][train_inner][INFO] - {"epoch": 5, "update": 4.088, "loss": "2.354", "ppl": "5.11", "wps": "365862", "ups": "3.07", "wpb": "119002", "bsz": "256", "num_updates": "210400", "lr": "0.000797576", "gnorm": "0.685", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.4", "wall": "68690"} [2022-08-01 20:34:02,502][train_inner][INFO] - {"epoch": 5, "update": 4.092, "loss": "2.352", "ppl": "5.1", "wps": "363970", "ups": "3.08", "wpb": "118217", "bsz": "256", "num_updates": "210600", "lr": "0.000797374", "gnorm": "0.685", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.8", "wall": "68755"} [2022-08-01 20:35:07,184][train_inner][INFO] - {"epoch": 5, "update": 4.096, "loss": "2.356", "ppl": "5.12", "wps": "365226", "ups": "3.09", "wpb": "118116", "bsz": "256", "num_updates": "210800", "lr": "0.000797172", "gnorm": "0.68", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.3", "wall": "68820"} [2022-08-01 20:36:12,216][train_inner][INFO] - {"epoch": 5, "update": 4.1, "loss": "2.352", "ppl": "5.1", "wps": "363609", "ups": "3.08", "wpb": "118230", "bsz": "256", "num_updates": "211000", "lr": "0.00079697", "gnorm": "0.677", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.9", "wall": "68885"} [2022-08-01 20:37:17,186][train_inner][INFO] - {"epoch": 5, "update": 4.104, "loss": "2.359", "ppl": "5.13", "wps": "363387", "ups": "3.08", "wpb": "118044", "bsz": "256", "num_updates": "211200", "lr": "0.000796768", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.2", "wall": "68950"} [2022-08-01 20:38:22,412][train_inner][INFO] - {"epoch": 5, "update": 4.107, "loss": "2.358", "ppl": "5.13", "wps": "363377", "ups": "3.07", "wpb": "118507", "bsz": "256", "num_updates": "211400", "lr": "0.000796566", "gnorm": "0.683", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.9", "wall": "69015"} [2022-08-01 20:39:27,313][train_inner][INFO] - {"epoch": 5, "update": 4.111, "loss": "2.355", "ppl": "5.12", "wps": "365473", "ups": "3.08", "wpb": "118596", "bsz": "256", "num_updates": "211600", "lr": "0.000796364", "gnorm": "0.68", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "69080"} [2022-08-01 20:40:32,620][train_inner][INFO] - {"epoch": 5, "update": 4.115, "loss": "2.352", "ppl": "5.11", "wps": "361888", "ups": "3.06", "wpb": "118167", "bsz": "256", "num_updates": "211800", "lr": "0.000796162", "gnorm": "0.681", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.4", "wall": "69145"} [2022-08-01 20:41:37,412][train_inner][INFO] - {"epoch": 5, "update": 4.119, "loss": "2.353", "ppl": "5.11", "wps": "364862", "ups": "3.09", "wpb": "118200", "bsz": "256", "num_updates": "212000", "lr": "0.00079596", "gnorm": "0.678", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.9", "wall": "69210"} [2022-08-01 20:42:42,479][train_inner][INFO] - {"epoch": 5, "update": 4.123, "loss": "2.345", "ppl": "5.08", "wps": "364478", "ups": "3.07", "wpb": "118576", "bsz": "256", "num_updates": "212200", "lr": "0.000795758", "gnorm": "0.674", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.5", "wall": "69275"} [2022-08-01 20:43:47,313][train_inner][INFO] - {"epoch": 5, "update": 4.127, "loss": "2.355", "ppl": "5.12", "wps": "366131", "ups": "3.08", "wpb": "118687", "bsz": "256", "num_updates": "212400", "lr": "0.000795556", "gnorm": "0.673", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.1", "wall": "69340"} [2022-08-01 20:44:52,219][train_inner][INFO] - {"epoch": 5, "update": 4.131, "loss": "2.349", "ppl": "5.1", "wps": "363878", "ups": "3.08", "wpb": "118088", "bsz": "256", "num_updates": "212600", "lr": "0.000795354", "gnorm": "0.68", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.7", "wall": "69405"} [2022-08-01 20:45:57,552][train_inner][INFO] - {"epoch": 5, "update": 4.135, "loss": "2.348", "ppl": "5.09", "wps": "362324", "ups": "3.06", "wpb": "118357", "bsz": "256", "num_updates": "212800", "lr": "0.000795152", "gnorm": "0.68", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.7", "wall": "69470"} [2022-08-01 20:47:02,418][train_inner][INFO] - {"epoch": 5, "update": 4.139, "loss": "2.353", "ppl": "5.11", "wps": "363305", "ups": "3.08", "wpb": "117828", "bsz": "256", "num_updates": "213000", "lr": "0.000794949", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.7", "wall": "69535"} [2022-08-01 20:48:08,342][train_inner][INFO] - {"epoch": 5, "update": 4.142, "loss": "2.347", "ppl": "5.09", "wps": "358966", "ups": "3.03", "wpb": "118321", "bsz": "256", "num_updates": "213200", "lr": "0.000794747", "gnorm": "0.676", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "20.1", "wall": "69601"} [2022-08-01 20:49:13,491][train_inner][INFO] - {"epoch": 5, "update": 4.146, "loss": "2.349", "ppl": "5.09", "wps": "362430", "ups": "3.07", "wpb": "118057", "bsz": "256", "num_updates": "213400", "lr": "0.000794545", "gnorm": "0.687", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.5", "wall": "69666"} [2022-08-01 20:50:18,552][train_inner][INFO] - {"epoch": 5, "update": 4.15, "loss": "2.351", "ppl": "5.1", "wps": "364586", "ups": "3.07", "wpb": "118601", "bsz": "256", "num_updates": "213600", "lr": "0.000794343", "gnorm": "0.68", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "69731"} [2022-08-01 20:51:24,242][train_inner][INFO] - {"epoch": 5, "update": 4.154, "loss": "2.352", "ppl": "5.1", "wps": "358643", "ups": "3.04", "wpb": "117795", "bsz": "256", "num_updates": "213800", "lr": "0.000794141", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.5", "wall": "69797"} [2022-08-01 20:52:29,346][train_inner][INFO] - {"epoch": 5, "update": 4.158, "loss": "2.356", "ppl": "5.12", "wps": "363151", "ups": "3.07", "wpb": "118210", "bsz": "256", "num_updates": "214000", "lr": "0.000793939", "gnorm": "0.681", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "69862"} [2022-08-01 20:53:34,216][train_inner][INFO] - {"epoch": 5, "update": 4.162, "loss": "2.351", "ppl": "5.1", "wps": "362279", "ups": "3.08", "wpb": "117502", "bsz": "256", "num_updates": "214200", "lr": "0.000793737", "gnorm": "0.679", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.2", "wall": "69927"} [2022-08-01 20:54:39,031][train_inner][INFO] - {"epoch": 5, "update": 4.166, "loss": "2.351", "ppl": "5.1", "wps": "363963", "ups": "3.09", "wpb": "117951", "bsz": "256", "num_updates": "214400", "lr": "0.000793535", "gnorm": "0.683", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.7", "wall": "69991"} [2022-08-01 20:55:07,857][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-01 20:55:44,689][train_inner][INFO] - {"epoch": 5, "update": 4.17, "loss": "2.353", "ppl": "5.11", "wps": "358570", "ups": "3.05", "wpb": "117714", "bsz": "256", "num_updates": "214600", "lr": "0.000793333", "gnorm": "0.696", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "25.2", "wall": "70057"} [2022-08-01 20:56:49,974][train_inner][INFO] - {"epoch": 5, "update": 4.173, "loss": "2.351", "ppl": "5.1", "wps": "363830", "ups": "3.06", "wpb": "118761", "bsz": "256", "num_updates": "214800", "lr": "0.000793131", "gnorm": "0.688", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "28.2", "wall": "70122"} [2022-08-01 20:57:55,159][train_inner][INFO] - {"epoch": 5, "update": 4.177, "loss": "2.354", "ppl": "5.11", "wps": "362479", "ups": "3.07", "wpb": "118140", "bsz": "256", "num_updates": "215000", "lr": "0.000792929", "gnorm": "0.678", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.7", "wall": "70187"} [2022-08-01 20:58:59,824][train_inner][INFO] - {"epoch": 5, "update": 4.181, "loss": "2.353", "ppl": "5.11", "wps": "366408", "ups": "3.09", "wpb": "118466", "bsz": "256", "num_updates": "215200", "lr": "0.000792727", "gnorm": "0.676", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.9", "wall": "70252"} [2022-08-01 21:00:04,635][train_inner][INFO] - {"epoch": 5, "update": 4.185, "loss": "2.356", "ppl": "5.12", "wps": "364282", "ups": "3.09", "wpb": "118046", "bsz": "256", "num_updates": "215400", "lr": "0.000792525", "gnorm": "0.684", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "28.2", "wall": "70317"} [2022-08-01 21:01:09,819][train_inner][INFO] - {"epoch": 5, "update": 4.189, "loss": "2.354", "ppl": "5.11", "wps": "362815", "ups": "3.07", "wpb": "118246", "bsz": "256", "num_updates": "215600", "lr": "0.000792323", "gnorm": "0.69", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.9", "wall": "70382"} [2022-08-01 21:02:14,966][train_inner][INFO] - {"epoch": 5, "update": 4.193, "loss": "2.347", "ppl": "5.09", "wps": "363189", "ups": "3.07", "wpb": "118302", "bsz": "256", "num_updates": "215800", "lr": "0.000792121", "gnorm": "0.677", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.8", "wall": "70447"} [2022-08-01 21:03:19,818][train_inner][INFO] - {"epoch": 5, "update": 4.197, "loss": "2.345", "ppl": "5.08", "wps": "366314", "ups": "3.08", "wpb": "118779", "bsz": "256", "num_updates": "216000", "lr": "0.000791919", "gnorm": "0.677", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.4", "wall": "70512"} [2022-08-01 21:04:25,075][train_inner][INFO] - {"epoch": 5, "update": 4.201, "loss": "2.347", "ppl": "5.09", "wps": "363815", "ups": "3.06", "wpb": "118706", "bsz": "256", "num_updates": "216200", "lr": "0.000791717", "gnorm": "0.678", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.9", "wall": "70577"} [2022-08-01 21:05:29,725][train_inner][INFO] - {"epoch": 5, "update": 4.205, "loss": "2.348", "ppl": "5.09", "wps": "365564", "ups": "3.09", "wpb": "118167", "bsz": "256", "num_updates": "216400", "lr": "0.000791515", "gnorm": "0.694", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.8", "wall": "70642"} [2022-08-01 21:06:34,736][train_inner][INFO] - {"epoch": 5, "update": 4.208, "loss": "2.353", "ppl": "5.11", "wps": "362704", "ups": "3.08", "wpb": "117897", "bsz": "256", "num_updates": "216600", "lr": "0.000791313", "gnorm": "0.682", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.3", "wall": "70707"} [2022-08-01 21:07:39,925][train_inner][INFO] - {"epoch": 5, "update": 4.212, "loss": "2.346", "ppl": "5.08", "wps": "363733", "ups": "3.07", "wpb": "118556", "bsz": "256", "num_updates": "216800", "lr": "0.000791111", "gnorm": "0.68", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.6", "wall": "70772"} [2022-08-01 21:08:44,950][train_inner][INFO] - {"epoch": 5, "update": 4.216, "loss": "2.353", "ppl": "5.11", "wps": "364276", "ups": "3.08", "wpb": "118434", "bsz": "256", "num_updates": "217000", "lr": "0.000790909", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "70837"} [2022-08-01 21:09:50,006][train_inner][INFO] - {"epoch": 5, "update": 4.22, "loss": "2.348", "ppl": "5.09", "wps": "363826", "ups": "3.07", "wpb": "118344", "bsz": "256", "num_updates": "217200", "lr": "0.000790707", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.7", "wall": "70902"} [2022-08-01 21:10:56,067][train_inner][INFO] - {"epoch": 5, "update": 4.224, "loss": "2.349", "ppl": "5.09", "wps": "357307", "ups": "3.03", "wpb": "118017", "bsz": "256", "num_updates": "217400", "lr": "0.000790505", "gnorm": "0.687", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "21.3", "wall": "70968"} [2022-08-01 21:12:00,748][train_inner][INFO] - {"epoch": 5, "update": 4.228, "loss": "2.356", "ppl": "5.12", "wps": "365595", "ups": "3.09", "wpb": "118234", "bsz": "256", "num_updates": "217600", "lr": "0.000790303", "gnorm": "0.685", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.7", "wall": "71033"} [2022-08-01 21:13:05,731][train_inner][INFO] - {"epoch": 5, "update": 4.232, "loss": "2.352", "ppl": "5.11", "wps": "364375", "ups": "3.08", "wpb": "118389", "bsz": "256", "num_updates": "217800", "lr": "0.000790101", "gnorm": "0.681", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.8", "wall": "71098"} [2022-08-01 21:14:10,860][train_inner][INFO] - {"epoch": 5, "update": 4.236, "loss": "2.349", "ppl": "5.1", "wps": "364160", "ups": "3.07", "wpb": "118585", "bsz": "256", "num_updates": "218000", "lr": "0.000789899", "gnorm": "0.683", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.7", "wall": "71163"} [2022-08-01 21:15:15,311][train_inner][INFO] - {"epoch": 5, "update": 4.24, "loss": "2.347", "ppl": "5.09", "wps": "367651", "ups": "3.1", "wpb": "118475", "bsz": "256", "num_updates": "218200", "lr": "0.000789697", "gnorm": "0.696", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.4", "wall": "71228"} [2022-08-01 21:16:20,267][train_inner][INFO] - {"epoch": 5, "update": 4.243, "loss": "2.343", "ppl": "5.07", "wps": "365263", "ups": "3.08", "wpb": "118628", "bsz": "256", "num_updates": "218400", "lr": "0.000789495", "gnorm": "0.68", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.4", "wall": "71293"} [2022-08-01 21:17:24,788][train_inner][INFO] - {"epoch": 5, "update": 4.247, "loss": "2.354", "ppl": "5.11", "wps": "365044", "ups": "3.1", "wpb": "117763", "bsz": "256", "num_updates": "218600", "lr": "0.000789293", "gnorm": "0.688", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.3", "wall": "71357"} [2022-08-01 21:18:29,736][train_inner][INFO] - {"epoch": 5, "update": 4.251, "loss": "2.351", "ppl": "5.1", "wps": "365902", "ups": "3.08", "wpb": "118820", "bsz": "256", "num_updates": "218800", "lr": "0.000789091", "gnorm": "0.678", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.9", "wall": "71422"} [2022-08-01 21:19:34,388][train_inner][INFO] - {"epoch": 5, "update": 4.255, "loss": "2.344", "ppl": "5.08", "wps": "366495", "ups": "3.09", "wpb": "118471", "bsz": "256", "num_updates": "219000", "lr": "0.000788889", "gnorm": "0.682", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "71487"} [2022-08-01 21:20:40,252][train_inner][INFO] - {"epoch": 5, "update": 4.259, "loss": "2.35", "ppl": "5.1", "wps": "359108", "ups": "3.04", "wpb": "118261", "bsz": "256", "num_updates": "219200", "lr": "0.000788687", "gnorm": "0.681", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.4", "wall": "71553"} [2022-08-01 21:21:45,218][train_inner][INFO] - {"epoch": 5, "update": 4.263, "loss": "2.344", "ppl": "5.08", "wps": "364858", "ups": "3.08", "wpb": "118514", "bsz": "256", "num_updates": "219400", "lr": "0.000788485", "gnorm": "0.674", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "71618"} [2022-08-01 21:22:50,796][train_inner][INFO] - {"epoch": 5, "update": 4.267, "loss": "2.345", "ppl": "5.08", "wps": "362250", "ups": "3.05", "wpb": "118776", "bsz": "256", "num_updates": "219600", "lr": "0.000788283", "gnorm": "0.675", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "71683"} [2022-08-01 21:23:55,898][train_inner][INFO] - {"epoch": 5, "update": 4.271, "loss": "2.353", "ppl": "5.11", "wps": "364194", "ups": "3.07", "wpb": "118548", "bsz": "256", "num_updates": "219800", "lr": "0.000788081", "gnorm": "0.683", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "71748"} [2022-08-01 21:25:00,492][train_inner][INFO] - {"epoch": 5, "update": 4.274, "loss": "2.344", "ppl": "5.08", "wps": "368116", "ups": "3.1", "wpb": "118888", "bsz": "256", "num_updates": "220000", "lr": "0.000787879", "gnorm": "0.681", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.3", "wall": "71813"} [2022-08-01 21:26:05,406][train_inner][INFO] - {"epoch": 5, "update": 4.278, "loss": "2.345", "ppl": "5.08", "wps": "364811", "ups": "3.08", "wpb": "118405", "bsz": "256", "num_updates": "220200", "lr": "0.000787677", "gnorm": "0.678", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.7", "wall": "71878"} [2022-08-01 21:27:10,368][train_inner][INFO] - {"epoch": 5, "update": 4.282, "loss": "2.344", "ppl": "5.08", "wps": "363277", "ups": "3.08", "wpb": "117994", "bsz": "256", "num_updates": "220400", "lr": "0.000787475", "gnorm": "0.678", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "71943"} [2022-08-01 21:28:14,902][train_inner][INFO] - {"epoch": 5, "update": 4.286, "loss": "2.352", "ppl": "5.11", "wps": "364882", "ups": "3.1", "wpb": "117735", "bsz": "256", "num_updates": "220600", "lr": "0.000787273", "gnorm": "0.681", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "72007"} [2022-08-01 21:28:27,136][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-01 21:29:04,928][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-01 21:29:20,609][train_inner][INFO] - {"epoch": 5, "update": 4.29, "loss": "2.343", "ppl": "5.07", "wps": "359915", "ups": "3.04", "wpb": "118243", "bsz": "256", "num_updates": "220800", "lr": "0.000787071", "gnorm": "0.68", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.1", "wall": "72073"} [2022-08-01 21:30:25,238][train_inner][INFO] - {"epoch": 5, "update": 4.294, "loss": "2.344", "ppl": "5.08", "wps": "363420", "ups": "3.09", "wpb": "117436", "bsz": "256", "num_updates": "221000", "lr": "0.000786869", "gnorm": "0.684", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.5", "wall": "72138"} [2022-08-01 21:31:30,171][train_inner][INFO] - {"epoch": 5, "update": 4.298, "loss": "2.341", "ppl": "5.07", "wps": "364132", "ups": "3.08", "wpb": "118220", "bsz": "256", "num_updates": "221200", "lr": "0.000786667", "gnorm": "0.681", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.2", "wall": "72202"} [2022-08-01 21:32:34,945][train_inner][INFO] - {"epoch": 5, "update": 4.302, "loss": "2.339", "ppl": "5.06", "wps": "365476", "ups": "3.09", "wpb": "118365", "bsz": "256", "num_updates": "221400", "lr": "0.000786465", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23", "wall": "72267"} [2022-08-01 21:33:40,163][train_inner][INFO] - {"epoch": 5, "update": 4.306, "loss": "2.343", "ppl": "5.07", "wps": "362907", "ups": "3.07", "wpb": "118338", "bsz": "256", "num_updates": "221600", "lr": "0.000786263", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.2", "wall": "72332"} [2022-08-01 21:34:44,976][train_inner][INFO] - {"epoch": 5, "update": 4.309, "loss": "2.351", "ppl": "5.1", "wps": "364781", "ups": "3.09", "wpb": "118212", "bsz": "256", "num_updates": "221800", "lr": "0.000786061", "gnorm": "0.684", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.9", "wall": "72397"} [2022-08-01 21:35:50,091][train_inner][INFO] - {"epoch": 5, "update": 4.313, "loss": "2.343", "ppl": "5.07", "wps": "362468", "ups": "3.07", "wpb": "118008", "bsz": "256", "num_updates": "222000", "lr": "0.000785859", "gnorm": "0.708", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.3", "wall": "72462"} [2022-08-01 21:36:16,433][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-01 21:36:17,389][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 [2022-08-01 21:36:17,681][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 [2022-08-01 21:36:17,979][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 [2022-08-01 21:36:56,343][train_inner][INFO] - {"epoch": 5, "update": 4.317, "loss": "2.368", "ppl": "5.16", "wps": "355985", "ups": "3.02", "wpb": "117922", "bsz": "256", "num_updates": "222200", "lr": "0.000785657", "gnorm": "1.024", "clip": "2.5", "loss_scale": "0.5", "train_wall": "66", "gb_free": "19.8", "wall": "72529"} [2022-08-01 21:38:01,326][train_inner][INFO] - {"epoch": 5, "update": 4.321, "loss": "2.347", "ppl": "5.09", "wps": "365468", "ups": "3.08", "wpb": "118745", "bsz": "256", "num_updates": "222400", "lr": "0.000785455", "gnorm": "0.687", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "21.2", "wall": "72594"} [2022-08-01 21:39:06,367][train_inner][INFO] - {"epoch": 5, "update": 4.325, "loss": "2.344", "ppl": "5.08", "wps": "365798", "ups": "3.08", "wpb": "118958", "bsz": "256", "num_updates": "222600", "lr": "0.000785253", "gnorm": "0.694", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "22.8", "wall": "72659"} [2022-08-01 21:40:11,609][train_inner][INFO] - {"epoch": 5, "update": 4.329, "loss": "2.348", "ppl": "5.09", "wps": "362464", "ups": "3.07", "wpb": "118237", "bsz": "256", "num_updates": "222800", "lr": "0.000785051", "gnorm": "0.682", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.2", "wall": "72724"} [2022-08-01 21:41:16,314][train_inner][INFO] - {"epoch": 5, "update": 4.333, "loss": "2.339", "ppl": "5.06", "wps": "367767", "ups": "3.09", "wpb": "118980", "bsz": "256", "num_updates": "223000", "lr": "0.000784848", "gnorm": "0.678", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.3", "wall": "72789"} [2022-08-01 21:42:20,664][train_inner][INFO] - {"epoch": 5, "update": 4.337, "loss": "2.349", "ppl": "5.09", "wps": "367275", "ups": "3.11", "wpb": "118169", "bsz": "256", "num_updates": "223200", "lr": "0.000784646", "gnorm": "0.682", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.8", "wall": "72853"} [2022-08-01 21:43:25,275][train_inner][INFO] - {"epoch": 5, "update": 4.341, "loss": "2.348", "ppl": "5.09", "wps": "365962", "ups": "3.1", "wpb": "118224", "bsz": "256", "num_updates": "223400", "lr": "0.000784444", "gnorm": "0.68", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "22", "wall": "72918"} [2022-08-01 21:44:29,874][train_inner][INFO] - {"epoch": 5, "update": 4.345, "loss": "2.345", "ppl": "5.08", "wps": "365506", "ups": "3.1", "wpb": "118054", "bsz": "256", "num_updates": "223600", "lr": "0.000784242", "gnorm": "0.681", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20", "wall": "72982"} [2022-08-01 21:45:34,936][train_inner][INFO] - {"epoch": 5, "update": 4.348, "loss": "2.344", "ppl": "5.08", "wps": "365469", "ups": "3.07", "wpb": "118889", "bsz": "256", "num_updates": "223800", "lr": "0.00078404", "gnorm": "0.678", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "21.7", "wall": "73047"} [2022-08-01 21:46:39,656][train_inner][INFO] - {"epoch": 5, "update": 4.352, "loss": "2.334", "ppl": "5.04", "wps": "365814", "ups": "3.09", "wpb": "118376", "bsz": "256", "num_updates": "224000", "lr": "0.000783838", "gnorm": "0.698", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "23.7", "wall": "73112"} [2022-08-01 21:47:44,626][train_inner][INFO] - {"epoch": 5, "update": 4.356, "loss": "2.347", "ppl": "5.09", "wps": "363535", "ups": "3.08", "wpb": "118093", "bsz": "256", "num_updates": "224200", "lr": "0.000783636", "gnorm": "0.682", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.2", "wall": "73177"} [2022-08-01 21:48:49,789][train_inner][INFO] - {"epoch": 5, "update": 4.36, "loss": "2.336", "ppl": "5.05", "wps": "364238", "ups": "3.07", "wpb": "118673", "bsz": "256", "num_updates": "224400", "lr": "0.000783434", "gnorm": "0.677", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.5", "wall": "73242"} [2022-08-01 21:49:54,779][train_inner][INFO] - {"epoch": 5, "update": 4.364, "loss": "2.344", "ppl": "5.08", "wps": "363280", "ups": "3.08", "wpb": "118046", "bsz": "256", "num_updates": "224600", "lr": "0.000783232", "gnorm": "0.687", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "22.5", "wall": "73307"} [2022-08-01 21:50:59,978][train_inner][INFO] - {"epoch": 5, "update": 4.368, "loss": "2.349", "ppl": "5.09", "wps": "359573", "ups": "3.07", "wpb": "117217", "bsz": "256", "num_updates": "224800", "lr": "0.00078303", "gnorm": "0.684", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "25.9", "wall": "73372"} [2022-08-01 21:52:04,824][train_inner][INFO] - {"epoch": 5, "update": 4.372, "loss": "2.341", "ppl": "5.07", "wps": "365967", "ups": "3.08", "wpb": "118655", "bsz": "256", "num_updates": "225000", "lr": "0.000782828", "gnorm": "0.68", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "19.9", "wall": "73437"} [2022-08-01 21:53:10,011][train_inner][INFO] - {"epoch": 5, "update": 4.376, "loss": "2.337", "ppl": "5.05", "wps": "362874", "ups": "3.07", "wpb": "118273", "bsz": "256", "num_updates": "225200", "lr": "0.000782626", "gnorm": "0.696", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "19.7", "wall": "73502"} [2022-08-01 21:54:14,998][train_inner][INFO] - {"epoch": 5, "update": 4.379, "loss": "2.343", "ppl": "5.07", "wps": "364660", "ups": "3.08", "wpb": "118488", "bsz": "256", "num_updates": "225400", "lr": "0.000782424", "gnorm": "0.685", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "19.7", "wall": "73567"} [2022-08-01 21:55:19,899][train_inner][INFO] - {"epoch": 5, "update": 4.383, "loss": "2.336", "ppl": "5.05", "wps": "364368", "ups": "3.08", "wpb": "118238", "bsz": "256", "num_updates": "225600", "lr": "0.000782222", "gnorm": "0.686", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "27.7", "wall": "73632"} [2022-08-01 21:56:24,920][train_inner][INFO] - {"epoch": 5, "update": 4.387, "loss": "2.338", "ppl": "5.06", "wps": "364260", "ups": "3.08", "wpb": "118422", "bsz": "256", "num_updates": "225800", "lr": "0.00078202", "gnorm": "0.683", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "22.8", "wall": "73697"} [2022-08-01 21:57:30,154][train_inner][INFO] - {"epoch": 5, "update": 4.391, "loss": "2.345", "ppl": "5.08", "wps": "362382", "ups": "3.07", "wpb": "118196", "bsz": "256", "num_updates": "226000", "lr": "0.000781818", "gnorm": "0.678", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.5", "wall": "73762"} [2022-08-01 21:58:35,157][train_inner][INFO] - {"epoch": 5, "update": 4.395, "loss": "2.34", "ppl": "5.06", "wps": "363055", "ups": "3.08", "wpb": "117996", "bsz": "256", "num_updates": "226200", "lr": "0.000781616", "gnorm": "0.685", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.9", "wall": "73827"} [2022-08-01 21:59:39,819][train_inner][INFO] - {"epoch": 5, "update": 4.399, "loss": "2.342", "ppl": "5.07", "wps": "365797", "ups": "3.09", "wpb": "118265", "bsz": "256", "num_updates": "226400", "lr": "0.000781414", "gnorm": "0.682", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "19.8", "wall": "73892"} [2022-08-01 22:00:44,631][train_inner][INFO] - {"epoch": 5, "update": 4.403, "loss": "2.342", "ppl": "5.07", "wps": "365950", "ups": "3.09", "wpb": "118587", "bsz": "256", "num_updates": "226600", "lr": "0.000781212", "gnorm": "0.678", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20", "wall": "73957"} [2022-08-01 22:01:36,794][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 [2022-08-01 22:01:39,056][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 [2022-08-01 22:01:50,533][train_inner][INFO] - {"epoch": 5, "update": 4.407, "loss": "2.589", "ppl": "6.02", "wps": "359474", "ups": "3.03", "wpb": "118448", "bsz": "256", "num_updates": "226800", "lr": "0.00078101", "gnorm": "1.246", "clip": "4.5", "loss_scale": "0.5", "train_wall": "66", "gb_free": "22.2", "wall": "74023"} [2022-08-01 22:02:55,444][train_inner][INFO] - {"epoch": 5, "update": 4.411, "loss": "2.345", "ppl": "5.08", "wps": "365307", "ups": "3.08", "wpb": "118562", "bsz": "256", "num_updates": "227000", "lr": "0.000780808", "gnorm": "0.675", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.2", "wall": "74088"} [2022-08-01 22:04:00,084][train_inner][INFO] - {"epoch": 5, "update": 4.414, "loss": "2.341", "ppl": "5.07", "wps": "367782", "ups": "3.09", "wpb": "118865", "bsz": "256", "num_updates": "227200", "lr": "0.000780606", "gnorm": "0.68", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.7", "wall": "74152"} [2022-08-01 22:05:05,104][train_inner][INFO] - {"epoch": 5, "update": 4.418, "loss": "2.343", "ppl": "5.07", "wps": "364309", "ups": "3.08", "wpb": "118435", "bsz": "256", "num_updates": "227400", "lr": "0.000780404", "gnorm": "0.692", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "23.6", "wall": "74217"} [2022-08-01 22:06:09,708][train_inner][INFO] - {"epoch": 5, "update": 4.422, "loss": "2.34", "ppl": "5.06", "wps": "365639", "ups": "3.1", "wpb": "118108", "bsz": "256", "num_updates": "227600", "lr": "0.000780202", "gnorm": "0.682", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.3", "wall": "74282"} [2022-08-01 22:07:14,581][train_inner][INFO] - {"epoch": 5, "update": 4.426, "loss": "2.345", "ppl": "5.08", "wps": "365896", "ups": "3.08", "wpb": "118682", "bsz": "256", "num_updates": "227800", "lr": "0.00078", "gnorm": "0.678", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "21.4", "wall": "74347"} [2022-08-01 22:08:19,485][train_inner][INFO] - {"epoch": 5, "update": 4.43, "loss": "2.341", "ppl": "5.07", "wps": "366091", "ups": "3.08", "wpb": "118802", "bsz": "256", "num_updates": "228000", "lr": "0.000779798", "gnorm": "0.678", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "21", "wall": "74412"} [2022-08-01 22:09:23,882][train_inner][INFO] - {"epoch": 5, "update": 4.434, "loss": "2.336", "ppl": "5.05", "wps": "367060", "ups": "3.11", "wpb": "118187", "bsz": "256", "num_updates": "228200", "lr": "0.000779596", "gnorm": "0.678", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "25.8", "wall": "74476"} [2022-08-01 22:10:28,752][train_inner][INFO] - {"epoch": 5, "update": 4.438, "loss": "2.339", "ppl": "5.06", "wps": "363760", "ups": "3.08", "wpb": "117983", "bsz": "256", "num_updates": "228400", "lr": "0.000779394", "gnorm": "0.694", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "21.4", "wall": "74541"} [2022-08-01 22:11:33,286][train_inner][INFO] - {"epoch": 5, "update": 4.442, "loss": "2.336", "ppl": "5.05", "wps": "366003", "ups": "3.1", "wpb": "118096", "bsz": "256", "num_updates": "228600", "lr": "0.000779192", "gnorm": "0.679", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "19.8", "wall": "74606"} [2022-08-01 22:12:38,134][train_inner][INFO] - {"epoch": 5, "update": 4.446, "loss": "2.342", "ppl": "5.07", "wps": "365085", "ups": "3.08", "wpb": "118375", "bsz": "256", "num_updates": "228800", "lr": "0.00077899", "gnorm": "0.68", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "21", "wall": "74670"} [2022-08-01 22:13:43,149][train_inner][INFO] - {"epoch": 5, "update": 4.449, "loss": "2.339", "ppl": "5.06", "wps": "363608", "ups": "3.08", "wpb": "118197", "bsz": "256", "num_updates": "229000", "lr": "0.000778788", "gnorm": "0.68", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.8", "wall": "74735"} [2022-08-01 22:14:48,150][train_inner][INFO] - {"epoch": 5, "update": 4.453, "loss": "2.34", "ppl": "5.06", "wps": "364814", "ups": "3.08", "wpb": "118565", "bsz": "256", "num_updates": "229200", "lr": "0.000778586", "gnorm": "0.674", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.3", "wall": "74800"} [2022-08-01 22:15:52,943][train_inner][INFO] - {"epoch": 5, "update": 4.457, "loss": "2.343", "ppl": "5.07", "wps": "364234", "ups": "3.09", "wpb": "117998", "bsz": "256", "num_updates": "229400", "lr": "0.000778384", "gnorm": "0.682", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "28.2", "wall": "74865"} [2022-08-01 22:16:57,979][train_inner][INFO] - {"epoch": 5, "update": 4.461, "loss": "2.337", "ppl": "5.05", "wps": "364736", "ups": "3.08", "wpb": "118604", "bsz": "256", "num_updates": "229600", "lr": "0.000778182", "gnorm": "0.691", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "22.4", "wall": "74930"} [2022-08-01 22:18:02,485][train_inner][INFO] - {"epoch": 5, "update": 4.465, "loss": "2.338", "ppl": "5.06", "wps": "365758", "ups": "3.1", "wpb": "117965", "bsz": "256", "num_updates": "229800", "lr": "0.00077798", "gnorm": "0.679", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "22.5", "wall": "74995"} [2022-08-01 22:19:07,792][train_inner][INFO] - {"epoch": 5, "update": 4.469, "loss": "2.333", "ppl": "5.04", "wps": "363802", "ups": "3.06", "wpb": "118793", "bsz": "256", "num_updates": "230000", "lr": "0.000777778", "gnorm": "0.68", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.9", "wall": "75060"} [2022-08-01 22:20:12,487][train_inner][INFO] - {"epoch": 5, "update": 4.473, "loss": "2.342", "ppl": "5.07", "wps": "364140", "ups": "3.09", "wpb": "117789", "bsz": "256", "num_updates": "230200", "lr": "0.000777576", "gnorm": "0.678", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "22.4", "wall": "75125"} [2022-08-01 22:21:17,394][train_inner][INFO] - {"epoch": 5, "update": 4.477, "loss": "2.341", "ppl": "5.07", "wps": "365448", "ups": "3.08", "wpb": "118598", "bsz": "256", "num_updates": "230400", "lr": "0.000777374", "gnorm": "0.68", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.7", "wall": "75190"} [2022-08-01 22:22:22,459][train_inner][INFO] - {"epoch": 5, "update": 4.481, "loss": "2.335", "ppl": "5.05", "wps": "362539", "ups": "3.07", "wpb": "117941", "bsz": "256", "num_updates": "230600", "lr": "0.000777172", "gnorm": "0.684", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.2", "wall": "75255"} [2022-08-01 22:23:27,158][train_inner][INFO] - {"epoch": 5, "update": 4.484, "loss": "2.338", "ppl": "5.05", "wps": "365032", "ups": "3.09", "wpb": "118086", "bsz": "256", "num_updates": "230800", "lr": "0.00077697", "gnorm": "0.68", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "23.4", "wall": "75319"} [2022-08-01 22:24:31,963][train_inner][INFO] - {"epoch": 5, "update": 4.488, "loss": "2.335", "ppl": "5.05", "wps": "364028", "ups": "3.09", "wpb": "117951", "bsz": "256", "num_updates": "231000", "lr": "0.000776768", "gnorm": "0.683", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.3", "wall": "75384"} [2022-08-01 22:25:36,585][train_inner][INFO] - {"epoch": 5, "update": 4.492, "loss": "2.334", "ppl": "5.04", "wps": "365710", "ups": "3.09", "wpb": "118162", "bsz": "256", "num_updates": "231200", "lr": "0.000776566", "gnorm": "0.677", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.2", "wall": "75449"} [2022-08-01 22:26:41,572][train_inner][INFO] - {"epoch": 5, "update": 4.496, "loss": "2.34", "ppl": "5.06", "wps": "363858", "ups": "3.08", "wpb": "118229", "bsz": "256", "num_updates": "231400", "lr": "0.000776364", "gnorm": "0.682", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.5", "wall": "75514"} [2022-08-01 22:27:46,688][train_inner][INFO] - {"epoch": 5, "update": 4.5, "loss": "2.337", "ppl": "5.05", "wps": "363442", "ups": "3.07", "wpb": "118328", "bsz": "256", "num_updates": "231600", "lr": "0.000776162", "gnorm": "0.681", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22.4", "wall": "75579"} [2022-08-01 22:28:51,640][train_inner][INFO] - {"epoch": 5, "update": 4.504, "loss": "2.33", "ppl": "5.03", "wps": "365171", "ups": "3.08", "wpb": "118590", "bsz": "256", "num_updates": "231800", "lr": "0.00077596", "gnorm": "0.673", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.5", "wall": "75644"} [2022-08-01 22:29:55,691][train_inner][INFO] - {"epoch": 5, "update": 4.508, "loss": "2.335", "ppl": "5.04", "wps": "369209", "ups": "3.12", "wpb": "118241", "bsz": "256", "num_updates": "232000", "lr": "0.000775758", "gnorm": "0.68", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "22.6", "wall": "75708"} [2022-08-01 22:31:00,827][train_inner][INFO] - {"epoch": 5, "update": 4.512, "loss": "2.335", "ppl": "5.05", "wps": "363754", "ups": "3.07", "wpb": "118465", "bsz": "256", "num_updates": "232200", "lr": "0.000775556", "gnorm": "0.679", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.1", "wall": "75773"} [2022-08-01 22:32:05,807][train_inner][INFO] - {"epoch": 5, "update": 4.515, "loss": "2.33", "ppl": "5.03", "wps": "365640", "ups": "3.08", "wpb": "118795", "bsz": "256", "num_updates": "232400", "lr": "0.000775354", "gnorm": "0.681", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.2", "wall": "75838"} [2022-08-01 22:33:10,526][train_inner][INFO] - {"epoch": 5, "update": 4.519, "loss": "2.328", "ppl": "5.02", "wps": "369114", "ups": "3.09", "wpb": "119442", "bsz": "256", "num_updates": "232600", "lr": "0.000775152", "gnorm": "0.677", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "22", "wall": "75903"} [2022-08-01 22:34:00,236][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 [2022-08-01 22:34:00,849][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 [2022-08-01 22:34:15,894][train_inner][INFO] - {"epoch": 5, "update": 4.523, "loss": "2.35", "ppl": "5.1", "wps": "362770", "ups": "3.06", "wpb": "118565", "bsz": "256", "num_updates": "232800", "lr": "0.000774949", "gnorm": "0.883", "clip": "0.5", "loss_scale": "0.5", "train_wall": "65", "gb_free": "21", "wall": "75968"} [2022-08-01 22:35:20,793][train_inner][INFO] - {"epoch": 5, "update": 4.527, "loss": "2.343", "ppl": "5.07", "wps": "366480", "ups": "3.08", "wpb": "118921", "bsz": "256", "num_updates": "233000", "lr": "0.000774747", "gnorm": "0.676", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.6", "wall": "76033"} [2022-08-01 22:36:26,079][train_inner][INFO] - {"epoch": 5, "update": 4.531, "loss": "2.337", "ppl": "5.05", "wps": "361842", "ups": "3.06", "wpb": "118115", "bsz": "256", "num_updates": "233200", "lr": "0.000774545", "gnorm": "0.682", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "21", "wall": "76098"} [2022-08-01 22:37:31,178][train_inner][INFO] - {"epoch": 5, "update": 4.535, "loss": "2.336", "ppl": "5.05", "wps": "364352", "ups": "3.07", "wpb": "118592", "bsz": "256", "num_updates": "233400", "lr": "0.000774343", "gnorm": "0.683", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "27.2", "wall": "76163"} [2022-08-01 22:38:36,309][train_inner][INFO] - {"epoch": 5, "update": 4.539, "loss": "2.329", "ppl": "5.02", "wps": "364574", "ups": "3.07", "wpb": "118724", "bsz": "256", "num_updates": "233600", "lr": "0.000774141", "gnorm": "0.678", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.9", "wall": "76229"} [2022-08-01 22:39:41,247][train_inner][INFO] - {"epoch": 5, "update": 4.543, "loss": "2.336", "ppl": "5.05", "wps": "364342", "ups": "3.08", "wpb": "118295", "bsz": "256", "num_updates": "233800", "lr": "0.000773939", "gnorm": "0.679", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "28.2", "wall": "76294"} [2022-08-01 22:40:45,906][train_inner][INFO] - {"epoch": 5, "update": 4.547, "loss": "2.336", "ppl": "5.05", "wps": "366804", "ups": "3.09", "wpb": "118585", "bsz": "256", "num_updates": "234000", "lr": "0.000773737", "gnorm": "0.68", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "22.5", "wall": "76358"} [2022-08-01 22:41:50,601][train_inner][INFO] - {"epoch": 5, "update": 4.55, "loss": "2.335", "ppl": "5.05", "wps": "366380", "ups": "3.09", "wpb": "118513", "bsz": "256", "num_updates": "234200", "lr": "0.000773535", "gnorm": "0.682", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.5", "wall": "76423"} [2022-08-01 22:42:55,529][train_inner][INFO] - {"epoch": 5, "update": 4.554, "loss": "2.34", "ppl": "5.06", "wps": "364509", "ups": "3.08", "wpb": "118333", "bsz": "256", "num_updates": "234400", "lr": "0.000773333", "gnorm": "0.68", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.5", "wall": "76488"} [2022-08-01 22:44:00,580][train_inner][INFO] - {"epoch": 5, "update": 4.558, "loss": "2.335", "ppl": "5.05", "wps": "364634", "ups": "3.07", "wpb": "118598", "bsz": "256", "num_updates": "234600", "lr": "0.000773131", "gnorm": "0.678", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "19.9", "wall": "76553"} [2022-08-01 22:45:05,495][train_inner][INFO] - {"epoch": 5, "update": 4.562, "loss": "2.332", "ppl": "5.04", "wps": "364676", "ups": "3.08", "wpb": "118364", "bsz": "256", "num_updates": "234800", "lr": "0.000772929", "gnorm": "0.699", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "21.4", "wall": "76618"} [2022-08-01 22:46:10,382][train_inner][INFO] - {"epoch": 5, "update": 4.566, "loss": "2.329", "ppl": "5.03", "wps": "365144", "ups": "3.08", "wpb": "118464", "bsz": "256", "num_updates": "235000", "lr": "0.000772727", "gnorm": "0.678", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20", "wall": "76683"} [2022-08-01 22:47:15,343][train_inner][INFO] - {"epoch": 5, "update": 4.57, "loss": "2.333", "ppl": "5.04", "wps": "363018", "ups": "3.08", "wpb": "117908", "bsz": "256", "num_updates": "235200", "lr": "0.000772525", "gnorm": "0.68", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "19.9", "wall": "76748"} [2022-08-01 22:48:20,079][train_inner][INFO] - {"epoch": 5, "update": 4.574, "loss": "2.343", "ppl": "5.07", "wps": "364627", "ups": "3.09", "wpb": "118021", "bsz": "256", "num_updates": "235400", "lr": "0.000772323", "gnorm": "0.68", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.4", "wall": "76812"} [2022-08-01 22:49:25,036][train_inner][INFO] - {"epoch": 5, "update": 4.578, "loss": "2.339", "ppl": "5.06", "wps": "362495", "ups": "3.08", "wpb": "117730", "bsz": "256", "num_updates": "235600", "lr": "0.000772121", "gnorm": "0.685", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.2", "wall": "76877"} [2022-08-01 22:50:29,629][train_inner][INFO] - {"epoch": 5, "update": 4.582, "loss": "2.333", "ppl": "5.04", "wps": "365651", "ups": "3.1", "wpb": "118090", "bsz": "256", "num_updates": "235800", "lr": "0.000771919", "gnorm": "0.694", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "26.1", "wall": "76942"} [2022-08-01 22:51:34,277][train_inner][INFO] - {"epoch": 5, "update": 4.585, "loss": "2.334", "ppl": "5.04", "wps": "364133", "ups": "3.09", "wpb": "117702", "bsz": "255.9", "num_updates": "236000", "lr": "0.000771717", "gnorm": "0.675", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "21.5", "wall": "77007"} [2022-08-01 22:52:39,198][train_inner][INFO] - {"epoch": 5, "update": 4.589, "loss": "2.334", "ppl": "5.04", "wps": "362024", "ups": "3.08", "wpb": "117513", "bsz": "256", "num_updates": "236200", "lr": "0.000771515", "gnorm": "0.681", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.7", "wall": "77072"} [2022-08-01 22:53:44,329][train_inner][INFO] - {"epoch": 5, "update": 4.593, "loss": "2.333", "ppl": "5.04", "wps": "362917", "ups": "3.07", "wpb": "118184", "bsz": "256", "num_updates": "236400", "lr": "0.000771313", "gnorm": "0.681", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.8", "wall": "77137"} [2022-08-01 22:54:49,519][train_inner][INFO] - {"epoch": 5, "update": 4.597, "loss": "2.327", "ppl": "5.02", "wps": "363628", "ups": "3.07", "wpb": "118522", "bsz": "256", "num_updates": "236600", "lr": "0.000771111", "gnorm": "0.678", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "24", "wall": "77202"} [2022-08-01 22:55:54,566][train_inner][INFO] - {"epoch": 5, "update": 4.601, "loss": "2.334", "ppl": "5.04", "wps": "363199", "ups": "3.07", "wpb": "118124", "bsz": "256", "num_updates": "236800", "lr": "0.000770909", "gnorm": "0.68", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "19.8", "wall": "77267"} [2022-08-01 22:56:59,395][train_inner][INFO] - {"epoch": 5, "update": 4.605, "loss": "2.33", "ppl": "5.03", "wps": "365398", "ups": "3.09", "wpb": "118440", "bsz": "256", "num_updates": "237000", "lr": "0.000770707", "gnorm": "0.683", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "27.9", "wall": "77332"} [2022-08-01 22:58:04,228][train_inner][INFO] - {"epoch": 5, "update": 4.609, "loss": "2.337", "ppl": "5.05", "wps": "364048", "ups": "3.08", "wpb": "118009", "bsz": "256", "num_updates": "237200", "lr": "0.000770505", "gnorm": "0.683", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "22.3", "wall": "77397"} [2022-08-01 22:59:08,930][train_inner][INFO] - {"epoch": 5, "update": 4.613, "loss": "2.33", "ppl": "5.03", "wps": "366884", "ups": "3.09", "wpb": "118690", "bsz": "256", "num_updates": "237400", "lr": "0.000770303", "gnorm": "0.678", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "19.9", "wall": "77461"} [2022-08-01 23:00:13,730][train_inner][INFO] - {"epoch": 5, "update": 4.617, "loss": "2.333", "ppl": "5.04", "wps": "364280", "ups": "3.09", "wpb": "118024", "bsz": "256", "num_updates": "237600", "lr": "0.000770101", "gnorm": "0.679", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.3", "wall": "77526"} [2022-08-01 23:01:18,216][train_inner][INFO] - {"epoch": 5, "update": 4.62, "loss": "2.325", "ppl": "5.01", "wps": "367333", "ups": "3.1", "wpb": "118438", "bsz": "256", "num_updates": "237800", "lr": "0.000769899", "gnorm": "0.675", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "24.8", "wall": "77591"} [2022-08-01 23:02:22,710][train_inner][INFO] - {"epoch": 5, "update": 4.624, "loss": "2.328", "ppl": "5.02", "wps": "366051", "ups": "3.1", "wpb": "118038", "bsz": "256", "num_updates": "238000", "lr": "0.000769697", "gnorm": "0.681", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "25.9", "wall": "77655"} [2022-08-01 23:03:27,302][train_inner][INFO] - {"epoch": 5, "update": 4.628, "loss": "2.332", "ppl": "5.04", "wps": "366700", "ups": "3.1", "wpb": "118428", "bsz": "256", "num_updates": "238200", "lr": "0.000769495", "gnorm": "0.676", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "19.9", "wall": "77720"} [2022-08-01 23:04:31,729][train_inner][INFO] - {"epoch": 5, "update": 4.632, "loss": "2.334", "ppl": "5.04", "wps": "363827", "ups": "3.1", "wpb": "117201", "bsz": "256", "num_updates": "238400", "lr": "0.000769293", "gnorm": "0.69", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "27.9", "wall": "77784"} [2022-08-01 23:05:36,827][train_inner][INFO] - {"epoch": 5, "update": 4.636, "loss": "2.335", "ppl": "5.05", "wps": "362792", "ups": "3.07", "wpb": "118083", "bsz": "256", "num_updates": "238600", "lr": "0.000769091", "gnorm": "0.681", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22.2", "wall": "77849"} [2022-08-01 23:06:41,244][train_inner][INFO] - {"epoch": 5, "update": 4.64, "loss": "2.334", "ppl": "5.04", "wps": "368798", "ups": "3.1", "wpb": "118782", "bsz": "256", "num_updates": "238800", "lr": "0.000768889", "gnorm": "0.677", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.1", "wall": "77914"} [2022-08-01 23:07:45,967][train_inner][INFO] - {"epoch": 5, "update": 4.644, "loss": "2.326", "ppl": "5.01", "wps": "366624", "ups": "3.09", "wpb": "118644", "bsz": "256", "num_updates": "239000", "lr": "0.000768687", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.8", "wall": "77978"} [2022-08-01 23:08:51,066][train_inner][INFO] - {"epoch": 5, "update": 4.648, "loss": "2.331", "ppl": "5.03", "wps": "364426", "ups": "3.07", "wpb": "118616", "bsz": "256", "num_updates": "239200", "lr": "0.000768485", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "25.1", "wall": "78043"} [2022-08-01 23:09:55,865][train_inner][INFO] - {"epoch": 5, "update": 4.651, "loss": "2.332", "ppl": "5.03", "wps": "366014", "ups": "3.09", "wpb": "118584", "bsz": "256", "num_updates": "239400", "lr": "0.000768283", "gnorm": "0.681", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.5", "wall": "78108"} [2022-08-01 23:11:00,765][train_inner][INFO] - {"epoch": 5, "update": 4.655, "loss": "2.328", "ppl": "5.02", "wps": "365329", "ups": "3.08", "wpb": "118548", "bsz": "256", "num_updates": "239600", "lr": "0.000768081", "gnorm": "0.686", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "78173"} [2022-08-01 23:12:05,420][train_inner][INFO] - {"epoch": 5, "update": 4.659, "loss": "2.331", "ppl": "5.03", "wps": "364183", "ups": "3.09", "wpb": "117731", "bsz": "256", "num_updates": "239800", "lr": "0.000767879", "gnorm": "0.69", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.6", "wall": "78238"} [2022-08-01 23:13:10,341][train_inner][INFO] - {"epoch": 5, "update": 4.663, "loss": "2.332", "ppl": "5.04", "wps": "364242", "ups": "3.08", "wpb": "118233", "bsz": "256", "num_updates": "240000", "lr": "0.000767677", "gnorm": "0.676", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.6", "wall": "78303"} [2022-08-01 23:14:15,290][train_inner][INFO] - {"epoch": 5, "update": 4.667, "loss": "2.335", "ppl": "5.05", "wps": "362772", "ups": "3.08", "wpb": "117805", "bsz": "256", "num_updates": "240200", "lr": "0.000767475", "gnorm": "0.68", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "78368"} [2022-08-01 23:15:20,381][train_inner][INFO] - {"epoch": 5, "update": 4.671, "loss": "2.328", "ppl": "5.02", "wps": "365480", "ups": "3.07", "wpb": "118946", "bsz": "256", "num_updates": "240400", "lr": "0.000767273", "gnorm": "0.684", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21", "wall": "78433"} [2022-08-01 23:16:24,868][train_inner][INFO] - {"epoch": 5, "update": 4.675, "loss": "2.327", "ppl": "5.02", "wps": "366039", "ups": "3.1", "wpb": "118023", "bsz": "256", "num_updates": "240600", "lr": "0.000767071", "gnorm": "0.684", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.1", "wall": "78497"} [2022-08-01 23:17:29,493][train_inner][INFO] - {"epoch": 5, "update": 4.679, "loss": "2.324", "ppl": "5.01", "wps": "364466", "ups": "3.09", "wpb": "117766", "bsz": "256", "num_updates": "240800", "lr": "0.000766869", "gnorm": "0.684", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "23.1", "wall": "78562"} [2022-08-01 23:18:34,477][train_inner][INFO] - {"epoch": 5, "update": 4.683, "loss": "2.33", "ppl": "5.03", "wps": "363308", "ups": "3.08", "wpb": "118044", "bsz": "256", "num_updates": "241000", "lr": "0.000766667", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23", "wall": "78627"} [2022-08-01 23:19:39,590][train_inner][INFO] - {"epoch": 5, "update": 4.686, "loss": "2.33", "ppl": "5.03", "wps": "361820", "ups": "3.07", "wpb": "117795", "bsz": "256", "num_updates": "241200", "lr": "0.000766465", "gnorm": "0.681", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.7", "wall": "78692"} [2022-08-01 23:20:43,866][train_inner][INFO] - {"epoch": 5, "update": 4.69, "loss": "2.323", "ppl": "5.01", "wps": "368331", "ups": "3.11", "wpb": "118373", "bsz": "256", "num_updates": "241400", "lr": "0.000766263", "gnorm": "0.68", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.2", "wall": "78756"} [2022-08-01 23:21:48,927][train_inner][INFO] - {"epoch": 5, "update": 4.694, "loss": "2.323", "ppl": "5", "wps": "366382", "ups": "3.07", "wpb": "119183", "bsz": "256", "num_updates": "241600", "lr": "0.000766061", "gnorm": "0.68", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.5", "wall": "78821"} [2022-08-01 23:22:54,843][train_inner][INFO] - {"epoch": 5, "update": 4.698, "loss": "2.322", "ppl": "5", "wps": "360946", "ups": "3.03", "wpb": "118959", "bsz": "256", "num_updates": "241800", "lr": "0.000765859", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "24.6", "wall": "78887"} [2022-08-01 23:23:59,620][train_inner][INFO] - {"epoch": 5, "update": 4.702, "loss": "2.328", "ppl": "5.02", "wps": "365960", "ups": "3.09", "wpb": "118527", "bsz": "256", "num_updates": "242000", "lr": "0.000765657", "gnorm": "0.682", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "78952"} [2022-08-01 23:25:04,750][train_inner][INFO] - {"epoch": 5, "update": 4.706, "loss": "2.326", "ppl": "5.01", "wps": "364192", "ups": "3.07", "wpb": "118598", "bsz": "256", "num_updates": "242200", "lr": "0.000765455", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.5", "wall": "79017"} [2022-08-01 23:25:06,302][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-01 23:25:53,783][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 [2022-08-01 23:26:09,995][train_inner][INFO] - {"epoch": 5, "update": 4.71, "loss": "2.326", "ppl": "5.01", "wps": "362236", "ups": "3.07", "wpb": "118168", "bsz": "256", "num_updates": "242400", "lr": "0.000765253", "gnorm": "0.677", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.7", "wall": "79082"} [2022-08-01 23:27:14,707][train_inner][INFO] - {"epoch": 5, "update": 4.714, "loss": "2.328", "ppl": "5.02", "wps": "366599", "ups": "3.09", "wpb": "118617", "bsz": "256", "num_updates": "242600", "lr": "0.000765051", "gnorm": "0.674", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "23.6", "wall": "79147"} [2022-08-01 23:28:19,630][train_inner][INFO] - {"epoch": 5, "update": 4.718, "loss": "2.333", "ppl": "5.04", "wps": "364556", "ups": "3.08", "wpb": "118338", "bsz": "256", "num_updates": "242800", "lr": "0.000764848", "gnorm": "0.69", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.1", "wall": "79212"} [2022-08-01 23:29:24,728][train_inner][INFO] - {"epoch": 5, "update": 4.721, "loss": "2.334", "ppl": "5.04", "wps": "362516", "ups": "3.07", "wpb": "117994", "bsz": "256", "num_updates": "243000", "lr": "0.000764646", "gnorm": "0.682", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.1", "wall": "79277"} [2022-08-01 23:30:29,558][train_inner][INFO] - {"epoch": 5, "update": 4.725, "loss": "2.328", "ppl": "5.02", "wps": "364876", "ups": "3.09", "wpb": "118272", "bsz": "256", "num_updates": "243200", "lr": "0.000764444", "gnorm": "0.693", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21", "wall": "79342"} [2022-08-01 23:31:34,414][train_inner][INFO] - {"epoch": 5, "update": 4.729, "loss": "2.322", "ppl": "5", "wps": "365770", "ups": "3.08", "wpb": "118610", "bsz": "256", "num_updates": "243400", "lr": "0.000764242", "gnorm": "0.673", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.1", "wall": "79407"} [2022-08-01 23:32:39,213][train_inner][INFO] - {"epoch": 5, "update": 4.733, "loss": "2.33", "ppl": "5.03", "wps": "366254", "ups": "3.09", "wpb": "118664", "bsz": "256", "num_updates": "243600", "lr": "0.00076404", "gnorm": "0.678", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "24.9", "wall": "79472"} [2022-08-01 23:33:43,846][train_inner][INFO] - {"epoch": 5, "update": 4.737, "loss": "2.334", "ppl": "5.04", "wps": "362649", "ups": "3.09", "wpb": "117193", "bsz": "256", "num_updates": "243800", "lr": "0.000763838", "gnorm": "0.684", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.1", "wall": "79536"} [2022-08-01 23:34:48,654][train_inner][INFO] - {"epoch": 5, "update": 4.741, "loss": "2.324", "ppl": "5.01", "wps": "364802", "ups": "3.09", "wpb": "118207", "bsz": "256", "num_updates": "244000", "lr": "0.000763636", "gnorm": "0.679", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21", "wall": "79601"} [2022-08-01 23:35:53,959][train_inner][INFO] - {"epoch": 5, "update": 4.745, "loss": "2.331", "ppl": "5.03", "wps": "361623", "ups": "3.06", "wpb": "118078", "bsz": "256", "num_updates": "244200", "lr": "0.000763434", "gnorm": "0.679", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "23.2", "wall": "79666"} [2022-08-01 23:36:59,191][train_inner][INFO] - {"epoch": 5, "update": 4.749, "loss": "2.321", "ppl": "5", "wps": "363649", "ups": "3.07", "wpb": "118606", "bsz": "256", "num_updates": "244400", "lr": "0.000763232", "gnorm": "0.678", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.8", "wall": "79732"} [2022-08-01 23:38:04,250][train_inner][INFO] - {"epoch": 5, "update": 4.753, "loss": "2.319", "ppl": "4.99", "wps": "363740", "ups": "3.07", "wpb": "118320", "bsz": "256", "num_updates": "244600", "lr": "0.00076303", "gnorm": "0.675", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.5", "wall": "79797"} [2022-08-01 23:39:09,101][train_inner][INFO] - {"epoch": 5, "update": 4.756, "loss": "2.326", "ppl": "5.02", "wps": "362599", "ups": "3.08", "wpb": "117573", "bsz": "256", "num_updates": "244800", "lr": "0.000762828", "gnorm": "0.696", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.1", "wall": "79861"} [2022-08-01 23:40:14,049][train_inner][INFO] - {"epoch": 5, "update": 4.76, "loss": "2.325", "ppl": "5.01", "wps": "363820", "ups": "3.08", "wpb": "118146", "bsz": "256", "num_updates": "245000", "lr": "0.000762626", "gnorm": "0.68", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "79926"} [2022-08-01 23:41:19,077][train_inner][INFO] - {"epoch": 5, "update": 4.764, "loss": "2.331", "ppl": "5.03", "wps": "363110", "ups": "3.08", "wpb": "118058", "bsz": "256", "num_updates": "245200", "lr": "0.000762424", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "24.9", "wall": "79991"} [2022-08-01 23:42:24,074][train_inner][INFO] - {"epoch": 5, "update": 4.768, "loss": "2.329", "ppl": "5.03", "wps": "363338", "ups": "3.08", "wpb": "118078", "bsz": "256", "num_updates": "245400", "lr": "0.000762222", "gnorm": "0.681", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.8", "wall": "80056"} [2022-08-01 23:43:28,852][train_inner][INFO] - {"epoch": 5, "update": 4.772, "loss": "2.329", "ppl": "5.03", "wps": "364681", "ups": "3.09", "wpb": "118115", "bsz": "256", "num_updates": "245600", "lr": "0.00076202", "gnorm": "0.678", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.3", "wall": "80121"} [2022-08-01 23:44:33,918][train_inner][INFO] - {"epoch": 5, "update": 4.776, "loss": "2.327", "ppl": "5.02", "wps": "363604", "ups": "3.07", "wpb": "118290", "bsz": "256", "num_updates": "245800", "lr": "0.000761818", "gnorm": "0.68", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.8", "wall": "80186"} [2022-08-01 23:45:00,349][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 [2022-08-01 23:45:39,544][train_inner][INFO] - {"epoch": 5, "update": 4.78, "loss": "2.318", "ppl": "4.99", "wps": "360854", "ups": "3.05", "wpb": "118403", "bsz": "256", "num_updates": "246000", "lr": "0.000761616", "gnorm": "0.679", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.9", "wall": "80252"} [2022-08-01 23:46:44,757][train_inner][INFO] - {"epoch": 5, "update": 4.784, "loss": "2.326", "ppl": "5.01", "wps": "363224", "ups": "3.07", "wpb": "118434", "bsz": "256", "num_updates": "246200", "lr": "0.000761414", "gnorm": "0.679", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20", "wall": "80317"} [2022-08-01 23:47:49,513][train_inner][INFO] - {"epoch": 5, "update": 4.787, "loss": "2.326", "ppl": "5.01", "wps": "366038", "ups": "3.09", "wpb": "118514", "bsz": "256", "num_updates": "246400", "lr": "0.000761212", "gnorm": "0.68", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20", "wall": "80382"} [2022-08-01 23:48:54,477][train_inner][INFO] - {"epoch": 5, "update": 4.791, "loss": "2.317", "ppl": "4.98", "wps": "366148", "ups": "3.08", "wpb": "118930", "bsz": "256", "num_updates": "246600", "lr": "0.00076101", "gnorm": "0.676", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.7", "wall": "80447"} [2022-08-01 23:49:59,289][train_inner][INFO] - {"epoch": 5, "update": 4.795, "loss": "2.328", "ppl": "5.02", "wps": "365284", "ups": "3.09", "wpb": "118372", "bsz": "256", "num_updates": "246800", "lr": "0.000760808", "gnorm": "0.68", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "25.3", "wall": "80512"} [2022-08-01 23:51:03,878][train_inner][INFO] - {"epoch": 5, "update": 4.799, "loss": "2.324", "ppl": "5.01", "wps": "366032", "ups": "3.1", "wpb": "118207", "bsz": "256", "num_updates": "247000", "lr": "0.000760606", "gnorm": "0.676", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "25.6", "wall": "80576"} [2022-08-01 23:52:08,782][train_inner][INFO] - {"epoch": 5, "update": 4.803, "loss": "2.321", "ppl": "5", "wps": "363846", "ups": "3.08", "wpb": "118073", "bsz": "256", "num_updates": "247200", "lr": "0.000760404", "gnorm": "0.677", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.7", "wall": "80641"} [2022-08-01 23:53:14,229][train_inner][INFO] - {"epoch": 5, "update": 4.807, "loss": "2.32", "ppl": "4.99", "wps": "362256", "ups": "3.06", "wpb": "118541", "bsz": "256", "num_updates": "247400", "lr": "0.000760202", "gnorm": "0.675", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.7", "wall": "80707"} [2022-08-01 23:54:20,091][train_inner][INFO] - {"epoch": 5, "update": 4.811, "loss": "2.321", "ppl": "5", "wps": "360176", "ups": "3.04", "wpb": "118608", "bsz": "256", "num_updates": "247600", "lr": "0.00076", "gnorm": "0.678", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22.5", "wall": "80772"} [2022-08-01 23:55:25,023][train_inner][INFO] - {"epoch": 5, "update": 4.815, "loss": "2.33", "ppl": "5.03", "wps": "362323", "ups": "3.08", "wpb": "117629", "bsz": "256", "num_updates": "247800", "lr": "0.000759798", "gnorm": "0.677", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.8", "wall": "80837"} [2022-08-01 23:56:30,054][train_inner][INFO] - {"epoch": 5, "update": 4.819, "loss": "2.326", "ppl": "5.01", "wps": "363716", "ups": "3.08", "wpb": "118262", "bsz": "256", "num_updates": "248000", "lr": "0.000759596", "gnorm": "0.68", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.1", "wall": "80902"} [2022-08-01 23:57:34,943][train_inner][INFO] - {"epoch": 5, "update": 4.822, "loss": "2.321", "ppl": "5", "wps": "364976", "ups": "3.08", "wpb": "118412", "bsz": "256", "num_updates": "248200", "lr": "0.000759394", "gnorm": "0.675", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "80967"} [2022-08-01 23:58:39,849][train_inner][INFO] - {"epoch": 5, "update": 4.826, "loss": "2.319", "ppl": "4.99", "wps": "364718", "ups": "3.08", "wpb": "118361", "bsz": "256", "num_updates": "248400", "lr": "0.000759192", "gnorm": "0.677", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.5", "wall": "81032"} [2022-08-01 23:59:44,992][train_inner][INFO] - {"epoch": 5, "update": 4.83, "loss": "2.327", "ppl": "5.02", "wps": "361627", "ups": "3.07", "wpb": "117785", "bsz": "256", "num_updates": "248600", "lr": "0.00075899", "gnorm": "0.681", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22", "wall": "81097"} [2022-08-02 00:00:49,225][train_inner][INFO] - {"epoch": 5, "update": 4.834, "loss": "2.321", "ppl": "5", "wps": "369719", "ups": "3.11", "wpb": "118739", "bsz": "256", "num_updates": "248800", "lr": "0.000758788", "gnorm": "0.673", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.9", "wall": "81162"} [2022-08-02 00:01:54,144][train_inner][INFO] - {"epoch": 5, "update": 4.838, "loss": "2.32", "ppl": "4.99", "wps": "364595", "ups": "3.08", "wpb": "118344", "bsz": "256", "num_updates": "249000", "lr": "0.000758586", "gnorm": "0.675", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.7", "wall": "81226"} [2022-08-02 00:02:58,979][train_inner][INFO] - {"epoch": 5, "update": 4.842, "loss": "2.323", "ppl": "5", "wps": "365298", "ups": "3.08", "wpb": "118418", "bsz": "256", "num_updates": "249200", "lr": "0.000758384", "gnorm": "0.675", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.8", "wall": "81291"} [2022-08-02 00:04:03,760][train_inner][INFO] - {"epoch": 5, "update": 4.846, "loss": "2.331", "ppl": "5.03", "wps": "364536", "ups": "3.09", "wpb": "118074", "bsz": "256", "num_updates": "249400", "lr": "0.000758182", "gnorm": "0.678", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.6", "wall": "81356"} [2022-08-02 00:05:08,912][train_inner][INFO] - {"epoch": 5, "update": 4.85, "loss": "2.315", "ppl": "4.98", "wps": "365097", "ups": "3.07", "wpb": "118932", "bsz": "256", "num_updates": "249600", "lr": "0.00075798", "gnorm": "0.675", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.9", "wall": "81421"} [2022-08-02 00:06:13,732][train_inner][INFO] - {"epoch": 5, "update": 4.854, "loss": "2.322", "ppl": "5", "wps": "364946", "ups": "3.09", "wpb": "118277", "bsz": "256", "num_updates": "249800", "lr": "0.000757778", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "23", "wall": "81486"} [2022-08-02 00:07:18,412][train_inner][INFO] - {"epoch": 5, "update": 4.857, "loss": "2.321", "ppl": "5", "wps": "365201", "ups": "3.09", "wpb": "118103", "bsz": "256", "num_updates": "250000", "lr": "0.000757576", "gnorm": "0.676", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.5", "wall": "81551"} [2022-08-02 00:07:18,413][fairseq_cli.train][INFO] - begin validation on "valid" subset [2022-08-02 00:07:40,991][valid][INFO] - {"epoch": 5, "valid_loss": "2.223", "valid_ppl": "4.67", "valid_wps": "1.61597e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "250000", "valid_best_loss": "2.223"} [2022-08-02 00:07:40,994][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 5 @ 250000 updates [2022-08-02 00:07:40,994][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_5_250000.pt [2022-08-02 00:07:55,779][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_5_250000.pt [2022-08-02 00:08:23,482][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/adan2/checkpoint_5_250000.pt (epoch 5 @ 250000 updates, score 2.223) (writing took 42.48845534957945 seconds) [2022-08-02 00:09:28,051][train_inner][INFO] - {"epoch": 5, "update": 4.861, "loss": "2.319", "ppl": "4.99", "wps": "182658", "ups": "1.54", "wpb": "118398", "bsz": "256", "num_updates": "250200", "lr": "0.000757374", "gnorm": "0.682", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.5", "wall": "81680"} [2022-08-02 00:10:32,855][train_inner][INFO] - {"epoch": 5, "update": 4.865, "loss": "2.323", "ppl": "5", "wps": "365916", "ups": "3.09", "wpb": "118561", "bsz": "256", "num_updates": "250400", "lr": "0.000757172", "gnorm": "0.679", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.1", "wall": "81745"} [2022-08-02 00:11:38,990][train_inner][INFO] - {"epoch": 5, "update": 4.869, "loss": "2.327", "ppl": "5.02", "wps": "356789", "ups": "3.02", "wpb": "117979", "bsz": "256", "num_updates": "250600", "lr": "0.00075697", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "19.7", "wall": "81811"} [2022-08-02 00:12:43,747][train_inner][INFO] - {"epoch": 5, "update": 4.873, "loss": "2.317", "ppl": "4.98", "wps": "366654", "ups": "3.09", "wpb": "118716", "bsz": "256", "num_updates": "250800", "lr": "0.000756768", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.8", "wall": "81876"} [2022-08-02 00:13:48,661][train_inner][INFO] - {"epoch": 5, "update": 4.877, "loss": "2.32", "ppl": "4.99", "wps": "364284", "ups": "3.08", "wpb": "118232", "bsz": "256", "num_updates": "251000", "lr": "0.000756566", "gnorm": "0.676", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.9", "wall": "81941"} [2022-08-02 00:14:53,447][train_inner][INFO] - {"epoch": 5, "update": 4.881, "loss": "2.323", "ppl": "5", "wps": "364950", "ups": "3.09", "wpb": "118214", "bsz": "256", "num_updates": "251200", "lr": "0.000756364", "gnorm": "0.681", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.8", "wall": "82006"} [2022-08-02 00:15:58,160][train_inner][INFO] - {"epoch": 5, "update": 4.885, "loss": "2.326", "ppl": "5.01", "wps": "366101", "ups": "3.09", "wpb": "118445", "bsz": "256", "num_updates": "251400", "lr": "0.000756162", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.5", "wall": "82070"} [2022-08-02 00:17:02,887][train_inner][INFO] - {"epoch": 5, "update": 4.888, "loss": "2.323", "ppl": "5", "wps": "364190", "ups": "3.09", "wpb": "117864", "bsz": "256", "num_updates": "251600", "lr": "0.00075596", "gnorm": "0.685", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "82135"} [2022-08-02 00:18:07,998][train_inner][INFO] - {"epoch": 5, "update": 4.892, "loss": "2.315", "ppl": "4.98", "wps": "363739", "ups": "3.07", "wpb": "118414", "bsz": "256", "num_updates": "251800", "lr": "0.000755758", "gnorm": "0.676", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "82200"} [2022-08-02 00:19:12,898][train_inner][INFO] - {"epoch": 5, "update": 4.896, "loss": "2.321", "ppl": "5", "wps": "363160", "ups": "3.08", "wpb": "117844", "bsz": "256", "num_updates": "252000", "lr": "0.000755556", "gnorm": "0.681", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "82265"} [2022-08-02 00:20:17,457][train_inner][INFO] - {"epoch": 5, "update": 4.9, "loss": "2.318", "ppl": "4.98", "wps": "363360", "ups": "3.1", "wpb": "117289", "bsz": "256", "num_updates": "252200", "lr": "0.000755354", "gnorm": "0.678", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.7", "wall": "82330"} [2022-08-02 00:21:23,085][train_inner][INFO] - {"epoch": 5, "update": 4.904, "loss": "2.32", "ppl": "4.99", "wps": "359546", "ups": "3.05", "wpb": "117979", "bsz": "256", "num_updates": "252400", "lr": "0.000755152", "gnorm": "0.677", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23", "wall": "82395"} [2022-08-02 00:22:27,837][train_inner][INFO] - {"epoch": 5, "update": 4.908, "loss": "2.33", "ppl": "5.03", "wps": "365497", "ups": "3.09", "wpb": "118325", "bsz": "256", "num_updates": "252600", "lr": "0.000754949", "gnorm": "0.679", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.1", "wall": "82460"} [2022-08-02 00:23:32,951][train_inner][INFO] - {"epoch": 5, "update": 4.912, "loss": "2.316", "ppl": "4.98", "wps": "363266", "ups": "3.07", "wpb": "118266", "bsz": "256", "num_updates": "252800", "lr": "0.000754747", "gnorm": "0.675", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24", "wall": "82525"} [2022-08-02 00:24:37,652][train_inner][INFO] - {"epoch": 5, "update": 4.916, "loss": "2.32", "ppl": "4.99", "wps": "366103", "ups": "3.09", "wpb": "118434", "bsz": "256", "num_updates": "253000", "lr": "0.000754545", "gnorm": "0.677", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.8", "wall": "82590"} [2022-08-02 00:25:42,067][train_inner][INFO] - {"epoch": 5, "update": 4.92, "loss": "2.319", "ppl": "4.99", "wps": "366091", "ups": "3.1", "wpb": "117908", "bsz": "256", "num_updates": "253200", "lr": "0.000754343", "gnorm": "0.68", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.2", "wall": "82654"} [2022-08-02 00:26:47,027][train_inner][INFO] - {"epoch": 5, "update": 4.923, "loss": "2.319", "ppl": "4.99", "wps": "364898", "ups": "3.08", "wpb": "118517", "bsz": "256", "num_updates": "253400", "lr": "0.000754141", "gnorm": "0.676", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.8", "wall": "82719"} [2022-08-02 00:27:52,213][train_inner][INFO] - {"epoch": 5, "update": 4.927, "loss": "2.319", "ppl": "4.99", "wps": "362722", "ups": "3.07", "wpb": "118220", "bsz": "256", "num_updates": "253600", "lr": "0.000753939", "gnorm": "0.679", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.1", "wall": "82785"} [2022-08-02 00:28:57,485][train_inner][INFO] - {"epoch": 5, "update": 4.931, "loss": "2.312", "ppl": "4.97", "wps": "362342", "ups": "3.06", "wpb": "118251", "bsz": "256", "num_updates": "253800", "lr": "0.000753737", "gnorm": "0.674", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "82850"} [2022-08-02 00:30:02,279][train_inner][INFO] - {"epoch": 5, "update": 4.935, "loss": "2.321", "ppl": "5", "wps": "364979", "ups": "3.09", "wpb": "118241", "bsz": "256", "num_updates": "254000", "lr": "0.000753535", "gnorm": "0.68", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "82915"} [2022-08-02 00:30:39,976][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-02 00:31:07,674][train_inner][INFO] - {"epoch": 5, "update": 4.939, "loss": "2.316", "ppl": "4.98", "wps": "363690", "ups": "3.06", "wpb": "118916", "bsz": "256", "num_updates": "254200", "lr": "0.000753333", "gnorm": "0.675", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.9", "wall": "82980"} [2022-08-02 00:32:13,264][train_inner][INFO] - {"epoch": 5, "update": 4.943, "loss": "2.318", "ppl": "4.99", "wps": "360756", "ups": "3.05", "wpb": "118308", "bsz": "256", "num_updates": "254400", "lr": "0.000753131", "gnorm": "0.68", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "83046"} [2022-08-02 00:32:22,313][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-02 00:33:18,270][train_inner][INFO] - {"epoch": 5, "update": 4.947, "loss": "2.319", "ppl": "4.99", "wps": "364283", "ups": "3.08", "wpb": "118401", "bsz": "256", "num_updates": "254600", "lr": "0.000752929", "gnorm": "0.68", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.6", "wall": "83111"} [2022-08-02 00:34:23,547][train_inner][INFO] - {"epoch": 5, "update": 4.951, "loss": "2.32", "ppl": "4.99", "wps": "361784", "ups": "3.06", "wpb": "118081", "bsz": "256", "num_updates": "254800", "lr": "0.000752727", "gnorm": "0.676", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.3", "wall": "83176"} [2022-08-02 00:35:28,252][train_inner][INFO] - {"epoch": 5, "update": 4.955, "loss": "2.323", "ppl": "5", "wps": "365148", "ups": "3.09", "wpb": "118131", "bsz": "256", "num_updates": "255000", "lr": "0.000752525", "gnorm": "0.676", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "83241"} [2022-08-02 00:36:32,681][train_inner][INFO] - {"epoch": 5, "update": 4.958, "loss": "2.324", "ppl": "5.01", "wps": "366312", "ups": "3.1", "wpb": "118004", "bsz": "256", "num_updates": "255200", "lr": "0.000752323", "gnorm": "0.685", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.6", "wall": "83305"} [2022-08-02 00:37:37,452][train_inner][INFO] - {"epoch": 5, "update": 4.962, "loss": "2.321", "ppl": "5", "wps": "366263", "ups": "3.09", "wpb": "118614", "bsz": "256", "num_updates": "255400", "lr": "0.000752121", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.9", "wall": "83370"} [2022-08-02 00:38:42,438][train_inner][INFO] - {"epoch": 5, "update": 4.966, "loss": "2.311", "ppl": "4.96", "wps": "364052", "ups": "3.08", "wpb": "118290", "bsz": "256", "num_updates": "255600", "lr": "0.000751919", "gnorm": "0.68", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "83435"} [2022-08-02 00:39:47,170][train_inner][INFO] - {"epoch": 5, "update": 4.97, "loss": "2.318", "ppl": "4.99", "wps": "362062", "ups": "3.09", "wpb": "117183", "bsz": "256", "num_updates": "255800", "lr": "0.000751717", "gnorm": "0.68", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.1", "wall": "83499"} [2022-08-02 00:40:52,265][train_inner][INFO] - {"epoch": 5, "update": 4.974, "loss": "2.314", "ppl": "4.97", "wps": "365103", "ups": "3.07", "wpb": "118831", "bsz": "256", "num_updates": "256000", "lr": "0.000751515", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.9", "wall": "83565"} [2022-08-02 00:41:57,257][train_inner][INFO] - {"epoch": 5, "update": 4.978, "loss": "2.319", "ppl": "4.99", "wps": "364242", "ups": "3.08", "wpb": "118363", "bsz": "256", "num_updates": "256200", "lr": "0.000751313", "gnorm": "0.682", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.4", "wall": "83630"} [2022-08-02 00:43:02,267][train_inner][INFO] - {"epoch": 5, "update": 4.982, "loss": "2.318", "ppl": "4.99", "wps": "364856", "ups": "3.08", "wpb": "118594", "bsz": "256", "num_updates": "256400", "lr": "0.000751111", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "83695"} [2022-08-02 00:44:06,826][train_inner][INFO] - {"epoch": 5, "update": 4.986, "loss": "2.323", "ppl": "5", "wps": "365493", "ups": "3.1", "wpb": "117978", "bsz": "256", "num_updates": "256600", "lr": "0.000750909", "gnorm": "0.677", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "83759"} [2022-08-02 00:44:55,969][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-02 00:45:11,980][train_inner][INFO] - {"epoch": 5, "update": 4.99, "loss": "2.324", "ppl": "5.01", "wps": "363439", "ups": "3.07", "wpb": "118394", "bsz": "256", "num_updates": "256800", "lr": "0.000750707", "gnorm": "0.684", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.7", "wall": "83824"} [2022-08-02 00:46:16,814][train_inner][INFO] - {"epoch": 5, "update": 4.993, "loss": "2.319", "ppl": "4.99", "wps": "364520", "ups": "3.08", "wpb": "118166", "bsz": "256", "num_updates": "257000", "lr": "0.000750505", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "83889"} [2022-08-02 00:47:21,868][train_inner][INFO] - {"epoch": 5, "update": 4.997, "loss": "2.317", "ppl": "4.98", "wps": "364231", "ups": "3.07", "wpb": "118470", "bsz": "256", "num_updates": "257200", "lr": "0.000750303", "gnorm": "0.684", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.5", "wall": "83954"} [2022-08-02 00:48:07,828][fairseq_cli.train][INFO] - begin validation on "valid" subset [2022-08-02 00:48:30,655][valid][INFO] - {"epoch": 5, "valid_loss": "2.218", "valid_ppl": "4.65", "valid_wps": "1.61793e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "257339", "valid_best_loss": "2.218"} [2022-08-02 00:48:30,657][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 5 @ 257339 updates [2022-08-02 00:48:30,658][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_best.pt [2022-08-02 00:48:42,410][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_best.pt [2022-08-02 00:49:01,671][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/adan2/checkpoint_best.pt (epoch 5 @ 257339 updates, score 2.218) (writing took 31.013541775755584 seconds) [2022-08-02 00:49:01,671][fairseq_cli.train][INFO] - end of epoch 5 (average epoch stats below) [2022-08-02 00:49:01,672][train][INFO] - {"epoch": 5, "train_loss": "2.338", "train_ppl": "5.06", "train_wps": "361380", "train_ups": "3.05", "train_wpb": "118298", "train_bsz": "256", "train_num_updates": "257339", "train_lr": "0.000750163", "train_gnorm": "0.685", "train_clip": "0", "train_loss_scale": "8", "train_train_wall": "16627", "train_gb_free": "20.7", "train_wall": "84054"} [2022-08-02 00:49:01,783][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 [2022-08-02 00:49:01,786][fairseq.trainer][INFO] - begin training epoch 6 [2022-08-02 00:49:01,787][fairseq_cli.train][INFO] - Start iterating over samples [2022-08-02 00:49:31,142][train_inner][INFO] - {"epoch": 6, "update": 5.001, "loss": "2.309", "ppl": "4.96", "wps": "182371", "ups": "1.55", "wpb": "117879", "bsz": "255.4", "num_updates": "257400", "lr": "0.000750101", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "20.8", "wall": "84083"} [2022-08-02 00:50:36,165][train_inner][INFO] - {"epoch": 6, "update": 5.005, "loss": "2.315", "ppl": "4.98", "wps": "363794", "ups": "3.08", "wpb": "118274", "bsz": "256", "num_updates": "257600", "lr": "0.000749899", "gnorm": "0.676", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "84148"} [2022-08-02 00:51:40,769][train_inner][INFO] - {"epoch": 6, "update": 5.009, "loss": "2.314", "ppl": "4.97", "wps": "365924", "ups": "3.1", "wpb": "118198", "bsz": "256", "num_updates": "257800", "lr": "0.000749697", "gnorm": "0.681", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.3", "wall": "84213"} [2022-08-02 00:52:45,668][train_inner][INFO] - {"epoch": 6, "update": 5.013, "loss": "2.311", "ppl": "4.96", "wps": "365611", "ups": "3.08", "wpb": "118637", "bsz": "256", "num_updates": "258000", "lr": "0.000749495", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.2", "wall": "84278"} [2022-08-02 00:53:51,603][train_inner][INFO] - {"epoch": 6, "update": 5.017, "loss": "2.31", "ppl": "4.96", "wps": "359883", "ups": "3.03", "wpb": "118643", "bsz": "256", "num_updates": "258200", "lr": "0.000749293", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "20.3", "wall": "84344"} [2022-08-02 00:54:57,350][train_inner][INFO] - {"epoch": 6, "update": 5.021, "loss": "2.32", "ppl": "4.99", "wps": "359358", "ups": "3.04", "wpb": "118132", "bsz": "256", "num_updates": "258400", "lr": "0.000749091", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.6", "wall": "84410"} [2022-08-02 00:56:02,098][train_inner][INFO] - {"epoch": 6, "update": 5.024, "loss": "2.316", "ppl": "4.98", "wps": "364207", "ups": "3.09", "wpb": "117906", "bsz": "256", "num_updates": "258600", "lr": "0.000748889", "gnorm": "0.676", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21", "wall": "84474"} [2022-08-02 00:57:06,864][train_inner][INFO] - {"epoch": 6, "update": 5.028, "loss": "2.318", "ppl": "4.99", "wps": "362007", "ups": "3.09", "wpb": "117228", "bsz": "256", "num_updates": "258800", "lr": "0.000748687", "gnorm": "0.68", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.3", "wall": "84539"} [2022-08-02 00:58:11,886][train_inner][INFO] - {"epoch": 6, "update": 5.032, "loss": "2.31", "ppl": "4.96", "wps": "365670", "ups": "3.08", "wpb": "118881", "bsz": "256", "num_updates": "259000", "lr": "0.000748485", "gnorm": "0.675", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "84604"} [2022-08-02 00:59:16,227][train_inner][INFO] - {"epoch": 6, "update": 5.036, "loss": "2.307", "ppl": "4.95", "wps": "369115", "ups": "3.11", "wpb": "118744", "bsz": "256", "num_updates": "259200", "lr": "0.000748283", "gnorm": "0.675", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24", "wall": "84669"} [2022-08-02 01:00:21,097][train_inner][INFO] - {"epoch": 6, "update": 5.04, "loss": "2.31", "ppl": "4.96", "wps": "365846", "ups": "3.08", "wpb": "118660", "bsz": "256", "num_updates": "259400", "lr": "0.000748081", "gnorm": "0.677", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.1", "wall": "84733"} [2022-08-02 01:01:25,587][train_inner][INFO] - {"epoch": 6, "update": 5.044, "loss": "2.318", "ppl": "4.99", "wps": "366266", "ups": "3.1", "wpb": "118101", "bsz": "256", "num_updates": "259600", "lr": "0.000747879", "gnorm": "0.678", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "84798"} [2022-08-02 01:02:30,325][train_inner][INFO] - {"epoch": 6, "update": 5.048, "loss": "2.32", "ppl": "4.99", "wps": "364778", "ups": "3.09", "wpb": "118074", "bsz": "256", "num_updates": "259800", "lr": "0.000747677", "gnorm": "0.677", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.1", "wall": "84863"} [2022-08-02 01:03:35,278][train_inner][INFO] - {"epoch": 6, "update": 5.052, "loss": "2.309", "ppl": "4.96", "wps": "364883", "ups": "3.08", "wpb": "118499", "bsz": "256", "num_updates": "260000", "lr": "0.000747475", "gnorm": "0.68", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.2", "wall": "84928"} [2022-08-02 01:04:39,982][train_inner][INFO] - {"epoch": 6, "update": 5.056, "loss": "2.307", "ppl": "4.95", "wps": "367270", "ups": "3.09", "wpb": "118816", "bsz": "256", "num_updates": "260200", "lr": "0.000747273", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "84992"} [2022-08-02 01:05:44,585][train_inner][INFO] - {"epoch": 6, "update": 5.059, "loss": "2.305", "ppl": "4.94", "wps": "368106", "ups": "3.1", "wpb": "118902", "bsz": "256", "num_updates": "260400", "lr": "0.000747071", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.1", "wall": "85057"} [2022-08-02 01:06:37,358][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-02 01:06:49,756][train_inner][INFO] - {"epoch": 6, "update": 5.063, "loss": "2.32", "ppl": "4.99", "wps": "362312", "ups": "3.07", "wpb": "118060", "bsz": "256", "num_updates": "260600", "lr": "0.000746869", "gnorm": "0.676", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "28.2", "wall": "85122"} [2022-08-02 01:07:54,231][train_inner][INFO] - {"epoch": 6, "update": 5.067, "loss": "2.309", "ppl": "4.95", "wps": "366348", "ups": "3.1", "wpb": "118100", "bsz": "256", "num_updates": "260800", "lr": "0.000746667", "gnorm": "0.682", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.5", "wall": "85187"} [2022-08-02 01:08:59,226][train_inner][INFO] - {"epoch": 6, "update": 5.071, "loss": "2.305", "ppl": "4.94", "wps": "366091", "ups": "3.08", "wpb": "118969", "bsz": "256", "num_updates": "261000", "lr": "0.000746465", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.8", "wall": "85252"} [2022-08-02 01:10:04,214][train_inner][INFO] - {"epoch": 6, "update": 5.075, "loss": "2.308", "ppl": "4.95", "wps": "363850", "ups": "3.08", "wpb": "118227", "bsz": "256", "num_updates": "261200", "lr": "0.000746263", "gnorm": "0.676", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "27.2", "wall": "85317"} [2022-08-02 01:11:09,000][train_inner][INFO] - {"epoch": 6, "update": 5.079, "loss": "2.312", "ppl": "4.97", "wps": "366292", "ups": "3.09", "wpb": "118651", "bsz": "256", "num_updates": "261400", "lr": "0.000746061", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.1", "wall": "85381"} [2022-08-02 01:12:13,663][train_inner][INFO] - {"epoch": 6, "update": 5.083, "loss": "2.309", "ppl": "4.95", "wps": "365662", "ups": "3.09", "wpb": "118222", "bsz": "256", "num_updates": "261600", "lr": "0.000745859", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.3", "wall": "85446"} [2022-08-02 01:13:18,093][train_inner][INFO] - {"epoch": 6, "update": 5.087, "loss": "2.313", "ppl": "4.97", "wps": "366371", "ups": "3.1", "wpb": "118024", "bsz": "256", "num_updates": "261800", "lr": "0.000745657", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "85510"} [2022-08-02 01:14:11,128][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-02 01:14:22,992][train_inner][INFO] - {"epoch": 6, "update": 5.091, "loss": "2.31", "ppl": "4.96", "wps": "364857", "ups": "3.08", "wpb": "118392", "bsz": "256", "num_updates": "262000", "lr": "0.000745455", "gnorm": "0.677", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "26.9", "wall": "85575"} [2022-08-02 01:14:32,642][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 [2022-08-02 01:14:32,941][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 [2022-08-02 01:14:33,236][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 [2022-08-02 01:15:28,690][train_inner][INFO] - {"epoch": 6, "update": 5.095, "loss": "2.312", "ppl": "4.97", "wps": "360879", "ups": "3.04", "wpb": "118544", "bsz": "256", "num_updates": "262200", "lr": "0.000745253", "gnorm": "0.734", "clip": "0.5", "loss_scale": "0.5", "train_wall": "65", "gb_free": "28.2", "wall": "85641"} [2022-08-02 01:16:33,707][train_inner][INFO] - {"epoch": 6, "update": 5.098, "loss": "2.314", "ppl": "4.97", "wps": "364336", "ups": "3.08", "wpb": "118441", "bsz": "256", "num_updates": "262400", "lr": "0.000745051", "gnorm": "0.675", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "25.8", "wall": "85706"} [2022-08-02 01:17:38,722][train_inner][INFO] - {"epoch": 6, "update": 5.102, "loss": "2.314", "ppl": "4.97", "wps": "362931", "ups": "3.08", "wpb": "117978", "bsz": "256", "num_updates": "262600", "lr": "0.000744848", "gnorm": "0.742", "clip": "0.5", "loss_scale": "0.5", "train_wall": "65", "gb_free": "22.7", "wall": "85771"} [2022-08-02 01:18:43,825][train_inner][INFO] - {"epoch": 6, "update": 5.106, "loss": "2.309", "ppl": "4.96", "wps": "362281", "ups": "3.07", "wpb": "117925", "bsz": "256", "num_updates": "262800", "lr": "0.000744646", "gnorm": "0.683", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "24.2", "wall": "85836"} [2022-08-02 01:19:48,658][train_inner][INFO] - {"epoch": 6, "update": 5.11, "loss": "2.309", "ppl": "4.96", "wps": "364046", "ups": "3.08", "wpb": "118011", "bsz": "256", "num_updates": "263000", "lr": "0.000744444", "gnorm": "0.681", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "22.1", "wall": "85901"} [2022-08-02 01:20:53,420][train_inner][INFO] - {"epoch": 6, "update": 5.114, "loss": "2.309", "ppl": "4.96", "wps": "364665", "ups": "3.09", "wpb": "118080", "bsz": "256", "num_updates": "263200", "lr": "0.000744242", "gnorm": "0.677", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "22.3", "wall": "85966"} [2022-08-02 01:21:51,923][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.25 [2022-08-02 01:21:58,184][train_inner][INFO] - {"epoch": 6, "update": 5.118, "loss": "2.355", "ppl": "5.12", "wps": "363249", "ups": "3.09", "wpb": "117625", "bsz": "256", "num_updates": "263400", "lr": "0.00074404", "gnorm": "1.675", "clip": "2.5", "loss_scale": "0.25", "train_wall": "64", "gb_free": "20.5", "wall": "86031"} [2022-08-02 01:23:03,061][train_inner][INFO] - {"epoch": 6, "update": 5.122, "loss": "2.315", "ppl": "4.98", "wps": "362232", "ups": "3.08", "wpb": "117502", "bsz": "256", "num_updates": "263600", "lr": "0.000743838", "gnorm": "0.724", "clip": "0.5", "loss_scale": "0.25", "train_wall": "65", "gb_free": "22.8", "wall": "86095"} [2022-08-02 01:23:58,653][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.125 [2022-08-02 01:24:08,000][train_inner][INFO] - {"epoch": 6, "update": 5.126, "loss": "2.351", "ppl": "5.1", "wps": "364698", "ups": "3.08", "wpb": "118413", "bsz": "256", "num_updates": "263800", "lr": "0.000743636", "gnorm": "1.227", "clip": "3", "loss_scale": "0.125", "train_wall": "65", "gb_free": "20.9", "wall": "86160"} [2022-08-02 01:25:12,828][train_inner][INFO] - {"epoch": 6, "update": 5.13, "loss": "2.314", "ppl": "4.97", "wps": "364541", "ups": "3.09", "wpb": "118163", "bsz": "256", "num_updates": "264000", "lr": "0.000743434", "gnorm": "0.675", "clip": "0", "loss_scale": "0.125", "train_wall": "64", "gb_free": "22.2", "wall": "86225"} [2022-08-02 01:26:17,624][train_inner][INFO] - {"epoch": 6, "update": 5.133, "loss": "2.314", "ppl": "4.97", "wps": "364036", "ups": "3.09", "wpb": "117938", "bsz": "256", "num_updates": "264200", "lr": "0.000743232", "gnorm": "0.701", "clip": "0", "loss_scale": "0.125", "train_wall": "64", "gb_free": "19.9", "wall": "86290"} [2022-08-02 01:27:22,314][train_inner][INFO] - {"epoch": 6, "update": 5.137, "loss": "2.314", "ppl": "4.97", "wps": "365451", "ups": "3.09", "wpb": "118203", "bsz": "256", "num_updates": "264400", "lr": "0.00074303", "gnorm": "0.675", "clip": "0", "loss_scale": "0.125", "train_wall": "64", "gb_free": "21.6", "wall": "86355"} [2022-08-02 01:28:27,198][train_inner][INFO] - {"epoch": 6, "update": 5.141, "loss": "2.32", "ppl": "4.99", "wps": "362560", "ups": "3.08", "wpb": "117619", "bsz": "256", "num_updates": "264600", "lr": "0.000742828", "gnorm": "0.702", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "19.7", "wall": "86420"} [2022-08-02 01:29:32,435][train_inner][INFO] - {"epoch": 6, "update": 5.145, "loss": "2.309", "ppl": "4.96", "wps": "363076", "ups": "3.07", "wpb": "118428", "bsz": "256", "num_updates": "264800", "lr": "0.000742626", "gnorm": "0.675", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "20.5", "wall": "86485"} [2022-08-02 01:30:18,151][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.0625 [2022-08-02 01:30:37,552][train_inner][INFO] - {"epoch": 6, "update": 5.149, "loss": "2.323", "ppl": "5", "wps": "361908", "ups": "3.07", "wpb": "117831", "bsz": "256", "num_updates": "265000", "lr": "0.000742424", "gnorm": "1.374", "clip": "0.5", "loss_scale": "0.0625", "train_wall": "65", "gb_free": "28", "wall": "86550"} [2022-08-02 01:31:42,323][train_inner][INFO] - {"epoch": 6, "update": 5.153, "loss": "2.314", "ppl": "4.97", "wps": "365684", "ups": "3.09", "wpb": "118427", "bsz": "256", "num_updates": "265200", "lr": "0.000742222", "gnorm": "0.675", "clip": "0", "loss_scale": "0.0625", "train_wall": "64", "gb_free": "27.2", "wall": "86615"} [2022-08-02 01:32:47,406][train_inner][INFO] - {"epoch": 6, "update": 5.157, "loss": "2.309", "ppl": "4.96", "wps": "363142", "ups": "3.07", "wpb": "118171", "bsz": "256", "num_updates": "265400", "lr": "0.00074202", "gnorm": "0.676", "clip": "0", "loss_scale": "0.0625", "train_wall": "65", "gb_free": "19.8", "wall": "86680"} [2022-08-02 01:33:52,220][train_inner][INFO] - {"epoch": 6, "update": 5.161, "loss": "2.314", "ppl": "4.97", "wps": "366393", "ups": "3.09", "wpb": "118735", "bsz": "256", "num_updates": "265600", "lr": "0.000741818", "gnorm": "0.688", "clip": "0", "loss_scale": "0.0625", "train_wall": "64", "gb_free": "21.3", "wall": "86745"} [2022-08-02 01:34:57,123][train_inner][INFO] - {"epoch": 6, "update": 5.164, "loss": "2.313", "ppl": "4.97", "wps": "364790", "ups": "3.08", "wpb": "118377", "bsz": "256", "num_updates": "265800", "lr": "0.000741616", "gnorm": "0.679", "clip": "0", "loss_scale": "0.0625", "train_wall": "65", "gb_free": "22.6", "wall": "86809"} [2022-08-02 01:36:01,940][train_inner][INFO] - {"epoch": 6, "update": 5.168, "loss": "2.313", "ppl": "4.97", "wps": "366026", "ups": "3.09", "wpb": "118623", "bsz": "256", "num_updates": "266000", "lr": "0.000741414", "gnorm": "0.701", "clip": "0", "loss_scale": "0.0625", "train_wall": "64", "gb_free": "20.1", "wall": "86874"} [2022-08-02 01:37:06,533][train_inner][INFO] - {"epoch": 6, "update": 5.172, "loss": "2.314", "ppl": "4.97", "wps": "366484", "ups": "3.1", "wpb": "118358", "bsz": "256", "num_updates": "266200", "lr": "0.000741212", "gnorm": "0.678", "clip": "0", "loss_scale": "0.0625", "train_wall": "64", "gb_free": "21.2", "wall": "86939"} [2022-08-02 01:38:11,197][train_inner][INFO] - {"epoch": 6, "update": 5.176, "loss": "2.308", "ppl": "4.95", "wps": "366350", "ups": "3.09", "wpb": "118447", "bsz": "256", "num_updates": "266400", "lr": "0.00074101", "gnorm": "0.677", "clip": "0", "loss_scale": "0.0625", "train_wall": "64", "gb_free": "20.4", "wall": "87004"} [2022-08-02 01:39:16,347][train_inner][INFO] - {"epoch": 6, "update": 5.18, "loss": "2.314", "ppl": "4.97", "wps": "362160", "ups": "3.07", "wpb": "117972", "bsz": "256", "num_updates": "266600", "lr": "0.000740808", "gnorm": "0.681", "clip": "0", "loss_scale": "0.0625", "train_wall": "65", "gb_free": "20.3", "wall": "87069"} [2022-08-02 01:40:21,066][train_inner][INFO] - {"epoch": 6, "update": 5.184, "loss": "2.315", "ppl": "4.98", "wps": "366006", "ups": "3.09", "wpb": "118436", "bsz": "256", "num_updates": "266800", "lr": "0.000740606", "gnorm": "0.743", "clip": "0.5", "loss_scale": "0.0625", "train_wall": "64", "gb_free": "23.4", "wall": "87133"} [2022-08-02 01:41:26,121][train_inner][INFO] - {"epoch": 6, "update": 5.188, "loss": "2.317", "ppl": "4.98", "wps": "362739", "ups": "3.07", "wpb": "117987", "bsz": "256", "num_updates": "267000", "lr": "0.000740404", "gnorm": "0.682", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "20.9", "wall": "87198"} [2022-08-02 01:42:30,806][train_inner][INFO] - {"epoch": 6, "update": 5.192, "loss": "2.313", "ppl": "4.97", "wps": "365350", "ups": "3.09", "wpb": "118162", "bsz": "256", "num_updates": "267200", "lr": "0.000740202", "gnorm": "0.679", "clip": "0", "loss_scale": "0.125", "train_wall": "64", "gb_free": "21.5", "wall": "87263"} [2022-08-02 01:43:35,789][train_inner][INFO] - {"epoch": 6, "update": 5.196, "loss": "2.314", "ppl": "4.97", "wps": "362681", "ups": "3.08", "wpb": "117840", "bsz": "256", "num_updates": "267400", "lr": "0.00074", "gnorm": "0.689", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "20.2", "wall": "87328"} [2022-08-02 01:44:40,364][train_inner][INFO] - {"epoch": 6, "update": 5.199, "loss": "2.315", "ppl": "4.98", "wps": "367093", "ups": "3.1", "wpb": "118524", "bsz": "256", "num_updates": "267600", "lr": "0.000739798", "gnorm": "0.692", "clip": "0", "loss_scale": "0.125", "train_wall": "64", "gb_free": "23.1", "wall": "87393"} [2022-08-02 01:45:45,430][train_inner][INFO] - {"epoch": 6, "update": 5.203, "loss": "2.31", "ppl": "4.96", "wps": "363237", "ups": "3.07", "wpb": "118169", "bsz": "256", "num_updates": "267800", "lr": "0.000739596", "gnorm": "0.678", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "19.8", "wall": "87458"} [2022-08-02 01:46:51,500][train_inner][INFO] - {"epoch": 6, "update": 5.207, "loss": "2.309", "ppl": "4.96", "wps": "357714", "ups": "3.03", "wpb": "118168", "bsz": "256", "num_updates": "268000", "lr": "0.000739394", "gnorm": "0.68", "clip": "0", "loss_scale": "0.125", "train_wall": "66", "gb_free": "22.5", "wall": "87524"} [2022-08-02 01:47:56,273][train_inner][INFO] - {"epoch": 6, "update": 5.211, "loss": "2.311", "ppl": "4.96", "wps": "363573", "ups": "3.09", "wpb": "117748", "bsz": "256", "num_updates": "268200", "lr": "0.000739192", "gnorm": "0.697", "clip": "0", "loss_scale": "0.125", "train_wall": "64", "gb_free": "22.9", "wall": "87589"} [2022-08-02 01:49:01,014][train_inner][INFO] - {"epoch": 6, "update": 5.215, "loss": "2.309", "ppl": "4.95", "wps": "365702", "ups": "3.09", "wpb": "118377", "bsz": "256", "num_updates": "268400", "lr": "0.00073899", "gnorm": "0.68", "clip": "0", "loss_scale": "0.125", "train_wall": "64", "gb_free": "20.2", "wall": "87653"} [2022-08-02 01:50:05,994][train_inner][INFO] - {"epoch": 6, "update": 5.219, "loss": "2.307", "ppl": "4.95", "wps": "365307", "ups": "3.08", "wpb": "118688", "bsz": "255.9", "num_updates": "268600", "lr": "0.000738788", "gnorm": "0.677", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "20.3", "wall": "87718"} [2022-08-02 01:51:11,504][train_inner][INFO] - {"epoch": 6, "update": 5.223, "loss": "2.314", "ppl": "4.97", "wps": "360591", "ups": "3.05", "wpb": "118109", "bsz": "256", "num_updates": "268800", "lr": "0.000738586", "gnorm": "0.694", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "20.9", "wall": "87784"} [2022-08-02 01:52:16,393][train_inner][INFO] - {"epoch": 6, "update": 5.227, "loss": "2.308", "ppl": "4.95", "wps": "365634", "ups": "3.08", "wpb": "118626", "bsz": "256", "num_updates": "269000", "lr": "0.000738384", "gnorm": "0.674", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "23.6", "wall": "87849"} [2022-08-02 01:53:20,995][train_inner][INFO] - {"epoch": 6, "update": 5.231, "loss": "2.313", "ppl": "4.97", "wps": "367442", "ups": "3.1", "wpb": "118686", "bsz": "256", "num_updates": "269200", "lr": "0.000738182", "gnorm": "0.676", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "19.9", "wall": "87913"} [2022-08-02 01:54:26,971][train_inner][INFO] - {"epoch": 6, "update": 5.234, "loss": "2.313", "ppl": "4.97", "wps": "357884", "ups": "3.03", "wpb": "118056", "bsz": "255.9", "num_updates": "269400", "lr": "0.00073798", "gnorm": "0.68", "clip": "0", "loss_scale": "0.25", "train_wall": "66", "gb_free": "20.2", "wall": "87979"} [2022-08-02 01:55:31,367][train_inner][INFO] - {"epoch": 6, "update": 5.238, "loss": "2.31", "ppl": "4.96", "wps": "366921", "ups": "3.11", "wpb": "118140", "bsz": "256", "num_updates": "269600", "lr": "0.000737778", "gnorm": "0.678", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "20.4", "wall": "88044"} [2022-08-02 01:56:36,329][train_inner][INFO] - {"epoch": 6, "update": 5.242, "loss": "2.311", "ppl": "4.96", "wps": "361509", "ups": "3.08", "wpb": "117419", "bsz": "256", "num_updates": "269800", "lr": "0.000737576", "gnorm": "0.682", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "21.5", "wall": "88109"} [2022-08-02 01:57:40,920][train_inner][INFO] - {"epoch": 6, "update": 5.246, "loss": "2.311", "ppl": "4.96", "wps": "366516", "ups": "3.1", "wpb": "118367", "bsz": "256", "num_updates": "270000", "lr": "0.000737374", "gnorm": "0.679", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "20.1", "wall": "88173"} [2022-08-02 01:58:46,064][train_inner][INFO] - {"epoch": 6, "update": 5.25, "loss": "2.312", "ppl": "4.97", "wps": "364549", "ups": "3.07", "wpb": "118738", "bsz": "256", "num_updates": "270200", "lr": "0.000737172", "gnorm": "0.799", "clip": "1", "loss_scale": "0.25", "train_wall": "65", "gb_free": "25.1", "wall": "88238"} [2022-08-02 01:59:50,486][train_inner][INFO] - {"epoch": 6, "update": 5.254, "loss": "2.3", "ppl": "4.93", "wps": "367425", "ups": "3.1", "wpb": "118350", "bsz": "256", "num_updates": "270400", "lr": "0.00073697", "gnorm": "0.674", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "19.9", "wall": "88303"} [2022-08-02 02:00:55,287][train_inner][INFO] - {"epoch": 6, "update": 5.258, "loss": "2.307", "ppl": "4.95", "wps": "364927", "ups": "3.09", "wpb": "118236", "bsz": "256", "num_updates": "270600", "lr": "0.000736768", "gnorm": "0.676", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "25.3", "wall": "88368"} [2022-08-02 02:02:00,920][train_inner][INFO] - {"epoch": 6, "update": 5.262, "loss": "2.31", "ppl": "4.96", "wps": "359351", "ups": "3.05", "wpb": "117926", "bsz": "256", "num_updates": "270800", "lr": "0.000736566", "gnorm": "0.696", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "21.5", "wall": "88433"} [2022-08-02 02:03:07,083][train_inner][INFO] - {"epoch": 6, "update": 5.265, "loss": "2.306", "ppl": "4.95", "wps": "359921", "ups": "3.02", "wpb": "119065", "bsz": "256", "num_updates": "271000", "lr": "0.000736364", "gnorm": "0.677", "clip": "0", "loss_scale": "0.25", "train_wall": "66", "gb_free": "25", "wall": "88499"} [2022-08-02 02:04:12,339][train_inner][INFO] - {"epoch": 6, "update": 5.269, "loss": "2.311", "ppl": "4.96", "wps": "362485", "ups": "3.06", "wpb": "118270", "bsz": "256", "num_updates": "271200", "lr": "0.000736162", "gnorm": "0.679", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "19.8", "wall": "88565"} [2022-08-02 02:05:18,505][train_inner][INFO] - {"epoch": 6, "update": 5.273, "loss": "2.312", "ppl": "4.96", "wps": "357074", "ups": "3.02", "wpb": "118128", "bsz": "256", "num_updates": "271400", "lr": "0.00073596", "gnorm": "0.682", "clip": "0", "loss_scale": "0.5", "train_wall": "66", "gb_free": "20.2", "wall": "88631"} [2022-08-02 02:06:23,376][train_inner][INFO] - {"epoch": 6, "update": 5.277, "loss": "2.305", "ppl": "4.94", "wps": "365093", "ups": "3.08", "wpb": "118419", "bsz": "256", "num_updates": "271600", "lr": "0.000735758", "gnorm": "0.678", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "21.3", "wall": "88696"} [2022-08-02 02:07:28,295][train_inner][INFO] - {"epoch": 6, "update": 5.281, "loss": "2.304", "ppl": "4.94", "wps": "363466", "ups": "3.08", "wpb": "117977", "bsz": "256", "num_updates": "271800", "lr": "0.000735556", "gnorm": "0.678", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "19.7", "wall": "88761"} [2022-08-02 02:08:33,218][train_inner][INFO] - {"epoch": 6, "update": 5.285, "loss": "2.306", "ppl": "4.94", "wps": "363108", "ups": "3.08", "wpb": "117868", "bsz": "256", "num_updates": "272000", "lr": "0.000735354", "gnorm": "0.678", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "21.1", "wall": "88826"} [2022-08-02 02:09:37,992][train_inner][INFO] - {"epoch": 6, "update": 5.289, "loss": "2.304", "ppl": "4.94", "wps": "364269", "ups": "3.09", "wpb": "117973", "bsz": "256", "num_updates": "272200", "lr": "0.000735152", "gnorm": "0.676", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "23.4", "wall": "88890"} [2022-08-02 02:10:42,825][train_inner][INFO] - {"epoch": 6, "update": 5.293, "loss": "2.306", "ppl": "4.94", "wps": "365619", "ups": "3.08", "wpb": "118519", "bsz": "256", "num_updates": "272400", "lr": "0.000734949", "gnorm": "0.675", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "19.8", "wall": "88955"} [2022-08-02 02:11:47,470][train_inner][INFO] - {"epoch": 6, "update": 5.297, "loss": "2.307", "ppl": "4.95", "wps": "367063", "ups": "3.09", "wpb": "118643", "bsz": "256", "num_updates": "272600", "lr": "0.000734747", "gnorm": "0.677", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "26.2", "wall": "89020"} [2022-08-02 02:12:52,197][train_inner][INFO] - {"epoch": 6, "update": 5.3, "loss": "2.306", "ppl": "4.95", "wps": "366503", "ups": "3.09", "wpb": "118610", "bsz": "256", "num_updates": "272800", "lr": "0.000734545", "gnorm": "0.689", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.6", "wall": "89085"} [2022-08-02 02:13:56,759][train_inner][INFO] - {"epoch": 6, "update": 5.304, "loss": "2.296", "ppl": "4.91", "wps": "365965", "ups": "3.1", "wpb": "118137", "bsz": "256", "num_updates": "273000", "lr": "0.000734343", "gnorm": "0.68", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "24", "wall": "89149"} [2022-08-02 02:15:01,520][train_inner][INFO] - {"epoch": 6, "update": 5.308, "loss": "2.305", "ppl": "4.94", "wps": "364355", "ups": "3.09", "wpb": "117978", "bsz": "256", "num_updates": "273200", "lr": "0.000734141", "gnorm": "0.681", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.4", "wall": "89214"} [2022-08-02 02:16:05,963][train_inner][INFO] - {"epoch": 6, "update": 5.312, "loss": "2.305", "ppl": "4.94", "wps": "366827", "ups": "3.1", "wpb": "118195", "bsz": "256", "num_updates": "273400", "lr": "0.000733939", "gnorm": "0.678", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.7", "wall": "89278"} [2022-08-02 02:17:10,920][train_inner][INFO] - {"epoch": 6, "update": 5.316, "loss": "2.309", "ppl": "4.95", "wps": "364106", "ups": "3.08", "wpb": "118255", "bsz": "256", "num_updates": "273600", "lr": "0.000733737", "gnorm": "0.681", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "22.6", "wall": "89343"} [2022-08-02 02:18:16,088][train_inner][INFO] - {"epoch": 6, "update": 5.32, "loss": "2.307", "ppl": "4.95", "wps": "362300", "ups": "3.07", "wpb": "118049", "bsz": "256", "num_updates": "273800", "lr": "0.000733535", "gnorm": "0.68", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20", "wall": "89408"} [2022-08-02 02:19:20,773][train_inner][INFO] - {"epoch": 6, "update": 5.324, "loss": "2.305", "ppl": "4.94", "wps": "366668", "ups": "3.09", "wpb": "118588", "bsz": "256", "num_updates": "274000", "lr": "0.000733333", "gnorm": "0.676", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "27.6", "wall": "89473"} [2022-08-02 02:20:25,410][train_inner][INFO] - {"epoch": 6, "update": 5.328, "loss": "2.298", "ppl": "4.92", "wps": "366832", "ups": "3.09", "wpb": "118553", "bsz": "256", "num_updates": "274200", "lr": "0.000733131", "gnorm": "0.677", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "24.9", "wall": "89538"} [2022-08-02 02:21:30,331][train_inner][INFO] - {"epoch": 6, "update": 5.332, "loss": "2.302", "ppl": "4.93", "wps": "365336", "ups": "3.08", "wpb": "118588", "bsz": "256", "num_updates": "274400", "lr": "0.000732929", "gnorm": "0.68", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.2", "wall": "89603"} [2022-08-02 02:22:35,105][train_inner][INFO] - {"epoch": 6, "update": 5.335, "loss": "2.302", "ppl": "4.93", "wps": "365739", "ups": "3.09", "wpb": "118449", "bsz": "256", "num_updates": "274600", "lr": "0.000732727", "gnorm": "0.676", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.2", "wall": "89667"} [2022-08-02 02:23:40,961][train_inner][INFO] - {"epoch": 6, "update": 5.339, "loss": "2.304", "ppl": "4.94", "wps": "358217", "ups": "3.04", "wpb": "117953", "bsz": "256", "num_updates": "274800", "lr": "0.000732525", "gnorm": "0.687", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.2", "wall": "89733"} [2022-08-02 02:24:45,559][train_inner][INFO] - {"epoch": 6, "update": 5.343, "loss": "2.303", "ppl": "4.94", "wps": "366597", "ups": "3.1", "wpb": "118405", "bsz": "256", "num_updates": "275000", "lr": "0.000732323", "gnorm": "0.677", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "19.8", "wall": "89798"} [2022-08-02 02:25:50,633][train_inner][INFO] - {"epoch": 6, "update": 5.347, "loss": "2.302", "ppl": "4.93", "wps": "363847", "ups": "3.07", "wpb": "118383", "bsz": "256", "num_updates": "275200", "lr": "0.000732121", "gnorm": "0.678", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.8", "wall": "89863"} [2022-08-02 02:26:55,337][train_inner][INFO] - {"epoch": 6, "update": 5.351, "loss": "2.298", "ppl": "4.92", "wps": "365833", "ups": "3.09", "wpb": "118352", "bsz": "256", "num_updates": "275400", "lr": "0.000731919", "gnorm": "0.678", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "19.8", "wall": "89928"} [2022-08-02 02:28:00,305][train_inner][INFO] - {"epoch": 6, "update": 5.355, "loss": "2.304", "ppl": "4.94", "wps": "362486", "ups": "3.08", "wpb": "117749", "bsz": "256", "num_updates": "275600", "lr": "0.000731717", "gnorm": "0.677", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22.9", "wall": "89993"} [2022-08-02 02:29:05,272][train_inner][INFO] - {"epoch": 6, "update": 5.359, "loss": "2.3", "ppl": "4.93", "wps": "365140", "ups": "3.08", "wpb": "118609", "bsz": "256", "num_updates": "275800", "lr": "0.000731515", "gnorm": "0.677", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22.1", "wall": "90058"} [2022-08-02 02:29:08,831][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 [2022-08-02 02:29:09,084][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 [2022-08-02 02:29:21,422][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.25 [2022-08-02 02:30:10,667][train_inner][INFO] - {"epoch": 6, "update": 5.363, "loss": "2.321", "ppl": "5", "wps": "362487", "ups": "3.06", "wpb": "118521", "bsz": "256", "num_updates": "276000", "lr": "0.000731313", "gnorm": "0.916", "clip": "2", "loss_scale": "0.25", "train_wall": "65", "gb_free": "19.8", "wall": "90123"} [2022-08-02 02:31:15,528][train_inner][INFO] - {"epoch": 6, "update": 5.367, "loss": "2.306", "ppl": "4.94", "wps": "362753", "ups": "3.08", "wpb": "117642", "bsz": "256", "num_updates": "276200", "lr": "0.000731111", "gnorm": "0.68", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "22.4", "wall": "90188"} [2022-08-02 02:32:20,427][train_inner][INFO] - {"epoch": 6, "update": 5.37, "loss": "2.304", "ppl": "4.94", "wps": "364043", "ups": "3.08", "wpb": "118128", "bsz": "256", "num_updates": "276400", "lr": "0.000730909", "gnorm": "0.676", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "23.5", "wall": "90253"} [2022-08-02 02:33:25,469][train_inner][INFO] - {"epoch": 6, "update": 5.374, "loss": "2.302", "ppl": "4.93", "wps": "365447", "ups": "3.07", "wpb": "118846", "bsz": "256", "num_updates": "276600", "lr": "0.000730707", "gnorm": "0.673", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "22.1", "wall": "90318"} [2022-08-02 02:34:30,282][train_inner][INFO] - {"epoch": 6, "update": 5.378, "loss": "2.298", "ppl": "4.92", "wps": "365848", "ups": "3.09", "wpb": "118557", "bsz": "256", "num_updates": "276800", "lr": "0.000730505", "gnorm": "0.675", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "19.7", "wall": "90383"} [2022-08-02 02:35:34,924][train_inner][INFO] - {"epoch": 6, "update": 5.382, "loss": "2.304", "ppl": "4.94", "wps": "365526", "ups": "3.09", "wpb": "118140", "bsz": "256", "num_updates": "277000", "lr": "0.000730303", "gnorm": "0.678", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "28", "wall": "90447"} [2022-08-02 02:36:41,029][train_inner][INFO] - {"epoch": 6, "update": 5.386, "loss": "2.292", "ppl": "4.9", "wps": "358967", "ups": "3.03", "wpb": "118646", "bsz": "256", "num_updates": "277200", "lr": "0.000730101", "gnorm": "0.673", "clip": "0", "loss_scale": "0.25", "train_wall": "66", "gb_free": "20.9", "wall": "90513"} [2022-08-02 02:37:46,017][train_inner][INFO] - {"epoch": 6, "update": 5.39, "loss": "2.304", "ppl": "4.94", "wps": "362906", "ups": "3.08", "wpb": "117920", "bsz": "256", "num_updates": "277400", "lr": "0.000729899", "gnorm": "0.675", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "24.8", "wall": "90578"} [2022-08-02 02:38:50,991][train_inner][INFO] - {"epoch": 6, "update": 5.394, "loss": "2.292", "ppl": "4.9", "wps": "365662", "ups": "3.08", "wpb": "118791", "bsz": "256", "num_updates": "277600", "lr": "0.000729697", "gnorm": "0.676", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "21.9", "wall": "90643"} [2022-08-02 02:39:56,089][train_inner][INFO] - {"epoch": 6, "update": 5.398, "loss": "2.299", "ppl": "4.92", "wps": "363877", "ups": "3.07", "wpb": "118435", "bsz": "256", "num_updates": "277800", "lr": "0.000729495", "gnorm": "0.678", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "25.7", "wall": "90708"} [2022-08-02 02:41:00,662][train_inner][INFO] - {"epoch": 6, "update": 5.401, "loss": "2.302", "ppl": "4.93", "wps": "365872", "ups": "3.1", "wpb": "118127", "bsz": "256", "num_updates": "278000", "lr": "0.000729293", "gnorm": "0.676", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "22.1", "wall": "90773"} [2022-08-02 02:42:05,348][train_inner][INFO] - {"epoch": 6, "update": 5.405, "loss": "2.302", "ppl": "4.93", "wps": "365804", "ups": "3.09", "wpb": "118310", "bsz": "256", "num_updates": "278200", "lr": "0.000729091", "gnorm": "0.677", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.1", "wall": "90838"} [2022-08-02 02:43:11,384][train_inner][INFO] - {"epoch": 6, "update": 5.409, "loss": "2.299", "ppl": "4.92", "wps": "358367", "ups": "3.03", "wpb": "118323", "bsz": "256", "num_updates": "278400", "lr": "0.000728889", "gnorm": "0.676", "clip": "0", "loss_scale": "0.5", "train_wall": "66", "gb_free": "20.2", "wall": "90904"} [2022-08-02 02:44:16,220][train_inner][INFO] - {"epoch": 6, "update": 5.413, "loss": "2.303", "ppl": "4.93", "wps": "366392", "ups": "3.08", "wpb": "118777", "bsz": "256", "num_updates": "278600", "lr": "0.000728687", "gnorm": "0.683", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "23.4", "wall": "90969"} [2022-08-02 02:45:20,339][train_inner][INFO] - {"epoch": 6, "update": 5.417, "loss": "2.302", "ppl": "4.93", "wps": "366615", "ups": "3.12", "wpb": "117532", "bsz": "256", "num_updates": "278800", "lr": "0.000728485", "gnorm": "0.679", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.6", "wall": "91033"} [2022-08-02 02:46:25,123][train_inner][INFO] - {"epoch": 6, "update": 5.421, "loss": "2.304", "ppl": "4.94", "wps": "365676", "ups": "3.09", "wpb": "118448", "bsz": "256", "num_updates": "279000", "lr": "0.000728283", "gnorm": "0.676", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "19.9", "wall": "91097"} [2022-08-02 02:47:30,098][train_inner][INFO] - {"epoch": 6, "update": 5.425, "loss": "2.306", "ppl": "4.94", "wps": "363234", "ups": "3.08", "wpb": "118005", "bsz": "256", "num_updates": "279200", "lr": "0.000728081", "gnorm": "0.678", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.8", "wall": "91162"} [2022-08-02 02:48:35,196][train_inner][INFO] - {"epoch": 6, "update": 5.429, "loss": "2.292", "ppl": "4.9", "wps": "365588", "ups": "3.07", "wpb": "118993", "bsz": "256", "num_updates": "279400", "lr": "0.000727879", "gnorm": "0.675", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.2", "wall": "91228"} [2022-08-02 02:49:40,030][train_inner][INFO] - {"epoch": 6, "update": 5.433, "loss": "2.3", "ppl": "4.93", "wps": "366615", "ups": "3.08", "wpb": "118843", "bsz": "256", "num_updates": "279600", "lr": "0.000727677", "gnorm": "0.681", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "24", "wall": "91292"} [2022-08-02 02:50:45,066][train_inner][INFO] - {"epoch": 6, "update": 5.436, "loss": "2.301", "ppl": "4.93", "wps": "363565", "ups": "3.08", "wpb": "118222", "bsz": "256", "num_updates": "279800", "lr": "0.000727475", "gnorm": "0.678", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "19.9", "wall": "91357"} [2022-08-02 02:51:49,867][train_inner][INFO] - {"epoch": 6, "update": 5.44, "loss": "2.297", "ppl": "4.91", "wps": "365257", "ups": "3.09", "wpb": "118344", "bsz": "256", "num_updates": "280000", "lr": "0.000727273", "gnorm": "0.674", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "22.3", "wall": "91422"} [2022-08-02 02:52:55,016][train_inner][INFO] - {"epoch": 6, "update": 5.444, "loss": "2.301", "ppl": "4.93", "wps": "363492", "ups": "3.07", "wpb": "118405", "bsz": "256", "num_updates": "280200", "lr": "0.000727071", "gnorm": "0.672", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "25.8", "wall": "91487"} [2022-08-02 02:54:00,212][train_inner][INFO] - {"epoch": 6, "update": 5.448, "loss": "2.293", "ppl": "4.9", "wps": "365716", "ups": "3.07", "wpb": "119214", "bsz": "256", "num_updates": "280400", "lr": "0.000726869", "gnorm": "0.67", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "22.4", "wall": "91553"} [2022-08-02 02:55:05,196][train_inner][INFO] - {"epoch": 6, "update": 5.452, "loss": "2.292", "ppl": "4.9", "wps": "365013", "ups": "3.08", "wpb": "118598", "bsz": "256", "num_updates": "280600", "lr": "0.000726667", "gnorm": "0.674", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.5", "wall": "91618"} [2022-08-02 02:56:10,241][train_inner][INFO] - {"epoch": 6, "update": 5.456, "loss": "2.299", "ppl": "4.92", "wps": "365128", "ups": "3.07", "wpb": "118747", "bsz": "256", "num_updates": "280800", "lr": "0.000726465", "gnorm": "0.678", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.9", "wall": "91683"} [2022-08-02 02:57:15,034][train_inner][INFO] - {"epoch": 6, "update": 5.46, "loss": "2.297", "ppl": "4.92", "wps": "365702", "ups": "3.09", "wpb": "118472", "bsz": "256", "num_updates": "281000", "lr": "0.000726263", "gnorm": "0.672", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.1", "wall": "91747"} [2022-08-02 02:58:20,044][train_inner][INFO] - {"epoch": 6, "update": 5.464, "loss": "2.3", "ppl": "4.93", "wps": "361969", "ups": "3.08", "wpb": "117658", "bsz": "256", "num_updates": "281200", "lr": "0.000726061", "gnorm": "0.675", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.8", "wall": "91812"} [2022-08-02 02:59:25,238][train_inner][INFO] - {"epoch": 6, "update": 5.468, "loss": "2.296", "ppl": "4.91", "wps": "363710", "ups": "3.07", "wpb": "118556", "bsz": "256", "num_updates": "281400", "lr": "0.000725859", "gnorm": "0.674", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.7", "wall": "91878"} [2022-08-02 03:00:30,020][train_inner][INFO] - {"epoch": 6, "update": 5.471, "loss": "2.302", "ppl": "4.93", "wps": "364670", "ups": "3.09", "wpb": "118118", "bsz": "256", "num_updates": "281600", "lr": "0.000725657", "gnorm": "0.696", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.2", "wall": "91942"} [2022-08-02 03:01:34,638][train_inner][INFO] - {"epoch": 6, "update": 5.475, "loss": "2.301", "ppl": "4.93", "wps": "365386", "ups": "3.1", "wpb": "118051", "bsz": "256", "num_updates": "281800", "lr": "0.000725455", "gnorm": "0.678", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "19.8", "wall": "92007"} [2022-08-02 03:02:39,706][train_inner][INFO] - {"epoch": 6, "update": 5.479, "loss": "2.296", "ppl": "4.91", "wps": "363723", "ups": "3.07", "wpb": "118332", "bsz": "256", "num_updates": "282000", "lr": "0.000725253", "gnorm": "0.676", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "23.6", "wall": "92072"} [2022-08-02 03:03:44,676][train_inner][INFO] - {"epoch": 6, "update": 5.483, "loss": "2.294", "ppl": "4.9", "wps": "365128", "ups": "3.08", "wpb": "118610", "bsz": "256", "num_updates": "282200", "lr": "0.000725051", "gnorm": "0.673", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "24.1", "wall": "92137"} [2022-08-02 03:04:49,657][train_inner][INFO] - {"epoch": 6, "update": 5.487, "loss": "2.296", "ppl": "4.91", "wps": "364437", "ups": "3.08", "wpb": "118405", "bsz": "256", "num_updates": "282400", "lr": "0.000724848", "gnorm": "0.675", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22.9", "wall": "92202"} [2022-08-02 03:05:13,516][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 [2022-08-02 03:05:54,606][train_inner][INFO] - {"epoch": 6, "update": 5.491, "loss": "2.306", "ppl": "4.95", "wps": "365092", "ups": "3.08", "wpb": "118561", "bsz": "256", "num_updates": "282600", "lr": "0.000724646", "gnorm": "0.818", "clip": "0.5", "loss_scale": "1", "train_wall": "65", "gb_free": "22.4", "wall": "92267"} [2022-08-02 03:06:58,856][train_inner][INFO] - {"epoch": 6, "update": 5.495, "loss": "2.305", "ppl": "4.94", "wps": "368294", "ups": "3.11", "wpb": "118312", "bsz": "256", "num_updates": "282800", "lr": "0.000724444", "gnorm": "0.675", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "23.4", "wall": "92331"} [2022-08-02 03:08:03,439][train_inner][INFO] - {"epoch": 6, "update": 5.499, "loss": "2.297", "ppl": "4.91", "wps": "363715", "ups": "3.1", "wpb": "117447", "bsz": "256", "num_updates": "283000", "lr": "0.000724242", "gnorm": "0.68", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "19.7", "wall": "92396"} [2022-08-02 03:09:08,096][train_inner][INFO] - {"epoch": 6, "update": 5.503, "loss": "2.294", "ppl": "4.91", "wps": "366280", "ups": "3.09", "wpb": "118410", "bsz": "256", "num_updates": "283200", "lr": "0.00072404", "gnorm": "0.675", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "21.5", "wall": "92460"} [2022-08-02 03:10:12,992][train_inner][INFO] - {"epoch": 6, "update": 5.506, "loss": "2.297", "ppl": "4.91", "wps": "362378", "ups": "3.08", "wpb": "117583", "bsz": "256", "num_updates": "283400", "lr": "0.000723838", "gnorm": "0.68", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "26", "wall": "92525"} [2022-08-02 03:11:17,721][train_inner][INFO] - {"epoch": 6, "update": 5.51, "loss": "2.304", "ppl": "4.94", "wps": "365850", "ups": "3.09", "wpb": "118403", "bsz": "256", "num_updates": "283600", "lr": "0.000723636", "gnorm": "0.674", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "28.2", "wall": "92590"} [2022-08-02 03:12:21,979][train_inner][INFO] - {"epoch": 6, "update": 5.514, "loss": "2.299", "ppl": "4.92", "wps": "367906", "ups": "3.11", "wpb": "118203", "bsz": "256", "num_updates": "283800", "lr": "0.000723434", "gnorm": "0.684", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "19.8", "wall": "92654"} [2022-08-02 03:13:26,779][train_inner][INFO] - {"epoch": 6, "update": 5.518, "loss": "2.297", "ppl": "4.91", "wps": "364385", "ups": "3.09", "wpb": "118059", "bsz": "256", "num_updates": "284000", "lr": "0.000723232", "gnorm": "0.679", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "21.4", "wall": "92719"} [2022-08-02 03:14:31,517][train_inner][INFO] - {"epoch": 6, "update": 5.522, "loss": "2.295", "ppl": "4.91", "wps": "365669", "ups": "3.09", "wpb": "118363", "bsz": "256", "num_updates": "284200", "lr": "0.00072303", "gnorm": "0.677", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.3", "wall": "92784"} [2022-08-02 03:15:36,505][train_inner][INFO] - {"epoch": 6, "update": 5.526, "loss": "2.303", "ppl": "4.93", "wps": "364438", "ups": "3.08", "wpb": "118418", "bsz": "256", "num_updates": "284400", "lr": "0.000722828", "gnorm": "0.677", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.1", "wall": "92849"} [2022-08-02 03:16:41,146][train_inner][INFO] - {"epoch": 6, "update": 5.53, "loss": "2.298", "ppl": "4.92", "wps": "365924", "ups": "3.09", "wpb": "118266", "bsz": "256", "num_updates": "284600", "lr": "0.000722626", "gnorm": "0.674", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.2", "wall": "92913"} [2022-08-02 03:17:45,722][train_inner][INFO] - {"epoch": 6, "update": 5.534, "loss": "2.296", "ppl": "4.91", "wps": "367179", "ups": "3.1", "wpb": "118553", "bsz": "256", "num_updates": "284800", "lr": "0.000722424", "gnorm": "0.677", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "19.8", "wall": "92978"} [2022-08-02 03:18:50,670][train_inner][INFO] - {"epoch": 6, "update": 5.537, "loss": "2.295", "ppl": "4.91", "wps": "363140", "ups": "3.08", "wpb": "117924", "bsz": "256", "num_updates": "285000", "lr": "0.000722222", "gnorm": "0.676", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.5", "wall": "93043"} [2022-08-02 03:19:55,478][train_inner][INFO] - {"epoch": 6, "update": 5.541, "loss": "2.296", "ppl": "4.91", "wps": "365520", "ups": "3.09", "wpb": "118442", "bsz": "256", "num_updates": "285200", "lr": "0.00072202", "gnorm": "0.69", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.1", "wall": "93108"} [2022-08-02 03:21:00,038][train_inner][INFO] - {"epoch": 6, "update": 5.545, "loss": "2.289", "ppl": "4.89", "wps": "368128", "ups": "3.1", "wpb": "118829", "bsz": "256", "num_updates": "285400", "lr": "0.000721818", "gnorm": "0.673", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.1", "wall": "93172"} [2022-08-02 03:22:05,105][train_inner][INFO] - {"epoch": 6, "update": 5.549, "loss": "2.29", "ppl": "4.89", "wps": "362469", "ups": "3.07", "wpb": "117923", "bsz": "256", "num_updates": "285600", "lr": "0.000721616", "gnorm": "0.674", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.3", "wall": "93237"} [2022-08-02 03:23:10,526][train_inner][INFO] - {"epoch": 6, "update": 5.553, "loss": "2.296", "ppl": "4.91", "wps": "362538", "ups": "3.06", "wpb": "118585", "bsz": "256", "num_updates": "285800", "lr": "0.000721414", "gnorm": "0.673", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.7", "wall": "93303"} [2022-08-02 03:24:15,735][train_inner][INFO] - {"epoch": 6, "update": 5.557, "loss": "2.298", "ppl": "4.92", "wps": "362976", "ups": "3.07", "wpb": "118345", "bsz": "256", "num_updates": "286000", "lr": "0.000721212", "gnorm": "0.74", "clip": "0.5", "loss_scale": "2", "train_wall": "65", "gb_free": "20.2", "wall": "93368"} [2022-08-02 03:25:20,537][train_inner][INFO] - {"epoch": 6, "update": 5.561, "loss": "2.299", "ppl": "4.92", "wps": "364418", "ups": "3.09", "wpb": "118074", "bsz": "256", "num_updates": "286200", "lr": "0.00072101", "gnorm": "0.676", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.9", "wall": "93433"} [2022-08-02 03:26:24,943][train_inner][INFO] - {"epoch": 6, "update": 5.565, "loss": "2.293", "ppl": "4.9", "wps": "368688", "ups": "3.11", "wpb": "118727", "bsz": "256", "num_updates": "286400", "lr": "0.000720808", "gnorm": "0.673", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.5", "wall": "93497"} [2022-08-02 03:27:29,519][train_inner][INFO] - {"epoch": 6, "update": 5.569, "loss": "2.294", "ppl": "4.9", "wps": "363503", "ups": "3.1", "wpb": "117366", "bsz": "256", "num_updates": "286600", "lr": "0.000720606", "gnorm": "0.678", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.8", "wall": "93562"} [2022-08-02 03:28:34,520][train_inner][INFO] - {"epoch": 6, "update": 5.572, "loss": "2.295", "ppl": "4.91", "wps": "365030", "ups": "3.08", "wpb": "118634", "bsz": "256", "num_updates": "286800", "lr": "0.000720404", "gnorm": "0.674", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.8", "wall": "93627"} [2022-08-02 03:29:39,467][train_inner][INFO] - {"epoch": 6, "update": 5.576, "loss": "2.295", "ppl": "4.91", "wps": "365176", "ups": "3.08", "wpb": "118583", "bsz": "256", "num_updates": "287000", "lr": "0.000720202", "gnorm": "0.67", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "24", "wall": "93692"} [2022-08-02 03:30:44,517][train_inner][INFO] - {"epoch": 6, "update": 5.58, "loss": "2.3", "ppl": "4.92", "wps": "363733", "ups": "3.07", "wpb": "118302", "bsz": "256", "num_updates": "287200", "lr": "0.00072", "gnorm": "0.677", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.1", "wall": "93757"} [2022-08-02 03:31:49,577][train_inner][INFO] - {"epoch": 6, "update": 5.584, "loss": "2.295", "ppl": "4.91", "wps": "362961", "ups": "3.07", "wpb": "118069", "bsz": "256", "num_updates": "287400", "lr": "0.000719798", "gnorm": "0.681", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.8", "wall": "93822"} [2022-08-02 03:32:54,148][train_inner][INFO] - {"epoch": 6, "update": 5.588, "loss": "2.289", "ppl": "4.89", "wps": "366909", "ups": "3.1", "wpb": "118458", "bsz": "256", "num_updates": "287600", "lr": "0.000719596", "gnorm": "0.673", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.5", "wall": "93886"} [2022-08-02 03:34:00,388][train_inner][INFO] - {"epoch": 6, "update": 5.592, "loss": "2.293", "ppl": "4.9", "wps": "356126", "ups": "3.02", "wpb": "117947", "bsz": "256", "num_updates": "287800", "lr": "0.000719394", "gnorm": "0.676", "clip": "0", "loss_scale": "4", "train_wall": "66", "gb_free": "22.7", "wall": "93953"} [2022-08-02 03:35:05,382][train_inner][INFO] - {"epoch": 6, "update": 5.596, "loss": "2.288", "ppl": "4.88", "wps": "364117", "ups": "3.08", "wpb": "118324", "bsz": "256", "num_updates": "288000", "lr": "0.000719192", "gnorm": "0.675", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "94018"} [2022-08-02 03:36:09,965][train_inner][INFO] - {"epoch": 6, "update": 5.6, "loss": "2.294", "ppl": "4.9", "wps": "365440", "ups": "3.1", "wpb": "118004", "bsz": "256", "num_updates": "288200", "lr": "0.00071899", "gnorm": "0.676", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.3", "wall": "94082"} [2022-08-02 03:37:15,224][train_inner][INFO] - {"epoch": 6, "update": 5.603, "loss": "2.289", "ppl": "4.89", "wps": "363012", "ups": "3.06", "wpb": "118447", "bsz": "256", "num_updates": "288400", "lr": "0.000718788", "gnorm": "0.674", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21", "wall": "94148"} [2022-08-02 03:38:19,896][train_inner][INFO] - {"epoch": 6, "update": 5.607, "loss": "2.298", "ppl": "4.92", "wps": "365280", "ups": "3.09", "wpb": "118116", "bsz": "256", "num_updates": "288600", "lr": "0.000718586", "gnorm": "0.676", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.9", "wall": "94212"} [2022-08-02 03:39:24,898][train_inner][INFO] - {"epoch": 6, "update": 5.611, "loss": "2.295", "ppl": "4.91", "wps": "364664", "ups": "3.08", "wpb": "118517", "bsz": "256", "num_updates": "288800", "lr": "0.000718384", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22", "wall": "94277"} [2022-08-02 03:40:29,827][train_inner][INFO] - {"epoch": 6, "update": 5.615, "loss": "2.297", "ppl": "4.92", "wps": "365408", "ups": "3.08", "wpb": "118626", "bsz": "256", "num_updates": "289000", "lr": "0.000718182", "gnorm": "0.688", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "94342"} [2022-08-02 03:41:34,381][train_inner][INFO] - {"epoch": 6, "update": 5.619, "loss": "2.289", "ppl": "4.89", "wps": "367721", "ups": "3.1", "wpb": "118689", "bsz": "256", "num_updates": "289200", "lr": "0.00071798", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.4", "wall": "94407"} [2022-08-02 03:42:39,336][train_inner][INFO] - {"epoch": 6, "update": 5.623, "loss": "2.293", "ppl": "4.9", "wps": "363873", "ups": "3.08", "wpb": "118174", "bsz": "256", "num_updates": "289400", "lr": "0.000717778", "gnorm": "0.68", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "94472"} [2022-08-02 03:43:44,397][train_inner][INFO] - {"epoch": 6, "update": 5.627, "loss": "2.298", "ppl": "4.92", "wps": "364060", "ups": "3.07", "wpb": "118429", "bsz": "256", "num_updates": "289600", "lr": "0.000717576", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.7", "wall": "94537"} [2022-08-02 03:44:49,003][train_inner][INFO] - {"epoch": 6, "update": 5.631, "loss": "2.297", "ppl": "4.91", "wps": "365225", "ups": "3.1", "wpb": "117977", "bsz": "256", "num_updates": "289800", "lr": "0.000717374", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "25.8", "wall": "94601"} [2022-08-02 03:45:53,899][train_inner][INFO] - {"epoch": 6, "update": 5.635, "loss": "2.293", "ppl": "4.9", "wps": "363909", "ups": "3.08", "wpb": "118079", "bsz": "256", "num_updates": "290000", "lr": "0.000717172", "gnorm": "0.68", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21", "wall": "94666"} [2022-08-02 03:46:59,171][train_inner][INFO] - {"epoch": 6, "update": 5.638, "loss": "2.292", "ppl": "4.9", "wps": "363496", "ups": "3.06", "wpb": "118629", "bsz": "256", "num_updates": "290200", "lr": "0.00071697", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.3", "wall": "94731"} [2022-08-02 03:48:04,216][train_inner][INFO] - {"epoch": 6, "update": 5.642, "loss": "2.292", "ppl": "4.9", "wps": "364063", "ups": "3.07", "wpb": "118400", "bsz": "256", "num_updates": "290400", "lr": "0.000716768", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.4", "wall": "94797"} [2022-08-02 03:49:09,069][train_inner][INFO] - {"epoch": 6, "update": 5.646, "loss": "2.29", "ppl": "4.89", "wps": "365349", "ups": "3.08", "wpb": "118469", "bsz": "256", "num_updates": "290600", "lr": "0.000716566", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.8", "wall": "94861"} [2022-08-02 03:50:14,077][train_inner][INFO] - {"epoch": 6, "update": 5.65, "loss": "2.291", "ppl": "4.89", "wps": "363353", "ups": "3.08", "wpb": "118102", "bsz": "256", "num_updates": "290800", "lr": "0.000716364", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "94926"} [2022-08-02 03:51:18,572][train_inner][INFO] - {"epoch": 6, "update": 5.654, "loss": "2.299", "ppl": "4.92", "wps": "365146", "ups": "3.1", "wpb": "117749", "bsz": "256", "num_updates": "291000", "lr": "0.000716162", "gnorm": "0.68", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.8", "wall": "94991"} [2022-08-02 03:52:23,480][train_inner][INFO] - {"epoch": 6, "update": 5.658, "loss": "2.291", "ppl": "4.89", "wps": "365084", "ups": "3.08", "wpb": "118483", "bsz": "256", "num_updates": "291200", "lr": "0.00071596", "gnorm": "0.674", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.6", "wall": "95056"} [2022-08-02 03:53:28,784][train_inner][INFO] - {"epoch": 6, "update": 5.662, "loss": "2.288", "ppl": "4.88", "wps": "362560", "ups": "3.06", "wpb": "118380", "bsz": "256", "num_updates": "291400", "lr": "0.000715758", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "95121"} [2022-08-02 03:53:53,280][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-02 03:54:34,055][train_inner][INFO] - {"epoch": 6, "update": 5.666, "loss": "2.287", "ppl": "4.88", "wps": "361660", "ups": "3.06", "wpb": "118028", "bsz": "256", "num_updates": "291600", "lr": "0.000715556", "gnorm": "0.673", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.3", "wall": "95186"} [2022-08-02 03:55:38,955][train_inner][INFO] - {"epoch": 6, "update": 5.67, "loss": "2.299", "ppl": "4.92", "wps": "362994", "ups": "3.08", "wpb": "117792", "bsz": "256", "num_updates": "291800", "lr": "0.000715354", "gnorm": "0.679", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25", "wall": "95251"} [2022-08-02 03:56:44,914][train_inner][INFO] - {"epoch": 6, "update": 5.673, "loss": "2.29", "ppl": "4.89", "wps": "359589", "ups": "3.03", "wpb": "118588", "bsz": "256", "num_updates": "292000", "lr": "0.000715152", "gnorm": "0.673", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "23.7", "wall": "95317"} [2022-08-02 03:57:49,779][train_inner][INFO] - {"epoch": 6, "update": 5.677, "loss": "2.29", "ppl": "4.89", "wps": "364269", "ups": "3.08", "wpb": "118140", "bsz": "256", "num_updates": "292200", "lr": "0.000714949", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "95382"} [2022-08-02 03:58:54,675][train_inner][INFO] - {"epoch": 6, "update": 5.681, "loss": "2.289", "ppl": "4.89", "wps": "365565", "ups": "3.08", "wpb": "118616", "bsz": "256", "num_updates": "292400", "lr": "0.000714747", "gnorm": "0.672", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "95447"} [2022-08-02 03:59:40,880][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-02 03:59:41,825][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 [2022-08-02 03:59:42,448][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 [2022-08-02 04:00:01,309][train_inner][INFO] - {"epoch": 6, "update": 5.685, "loss": "2.291", "ppl": "4.9", "wps": "356187", "ups": "3", "wpb": "118669", "bsz": "256", "num_updates": "292600", "lr": "0.000714545", "gnorm": "0.758", "clip": "0.5", "loss_scale": "1", "train_wall": "66", "gb_free": "20", "wall": "95514"} [2022-08-02 04:01:06,034][train_inner][INFO] - {"epoch": 6, "update": 5.689, "loss": "2.293", "ppl": "4.9", "wps": "365645", "ups": "3.09", "wpb": "118331", "bsz": "256", "num_updates": "292800", "lr": "0.000714343", "gnorm": "0.671", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "19.9", "wall": "95578"} [2022-08-02 04:02:12,041][train_inner][INFO] - {"epoch": 6, "update": 5.693, "loss": "2.29", "ppl": "4.89", "wps": "358062", "ups": "3.03", "wpb": "118171", "bsz": "256", "num_updates": "293000", "lr": "0.000714141", "gnorm": "0.674", "clip": "0", "loss_scale": "1", "train_wall": "66", "gb_free": "21.5", "wall": "95644"} [2022-08-02 04:03:17,233][train_inner][INFO] - {"epoch": 6, "update": 5.697, "loss": "2.289", "ppl": "4.89", "wps": "361786", "ups": "3.07", "wpb": "117927", "bsz": "256", "num_updates": "293200", "lr": "0.000713939", "gnorm": "0.676", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.2", "wall": "95710"} [2022-08-02 04:04:22,248][train_inner][INFO] - {"epoch": 6, "update": 5.701, "loss": "2.289", "ppl": "4.89", "wps": "364552", "ups": "3.08", "wpb": "118504", "bsz": "256", "num_updates": "293400", "lr": "0.000713737", "gnorm": "0.676", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "26.2", "wall": "95775"} [2022-08-02 04:04:37,036][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 [2022-08-02 04:04:37,317][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.25 [2022-08-02 04:05:28,040][train_inner][INFO] - {"epoch": 6, "update": 5.705, "loss": "2.311", "ppl": "4.96", "wps": "358679", "ups": "3.04", "wpb": "117989", "bsz": "256", "num_updates": "293600", "lr": "0.000713535", "gnorm": "1.006", "clip": "0.5", "loss_scale": "0.25", "train_wall": "65", "gb_free": "22.7", "wall": "95840"} [2022-08-02 04:06:32,973][train_inner][INFO] - {"epoch": 6, "update": 5.708, "loss": "2.287", "ppl": "4.88", "wps": "363848", "ups": "3.08", "wpb": "118129", "bsz": "256", "num_updates": "293800", "lr": "0.000713333", "gnorm": "0.675", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "22.1", "wall": "95905"} [2022-08-02 04:07:37,723][train_inner][INFO] - {"epoch": 6, "update": 5.712, "loss": "2.286", "ppl": "4.88", "wps": "366194", "ups": "3.09", "wpb": "118553", "bsz": "256", "num_updates": "294000", "lr": "0.000713131", "gnorm": "0.676", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "20.2", "wall": "95970"} [2022-08-02 04:08:43,869][train_inner][INFO] - {"epoch": 6, "update": 5.716, "loss": "2.287", "ppl": "4.88", "wps": "358531", "ups": "3.02", "wpb": "118576", "bsz": "256", "num_updates": "294200", "lr": "0.000712929", "gnorm": "0.675", "clip": "0", "loss_scale": "0.25", "train_wall": "66", "gb_free": "19.7", "wall": "96036"} [2022-08-02 04:09:49,238][train_inner][INFO] - {"epoch": 6, "update": 5.72, "loss": "2.294", "ppl": "4.9", "wps": "362462", "ups": "3.06", "wpb": "118466", "bsz": "256", "num_updates": "294400", "lr": "0.000712727", "gnorm": "0.671", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "23.1", "wall": "96102"} [2022-08-02 04:10:54,029][train_inner][INFO] - {"epoch": 6, "update": 5.724, "loss": "2.293", "ppl": "4.9", "wps": "362650", "ups": "3.09", "wpb": "117481", "bsz": "256", "num_updates": "294600", "lr": "0.000712525", "gnorm": "0.675", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "23.4", "wall": "96166"} [2022-08-02 04:11:59,191][train_inner][INFO] - {"epoch": 6, "update": 5.728, "loss": "2.289", "ppl": "4.89", "wps": "363438", "ups": "3.07", "wpb": "118409", "bsz": "256", "num_updates": "294800", "lr": "0.000712323", "gnorm": "0.675", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "23.7", "wall": "96232"} [2022-08-02 04:13:04,287][train_inner][INFO] - {"epoch": 6, "update": 5.732, "loss": "2.291", "ppl": "4.89", "wps": "364647", "ups": "3.07", "wpb": "118684", "bsz": "256", "num_updates": "295000", "lr": "0.000712121", "gnorm": "0.681", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "20.4", "wall": "96297"} [2022-08-02 04:14:10,309][train_inner][INFO] - {"epoch": 6, "update": 5.736, "loss": "2.285", "ppl": "4.87", "wps": "360146", "ups": "3.03", "wpb": "118885", "bsz": "256", "num_updates": "295200", "lr": "0.000711919", "gnorm": "0.671", "clip": "0", "loss_scale": "0.25", "train_wall": "66", "gb_free": "21.7", "wall": "96363"} [2022-08-02 04:15:15,300][train_inner][INFO] - {"epoch": 6, "update": 5.74, "loss": "2.29", "ppl": "4.89", "wps": "364150", "ups": "3.08", "wpb": "118331", "bsz": "256", "num_updates": "295400", "lr": "0.000711717", "gnorm": "0.676", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "20", "wall": "96428"} [2022-08-02 04:16:20,080][train_inner][INFO] - {"epoch": 6, "update": 5.743, "loss": "2.29", "ppl": "4.89", "wps": "362796", "ups": "3.09", "wpb": "117510", "bsz": "256", "num_updates": "295600", "lr": "0.000711515", "gnorm": "0.677", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20", "wall": "96492"} [2022-08-02 04:17:24,461][train_inner][INFO] - {"epoch": 6, "update": 5.747, "loss": "2.286", "ppl": "4.88", "wps": "367996", "ups": "3.11", "wpb": "118458", "bsz": "256", "num_updates": "295800", "lr": "0.000711313", "gnorm": "0.675", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.3", "wall": "96557"} [2022-08-02 04:18:29,334][train_inner][INFO] - {"epoch": 6, "update": 5.751, "loss": "2.285", "ppl": "4.87", "wps": "364090", "ups": "3.08", "wpb": "118096", "bsz": "256", "num_updates": "296000", "lr": "0.000711111", "gnorm": "0.675", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "23", "wall": "96622"} [2022-08-02 04:19:34,128][train_inner][INFO] - {"epoch": 6, "update": 5.755, "loss": "2.293", "ppl": "4.9", "wps": "365290", "ups": "3.09", "wpb": "118341", "bsz": "256", "num_updates": "296200", "lr": "0.000710909", "gnorm": "0.683", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.2", "wall": "96686"} [2022-08-02 04:20:40,117][train_inner][INFO] - {"epoch": 6, "update": 5.759, "loss": "2.289", "ppl": "4.89", "wps": "358294", "ups": "3.03", "wpb": "118217", "bsz": "256", "num_updates": "296400", "lr": "0.000710707", "gnorm": "0.679", "clip": "0", "loss_scale": "0.5", "train_wall": "66", "gb_free": "26.8", "wall": "96752"} [2022-08-02 04:21:45,080][train_inner][INFO] - {"epoch": 6, "update": 5.763, "loss": "2.281", "ppl": "4.86", "wps": "366023", "ups": "3.08", "wpb": "118887", "bsz": "256", "num_updates": "296600", "lr": "0.000710505", "gnorm": "0.672", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "19.9", "wall": "96817"} [2022-08-02 04:22:50,231][train_inner][INFO] - {"epoch": 6, "update": 5.767, "loss": "2.287", "ppl": "4.88", "wps": "364320", "ups": "3.07", "wpb": "118677", "bsz": "256", "num_updates": "296800", "lr": "0.000710303", "gnorm": "0.671", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "19.8", "wall": "96883"} [2022-08-02 04:23:54,827][train_inner][INFO] - {"epoch": 6, "update": 5.771, "loss": "2.292", "ppl": "4.9", "wps": "366270", "ups": "3.1", "wpb": "118296", "bsz": "256", "num_updates": "297000", "lr": "0.000710101", "gnorm": "0.671", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.1", "wall": "96947"} [2022-08-02 04:24:59,635][train_inner][INFO] - {"epoch": 6, "update": 5.775, "loss": "2.286", "ppl": "4.88", "wps": "364227", "ups": "3.09", "wpb": "118022", "bsz": "256", "num_updates": "297200", "lr": "0.000709899", "gnorm": "0.675", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.1", "wall": "97012"} [2022-08-02 04:26:04,727][train_inner][INFO] - {"epoch": 6, "update": 5.778, "loss": "2.282", "ppl": "4.86", "wps": "365210", "ups": "3.07", "wpb": "118861", "bsz": "256", "num_updates": "297400", "lr": "0.000709697", "gnorm": "0.672", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "25.4", "wall": "97077"} [2022-08-02 04:26:54,233][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 [2022-08-02 04:26:54,513][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.25 [2022-08-02 04:26:55,093][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.125 [2022-08-02 04:27:10,720][train_inner][INFO] - {"epoch": 6, "update": 5.782, "loss": "2.339", "ppl": "5.06", "wps": "359490", "ups": "3.03", "wpb": "118617", "bsz": "256", "num_updates": "297600", "lr": "0.000709495", "gnorm": "1.014", "clip": "1.5", "loss_scale": "0.125", "train_wall": "66", "gb_free": "19.9", "wall": "97143"} [2022-08-02 04:28:15,140][train_inner][INFO] - {"epoch": 6, "update": 5.786, "loss": "2.295", "ppl": "4.91", "wps": "366438", "ups": "3.1", "wpb": "118028", "bsz": "256", "num_updates": "297800", "lr": "0.000709293", "gnorm": "0.925", "clip": "1", "loss_scale": "0.125", "train_wall": "64", "gb_free": "21.7", "wall": "97207"} [2022-08-02 04:29:20,099][train_inner][INFO] - {"epoch": 6, "update": 5.79, "loss": "2.281", "ppl": "4.86", "wps": "364980", "ups": "3.08", "wpb": "118542", "bsz": "256", "num_updates": "298000", "lr": "0.000709091", "gnorm": "0.671", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "22.4", "wall": "97272"} [2022-08-02 04:30:24,765][train_inner][INFO] - {"epoch": 6, "update": 5.794, "loss": "2.283", "ppl": "4.87", "wps": "366722", "ups": "3.09", "wpb": "118570", "bsz": "256", "num_updates": "298200", "lr": "0.000708889", "gnorm": "0.674", "clip": "0", "loss_scale": "0.125", "train_wall": "64", "gb_free": "20", "wall": "97337"} [2022-08-02 04:31:29,532][train_inner][INFO] - {"epoch": 6, "update": 5.798, "loss": "2.306", "ppl": "4.94", "wps": "365363", "ups": "3.09", "wpb": "118316", "bsz": "256", "num_updates": "298400", "lr": "0.000708687", "gnorm": "0.954", "clip": "1.5", "loss_scale": "0.125", "train_wall": "64", "gb_free": "19.9", "wall": "97402"} [2022-08-02 04:32:34,138][train_inner][INFO] - {"epoch": 6, "update": 5.802, "loss": "2.297", "ppl": "4.91", "wps": "364073", "ups": "3.1", "wpb": "117605", "bsz": "256", "num_updates": "298600", "lr": "0.000708485", "gnorm": "0.734", "clip": "0.5", "loss_scale": "0.125", "train_wall": "64", "gb_free": "19.9", "wall": "97466"} [2022-08-02 04:33:39,014][train_inner][INFO] - {"epoch": 6, "update": 5.806, "loss": "2.297", "ppl": "4.92", "wps": "363690", "ups": "3.08", "wpb": "117971", "bsz": "256", "num_updates": "298800", "lr": "0.000708283", "gnorm": "0.72", "clip": "0", "loss_scale": "0.125", "train_wall": "64", "gb_free": "21.9", "wall": "97531"} [2022-08-02 04:34:43,739][train_inner][INFO] - {"epoch": 6, "update": 5.81, "loss": "2.29", "ppl": "4.89", "wps": "366264", "ups": "3.09", "wpb": "118531", "bsz": "256", "num_updates": "299000", "lr": "0.000708081", "gnorm": "0.749", "clip": "0.5", "loss_scale": "0.125", "train_wall": "64", "gb_free": "21.5", "wall": "97596"} [2022-08-02 04:35:48,656][train_inner][INFO] - {"epoch": 6, "update": 5.813, "loss": "2.293", "ppl": "4.9", "wps": "363353", "ups": "3.08", "wpb": "117937", "bsz": "256", "num_updates": "299200", "lr": "0.000707879", "gnorm": "0.75", "clip": "0.5", "loss_scale": "0.125", "train_wall": "65", "gb_free": "24.1", "wall": "97661"} [2022-08-02 04:36:53,514][train_inner][INFO] - {"epoch": 6, "update": 5.817, "loss": "2.296", "ppl": "4.91", "wps": "363572", "ups": "3.08", "wpb": "117902", "bsz": "256", "num_updates": "299400", "lr": "0.000707677", "gnorm": "0.685", "clip": "0", "loss_scale": "0.125", "train_wall": "64", "gb_free": "26.8", "wall": "97726"} [2022-08-02 04:37:58,207][train_inner][INFO] - {"epoch": 6, "update": 5.821, "loss": "2.287", "ppl": "4.88", "wps": "366247", "ups": "3.09", "wpb": "118467", "bsz": "256", "num_updates": "299600", "lr": "0.000707475", "gnorm": "0.681", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "19.9", "wall": "97791"} [2022-08-02 04:39:02,986][train_inner][INFO] - {"epoch": 6, "update": 5.825, "loss": "2.294", "ppl": "4.9", "wps": "365145", "ups": "3.09", "wpb": "118265", "bsz": "256", "num_updates": "299800", "lr": "0.000707273", "gnorm": "0.731", "clip": "0.5", "loss_scale": "0.25", "train_wall": "64", "gb_free": "19.9", "wall": "97855"} [2022-08-02 04:40:07,792][train_inner][INFO] - {"epoch": 6, "update": 5.829, "loss": "2.283", "ppl": "4.87", "wps": "366536", "ups": "3.09", "wpb": "118767", "bsz": "256", "num_updates": "300000", "lr": "0.000707071", "gnorm": "0.672", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "27.3", "wall": "97920"} [2022-08-02 04:40:07,793][fairseq_cli.train][INFO] - begin validation on "valid" subset [2022-08-02 04:40:30,472][valid][INFO] - {"epoch": 6, "valid_loss": "2.185", "valid_ppl": "4.55", "valid_wps": "1.61614e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "300000", "valid_best_loss": "2.185"} [2022-08-02 04:40:30,475][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 6 @ 300000 updates [2022-08-02 04:40:30,476][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_6_300000.pt [2022-08-02 04:40:41,615][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_6_300000.pt [2022-08-02 04:41:13,516][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/adan2/checkpoint_6_300000.pt (epoch 6 @ 300000 updates, score 2.185) (writing took 43.04120281152427 seconds) [2022-08-02 04:42:18,295][train_inner][INFO] - {"epoch": 6, "update": 5.833, "loss": "2.287", "ppl": "4.88", "wps": "181080", "ups": "1.53", "wpb": "118157", "bsz": "256", "num_updates": "300200", "lr": "0.000706869", "gnorm": "0.675", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "25.8", "wall": "98051"} [2022-08-02 04:43:23,552][train_inner][INFO] - {"epoch": 6, "update": 5.837, "loss": "2.29", "ppl": "4.89", "wps": "362730", "ups": "3.06", "wpb": "118352", "bsz": "256", "num_updates": "300400", "lr": "0.000706667", "gnorm": "0.672", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "20.7", "wall": "98116"} [2022-08-02 04:44:28,268][train_inner][INFO] - {"epoch": 6, "update": 5.841, "loss": "2.286", "ppl": "4.88", "wps": "366224", "ups": "3.09", "wpb": "118501", "bsz": "256", "num_updates": "300600", "lr": "0.000706465", "gnorm": "0.672", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "26.3", "wall": "98181"} [2022-08-02 04:45:34,130][train_inner][INFO] - {"epoch": 6, "update": 5.845, "loss": "2.287", "ppl": "4.88", "wps": "359623", "ups": "3.04", "wpb": "118424", "bsz": "256", "num_updates": "300800", "lr": "0.000706263", "gnorm": "0.674", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "19.8", "wall": "98246"} [2022-08-02 04:46:38,651][train_inner][INFO] - {"epoch": 6, "update": 5.848, "loss": "2.288", "ppl": "4.88", "wps": "368329", "ups": "3.1", "wpb": "118823", "bsz": "256", "num_updates": "301000", "lr": "0.000706061", "gnorm": "0.673", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "22.7", "wall": "98311"} [2022-08-02 04:47:44,547][train_inner][INFO] - {"epoch": 6, "update": 5.852, "loss": "2.29", "ppl": "4.89", "wps": "359052", "ups": "3.04", "wpb": "118300", "bsz": "256", "num_updates": "301200", "lr": "0.000705859", "gnorm": "0.678", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "21.8", "wall": "98377"} [2022-08-02 04:48:49,205][train_inner][INFO] - {"epoch": 6, "update": 5.856, "loss": "2.302", "ppl": "4.93", "wps": "362865", "ups": "3.09", "wpb": "117308", "bsz": "256", "num_updates": "301400", "lr": "0.000705657", "gnorm": "0.679", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "20.1", "wall": "98442"} [2022-08-02 04:49:54,162][train_inner][INFO] - {"epoch": 6, "update": 5.86, "loss": "2.29", "ppl": "4.89", "wps": "365419", "ups": "3.08", "wpb": "118682", "bsz": "256", "num_updates": "301600", "lr": "0.000705455", "gnorm": "0.674", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "20.9", "wall": "98506"} [2022-08-02 04:50:59,075][train_inner][INFO] - {"epoch": 6, "update": 5.864, "loss": "2.284", "ppl": "4.87", "wps": "366510", "ups": "3.08", "wpb": "118955", "bsz": "256", "num_updates": "301800", "lr": "0.000705253", "gnorm": "0.671", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "21.3", "wall": "98571"} [2022-08-02 04:52:04,162][train_inner][INFO] - {"epoch": 6, "update": 5.868, "loss": "2.286", "ppl": "4.88", "wps": "362494", "ups": "3.07", "wpb": "117965", "bsz": "256", "num_updates": "302000", "lr": "0.000705051", "gnorm": "0.675", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "22.9", "wall": "98636"} [2022-08-02 04:53:08,858][train_inner][INFO] - {"epoch": 6, "update": 5.872, "loss": "2.285", "ppl": "4.88", "wps": "366696", "ups": "3.09", "wpb": "118617", "bsz": "256", "num_updates": "302200", "lr": "0.000704848", "gnorm": "0.675", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "23.8", "wall": "98701"} [2022-08-02 04:54:13,658][train_inner][INFO] - {"epoch": 6, "update": 5.876, "loss": "2.291", "ppl": "4.9", "wps": "364712", "ups": "3.09", "wpb": "118165", "bsz": "256", "num_updates": "302400", "lr": "0.000704646", "gnorm": "0.676", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.6", "wall": "98766"} [2022-08-02 04:55:18,333][train_inner][INFO] - {"epoch": 6, "update": 5.879, "loss": "2.29", "ppl": "4.89", "wps": "365901", "ups": "3.09", "wpb": "118313", "bsz": "256", "num_updates": "302600", "lr": "0.000704444", "gnorm": "0.679", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "23.1", "wall": "98831"} [2022-08-02 04:56:22,921][train_inner][INFO] - {"epoch": 6, "update": 5.883, "loss": "2.288", "ppl": "4.89", "wps": "366070", "ups": "3.1", "wpb": "118218", "bsz": "256", "num_updates": "302800", "lr": "0.000704242", "gnorm": "0.68", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "21.5", "wall": "98895"} [2022-08-02 04:57:27,809][train_inner][INFO] - {"epoch": 6, "update": 5.887, "loss": "2.286", "ppl": "4.88", "wps": "364240", "ups": "3.08", "wpb": "118172", "bsz": "256", "num_updates": "303000", "lr": "0.00070404", "gnorm": "0.676", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.6", "wall": "98960"} [2022-08-02 04:58:33,095][train_inner][INFO] - {"epoch": 6, "update": 5.891, "loss": "2.288", "ppl": "4.88", "wps": "362741", "ups": "3.06", "wpb": "118407", "bsz": "256", "num_updates": "303200", "lr": "0.000703838", "gnorm": "0.673", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "22", "wall": "99025"} [2022-08-02 04:59:37,787][train_inner][INFO] - {"epoch": 6, "update": 5.895, "loss": "2.294", "ppl": "4.9", "wps": "364957", "ups": "3.09", "wpb": "118048", "bsz": "256", "num_updates": "303400", "lr": "0.000703636", "gnorm": "0.676", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "23.4", "wall": "99090"} [2022-08-02 05:00:42,684][train_inner][INFO] - {"epoch": 6, "update": 5.899, "loss": "2.289", "ppl": "4.89", "wps": "365223", "ups": "3.08", "wpb": "118508", "bsz": "256", "num_updates": "303600", "lr": "0.000703434", "gnorm": "0.683", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.5", "wall": "99155"} [2022-08-02 05:01:47,024][train_inner][INFO] - {"epoch": 6, "update": 5.903, "loss": "2.291", "ppl": "4.89", "wps": "365180", "ups": "3.11", "wpb": "117476", "bsz": "256", "num_updates": "303800", "lr": "0.000703232", "gnorm": "0.678", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "24", "wall": "99219"} [2022-08-02 05:02:52,141][train_inner][INFO] - {"epoch": 6, "update": 5.907, "loss": "2.284", "ppl": "4.87", "wps": "363973", "ups": "3.07", "wpb": "118503", "bsz": "256", "num_updates": "304000", "lr": "0.00070303", "gnorm": "0.672", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.2", "wall": "99284"} [2022-08-02 05:03:57,065][train_inner][INFO] - {"epoch": 6, "update": 5.911, "loss": "2.282", "ppl": "4.86", "wps": "365939", "ups": "3.08", "wpb": "118789", "bsz": "256", "num_updates": "304200", "lr": "0.000702828", "gnorm": "0.671", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "22.4", "wall": "99349"} [2022-08-02 05:05:03,209][train_inner][INFO] - {"epoch": 6, "update": 5.914, "loss": "2.285", "ppl": "4.87", "wps": "358382", "ups": "3.02", "wpb": "118521", "bsz": "256", "num_updates": "304400", "lr": "0.000702626", "gnorm": "0.674", "clip": "0", "loss_scale": "1", "train_wall": "66", "gb_free": "19.7", "wall": "99416"} [2022-08-02 05:06:08,039][train_inner][INFO] - {"epoch": 6, "update": 5.918, "loss": "2.284", "ppl": "4.87", "wps": "365280", "ups": "3.09", "wpb": "118404", "bsz": "256", "num_updates": "304600", "lr": "0.000702424", "gnorm": "0.673", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.3", "wall": "99480"} [2022-08-02 05:07:12,780][train_inner][INFO] - {"epoch": 6, "update": 5.922, "loss": "2.283", "ppl": "4.87", "wps": "365972", "ups": "3.09", "wpb": "118465", "bsz": "256", "num_updates": "304800", "lr": "0.000702222", "gnorm": "0.674", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "22.3", "wall": "99545"} [2022-08-02 05:08:17,526][train_inner][INFO] - {"epoch": 6, "update": 5.926, "loss": "2.282", "ppl": "4.86", "wps": "367125", "ups": "3.09", "wpb": "118848", "bsz": "256", "num_updates": "305000", "lr": "0.00070202", "gnorm": "0.669", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "21.3", "wall": "99610"} [2022-08-02 05:09:22,543][train_inner][INFO] - {"epoch": 6, "update": 5.93, "loss": "2.287", "ppl": "4.88", "wps": "364185", "ups": "3.08", "wpb": "118390", "bsz": "256", "num_updates": "305200", "lr": "0.000701818", "gnorm": "0.675", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "26.6", "wall": "99675"} [2022-08-02 05:10:27,316][train_inner][INFO] - {"epoch": 6, "update": 5.934, "loss": "2.284", "ppl": "4.87", "wps": "364926", "ups": "3.09", "wpb": "118184", "bsz": "256", "num_updates": "305400", "lr": "0.000701616", "gnorm": "0.672", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "25.3", "wall": "99740"} [2022-08-02 05:11:32,367][train_inner][INFO] - {"epoch": 6, "update": 5.938, "loss": "2.281", "ppl": "4.86", "wps": "364360", "ups": "3.07", "wpb": "118509", "bsz": "256", "num_updates": "305600", "lr": "0.000701414", "gnorm": "0.673", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "19.8", "wall": "99805"} [2022-08-02 05:12:37,328][train_inner][INFO] - {"epoch": 6, "update": 5.942, "loss": "2.288", "ppl": "4.89", "wps": "363779", "ups": "3.08", "wpb": "118154", "bsz": "256", "num_updates": "305800", "lr": "0.000701212", "gnorm": "0.675", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "24.9", "wall": "99870"} [2022-08-02 05:13:41,893][train_inner][INFO] - {"epoch": 6, "update": 5.946, "loss": "2.285", "ppl": "4.87", "wps": "365108", "ups": "3.1", "wpb": "117864", "bsz": "256", "num_updates": "306000", "lr": "0.00070101", "gnorm": "0.674", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.2", "wall": "99934"} [2022-08-02 05:14:46,662][train_inner][INFO] - {"epoch": 6, "update": 5.949, "loss": "2.286", "ppl": "4.88", "wps": "366574", "ups": "3.09", "wpb": "118712", "bsz": "256", "num_updates": "306200", "lr": "0.000700808", "gnorm": "0.692", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "24.5", "wall": "99999"} [2022-08-02 05:15:51,180][train_inner][INFO] - {"epoch": 6, "update": 5.953, "loss": "2.295", "ppl": "4.91", "wps": "365365", "ups": "3.1", "wpb": "117861", "bsz": "256", "num_updates": "306400", "lr": "0.000700606", "gnorm": "0.701", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "26", "wall": "100064"} [2022-08-02 05:16:05,805][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 [2022-08-02 05:16:06,070][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 [2022-08-02 05:16:56,399][train_inner][INFO] - {"epoch": 6, "update": 5.957, "loss": "2.287", "ppl": "4.88", "wps": "362660", "ups": "3.07", "wpb": "118260", "bsz": "256", "num_updates": "306600", "lr": "0.000700404", "gnorm": "0.703", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "22.7", "wall": "100129"} [2022-08-02 05:18:01,044][train_inner][INFO] - {"epoch": 6, "update": 5.961, "loss": "2.286", "ppl": "4.88", "wps": "364919", "ups": "3.09", "wpb": "117949", "bsz": "256", "num_updates": "306800", "lr": "0.000700202", "gnorm": "0.682", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "19.9", "wall": "100193"} [2022-08-02 05:19:05,851][train_inner][INFO] - {"epoch": 6, "update": 5.965, "loss": "2.282", "ppl": "4.86", "wps": "366750", "ups": "3.09", "wpb": "118838", "bsz": "256", "num_updates": "307000", "lr": "0.0007", "gnorm": "0.672", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "19.7", "wall": "100258"} [2022-08-02 05:20:10,983][train_inner][INFO] - {"epoch": 6, "update": 5.969, "loss": "2.281", "ppl": "4.86", "wps": "364613", "ups": "3.07", "wpb": "118738", "bsz": "256", "num_updates": "307200", "lr": "0.000699798", "gnorm": "0.67", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "21.3", "wall": "100323"} [2022-08-02 05:21:15,881][train_inner][INFO] - {"epoch": 6, "update": 5.973, "loss": "2.287", "ppl": "4.88", "wps": "365020", "ups": "3.08", "wpb": "118443", "bsz": "256", "num_updates": "307400", "lr": "0.000699596", "gnorm": "0.673", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "21.9", "wall": "100388"} [2022-08-02 05:22:20,789][train_inner][INFO] - {"epoch": 6, "update": 5.977, "loss": "2.285", "ppl": "4.87", "wps": "364993", "ups": "3.08", "wpb": "118453", "bsz": "256", "num_updates": "307600", "lr": "0.000699394", "gnorm": "0.674", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "22", "wall": "100453"} [2022-08-02 05:23:25,874][train_inner][INFO] - {"epoch": 6, "update": 5.981, "loss": "2.285", "ppl": "4.87", "wps": "363231", "ups": "3.07", "wpb": "118204", "bsz": "256", "num_updates": "307800", "lr": "0.000699192", "gnorm": "0.673", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "21", "wall": "100518"} [2022-08-02 05:24:30,623][train_inner][INFO] - {"epoch": 6, "update": 5.984, "loss": "2.283", "ppl": "4.87", "wps": "365226", "ups": "3.09", "wpb": "118237", "bsz": "256", "num_updates": "308000", "lr": "0.00069899", "gnorm": "0.686", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "21.3", "wall": "100583"} [2022-08-02 05:25:35,733][train_inner][INFO] - {"epoch": 6, "update": 5.988, "loss": "2.277", "ppl": "4.85", "wps": "362683", "ups": "3.07", "wpb": "118070", "bsz": "256", "num_updates": "308200", "lr": "0.000698788", "gnorm": "0.671", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20", "wall": "100648"} [2022-08-02 05:26:40,822][train_inner][INFO] - {"epoch": 6, "update": 5.992, "loss": "2.28", "ppl": "4.86", "wps": "362825", "ups": "3.07", "wpb": "118079", "bsz": "256", "num_updates": "308400", "lr": "0.000698586", "gnorm": "0.674", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.6", "wall": "100713"} [2022-08-02 05:27:45,879][train_inner][INFO] - {"epoch": 6, "update": 5.996, "loss": "2.283", "ppl": "4.87", "wps": "363742", "ups": "3.07", "wpb": "118317", "bsz": "256", "num_updates": "308600", "lr": "0.000698384", "gnorm": "0.671", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "22.9", "wall": "100778"} [2022-08-02 05:28:50,938][train_inner][INFO] - {"epoch": 6, "update": 6.0, "loss": "2.28", "ppl": "4.86", "wps": "363290", "ups": "3.07", "wpb": "118174", "bsz": "256", "num_updates": "308800", "lr": "0.000698182", "gnorm": "0.674", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "24.4", "wall": "100843"} [2022-08-02 05:28:52,181][fairseq_cli.train][INFO] - end of epoch 6 (average epoch stats below) [2022-08-02 05:28:52,182][train][INFO] - {"epoch": 6, "train_loss": "2.299", "train_ppl": "4.92", "train_wps": "362602", "train_ups": "3.07", "train_wpb": "118299", "train_bsz": "256", "train_num_updates": "308804", "train_lr": "0.000698178", "train_gnorm": "0.695", "train_clip": "0.1", "train_loss_scale": "1", "train_train_wall": "16621", "train_gb_free": "27.4", "train_wall": "100845"} [2022-08-02 05:28:52,271][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 [2022-08-02 05:28:52,274][fairseq.trainer][INFO] - begin training epoch 7 [2022-08-02 05:28:52,275][fairseq_cli.train][INFO] - Start iterating over samples [2022-08-02 05:30:06,429][train_inner][INFO] - {"epoch": 7, "update": 6.004, "loss": "2.274", "ppl": "4.84", "wps": "312561", "ups": "2.65", "wpb": "117976", "bsz": "255.4", "num_updates": "309000", "lr": "0.00069798", "gnorm": "0.673", "clip": "0", "loss_scale": "1", "train_wall": "66", "gb_free": "22.3", "wall": "100919"} [2022-08-02 05:31:11,126][train_inner][INFO] - {"epoch": 7, "update": 6.008, "loss": "2.28", "ppl": "4.86", "wps": "364552", "ups": "3.09", "wpb": "117926", "bsz": "256", "num_updates": "309200", "lr": "0.000697778", "gnorm": "0.679", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "21.1", "wall": "100983"} [2022-08-02 05:32:16,853][train_inner][INFO] - {"epoch": 7, "update": 6.012, "loss": "2.274", "ppl": "4.84", "wps": "359457", "ups": "3.04", "wpb": "118129", "bsz": "256", "num_updates": "309400", "lr": "0.000697576", "gnorm": "0.672", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.3", "wall": "101049"} [2022-08-02 05:33:21,334][train_inner][INFO] - {"epoch": 7, "update": 6.015, "loss": "2.282", "ppl": "4.86", "wps": "368073", "ups": "3.1", "wpb": "118666", "bsz": "256", "num_updates": "309600", "lr": "0.000697374", "gnorm": "0.672", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "23", "wall": "101114"} [2022-08-02 05:34:26,114][train_inner][INFO] - {"epoch": 7, "update": 6.019, "loss": "2.274", "ppl": "4.84", "wps": "364828", "ups": "3.09", "wpb": "118166", "bsz": "256", "num_updates": "309800", "lr": "0.000697172", "gnorm": "0.674", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "22.7", "wall": "101178"} [2022-08-02 05:35:30,957][train_inner][INFO] - {"epoch": 7, "update": 6.023, "loss": "2.281", "ppl": "4.86", "wps": "365229", "ups": "3.08", "wpb": "118411", "bsz": "256", "num_updates": "310000", "lr": "0.00069697", "gnorm": "0.674", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.5", "wall": "101243"} [2022-08-02 05:36:36,768][train_inner][INFO] - {"epoch": 7, "update": 6.027, "loss": "2.285", "ppl": "4.87", "wps": "356909", "ups": "3.04", "wpb": "117441", "bsz": "256", "num_updates": "310200", "lr": "0.000696768", "gnorm": "0.682", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.8", "wall": "101309"} [2022-08-02 05:37:42,121][train_inner][INFO] - {"epoch": 7, "update": 6.031, "loss": "2.27", "ppl": "4.82", "wps": "362745", "ups": "3.06", "wpb": "118530", "bsz": "256", "num_updates": "310400", "lr": "0.000696566", "gnorm": "0.671", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "26", "wall": "101374"} [2022-08-02 05:38:47,238][train_inner][INFO] - {"epoch": 7, "update": 6.035, "loss": "2.277", "ppl": "4.85", "wps": "361975", "ups": "3.07", "wpb": "117853", "bsz": "256", "num_updates": "310600", "lr": "0.000696364", "gnorm": "0.672", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "23.1", "wall": "101440"} [2022-08-02 05:39:52,031][train_inner][INFO] - {"epoch": 7, "update": 6.039, "loss": "2.284", "ppl": "4.87", "wps": "365298", "ups": "3.09", "wpb": "118342", "bsz": "256", "num_updates": "310800", "lr": "0.000696162", "gnorm": "0.678", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.9", "wall": "101504"} [2022-08-02 05:40:56,849][train_inner][INFO] - {"epoch": 7, "update": 6.043, "loss": "2.274", "ppl": "4.84", "wps": "364731", "ups": "3.09", "wpb": "118204", "bsz": "256", "num_updates": "311000", "lr": "0.00069596", "gnorm": "0.675", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20", "wall": "101569"} [2022-08-02 05:42:03,178][train_inner][INFO] - {"epoch": 7, "update": 6.047, "loss": "2.267", "ppl": "4.81", "wps": "359552", "ups": "3.02", "wpb": "119241", "bsz": "256", "num_updates": "311200", "lr": "0.000695758", "gnorm": "0.665", "clip": "0", "loss_scale": "2", "train_wall": "66", "gb_free": "22.4", "wall": "101635"} [2022-08-02 05:43:08,072][train_inner][INFO] - {"epoch": 7, "update": 6.05, "loss": "2.276", "ppl": "4.84", "wps": "365658", "ups": "3.08", "wpb": "118644", "bsz": "256", "num_updates": "311400", "lr": "0.000695556", "gnorm": "0.672", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "23", "wall": "101700"} [2022-08-02 05:44:12,837][train_inner][INFO] - {"epoch": 7, "update": 6.054, "loss": "2.282", "ppl": "4.86", "wps": "365910", "ups": "3.09", "wpb": "118488", "bsz": "256", "num_updates": "311600", "lr": "0.000695354", "gnorm": "0.674", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "22.6", "wall": "101765"} [2022-08-02 05:45:17,619][train_inner][INFO] - {"epoch": 7, "update": 6.058, "loss": "2.274", "ppl": "4.84", "wps": "366114", "ups": "3.09", "wpb": "118587", "bsz": "256", "num_updates": "311800", "lr": "0.000695152", "gnorm": "0.669", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.9", "wall": "101830"} [2022-08-02 05:46:22,554][train_inner][INFO] - {"epoch": 7, "update": 6.062, "loss": "2.278", "ppl": "4.85", "wps": "363852", "ups": "3.08", "wpb": "118131", "bsz": "256", "num_updates": "312000", "lr": "0.000694949", "gnorm": "0.676", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22", "wall": "101895"} [2022-08-02 05:47:27,530][train_inner][INFO] - {"epoch": 7, "update": 6.066, "loss": "2.273", "ppl": "4.83", "wps": "364354", "ups": "3.08", "wpb": "118370", "bsz": "256", "num_updates": "312200", "lr": "0.000694747", "gnorm": "0.673", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.9", "wall": "101960"} [2022-08-02 05:48:32,664][train_inner][INFO] - {"epoch": 7, "update": 6.07, "loss": "2.278", "ppl": "4.85", "wps": "362822", "ups": "3.07", "wpb": "118158", "bsz": "256", "num_updates": "312400", "lr": "0.000694545", "gnorm": "0.673", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.3", "wall": "102025"} [2022-08-02 05:49:39,742][train_inner][INFO] - {"epoch": 7, "update": 6.074, "loss": "2.274", "ppl": "4.84", "wps": "354332", "ups": "2.98", "wpb": "118837", "bsz": "256", "num_updates": "312600", "lr": "0.000694343", "gnorm": "0.671", "clip": "0", "loss_scale": "4", "train_wall": "67", "gb_free": "20", "wall": "102092"} [2022-08-02 05:50:45,581][train_inner][INFO] - {"epoch": 7, "update": 6.078, "loss": "2.274", "ppl": "4.84", "wps": "359641", "ups": "3.04", "wpb": "118391", "bsz": "256", "num_updates": "312800", "lr": "0.000694141", "gnorm": "0.672", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.5", "wall": "102158"} [2022-08-02 05:51:50,687][train_inner][INFO] - {"epoch": 7, "update": 6.081, "loss": "2.273", "ppl": "4.83", "wps": "362359", "ups": "3.07", "wpb": "117956", "bsz": "256", "num_updates": "313000", "lr": "0.000693939", "gnorm": "0.675", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.5", "wall": "102223"} [2022-08-02 05:52:55,845][train_inner][INFO] - {"epoch": 7, "update": 6.085, "loss": "2.281", "ppl": "4.86", "wps": "364042", "ups": "3.07", "wpb": "118600", "bsz": "256", "num_updates": "313200", "lr": "0.000693737", "gnorm": "0.673", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.9", "wall": "102288"} [2022-08-02 05:54:00,856][train_inner][INFO] - {"epoch": 7, "update": 6.089, "loss": "2.281", "ppl": "4.86", "wps": "363807", "ups": "3.08", "wpb": "118256", "bsz": "256", "num_updates": "313400", "lr": "0.000693535", "gnorm": "0.674", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "102353"} [2022-08-02 05:55:05,520][train_inner][INFO] - {"epoch": 7, "update": 6.093, "loss": "2.274", "ppl": "4.84", "wps": "366444", "ups": "3.09", "wpb": "118477", "bsz": "256", "num_updates": "313600", "lr": "0.000693333", "gnorm": "0.672", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.1", "wall": "102418"} [2022-08-02 05:56:10,627][train_inner][INFO] - {"epoch": 7, "update": 6.097, "loss": "2.274", "ppl": "4.84", "wps": "365248", "ups": "3.07", "wpb": "118901", "bsz": "256", "num_updates": "313800", "lr": "0.000693131", "gnorm": "0.672", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.1", "wall": "102483"} [2022-08-02 05:57:15,876][train_inner][INFO] - {"epoch": 7, "update": 6.101, "loss": "2.275", "ppl": "4.84", "wps": "363588", "ups": "3.07", "wpb": "118616", "bsz": "256", "num_updates": "314000", "lr": "0.000692929", "gnorm": "0.67", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.2", "wall": "102548"} [2022-08-02 05:58:21,715][train_inner][INFO] - {"epoch": 7, "update": 6.105, "loss": "2.276", "ppl": "4.84", "wps": "357848", "ups": "3.04", "wpb": "117800", "bsz": "256", "num_updates": "314200", "lr": "0.000692727", "gnorm": "0.68", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "25.3", "wall": "102614"} [2022-08-02 05:59:26,583][train_inner][INFO] - {"epoch": 7, "update": 6.109, "loss": "2.274", "ppl": "4.84", "wps": "363412", "ups": "3.08", "wpb": "117868", "bsz": "256", "num_updates": "314400", "lr": "0.000692525", "gnorm": "0.673", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23", "wall": "102679"} [2022-08-02 06:00:31,800][train_inner][INFO] - {"epoch": 7, "update": 6.113, "loss": "2.285", "ppl": "4.87", "wps": "363714", "ups": "3.07", "wpb": "118600", "bsz": "256", "num_updates": "314600", "lr": "0.000692323", "gnorm": "0.677", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.7", "wall": "102744"} [2022-08-02 06:01:36,193][train_inner][INFO] - {"epoch": 7, "update": 6.116, "loss": "2.271", "ppl": "4.83", "wps": "367267", "ups": "3.11", "wpb": "118247", "bsz": "256", "num_updates": "314800", "lr": "0.000692121", "gnorm": "0.673", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "102809"} [2022-08-02 06:02:41,162][train_inner][INFO] - {"epoch": 7, "update": 6.12, "loss": "2.269", "ppl": "4.82", "wps": "364002", "ups": "3.08", "wpb": "118242", "bsz": "256", "num_updates": "315000", "lr": "0.000691919", "gnorm": "0.673", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "102873"} [2022-08-02 06:03:46,087][train_inner][INFO] - {"epoch": 7, "update": 6.124, "loss": "2.271", "ppl": "4.83", "wps": "363781", "ups": "3.08", "wpb": "118090", "bsz": "256", "num_updates": "315200", "lr": "0.000691717", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "102938"} [2022-08-02 06:04:51,107][train_inner][INFO] - {"epoch": 7, "update": 6.128, "loss": "2.274", "ppl": "4.84", "wps": "364100", "ups": "3.08", "wpb": "118368", "bsz": "256", "num_updates": "315400", "lr": "0.000691515", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "103003"} [2022-08-02 06:05:55,899][train_inner][INFO] - {"epoch": 7, "update": 6.132, "loss": "2.28", "ppl": "4.86", "wps": "364541", "ups": "3.09", "wpb": "118096", "bsz": "256", "num_updates": "315600", "lr": "0.000691313", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22", "wall": "103068"} [2022-08-02 06:07:00,579][train_inner][INFO] - {"epoch": 7, "update": 6.136, "loss": "2.273", "ppl": "4.83", "wps": "365241", "ups": "3.09", "wpb": "118117", "bsz": "256", "num_updates": "315800", "lr": "0.000691111", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.1", "wall": "103133"} [2022-08-02 06:08:05,199][train_inner][INFO] - {"epoch": 7, "update": 6.14, "loss": "2.279", "ppl": "4.85", "wps": "364409", "ups": "3.1", "wpb": "117738", "bsz": "256", "num_updates": "316000", "lr": "0.000690909", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.7", "wall": "103198"} [2022-08-02 06:09:10,033][train_inner][INFO] - {"epoch": 7, "update": 6.144, "loss": "2.277", "ppl": "4.85", "wps": "365317", "ups": "3.08", "wpb": "118424", "bsz": "256", "num_updates": "316200", "lr": "0.000690707", "gnorm": "0.672", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "27.1", "wall": "103262"} [2022-08-02 06:10:14,960][train_inner][INFO] - {"epoch": 7, "update": 6.148, "loss": "2.27", "ppl": "4.82", "wps": "366180", "ups": "3.08", "wpb": "118873", "bsz": "256", "num_updates": "316400", "lr": "0.000690505", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "103327"} [2022-08-02 06:11:19,764][train_inner][INFO] - {"epoch": 7, "update": 6.151, "loss": "2.274", "ppl": "4.84", "wps": "364767", "ups": "3.09", "wpb": "118190", "bsz": "256", "num_updates": "316600", "lr": "0.000690303", "gnorm": "0.672", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "25.2", "wall": "103392"} [2022-08-02 06:12:24,240][train_inner][INFO] - {"epoch": 7, "update": 6.155, "loss": "2.28", "ppl": "4.86", "wps": "366890", "ups": "3.1", "wpb": "118276", "bsz": "256", "num_updates": "316800", "lr": "0.000690101", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.9", "wall": "103457"} [2022-08-02 06:13:29,250][train_inner][INFO] - {"epoch": 7, "update": 6.159, "loss": "2.273", "ppl": "4.83", "wps": "363815", "ups": "3.08", "wpb": "118256", "bsz": "256", "num_updates": "317000", "lr": "0.000689899", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "103522"} [2022-08-02 06:14:30,451][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-02 06:14:33,884][train_inner][INFO] - {"epoch": 7, "update": 6.163, "loss": "2.271", "ppl": "4.83", "wps": "367002", "ups": "3.09", "wpb": "118602", "bsz": "256", "num_updates": "317200", "lr": "0.000689697", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.2", "wall": "103586"} [2022-08-02 06:15:38,440][train_inner][INFO] - {"epoch": 7, "update": 6.167, "loss": "2.27", "ppl": "4.82", "wps": "366873", "ups": "3.1", "wpb": "118418", "bsz": "256", "num_updates": "317400", "lr": "0.000689495", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "25.3", "wall": "103651"} [2022-08-02 06:16:42,954][train_inner][INFO] - {"epoch": 7, "update": 6.171, "loss": "2.273", "ppl": "4.83", "wps": "367706", "ups": "3.1", "wpb": "118610", "bsz": "256", "num_updates": "317600", "lr": "0.000689293", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.5", "wall": "103715"} [2022-08-02 06:17:47,479][train_inner][INFO] - {"epoch": 7, "update": 6.175, "loss": "2.276", "ppl": "4.84", "wps": "366048", "ups": "3.1", "wpb": "118095", "bsz": "256", "num_updates": "317800", "lr": "0.000689091", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.3", "wall": "103780"} [2022-08-02 06:18:52,487][train_inner][INFO] - {"epoch": 7, "update": 6.179, "loss": "2.271", "ppl": "4.83", "wps": "363318", "ups": "3.08", "wpb": "118090", "bsz": "256", "num_updates": "318000", "lr": "0.000688889", "gnorm": "0.672", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.5", "wall": "103845"} [2022-08-02 06:19:57,375][train_inner][INFO] - {"epoch": 7, "update": 6.183, "loss": "2.272", "ppl": "4.83", "wps": "364604", "ups": "3.08", "wpb": "118290", "bsz": "256", "num_updates": "318200", "lr": "0.000688687", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "103910"} [2022-08-02 06:21:02,423][train_inner][INFO] - {"epoch": 7, "update": 6.186, "loss": "2.276", "ppl": "4.84", "wps": "362410", "ups": "3.07", "wpb": "117868", "bsz": "256", "num_updates": "318400", "lr": "0.000688485", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.9", "wall": "103975"} [2022-08-02 06:22:07,106][train_inner][INFO] - {"epoch": 7, "update": 6.19, "loss": "2.271", "ppl": "4.83", "wps": "366344", "ups": "3.09", "wpb": "118479", "bsz": "256", "num_updates": "318600", "lr": "0.000688283", "gnorm": "0.672", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.7", "wall": "104039"} [2022-08-02 06:23:11,879][train_inner][INFO] - {"epoch": 7, "update": 6.194, "loss": "2.274", "ppl": "4.84", "wps": "361876", "ups": "3.09", "wpb": "117199", "bsz": "256", "num_updates": "318800", "lr": "0.000688081", "gnorm": "0.697", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.4", "wall": "104104"} [2022-08-02 06:24:17,730][train_inner][INFO] - {"epoch": 7, "update": 6.198, "loss": "2.266", "ppl": "4.81", "wps": "359589", "ups": "3.04", "wpb": "118395", "bsz": "256", "num_updates": "319000", "lr": "0.000687879", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "19.9", "wall": "104170"} [2022-08-02 06:25:22,904][train_inner][INFO] - {"epoch": 7, "update": 6.202, "loss": "2.276", "ppl": "4.84", "wps": "364193", "ups": "3.07", "wpb": "118678", "bsz": "256", "num_updates": "319200", "lr": "0.000687677", "gnorm": "0.673", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.7", "wall": "104235"} [2022-08-02 06:26:27,740][train_inner][INFO] - {"epoch": 7, "update": 6.206, "loss": "2.273", "ppl": "4.83", "wps": "363970", "ups": "3.08", "wpb": "117990", "bsz": "256", "num_updates": "319400", "lr": "0.000687475", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "104300"} [2022-08-02 06:27:32,785][train_inner][INFO] - {"epoch": 7, "update": 6.21, "loss": "2.267", "ppl": "4.81", "wps": "364700", "ups": "3.07", "wpb": "118608", "bsz": "256", "num_updates": "319600", "lr": "0.000687273", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.3", "wall": "104365"} [2022-08-02 06:28:37,804][train_inner][INFO] - {"epoch": 7, "update": 6.214, "loss": "2.269", "ppl": "4.82", "wps": "365759", "ups": "3.08", "wpb": "118904", "bsz": "256", "num_updates": "319800", "lr": "0.000687071", "gnorm": "0.666", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.5", "wall": "104430"} [2022-08-02 06:29:42,547][train_inner][INFO] - {"epoch": 7, "update": 6.217, "loss": "2.269", "ppl": "4.82", "wps": "364257", "ups": "3.09", "wpb": "117915", "bsz": "256", "num_updates": "320000", "lr": "0.000686869", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.5", "wall": "104495"} [2022-08-02 06:30:47,353][train_inner][INFO] - {"epoch": 7, "update": 6.221, "loss": "2.272", "ppl": "4.83", "wps": "365211", "ups": "3.09", "wpb": "118338", "bsz": "256", "num_updates": "320200", "lr": "0.000686667", "gnorm": "0.674", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.2", "wall": "104560"} [2022-08-02 06:31:52,288][train_inner][INFO] - {"epoch": 7, "update": 6.225, "loss": "2.278", "ppl": "4.85", "wps": "361896", "ups": "3.08", "wpb": "117496", "bsz": "256", "num_updates": "320400", "lr": "0.000686465", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.1", "wall": "104625"} [2022-08-02 06:32:57,564][train_inner][INFO] - {"epoch": 7, "update": 6.229, "loss": "2.264", "ppl": "4.8", "wps": "363494", "ups": "3.06", "wpb": "118636", "bsz": "256", "num_updates": "320600", "lr": "0.000686263", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "104690"} [2022-08-02 06:34:02,782][train_inner][INFO] - {"epoch": 7, "update": 6.233, "loss": "2.271", "ppl": "4.83", "wps": "363123", "ups": "3.07", "wpb": "118409", "bsz": "256", "num_updates": "320800", "lr": "0.000686061", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "104755"} [2022-08-02 06:35:07,658][train_inner][INFO] - {"epoch": 7, "update": 6.237, "loss": "2.272", "ppl": "4.83", "wps": "363481", "ups": "3.08", "wpb": "117903", "bsz": "256", "num_updates": "321000", "lr": "0.000685859", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "104820"} [2022-08-02 06:36:12,988][train_inner][INFO] - {"epoch": 7, "update": 6.241, "loss": "2.273", "ppl": "4.83", "wps": "360650", "ups": "3.06", "wpb": "117805", "bsz": "256", "num_updates": "321200", "lr": "0.000685657", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "26", "wall": "104885"} [2022-08-02 06:37:17,853][train_inner][INFO] - {"epoch": 7, "update": 6.245, "loss": "2.274", "ppl": "4.84", "wps": "363125", "ups": "3.08", "wpb": "117770", "bsz": "256", "num_updates": "321400", "lr": "0.000685455", "gnorm": "0.672", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "22.6", "wall": "104950"} [2022-08-02 06:38:22,483][train_inner][INFO] - {"epoch": 7, "update": 6.249, "loss": "2.268", "ppl": "4.82", "wps": "366382", "ups": "3.09", "wpb": "118395", "bsz": "256", "num_updates": "321600", "lr": "0.000685253", "gnorm": "0.668", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "23.7", "wall": "105015"} [2022-08-02 06:38:43,596][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-02 06:39:27,630][train_inner][INFO] - {"epoch": 7, "update": 6.252, "loss": "2.269", "ppl": "4.82", "wps": "362991", "ups": "3.07", "wpb": "118237", "bsz": "256", "num_updates": "321800", "lr": "0.000685051", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "105080"} [2022-08-02 06:40:32,664][train_inner][INFO] - {"epoch": 7, "update": 6.256, "loss": "2.27", "ppl": "4.82", "wps": "362392", "ups": "3.08", "wpb": "117837", "bsz": "256", "num_updates": "322000", "lr": "0.000684848", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "105145"} [2022-08-02 06:41:38,009][train_inner][INFO] - {"epoch": 7, "update": 6.26, "loss": "2.267", "ppl": "4.81", "wps": "362419", "ups": "3.06", "wpb": "118410", "bsz": "256", "num_updates": "322200", "lr": "0.000684646", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23", "wall": "105210"} [2022-08-02 06:42:43,002][train_inner][INFO] - {"epoch": 7, "update": 6.264, "loss": "2.27", "ppl": "4.82", "wps": "361948", "ups": "3.08", "wpb": "117619", "bsz": "256", "num_updates": "322400", "lr": "0.000684444", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.8", "wall": "105275"} [2022-08-02 06:43:47,886][train_inner][INFO] - {"epoch": 7, "update": 6.268, "loss": "2.272", "ppl": "4.83", "wps": "364944", "ups": "3.08", "wpb": "118392", "bsz": "256", "num_updates": "322600", "lr": "0.000684242", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25.6", "wall": "105340"} [2022-08-02 06:44:52,746][train_inner][INFO] - {"epoch": 7, "update": 6.272, "loss": "2.265", "ppl": "4.81", "wps": "366778", "ups": "3.08", "wpb": "118945", "bsz": "256", "num_updates": "322800", "lr": "0.00068404", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.5", "wall": "105405"} [2022-08-02 06:45:57,607][train_inner][INFO] - {"epoch": 7, "update": 6.276, "loss": "2.271", "ppl": "4.83", "wps": "364118", "ups": "3.08", "wpb": "118084", "bsz": "256", "num_updates": "323000", "lr": "0.000683838", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "26.3", "wall": "105470"} [2022-08-02 06:47:02,361][train_inner][INFO] - {"epoch": 7, "update": 6.28, "loss": "2.269", "ppl": "4.82", "wps": "364393", "ups": "3.09", "wpb": "117977", "bsz": "256", "num_updates": "323200", "lr": "0.000683636", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "105535"} [2022-08-02 06:48:07,644][train_inner][INFO] - {"epoch": 7, "update": 6.284, "loss": "2.267", "ppl": "4.81", "wps": "362269", "ups": "3.06", "wpb": "118248", "bsz": "256", "num_updates": "323400", "lr": "0.000683434", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "105600"} [2022-08-02 06:49:12,888][train_inner][INFO] - {"epoch": 7, "update": 6.287, "loss": "2.271", "ppl": "4.83", "wps": "363426", "ups": "3.07", "wpb": "118557", "bsz": "256", "num_updates": "323600", "lr": "0.000683232", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "26.3", "wall": "105665"} [2022-08-02 06:49:35,663][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-02 06:49:35,955][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-02 06:50:18,425][train_inner][INFO] - {"epoch": 7, "update": 6.291, "loss": "2.267", "ppl": "4.81", "wps": "361089", "ups": "3.05", "wpb": "118321", "bsz": "256", "num_updates": "323800", "lr": "0.00068303", "gnorm": "0.733", "clip": "0.5", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "105731"} [2022-08-02 06:51:22,814][train_inner][INFO] - {"epoch": 7, "update": 6.295, "loss": "2.27", "ppl": "4.82", "wps": "366212", "ups": "3.11", "wpb": "117899", "bsz": "256", "num_updates": "324000", "lr": "0.000682828", "gnorm": "0.684", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20", "wall": "105795"} [2022-08-02 06:52:27,684][train_inner][INFO] - {"epoch": 7, "update": 6.299, "loss": "2.267", "ppl": "4.81", "wps": "365032", "ups": "3.08", "wpb": "118396", "bsz": "256", "num_updates": "324200", "lr": "0.000682626", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.4", "wall": "105860"} [2022-08-02 06:53:32,842][train_inner][INFO] - {"epoch": 7, "update": 6.303, "loss": "2.268", "ppl": "4.82", "wps": "362264", "ups": "3.07", "wpb": "118021", "bsz": "256", "num_updates": "324400", "lr": "0.000682424", "gnorm": "0.678", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.7", "wall": "105925"} [2022-08-02 06:54:38,011][train_inner][INFO] - {"epoch": 7, "update": 6.307, "loss": "2.269", "ppl": "4.82", "wps": "363269", "ups": "3.07", "wpb": "118369", "bsz": "256", "num_updates": "324600", "lr": "0.000682222", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.7", "wall": "105990"} [2022-08-02 06:55:42,512][train_inner][INFO] - {"epoch": 7, "update": 6.311, "loss": "2.274", "ppl": "4.84", "wps": "368728", "ups": "3.1", "wpb": "118914", "bsz": "256", "num_updates": "324800", "lr": "0.00068202", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "26.4", "wall": "106055"} [2022-08-02 06:56:47,163][train_inner][INFO] - {"epoch": 7, "update": 6.315, "loss": "2.266", "ppl": "4.81", "wps": "364630", "ups": "3.09", "wpb": "117867", "bsz": "256", "num_updates": "325000", "lr": "0.000681818", "gnorm": "0.673", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "24", "wall": "106119"} [2022-08-02 06:57:52,163][train_inner][INFO] - {"epoch": 7, "update": 6.319, "loss": "2.266", "ppl": "4.81", "wps": "363781", "ups": "3.08", "wpb": "118228", "bsz": "256", "num_updates": "325200", "lr": "0.000681616", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.2", "wall": "106184"} [2022-08-02 06:58:57,206][train_inner][INFO] - {"epoch": 7, "update": 6.322, "loss": "2.268", "ppl": "4.82", "wps": "363883", "ups": "3.07", "wpb": "118338", "bsz": "256", "num_updates": "325400", "lr": "0.000681414", "gnorm": "0.672", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22", "wall": "106250"} [2022-08-02 07:00:01,764][train_inner][INFO] - {"epoch": 7, "update": 6.326, "loss": "2.27", "ppl": "4.82", "wps": "366503", "ups": "3.1", "wpb": "118301", "bsz": "256", "num_updates": "325600", "lr": "0.000681212", "gnorm": "0.671", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.9", "wall": "106314"} [2022-08-02 07:01:06,567][train_inner][INFO] - {"epoch": 7, "update": 6.33, "loss": "2.268", "ppl": "4.82", "wps": "365138", "ups": "3.09", "wpb": "118308", "bsz": "256", "num_updates": "325800", "lr": "0.00068101", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "27.2", "wall": "106379"} [2022-08-02 07:02:11,508][train_inner][INFO] - {"epoch": 7, "update": 6.334, "loss": "2.263", "ppl": "4.8", "wps": "365750", "ups": "3.08", "wpb": "118760", "bsz": "256", "num_updates": "326000", "lr": "0.000680808", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "26.3", "wall": "106444"} [2022-08-02 07:03:16,230][train_inner][INFO] - {"epoch": 7, "update": 6.338, "loss": "2.264", "ppl": "4.8", "wps": "365760", "ups": "3.09", "wpb": "118362", "bsz": "256", "num_updates": "326200", "lr": "0.000680606", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.2", "wall": "106509"} [2022-08-02 07:04:21,198][train_inner][INFO] - {"epoch": 7, "update": 6.342, "loss": "2.272", "ppl": "4.83", "wps": "363304", "ups": "3.08", "wpb": "118014", "bsz": "256", "num_updates": "326400", "lr": "0.000680404", "gnorm": "0.673", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "106574"} [2022-08-02 07:05:26,062][train_inner][INFO] - {"epoch": 7, "update": 6.346, "loss": "2.26", "ppl": "4.79", "wps": "366064", "ups": "3.08", "wpb": "118720", "bsz": "256", "num_updates": "326600", "lr": "0.000680202", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "106638"} [2022-08-02 07:06:31,372][train_inner][INFO] - {"epoch": 7, "update": 6.35, "loss": "2.26", "ppl": "4.79", "wps": "361721", "ups": "3.06", "wpb": "118118", "bsz": "256", "num_updates": "326800", "lr": "0.00068", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "106704"} [2022-08-02 07:07:36,153][train_inner][INFO] - {"epoch": 7, "update": 6.353, "loss": "2.263", "ppl": "4.8", "wps": "366104", "ups": "3.09", "wpb": "118581", "bsz": "256", "num_updates": "327000", "lr": "0.000679798", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22", "wall": "106768"} [2022-08-02 07:08:42,057][train_inner][INFO] - {"epoch": 7, "update": 6.357, "loss": "2.267", "ppl": "4.81", "wps": "359374", "ups": "3.03", "wpb": "118421", "bsz": "256", "num_updates": "327200", "lr": "0.000679596", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "24.4", "wall": "106834"} [2022-08-02 07:09:46,816][train_inner][INFO] - {"epoch": 7, "update": 6.361, "loss": "2.265", "ppl": "4.81", "wps": "366204", "ups": "3.09", "wpb": "118573", "bsz": "256", "num_updates": "327400", "lr": "0.000679394", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.6", "wall": "106899"} [2022-08-02 07:10:51,556][train_inner][INFO] - {"epoch": 7, "update": 6.365, "loss": "2.271", "ppl": "4.83", "wps": "362704", "ups": "3.09", "wpb": "117406", "bsz": "256", "num_updates": "327600", "lr": "0.000679192", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "28.2", "wall": "106964"} [2022-08-02 07:11:56,170][train_inner][INFO] - {"epoch": 7, "update": 6.369, "loss": "2.264", "ppl": "4.8", "wps": "364765", "ups": "3.1", "wpb": "117844", "bsz": "256", "num_updates": "327800", "lr": "0.00067899", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.7", "wall": "107028"} [2022-08-02 07:13:01,204][train_inner][INFO] - {"epoch": 7, "update": 6.373, "loss": "2.266", "ppl": "4.81", "wps": "364445", "ups": "3.08", "wpb": "118505", "bsz": "256", "num_updates": "328000", "lr": "0.000678788", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "107094"} [2022-08-02 07:13:24,544][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-02 07:14:06,381][train_inner][INFO] - {"epoch": 7, "update": 6.377, "loss": "2.268", "ppl": "4.82", "wps": "363296", "ups": "3.07", "wpb": "118390", "bsz": "256", "num_updates": "328200", "lr": "0.000678586", "gnorm": "0.676", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.5", "wall": "107159"} [2022-08-02 07:15:11,118][train_inner][INFO] - {"epoch": 7, "update": 6.381, "loss": "2.264", "ppl": "4.8", "wps": "365310", "ups": "3.09", "wpb": "118245", "bsz": "256", "num_updates": "328400", "lr": "0.000678384", "gnorm": "0.673", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.2", "wall": "107223"} [2022-08-02 07:16:15,465][train_inner][INFO] - {"epoch": 7, "update": 6.385, "loss": "2.27", "ppl": "4.82", "wps": "367258", "ups": "3.11", "wpb": "118158", "bsz": "256", "num_updates": "328600", "lr": "0.000678182", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.1", "wall": "107288"} [2022-08-02 07:17:20,098][train_inner][INFO] - {"epoch": 7, "update": 6.388, "loss": "2.267", "ppl": "4.81", "wps": "363754", "ups": "3.09", "wpb": "117550", "bsz": "256", "num_updates": "328800", "lr": "0.00067798", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "107352"} [2022-08-02 07:18:25,104][train_inner][INFO] - {"epoch": 7, "update": 6.392, "loss": "2.268", "ppl": "4.82", "wps": "362250", "ups": "3.08", "wpb": "117741", "bsz": "256", "num_updates": "329000", "lr": "0.000677778", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "107417"} [2022-08-02 07:19:29,870][train_inner][INFO] - {"epoch": 7, "update": 6.396, "loss": "2.268", "ppl": "4.82", "wps": "364312", "ups": "3.09", "wpb": "117975", "bsz": "256", "num_updates": "329200", "lr": "0.000677576", "gnorm": "0.672", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.8", "wall": "107482"} [2022-08-02 07:20:34,818][train_inner][INFO] - {"epoch": 7, "update": 6.4, "loss": "2.265", "ppl": "4.81", "wps": "362858", "ups": "3.08", "wpb": "117832", "bsz": "256", "num_updates": "329400", "lr": "0.000677374", "gnorm": "0.672", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.7", "wall": "107547"} [2022-08-02 07:21:39,904][train_inner][INFO] - {"epoch": 7, "update": 6.404, "loss": "2.261", "ppl": "4.79", "wps": "363823", "ups": "3.07", "wpb": "118397", "bsz": "256", "num_updates": "329600", "lr": "0.000677172", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "107612"} [2022-08-02 07:22:44,610][train_inner][INFO] - {"epoch": 7, "update": 6.408, "loss": "2.259", "ppl": "4.79", "wps": "364927", "ups": "3.09", "wpb": "118064", "bsz": "256", "num_updates": "329800", "lr": "0.00067697", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.2", "wall": "107677"} [2022-08-02 07:23:49,658][train_inner][INFO] - {"epoch": 7, "update": 6.412, "loss": "2.258", "ppl": "4.78", "wps": "365190", "ups": "3.07", "wpb": "118771", "bsz": "256", "num_updates": "330000", "lr": "0.000676768", "gnorm": "0.676", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "107742"} [2022-08-02 07:24:54,796][train_inner][INFO] - {"epoch": 7, "update": 6.416, "loss": "2.26", "ppl": "4.79", "wps": "363978", "ups": "3.07", "wpb": "118544", "bsz": "256", "num_updates": "330200", "lr": "0.000676566", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.4", "wall": "107807"} [2022-08-02 07:25:59,706][train_inner][INFO] - {"epoch": 7, "update": 6.42, "loss": "2.27", "ppl": "4.82", "wps": "364564", "ups": "3.08", "wpb": "118317", "bsz": "256", "num_updates": "330400", "lr": "0.000676364", "gnorm": "0.678", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.5", "wall": "107872"} [2022-08-02 07:27:04,283][train_inner][INFO] - {"epoch": 7, "update": 6.423, "loss": "2.265", "ppl": "4.81", "wps": "364994", "ups": "3.1", "wpb": "117849", "bsz": "256", "num_updates": "330600", "lr": "0.000676162", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.4", "wall": "107937"} [2022-08-02 07:27:28,744][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-02 07:28:09,177][train_inner][INFO] - {"epoch": 7, "update": 6.427, "loss": "2.263", "ppl": "4.8", "wps": "363755", "ups": "3.08", "wpb": "118027", "bsz": "256", "num_updates": "330800", "lr": "0.00067596", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.2", "wall": "108001"} [2022-08-02 07:29:15,011][train_inner][INFO] - {"epoch": 7, "update": 6.431, "loss": "2.263", "ppl": "4.8", "wps": "359080", "ups": "3.04", "wpb": "118196", "bsz": "256", "num_updates": "331000", "lr": "0.000675758", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "108067"} [2022-08-02 07:30:20,333][train_inner][INFO] - {"epoch": 7, "update": 6.435, "loss": "2.268", "ppl": "4.82", "wps": "361067", "ups": "3.06", "wpb": "117928", "bsz": "256", "num_updates": "331200", "lr": "0.000675556", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.8", "wall": "108133"} [2022-08-02 07:31:25,874][train_inner][INFO] - {"epoch": 7, "update": 6.439, "loss": "2.259", "ppl": "4.79", "wps": "360981", "ups": "3.05", "wpb": "118293", "bsz": "256", "num_updates": "331400", "lr": "0.000675354", "gnorm": "0.672", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "108198"} [2022-08-02 07:32:31,128][train_inner][INFO] - {"epoch": 7, "update": 6.443, "loss": "2.256", "ppl": "4.78", "wps": "363597", "ups": "3.06", "wpb": "118629", "bsz": "256", "num_updates": "331600", "lr": "0.000675152", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.3", "wall": "108263"} [2022-08-02 07:33:36,123][train_inner][INFO] - {"epoch": 7, "update": 6.447, "loss": "2.261", "ppl": "4.79", "wps": "363361", "ups": "3.08", "wpb": "118082", "bsz": "256", "num_updates": "331800", "lr": "0.000674949", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "26.7", "wall": "108328"} [2022-08-02 07:34:40,611][train_inner][INFO] - {"epoch": 7, "update": 6.451, "loss": "2.265", "ppl": "4.81", "wps": "365506", "ups": "3.1", "wpb": "117852", "bsz": "256", "num_updates": "332000", "lr": "0.000674747", "gnorm": "0.672", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.2", "wall": "108393"} [2022-08-02 07:35:45,603][train_inner][INFO] - {"epoch": 7, "update": 6.455, "loss": "2.263", "ppl": "4.8", "wps": "364496", "ups": "3.08", "wpb": "118443", "bsz": "256", "num_updates": "332200", "lr": "0.000674545", "gnorm": "0.672", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "108458"} [2022-08-02 07:36:50,106][train_inner][INFO] - {"epoch": 7, "update": 6.458, "loss": "2.267", "ppl": "4.81", "wps": "365102", "ups": "3.1", "wpb": "117749", "bsz": "256", "num_updates": "332400", "lr": "0.000674343", "gnorm": "0.673", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "27.7", "wall": "108522"} [2022-08-02 07:37:54,566][train_inner][INFO] - {"epoch": 7, "update": 6.462, "loss": "2.263", "ppl": "4.8", "wps": "367073", "ups": "3.1", "wpb": "118306", "bsz": "256", "num_updates": "332600", "lr": "0.000674141", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.3", "wall": "108587"} [2022-08-02 07:38:59,263][train_inner][INFO] - {"epoch": 7, "update": 6.466, "loss": "2.268", "ppl": "4.82", "wps": "365386", "ups": "3.09", "wpb": "118194", "bsz": "256", "num_updates": "332800", "lr": "0.000673939", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "108652"} [2022-08-02 07:40:02,320][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-02 07:40:04,241][train_inner][INFO] - {"epoch": 7, "update": 6.47, "loss": "2.261", "ppl": "4.79", "wps": "362156", "ups": "3.08", "wpb": "117660", "bsz": "256", "num_updates": "333000", "lr": "0.000673737", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.5", "wall": "108717"} [2022-08-02 07:41:08,871][train_inner][INFO] - {"epoch": 7, "update": 6.474, "loss": "2.252", "ppl": "4.76", "wps": "367728", "ups": "3.09", "wpb": "118831", "bsz": "255.9", "num_updates": "333200", "lr": "0.000673535", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "26", "wall": "108781"} [2022-08-02 07:42:13,873][train_inner][INFO] - {"epoch": 7, "update": 6.478, "loss": "2.266", "ppl": "4.81", "wps": "363261", "ups": "3.08", "wpb": "118062", "bsz": "256", "num_updates": "333400", "lr": "0.000673333", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.3", "wall": "108846"} [2022-08-02 07:43:18,374][train_inner][INFO] - {"epoch": 7, "update": 6.482, "loss": "2.263", "ppl": "4.8", "wps": "368396", "ups": "3.1", "wpb": "118807", "bsz": "256", "num_updates": "333600", "lr": "0.000673131", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.9", "wall": "108911"} [2022-08-02 07:43:52,100][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-02 07:44:23,394][train_inner][INFO] - {"epoch": 7, "update": 6.486, "loss": "2.251", "ppl": "4.76", "wps": "365554", "ups": "3.08", "wpb": "118840", "bsz": "256", "num_updates": "333800", "lr": "0.000672929", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.7", "wall": "108976"} [2022-08-02 07:45:27,839][train_inner][INFO] - {"epoch": 7, "update": 6.49, "loss": "2.266", "ppl": "4.81", "wps": "366509", "ups": "3.1", "wpb": "118096", "bsz": "256", "num_updates": "334000", "lr": "0.000672727", "gnorm": "0.67", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.5", "wall": "109040"} [2022-08-02 07:46:32,631][train_inner][INFO] - {"epoch": 7, "update": 6.493, "loss": "2.269", "ppl": "4.82", "wps": "364152", "ups": "3.09", "wpb": "117969", "bsz": "256", "num_updates": "334200", "lr": "0.000672525", "gnorm": "0.689", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "24.3", "wall": "109105"} [2022-08-02 07:47:37,495][train_inner][INFO] - {"epoch": 7, "update": 6.497, "loss": "2.266", "ppl": "4.81", "wps": "365746", "ups": "3.08", "wpb": "118618", "bsz": "256", "num_updates": "334400", "lr": "0.000672323", "gnorm": "0.673", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.5", "wall": "109170"} [2022-08-02 07:48:41,949][train_inner][INFO] - {"epoch": 7, "update": 6.501, "loss": "2.262", "ppl": "4.8", "wps": "366396", "ups": "3.1", "wpb": "118077", "bsz": "256", "num_updates": "334600", "lr": "0.000672121", "gnorm": "0.67", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21", "wall": "109234"} [2022-08-02 07:49:46,706][train_inner][INFO] - {"epoch": 7, "update": 6.505, "loss": "2.259", "ppl": "4.79", "wps": "365771", "ups": "3.09", "wpb": "118430", "bsz": "256", "num_updates": "334800", "lr": "0.000671919", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.8", "wall": "109299"} [2022-08-02 07:50:51,864][train_inner][INFO] - {"epoch": 7, "update": 6.509, "loss": "2.259", "ppl": "4.79", "wps": "362959", "ups": "3.07", "wpb": "118247", "bsz": "256", "num_updates": "335000", "lr": "0.000671717", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.4", "wall": "109364"} [2022-08-02 07:51:56,836][train_inner][INFO] - {"epoch": 7, "update": 6.513, "loss": "2.262", "ppl": "4.8", "wps": "363481", "ups": "3.08", "wpb": "118077", "bsz": "256", "num_updates": "335200", "lr": "0.000671515", "gnorm": "0.675", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.9", "wall": "109429"} [2022-08-02 07:53:02,041][train_inner][INFO] - {"epoch": 7, "update": 6.517, "loss": "2.264", "ppl": "4.8", "wps": "361667", "ups": "3.07", "wpb": "117912", "bsz": "256", "num_updates": "335400", "lr": "0.000671313", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.1", "wall": "109494"} [2022-08-02 07:54:06,527][train_inner][INFO] - {"epoch": 7, "update": 6.521, "loss": "2.266", "ppl": "4.81", "wps": "364002", "ups": "3.1", "wpb": "117363", "bsz": "256", "num_updates": "335600", "lr": "0.000671111", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.2", "wall": "109559"} [2022-08-02 07:55:11,129][train_inner][INFO] - {"epoch": 7, "update": 6.524, "loss": "2.258", "ppl": "4.78", "wps": "367398", "ups": "3.1", "wpb": "118671", "bsz": "256", "num_updates": "335800", "lr": "0.000670909", "gnorm": "0.665", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.9", "wall": "109623"} [2022-08-02 07:56:16,024][train_inner][INFO] - {"epoch": 7, "update": 6.528, "loss": "2.257", "ppl": "4.78", "wps": "366088", "ups": "3.08", "wpb": "118786", "bsz": "256", "num_updates": "336000", "lr": "0.000670707", "gnorm": "0.664", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "109688"} [2022-08-02 07:57:20,982][train_inner][INFO] - {"epoch": 7, "update": 6.532, "loss": "2.263", "ppl": "4.8", "wps": "363043", "ups": "3.08", "wpb": "117910", "bsz": "256", "num_updates": "336200", "lr": "0.000670505", "gnorm": "0.672", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "109753"} [2022-08-02 07:58:25,812][train_inner][INFO] - {"epoch": 7, "update": 6.536, "loss": "2.265", "ppl": "4.81", "wps": "363392", "ups": "3.09", "wpb": "117792", "bsz": "256", "num_updates": "336400", "lr": "0.000670303", "gnorm": "0.683", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.5", "wall": "109818"} [2022-08-02 07:59:30,762][train_inner][INFO] - {"epoch": 7, "update": 6.54, "loss": "2.253", "ppl": "4.77", "wps": "364593", "ups": "3.08", "wpb": "118400", "bsz": "256", "num_updates": "336600", "lr": "0.000670101", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "109883"} [2022-08-02 08:00:36,025][train_inner][INFO] - {"epoch": 7, "update": 6.544, "loss": "2.262", "ppl": "4.8", "wps": "364395", "ups": "3.06", "wpb": "118906", "bsz": "256", "num_updates": "336800", "lr": "0.000669899", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "109948"} [2022-08-02 08:01:40,442][train_inner][INFO] - {"epoch": 7, "update": 6.548, "loss": "2.26", "ppl": "4.79", "wps": "366553", "ups": "3.1", "wpb": "118059", "bsz": "256", "num_updates": "337000", "lr": "0.000669697", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.7", "wall": "110013"} [2022-08-02 08:02:45,738][train_inner][INFO] - {"epoch": 7, "update": 6.552, "loss": "2.256", "ppl": "4.78", "wps": "363085", "ups": "3.06", "wpb": "118537", "bsz": "256", "num_updates": "337200", "lr": "0.000669495", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.5", "wall": "110078"} [2022-08-02 08:03:50,606][train_inner][INFO] - {"epoch": 7, "update": 6.556, "loss": "2.26", "ppl": "4.79", "wps": "365975", "ups": "3.08", "wpb": "118699", "bsz": "256", "num_updates": "337400", "lr": "0.000669293", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "110143"} [2022-08-02 08:04:55,299][train_inner][INFO] - {"epoch": 7, "update": 6.559, "loss": "2.255", "ppl": "4.77", "wps": "365240", "ups": "3.09", "wpb": "118141", "bsz": "256", "num_updates": "337600", "lr": "0.000669091", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.4", "wall": "110208"} [2022-08-02 08:06:00,149][train_inner][INFO] - {"epoch": 7, "update": 6.563, "loss": "2.257", "ppl": "4.78", "wps": "365540", "ups": "3.08", "wpb": "118524", "bsz": "256", "num_updates": "337800", "lr": "0.000668889", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.1", "wall": "110272"} [2022-08-02 08:07:04,915][train_inner][INFO] - {"epoch": 7, "update": 6.567, "loss": "2.265", "ppl": "4.81", "wps": "364439", "ups": "3.09", "wpb": "118015", "bsz": "256", "num_updates": "338000", "lr": "0.000668687", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "25.1", "wall": "110337"} [2022-08-02 08:08:10,049][train_inner][INFO] - {"epoch": 7, "update": 6.571, "loss": "2.255", "ppl": "4.77", "wps": "364334", "ups": "3.07", "wpb": "118650", "bsz": "256", "num_updates": "338200", "lr": "0.000668485", "gnorm": "0.674", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.2", "wall": "110402"} [2022-08-02 08:09:15,375][train_inner][INFO] - {"epoch": 7, "update": 6.575, "loss": "2.256", "ppl": "4.78", "wps": "362696", "ups": "3.06", "wpb": "118467", "bsz": "256", "num_updates": "338400", "lr": "0.000668283", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.2", "wall": "110468"} [2022-08-02 08:10:20,178][train_inner][INFO] - {"epoch": 7, "update": 6.579, "loss": "2.259", "ppl": "4.79", "wps": "364296", "ups": "3.09", "wpb": "118034", "bsz": "256", "num_updates": "338600", "lr": "0.000668081", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.1", "wall": "110532"} [2022-08-02 08:10:38,406][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-02 08:11:25,079][train_inner][INFO] - {"epoch": 7, "update": 6.583, "loss": "2.259", "ppl": "4.79", "wps": "362278", "ups": "3.08", "wpb": "117560", "bsz": "256", "num_updates": "338800", "lr": "0.000667879", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.7", "wall": "110597"} [2022-08-02 08:12:30,007][train_inner][INFO] - {"epoch": 7, "update": 6.587, "loss": "2.264", "ppl": "4.8", "wps": "362212", "ups": "3.08", "wpb": "117588", "bsz": "256", "num_updates": "339000", "lr": "0.000667677", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21", "wall": "110662"} [2022-08-02 08:13:35,105][train_inner][INFO] - {"epoch": 7, "update": 6.591, "loss": "2.258", "ppl": "4.78", "wps": "363955", "ups": "3.07", "wpb": "118461", "bsz": "256", "num_updates": "339200", "lr": "0.000667475", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "26.1", "wall": "110727"} [2022-08-02 08:14:39,702][train_inner][INFO] - {"epoch": 7, "update": 6.594, "loss": "2.253", "ppl": "4.77", "wps": "368769", "ups": "3.1", "wpb": "119106", "bsz": "256", "num_updates": "339400", "lr": "0.000667273", "gnorm": "0.665", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.1", "wall": "110792"} [2022-08-02 08:15:45,017][train_inner][INFO] - {"epoch": 7, "update": 6.598, "loss": "2.259", "ppl": "4.79", "wps": "361306", "ups": "3.06", "wpb": "117991", "bsz": "256", "num_updates": "339600", "lr": "0.000667071", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "110857"} [2022-08-02 08:16:49,583][train_inner][INFO] - {"epoch": 7, "update": 6.602, "loss": "2.259", "ppl": "4.79", "wps": "366766", "ups": "3.1", "wpb": "118401", "bsz": "256", "num_updates": "339800", "lr": "0.000666869", "gnorm": "0.681", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.8", "wall": "110922"} [2022-08-02 08:17:54,308][train_inner][INFO] - {"epoch": 7, "update": 6.606, "loss": "2.257", "ppl": "4.78", "wps": "365978", "ups": "3.09", "wpb": "118438", "bsz": "256", "num_updates": "340000", "lr": "0.000666667", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.5", "wall": "110987"} [2022-08-02 08:18:59,575][train_inner][INFO] - {"epoch": 7, "update": 6.61, "loss": "2.255", "ppl": "4.77", "wps": "364542", "ups": "3.06", "wpb": "118961", "bsz": "256", "num_updates": "340200", "lr": "0.000666465", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.3", "wall": "111052"} [2022-08-02 08:20:04,579][train_inner][INFO] - {"epoch": 7, "update": 6.614, "loss": "2.259", "ppl": "4.79", "wps": "364725", "ups": "3.08", "wpb": "118541", "bsz": "256", "num_updates": "340400", "lr": "0.000666263", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.3", "wall": "111117"} [2022-08-02 08:21:09,733][train_inner][INFO] - {"epoch": 7, "update": 6.618, "loss": "2.254", "ppl": "4.77", "wps": "364370", "ups": "3.07", "wpb": "118699", "bsz": "256", "num_updates": "340600", "lr": "0.000666061", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "111182"} [2022-08-02 08:22:14,306][train_inner][INFO] - {"epoch": 7, "update": 6.622, "loss": "2.257", "ppl": "4.78", "wps": "366857", "ups": "3.1", "wpb": "118443", "bsz": "256", "num_updates": "340800", "lr": "0.000665859", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.4", "wall": "111247"} [2022-08-02 08:23:19,125][train_inner][INFO] - {"epoch": 7, "update": 6.625, "loss": "2.26", "ppl": "4.79", "wps": "365224", "ups": "3.09", "wpb": "118366", "bsz": "256", "num_updates": "341000", "lr": "0.000665657", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "28.2", "wall": "111311"} [2022-08-02 08:24:24,137][train_inner][INFO] - {"epoch": 7, "update": 6.629, "loss": "2.257", "ppl": "4.78", "wps": "363184", "ups": "3.08", "wpb": "118056", "bsz": "256", "num_updates": "341200", "lr": "0.000665455", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22", "wall": "111376"} [2022-08-02 08:25:28,928][train_inner][INFO] - {"epoch": 7, "update": 6.633, "loss": "2.265", "ppl": "4.81", "wps": "363526", "ups": "3.09", "wpb": "117764", "bsz": "256", "num_updates": "341400", "lr": "0.000665253", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.3", "wall": "111441"} [2022-08-02 08:26:34,007][train_inner][INFO] - {"epoch": 7, "update": 6.637, "loss": "2.251", "ppl": "4.76", "wps": "365555", "ups": "3.07", "wpb": "118948", "bsz": "256", "num_updates": "341600", "lr": "0.000665051", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.8", "wall": "111506"} [2022-08-02 08:27:38,880][train_inner][INFO] - {"epoch": 7, "update": 6.641, "loss": "2.253", "ppl": "4.77", "wps": "366004", "ups": "3.08", "wpb": "118718", "bsz": "256", "num_updates": "341800", "lr": "0.000664848", "gnorm": "0.666", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.3", "wall": "111571"} [2022-08-02 08:28:43,549][train_inner][INFO] - {"epoch": 7, "update": 6.645, "loss": "2.26", "ppl": "4.79", "wps": "365596", "ups": "3.09", "wpb": "118211", "bsz": "256", "num_updates": "342000", "lr": "0.000664646", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "111636"} [2022-08-02 08:29:48,595][train_inner][INFO] - {"epoch": 7, "update": 6.649, "loss": "2.254", "ppl": "4.77", "wps": "364255", "ups": "3.07", "wpb": "118466", "bsz": "256", "num_updates": "342200", "lr": "0.000664444", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "26.4", "wall": "111701"} [2022-08-02 08:30:53,169][train_inner][INFO] - {"epoch": 7, "update": 6.653, "loss": "2.261", "ppl": "4.79", "wps": "363920", "ups": "3.1", "wpb": "117497", "bsz": "256", "num_updates": "342400", "lr": "0.000664242", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.4", "wall": "111765"} [2022-08-02 08:31:58,147][train_inner][INFO] - {"epoch": 7, "update": 6.657, "loss": "2.256", "ppl": "4.78", "wps": "367091", "ups": "3.08", "wpb": "119261", "bsz": "256", "num_updates": "342600", "lr": "0.00066404", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.7", "wall": "111830"} [2022-08-02 08:32:50,749][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-02 08:33:03,321][train_inner][INFO] - {"epoch": 7, "update": 6.66, "loss": "2.263", "ppl": "4.8", "wps": "361323", "ups": "3.07", "wpb": "117743", "bsz": "256", "num_updates": "342800", "lr": "0.000663838", "gnorm": "0.693", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "26.9", "wall": "111896"} [2022-08-02 08:33:34,980][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-02 08:34:07,361][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-02 08:34:08,672][train_inner][INFO] - {"epoch": 7, "update": 6.664, "loss": "2.257", "ppl": "4.78", "wps": "362072", "ups": "3.06", "wpb": "118308", "bsz": "256", "num_updates": "343000", "lr": "0.000663636", "gnorm": "0.745", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "111961"} [2022-08-02 08:34:16,692][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 [2022-08-02 08:34:18,293][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 [2022-08-02 08:34:23,047][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 [2022-08-02 08:34:41,783][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.25 [2022-08-02 08:34:42,080][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.125 [2022-08-02 08:35:14,826][train_inner][INFO] - {"epoch": 7, "update": 6.668, "loss": "2.283", "ppl": "4.87", "wps": "358054", "ups": "3.02", "wpb": "118433", "bsz": "256", "num_updates": "343200", "lr": "0.000663434", "gnorm": "1.374", "clip": "2.5", "loss_scale": "0.125", "train_wall": "66", "gb_free": "24.3", "wall": "112027"} [2022-08-02 08:36:19,904][train_inner][INFO] - {"epoch": 7, "update": 6.672, "loss": "2.247", "ppl": "4.75", "wps": "362676", "ups": "3.07", "wpb": "118011", "bsz": "256", "num_updates": "343400", "lr": "0.000663232", "gnorm": "0.669", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "22.2", "wall": "112092"} [2022-08-02 08:37:24,830][train_inner][INFO] - {"epoch": 7, "update": 6.676, "loss": "2.248", "ppl": "4.75", "wps": "366303", "ups": "3.08", "wpb": "118912", "bsz": "256", "num_updates": "343600", "lr": "0.00066303", "gnorm": "0.665", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "19.7", "wall": "112157"} [2022-08-02 08:38:29,677][train_inner][INFO] - {"epoch": 7, "update": 6.68, "loss": "2.26", "ppl": "4.79", "wps": "366230", "ups": "3.08", "wpb": "118739", "bsz": "256", "num_updates": "343800", "lr": "0.000662828", "gnorm": "0.667", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "25", "wall": "112222"} [2022-08-02 08:39:34,405][train_inner][INFO] - {"epoch": 7, "update": 6.684, "loss": "2.259", "ppl": "4.79", "wps": "367266", "ups": "3.09", "wpb": "118860", "bsz": "256", "num_updates": "344000", "lr": "0.000662626", "gnorm": "0.667", "clip": "0", "loss_scale": "0.125", "train_wall": "64", "gb_free": "22.1", "wall": "112287"} [2022-08-02 08:40:39,033][train_inner][INFO] - {"epoch": 7, "update": 6.688, "loss": "2.261", "ppl": "4.79", "wps": "366037", "ups": "3.09", "wpb": "118279", "bsz": "256", "num_updates": "344200", "lr": "0.000662424", "gnorm": "0.67", "clip": "0", "loss_scale": "0.125", "train_wall": "64", "gb_free": "21.2", "wall": "112351"} [2022-08-02 08:41:43,734][train_inner][INFO] - {"epoch": 7, "update": 6.692, "loss": "2.252", "ppl": "4.76", "wps": "365918", "ups": "3.09", "wpb": "118375", "bsz": "256", "num_updates": "344400", "lr": "0.000662222", "gnorm": "0.669", "clip": "0", "loss_scale": "0.125", "train_wall": "64", "gb_free": "21.4", "wall": "112416"} [2022-08-02 08:42:48,721][train_inner][INFO] - {"epoch": 7, "update": 6.696, "loss": "2.25", "ppl": "4.76", "wps": "363850", "ups": "3.08", "wpb": "118226", "bsz": "256", "num_updates": "344600", "lr": "0.00066202", "gnorm": "0.67", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "20", "wall": "112481"} [2022-08-02 08:43:53,589][train_inner][INFO] - {"epoch": 7, "update": 6.699, "loss": "2.257", "ppl": "4.78", "wps": "364483", "ups": "3.08", "wpb": "118216", "bsz": "256", "num_updates": "344800", "lr": "0.000661818", "gnorm": "0.671", "clip": "0", "loss_scale": "0.125", "train_wall": "65", "gb_free": "22.2", "wall": "112546"} [2022-08-02 08:44:58,168][train_inner][INFO] - {"epoch": 7, "update": 6.703, "loss": "2.26", "ppl": "4.79", "wps": "364490", "ups": "3.1", "wpb": "117690", "bsz": "256", "num_updates": "345000", "lr": "0.000661616", "gnorm": "0.672", "clip": "0", "loss_scale": "0.125", "train_wall": "64", "gb_free": "20", "wall": "112610"} [2022-08-02 08:46:03,026][train_inner][INFO] - {"epoch": 7, "update": 6.707, "loss": "2.257", "ppl": "4.78", "wps": "365334", "ups": "3.08", "wpb": "118472", "bsz": "256", "num_updates": "345200", "lr": "0.000661414", "gnorm": "0.671", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "21.5", "wall": "112675"} [2022-08-02 08:47:07,945][train_inner][INFO] - {"epoch": 7, "update": 6.711, "loss": "2.255", "ppl": "4.77", "wps": "364376", "ups": "3.08", "wpb": "118274", "bsz": "256", "num_updates": "345400", "lr": "0.000661212", "gnorm": "0.673", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "26.6", "wall": "112740"} [2022-08-02 08:48:12,715][train_inner][INFO] - {"epoch": 7, "update": 6.715, "loss": "2.259", "ppl": "4.79", "wps": "363657", "ups": "3.09", "wpb": "117768", "bsz": "256", "num_updates": "345600", "lr": "0.00066101", "gnorm": "0.674", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "20", "wall": "112805"} [2022-08-02 08:49:18,673][train_inner][INFO] - {"epoch": 7, "update": 6.719, "loss": "2.257", "ppl": "4.78", "wps": "358272", "ups": "3.03", "wpb": "118154", "bsz": "256", "num_updates": "345800", "lr": "0.000660808", "gnorm": "0.674", "clip": "0", "loss_scale": "0.25", "train_wall": "66", "gb_free": "22.8", "wall": "112871"} [2022-08-02 08:50:23,775][train_inner][INFO] - {"epoch": 7, "update": 6.723, "loss": "2.257", "ppl": "4.78", "wps": "362867", "ups": "3.07", "wpb": "118114", "bsz": "256", "num_updates": "346000", "lr": "0.000660606", "gnorm": "0.67", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "22.9", "wall": "112936"} [2022-08-02 08:51:28,460][train_inner][INFO] - {"epoch": 7, "update": 6.727, "loss": "2.251", "ppl": "4.76", "wps": "367298", "ups": "3.09", "wpb": "118793", "bsz": "256", "num_updates": "346200", "lr": "0.000660404", "gnorm": "0.67", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "23.7", "wall": "113001"} [2022-08-02 08:52:32,934][train_inner][INFO] - {"epoch": 7, "update": 6.731, "loss": "2.259", "ppl": "4.79", "wps": "367133", "ups": "3.1", "wpb": "118351", "bsz": "256", "num_updates": "346400", "lr": "0.000660202", "gnorm": "0.839", "clip": "1", "loss_scale": "0.25", "train_wall": "64", "gb_free": "27", "wall": "113065"} [2022-08-02 08:53:37,923][train_inner][INFO] - {"epoch": 7, "update": 6.734, "loss": "2.259", "ppl": "4.79", "wps": "364073", "ups": "3.08", "wpb": "118302", "bsz": "256", "num_updates": "346600", "lr": "0.00066", "gnorm": "0.683", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "21", "wall": "113130"} [2022-08-02 08:54:42,906][train_inner][INFO] - {"epoch": 7, "update": 6.738, "loss": "2.26", "ppl": "4.79", "wps": "364111", "ups": "3.08", "wpb": "118304", "bsz": "256", "num_updates": "346800", "lr": "0.000659798", "gnorm": "0.67", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "20.2", "wall": "113195"} [2022-08-02 08:55:47,900][train_inner][INFO] - {"epoch": 7, "update": 6.742, "loss": "2.25", "ppl": "4.76", "wps": "363447", "ups": "3.08", "wpb": "118106", "bsz": "256", "num_updates": "347000", "lr": "0.000659596", "gnorm": "0.671", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "21.3", "wall": "113260"} [2022-08-02 08:56:53,819][train_inner][INFO] - {"epoch": 7, "update": 6.746, "loss": "2.255", "ppl": "4.77", "wps": "359142", "ups": "3.03", "wpb": "118370", "bsz": "256", "num_updates": "347200", "lr": "0.000659394", "gnorm": "0.669", "clip": "0", "loss_scale": "0.5", "train_wall": "66", "gb_free": "28.2", "wall": "113326"} [2022-08-02 08:57:58,924][train_inner][INFO] - {"epoch": 7, "update": 6.75, "loss": "2.252", "ppl": "4.76", "wps": "364544", "ups": "3.07", "wpb": "118667", "bsz": "256", "num_updates": "347400", "lr": "0.000659192", "gnorm": "0.668", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "26.3", "wall": "113391"} [2022-08-02 08:59:03,436][train_inner][INFO] - {"epoch": 7, "update": 6.754, "loss": "2.272", "ppl": "4.83", "wps": "366060", "ups": "3.1", "wpb": "118074", "bsz": "256", "num_updates": "347600", "lr": "0.00065899", "gnorm": "0.88", "clip": "0.5", "loss_scale": "0.5", "train_wall": "64", "gb_free": "23.6", "wall": "113456"} [2022-08-02 09:00:07,919][train_inner][INFO] - {"epoch": 7, "update": 6.758, "loss": "2.251", "ppl": "4.76", "wps": "366605", "ups": "3.1", "wpb": "118197", "bsz": "256", "num_updates": "347800", "lr": "0.000658788", "gnorm": "0.669", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "24.1", "wall": "113520"} [2022-08-02 09:01:12,962][train_inner][INFO] - {"epoch": 7, "update": 6.762, "loss": "2.254", "ppl": "4.77", "wps": "363629", "ups": "3.07", "wpb": "118254", "bsz": "256", "num_updates": "348000", "lr": "0.000658586", "gnorm": "0.674", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.3", "wall": "113585"} [2022-08-02 09:02:17,831][train_inner][INFO] - {"epoch": 7, "update": 6.765, "loss": "2.252", "ppl": "4.76", "wps": "364770", "ups": "3.08", "wpb": "118309", "bsz": "256", "num_updates": "348200", "lr": "0.000658384", "gnorm": "0.67", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.8", "wall": "113650"} [2022-08-02 09:03:22,594][train_inner][INFO] - {"epoch": 7, "update": 6.769, "loss": "2.256", "ppl": "4.78", "wps": "365708", "ups": "3.09", "wpb": "118421", "bsz": "256", "num_updates": "348400", "lr": "0.000658182", "gnorm": "0.669", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "19.9", "wall": "113715"} [2022-08-02 09:04:27,816][train_inner][INFO] - {"epoch": 7, "update": 6.773, "loss": "2.251", "ppl": "4.76", "wps": "363921", "ups": "3.07", "wpb": "118677", "bsz": "256", "num_updates": "348600", "lr": "0.00065798", "gnorm": "0.678", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.9", "wall": "113780"} [2022-08-02 09:05:32,777][train_inner][INFO] - {"epoch": 7, "update": 6.777, "loss": "2.248", "ppl": "4.75", "wps": "363279", "ups": "3.08", "wpb": "117992", "bsz": "256", "num_updates": "348800", "lr": "0.000657778", "gnorm": "0.67", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "19.8", "wall": "113845"} [2022-08-02 09:06:38,605][train_inner][INFO] - {"epoch": 7, "update": 6.781, "loss": "2.252", "ppl": "4.76", "wps": "358769", "ups": "3.04", "wpb": "118084", "bsz": "256", "num_updates": "349000", "lr": "0.000657576", "gnorm": "0.671", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "19.8", "wall": "113911"} [2022-08-02 09:07:43,254][train_inner][INFO] - {"epoch": 7, "update": 6.785, "loss": "2.25", "ppl": "4.76", "wps": "367928", "ups": "3.09", "wpb": "118928", "bsz": "256", "num_updates": "349200", "lr": "0.000657374", "gnorm": "0.803", "clip": "0.5", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.6", "wall": "113976"} [2022-08-02 09:08:48,126][train_inner][INFO] - {"epoch": 7, "update": 6.789, "loss": "2.253", "ppl": "4.77", "wps": "364894", "ups": "3.08", "wpb": "118355", "bsz": "256", "num_updates": "349400", "lr": "0.000657172", "gnorm": "0.738", "clip": "0.5", "loss_scale": "1", "train_wall": "64", "gb_free": "22.9", "wall": "114040"} [2022-08-02 09:09:53,128][train_inner][INFO] - {"epoch": 7, "update": 6.793, "loss": "2.25", "ppl": "4.76", "wps": "362301", "ups": "3.08", "wpb": "117750", "bsz": "256", "num_updates": "349600", "lr": "0.00065697", "gnorm": "0.677", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.7", "wall": "114105"} [2022-08-02 09:10:58,314][train_inner][INFO] - {"epoch": 7, "update": 6.797, "loss": "2.258", "ppl": "4.78", "wps": "363041", "ups": "3.07", "wpb": "118323", "bsz": "256", "num_updates": "349800", "lr": "0.000656768", "gnorm": "0.67", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.1", "wall": "114171"} [2022-08-02 09:12:03,490][train_inner][INFO] - {"epoch": 7, "update": 6.8, "loss": "2.249", "ppl": "4.75", "wps": "363076", "ups": "3.07", "wpb": "118317", "bsz": "256", "num_updates": "350000", "lr": "0.000656566", "gnorm": "0.667", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "22.1", "wall": "114236"} [2022-08-02 09:12:03,491][fairseq_cli.train][INFO] - begin validation on "valid" subset [2022-08-02 09:12:26,104][valid][INFO] - {"epoch": 7, "valid_loss": "2.156", "valid_ppl": "4.46", "valid_wps": "1.61428e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "350000", "valid_best_loss": "2.156"} [2022-08-02 09:12:26,107][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 7 @ 350000 updates [2022-08-02 09:12:26,108][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_7_350000.pt [2022-08-02 09:12:35,852][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_7_350000.pt [2022-08-02 09:13:10,129][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/adan2/checkpoint_7_350000.pt (epoch 7 @ 350000 updates, score 2.156) (writing took 44.022107725963 seconds) [2022-08-02 09:14:14,772][train_inner][INFO] - {"epoch": 7, "update": 6.804, "loss": "2.246", "ppl": "4.74", "wps": "180791", "ups": "1.52", "wpb": "118673", "bsz": "256", "num_updates": "350200", "lr": "0.000656364", "gnorm": "0.667", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "24.3", "wall": "114367"} [2022-08-02 09:15:19,631][train_inner][INFO] - {"epoch": 7, "update": 6.808, "loss": "2.252", "ppl": "4.76", "wps": "366209", "ups": "3.08", "wpb": "118758", "bsz": "255.9", "num_updates": "350400", "lr": "0.000656162", "gnorm": "0.669", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "23.2", "wall": "114432"} [2022-08-02 09:16:24,415][train_inner][INFO] - {"epoch": 7, "update": 6.812, "loss": "2.256", "ppl": "4.78", "wps": "364319", "ups": "3.09", "wpb": "118007", "bsz": "256", "num_updates": "350600", "lr": "0.00065596", "gnorm": "0.671", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "22.7", "wall": "114497"} [2022-08-02 09:17:28,770][train_inner][INFO] - {"epoch": 7, "update": 6.816, "loss": "2.256", "ppl": "4.78", "wps": "366979", "ups": "3.11", "wpb": "118084", "bsz": "256", "num_updates": "350800", "lr": "0.000655758", "gnorm": "0.681", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "22.2", "wall": "114561"} [2022-08-02 09:18:33,494][train_inner][INFO] - {"epoch": 7, "update": 6.82, "loss": "2.257", "ppl": "4.78", "wps": "364630", "ups": "3.09", "wpb": "117999", "bsz": "256", "num_updates": "351000", "lr": "0.000655556", "gnorm": "0.676", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "23", "wall": "114626"} [2022-08-02 09:19:38,484][train_inner][INFO] - {"epoch": 7, "update": 6.824, "loss": "2.248", "ppl": "4.75", "wps": "364988", "ups": "3.08", "wpb": "118600", "bsz": "256", "num_updates": "351200", "lr": "0.000655354", "gnorm": "0.666", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "24.1", "wall": "114691"} [2022-08-02 09:20:43,180][train_inner][INFO] - {"epoch": 7, "update": 6.828, "loss": "2.253", "ppl": "4.77", "wps": "364834", "ups": "3.09", "wpb": "118016", "bsz": "256", "num_updates": "351400", "lr": "0.000655152", "gnorm": "0.67", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "23.5", "wall": "114756"} [2022-08-02 09:21:48,069][train_inner][INFO] - {"epoch": 7, "update": 6.832, "loss": "2.25", "ppl": "4.76", "wps": "365634", "ups": "3.08", "wpb": "118625", "bsz": "256", "num_updates": "351600", "lr": "0.000654949", "gnorm": "0.668", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "26.4", "wall": "114820"} [2022-08-02 09:22:53,967][train_inner][INFO] - {"epoch": 7, "update": 6.835, "loss": "2.25", "ppl": "4.76", "wps": "358651", "ups": "3.04", "wpb": "118170", "bsz": "256", "num_updates": "351800", "lr": "0.000654747", "gnorm": "0.67", "clip": "0", "loss_scale": "2", "train_wall": "66", "gb_free": "26.1", "wall": "114886"} [2022-08-02 09:23:59,376][train_inner][INFO] - {"epoch": 7, "update": 6.839, "loss": "2.251", "ppl": "4.76", "wps": "361600", "ups": "3.06", "wpb": "118258", "bsz": "256", "num_updates": "352000", "lr": "0.000654545", "gnorm": "0.667", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.3", "wall": "114952"} [2022-08-02 09:25:03,904][train_inner][INFO] - {"epoch": 7, "update": 6.843, "loss": "2.254", "ppl": "4.77", "wps": "366366", "ups": "3.1", "wpb": "118202", "bsz": "256", "num_updates": "352200", "lr": "0.000654343", "gnorm": "0.668", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.1", "wall": "115016"} [2022-08-02 09:26:08,615][train_inner][INFO] - {"epoch": 7, "update": 6.847, "loss": "2.251", "ppl": "4.76", "wps": "364998", "ups": "3.09", "wpb": "118096", "bsz": "256", "num_updates": "352400", "lr": "0.000654141", "gnorm": "0.669", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.8", "wall": "115081"} [2022-08-02 09:27:13,810][train_inner][INFO] - {"epoch": 7, "update": 6.851, "loss": "2.25", "ppl": "4.76", "wps": "365282", "ups": "3.07", "wpb": "119070", "bsz": "256", "num_updates": "352600", "lr": "0.000653939", "gnorm": "0.668", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "25.7", "wall": "115146"} [2022-08-02 09:28:18,368][train_inner][INFO] - {"epoch": 7, "update": 6.855, "loss": "2.25", "ppl": "4.76", "wps": "364923", "ups": "3.1", "wpb": "117792", "bsz": "256", "num_updates": "352800", "lr": "0.000653737", "gnorm": "0.668", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "19.8", "wall": "115211"} [2022-08-02 09:29:23,182][train_inner][INFO] - {"epoch": 7, "update": 6.859, "loss": "2.256", "ppl": "4.78", "wps": "363480", "ups": "3.09", "wpb": "117791", "bsz": "256", "num_updates": "353000", "lr": "0.000653535", "gnorm": "0.672", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.6", "wall": "115276"} [2022-08-02 09:30:28,327][train_inner][INFO] - {"epoch": 7, "update": 6.863, "loss": "2.25", "ppl": "4.76", "wps": "363746", "ups": "3.07", "wpb": "118479", "bsz": "256", "num_updates": "353200", "lr": "0.000653333", "gnorm": "0.666", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.2", "wall": "115341"} [2022-08-02 09:31:33,432][train_inner][INFO] - {"epoch": 7, "update": 6.866, "loss": "2.251", "ppl": "4.76", "wps": "363797", "ups": "3.07", "wpb": "118424", "bsz": "256", "num_updates": "353400", "lr": "0.000653131", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.7", "wall": "115406"} [2022-08-02 09:32:38,763][train_inner][INFO] - {"epoch": 7, "update": 6.87, "loss": "2.249", "ppl": "4.75", "wps": "365366", "ups": "3.06", "wpb": "119348", "bsz": "256", "num_updates": "353600", "lr": "0.000652929", "gnorm": "0.666", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22", "wall": "115471"} [2022-08-02 09:33:43,813][train_inner][INFO] - {"epoch": 7, "update": 6.874, "loss": "2.247", "ppl": "4.75", "wps": "363992", "ups": "3.07", "wpb": "118386", "bsz": "256", "num_updates": "353800", "lr": "0.000652727", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "24.9", "wall": "115536"} [2022-08-02 09:34:48,868][train_inner][INFO] - {"epoch": 7, "update": 6.878, "loss": "2.249", "ppl": "4.75", "wps": "363538", "ups": "3.07", "wpb": "118247", "bsz": "256", "num_updates": "354000", "lr": "0.000652525", "gnorm": "0.67", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.1", "wall": "115601"} [2022-08-02 09:35:54,223][train_inner][INFO] - {"epoch": 7, "update": 6.882, "loss": "2.249", "ppl": "4.75", "wps": "363239", "ups": "3.06", "wpb": "118697", "bsz": "256", "num_updates": "354200", "lr": "0.000652323", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "115667"} [2022-08-02 09:37:00,261][train_inner][INFO] - {"epoch": 7, "update": 6.886, "loss": "2.25", "ppl": "4.76", "wps": "359934", "ups": "3.03", "wpb": "118844", "bsz": "256", "num_updates": "354400", "lr": "0.000652121", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "66", "gb_free": "23", "wall": "115733"} [2022-08-02 09:38:04,916][train_inner][INFO] - {"epoch": 7, "update": 6.89, "loss": "2.248", "ppl": "4.75", "wps": "366315", "ups": "3.09", "wpb": "118420", "bsz": "256", "num_updates": "354600", "lr": "0.000651919", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.7", "wall": "115797"} [2022-08-02 09:39:09,859][train_inner][INFO] - {"epoch": 7, "update": 6.894, "loss": "2.247", "ppl": "4.75", "wps": "364479", "ups": "3.08", "wpb": "118350", "bsz": "256", "num_updates": "354800", "lr": "0.000651717", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "115862"} [2022-08-02 09:40:14,810][train_inner][INFO] - {"epoch": 7, "update": 6.898, "loss": "2.246", "ppl": "4.74", "wps": "364723", "ups": "3.08", "wpb": "118444", "bsz": "256", "num_updates": "355000", "lr": "0.000651515", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.1", "wall": "115927"} [2022-08-02 09:41:19,685][train_inner][INFO] - {"epoch": 7, "update": 6.901, "loss": "2.255", "ppl": "4.77", "wps": "366395", "ups": "3.08", "wpb": "118848", "bsz": "256", "num_updates": "355200", "lr": "0.000651313", "gnorm": "0.72", "clip": "0.5", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "115992"} [2022-08-02 09:42:24,692][train_inner][INFO] - {"epoch": 7, "update": 6.905, "loss": "2.25", "ppl": "4.76", "wps": "363366", "ups": "3.08", "wpb": "118103", "bsz": "256", "num_updates": "355400", "lr": "0.000651111", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.3", "wall": "116057"} [2022-08-02 09:43:29,860][train_inner][INFO] - {"epoch": 7, "update": 6.909, "loss": "2.243", "ppl": "4.73", "wps": "363731", "ups": "3.07", "wpb": "118516", "bsz": "256", "num_updates": "355600", "lr": "0.000650909", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.3", "wall": "116122"} [2022-08-02 09:44:34,766][train_inner][INFO] - {"epoch": 7, "update": 6.913, "loss": "2.247", "ppl": "4.75", "wps": "363896", "ups": "3.08", "wpb": "118095", "bsz": "256", "num_updates": "355800", "lr": "0.000650707", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.8", "wall": "116187"} [2022-08-02 09:45:39,605][train_inner][INFO] - {"epoch": 7, "update": 6.917, "loss": "2.249", "ppl": "4.75", "wps": "363372", "ups": "3.08", "wpb": "117800", "bsz": "256", "num_updates": "356000", "lr": "0.000650505", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.8", "wall": "116252"} [2022-08-02 09:46:44,471][train_inner][INFO] - {"epoch": 7, "update": 6.921, "loss": "2.246", "ppl": "4.74", "wps": "364839", "ups": "3.08", "wpb": "118327", "bsz": "256", "num_updates": "356200", "lr": "0.000650303", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21", "wall": "116317"} [2022-08-02 09:47:49,526][train_inner][INFO] - {"epoch": 7, "update": 6.925, "loss": "2.245", "ppl": "4.74", "wps": "363529", "ups": "3.07", "wpb": "118244", "bsz": "256", "num_updates": "356400", "lr": "0.000650101", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.8", "wall": "116382"} [2022-08-02 09:48:54,337][train_inner][INFO] - {"epoch": 7, "update": 6.929, "loss": "2.247", "ppl": "4.75", "wps": "365434", "ups": "3.09", "wpb": "118421", "bsz": "256", "num_updates": "356600", "lr": "0.000649899", "gnorm": "0.665", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23", "wall": "116447"} [2022-08-02 09:50:00,157][train_inner][INFO] - {"epoch": 7, "update": 6.933, "loss": "2.251", "ppl": "4.76", "wps": "358068", "ups": "3.04", "wpb": "117838", "bsz": "256", "num_updates": "356800", "lr": "0.000649697", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "116512"} [2022-08-02 09:51:05,038][train_inner][INFO] - {"epoch": 7, "update": 6.936, "loss": "2.244", "ppl": "4.74", "wps": "364814", "ups": "3.08", "wpb": "118345", "bsz": "256", "num_updates": "357000", "lr": "0.000649495", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21", "wall": "116577"} [2022-08-02 09:52:10,249][train_inner][INFO] - {"epoch": 7, "update": 6.94, "loss": "2.248", "ppl": "4.75", "wps": "363998", "ups": "3.07", "wpb": "118681", "bsz": "256", "num_updates": "357200", "lr": "0.000649293", "gnorm": "0.664", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.5", "wall": "116643"} [2022-08-02 09:53:14,984][train_inner][INFO] - {"epoch": 7, "update": 6.944, "loss": "2.246", "ppl": "4.74", "wps": "365318", "ups": "3.09", "wpb": "118242", "bsz": "256", "num_updates": "357400", "lr": "0.000649091", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.1", "wall": "116707"} [2022-08-02 09:54:20,131][train_inner][INFO] - {"epoch": 7, "update": 6.948, "loss": "2.247", "ppl": "4.75", "wps": "364978", "ups": "3.07", "wpb": "118886", "bsz": "256", "num_updates": "357600", "lr": "0.000648889", "gnorm": "0.666", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.1", "wall": "116772"} [2022-08-02 09:55:24,831][train_inner][INFO] - {"epoch": 7, "update": 6.952, "loss": "2.24", "ppl": "4.72", "wps": "366792", "ups": "3.09", "wpb": "118654", "bsz": "256", "num_updates": "357800", "lr": "0.000648687", "gnorm": "0.667", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.8", "wall": "116837"} [2022-08-02 09:56:29,608][train_inner][INFO] - {"epoch": 7, "update": 6.956, "loss": "2.242", "ppl": "4.73", "wps": "367052", "ups": "3.09", "wpb": "118882", "bsz": "256", "num_updates": "358000", "lr": "0.000648485", "gnorm": "0.683", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "116902"} [2022-08-02 09:57:34,739][train_inner][INFO] - {"epoch": 7, "update": 6.96, "loss": "2.243", "ppl": "4.73", "wps": "363703", "ups": "3.07", "wpb": "118438", "bsz": "256", "num_updates": "358200", "lr": "0.000648283", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "28.2", "wall": "116967"} [2022-08-02 09:58:39,465][train_inner][INFO] - {"epoch": 7, "update": 6.964, "loss": "2.246", "ppl": "4.74", "wps": "365227", "ups": "3.09", "wpb": "118198", "bsz": "256", "num_updates": "358400", "lr": "0.000648081", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.7", "wall": "117032"} [2022-08-02 09:59:44,206][train_inner][INFO] - {"epoch": 7, "update": 6.967, "loss": "2.247", "ppl": "4.75", "wps": "367064", "ups": "3.09", "wpb": "118818", "bsz": "256", "num_updates": "358600", "lr": "0.000647879", "gnorm": "0.667", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "117097"} [2022-08-02 10:00:05,683][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-02 10:00:14,087][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-02 10:00:49,499][train_inner][INFO] - {"epoch": 7, "update": 6.971, "loss": "2.245", "ppl": "4.74", "wps": "364347", "ups": "3.06", "wpb": "118946", "bsz": "256", "num_updates": "358800", "lr": "0.000647677", "gnorm": "0.67", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21", "wall": "117162"} [2022-08-02 10:01:54,436][train_inner][INFO] - {"epoch": 7, "update": 6.975, "loss": "2.254", "ppl": "4.77", "wps": "362577", "ups": "3.08", "wpb": "117721", "bsz": "256", "num_updates": "359000", "lr": "0.000647475", "gnorm": "0.683", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "26.5", "wall": "117227"} [2022-08-02 10:02:59,078][train_inner][INFO] - {"epoch": 7, "update": 6.979, "loss": "2.241", "ppl": "4.73", "wps": "367020", "ups": "3.09", "wpb": "118624", "bsz": "256", "num_updates": "359200", "lr": "0.000647273", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.9", "wall": "117291"} [2022-08-02 10:04:04,069][train_inner][INFO] - {"epoch": 7, "update": 6.983, "loss": "2.246", "ppl": "4.74", "wps": "362349", "ups": "3.08", "wpb": "117745", "bsz": "256", "num_updates": "359400", "lr": "0.000647071", "gnorm": "0.688", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.9", "wall": "117356"} [2022-08-02 10:05:08,622][train_inner][INFO] - {"epoch": 7, "update": 6.987, "loss": "2.238", "ppl": "4.72", "wps": "369710", "ups": "3.1", "wpb": "119327", "bsz": "256", "num_updates": "359600", "lr": "0.000646869", "gnorm": "0.665", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.9", "wall": "117421"} [2022-08-02 10:06:13,434][train_inner][INFO] - {"epoch": 7, "update": 6.991, "loss": "2.251", "ppl": "4.76", "wps": "363168", "ups": "3.09", "wpb": "117686", "bsz": "256", "num_updates": "359800", "lr": "0.000646667", "gnorm": "0.671", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.1", "wall": "117486"} [2022-08-02 10:07:18,356][train_inner][INFO] - {"epoch": 7, "update": 6.995, "loss": "2.245", "ppl": "4.74", "wps": "365030", "ups": "3.08", "wpb": "118491", "bsz": "256", "num_updates": "360000", "lr": "0.000646465", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.6", "wall": "117551"} [2022-08-02 10:08:23,021][train_inner][INFO] - {"epoch": 7, "update": 6.999, "loss": "2.247", "ppl": "4.75", "wps": "365725", "ups": "3.09", "wpb": "118246", "bsz": "256", "num_updates": "360200", "lr": "0.000646263", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.6", "wall": "117615"} [2022-08-02 10:08:46,673][fairseq_cli.train][INFO] - end of epoch 7 (average epoch stats below) [2022-08-02 10:08:46,674][train][INFO] - {"epoch": 7, "train_loss": "2.262", "train_ppl": "4.8", "train_wps": "362538", "train_ups": "3.06", "train_wpb": "118297", "train_bsz": "256", "train_num_updates": "360273", "train_lr": "0.000646189", "train_gnorm": "0.677", "train_clip": "0", "train_loss_scale": "4", "train_train_wall": "16628", "train_gb_free": "20.5", "train_wall": "117639"} [2022-08-02 10:08:46,787][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 [2022-08-02 10:08:46,790][fairseq.trainer][INFO] - begin training epoch 8 [2022-08-02 10:08:46,790][fairseq_cli.train][INFO] - Start iterating over samples [2022-08-02 10:09:42,204][train_inner][INFO] - {"epoch": 8, "update": 7.002, "loss": "2.248", "ppl": "4.75", "wps": "297619", "ups": "2.53", "wpb": "117831", "bsz": "255.4", "num_updates": "360400", "lr": "0.000646061", "gnorm": "0.672", "clip": "0", "loss_scale": "4", "train_wall": "67", "gb_free": "20.1", "wall": "117695"} [2022-08-02 10:10:47,103][train_inner][INFO] - {"epoch": 8, "update": 7.006, "loss": "2.244", "ppl": "4.74", "wps": "362919", "ups": "3.08", "wpb": "117765", "bsz": "256", "num_updates": "360600", "lr": "0.000645859", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.8", "wall": "117759"} [2022-08-02 10:11:51,790][train_inner][INFO] - {"epoch": 8, "update": 7.01, "loss": "2.246", "ppl": "4.75", "wps": "366016", "ups": "3.09", "wpb": "118380", "bsz": "256", "num_updates": "360800", "lr": "0.000645657", "gnorm": "0.672", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "117824"} [2022-08-02 10:12:56,650][train_inner][INFO] - {"epoch": 8, "update": 7.014, "loss": "2.24", "ppl": "4.72", "wps": "362696", "ups": "3.08", "wpb": "117621", "bsz": "256", "num_updates": "361000", "lr": "0.000645455", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.7", "wall": "117889"} [2022-08-02 10:14:01,753][train_inner][INFO] - {"epoch": 8, "update": 7.018, "loss": "2.243", "ppl": "4.73", "wps": "364172", "ups": "3.07", "wpb": "118541", "bsz": "255.9", "num_updates": "361200", "lr": "0.000645253", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "117954"} [2022-08-02 10:15:06,903][train_inner][INFO] - {"epoch": 8, "update": 7.022, "loss": "2.243", "ppl": "4.73", "wps": "361486", "ups": "3.07", "wpb": "117752", "bsz": "256", "num_updates": "361400", "lr": "0.000645051", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.5", "wall": "118019"} [2022-08-02 10:16:11,961][train_inner][INFO] - {"epoch": 8, "update": 7.026, "loss": "2.242", "ppl": "4.73", "wps": "362869", "ups": "3.07", "wpb": "118035", "bsz": "256", "num_updates": "361600", "lr": "0.000644848", "gnorm": "0.679", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21", "wall": "118084"} [2022-08-02 10:17:16,781][train_inner][INFO] - {"epoch": 8, "update": 7.03, "loss": "2.242", "ppl": "4.73", "wps": "365451", "ups": "3.09", "wpb": "118442", "bsz": "256", "num_updates": "361800", "lr": "0.000644646", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.1", "wall": "118149"} [2022-08-02 10:17:42,933][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-02 10:18:22,100][train_inner][INFO] - {"epoch": 8, "update": 7.034, "loss": "2.245", "ppl": "4.74", "wps": "362025", "ups": "3.06", "wpb": "118234", "bsz": "256", "num_updates": "362000", "lr": "0.000644444", "gnorm": "0.674", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.9", "wall": "118214"} [2022-08-02 10:19:26,831][train_inner][INFO] - {"epoch": 8, "update": 7.037, "loss": "2.236", "ppl": "4.71", "wps": "367702", "ups": "3.09", "wpb": "119008", "bsz": "256", "num_updates": "362200", "lr": "0.000644242", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.5", "wall": "118279"} [2022-08-02 10:20:31,905][train_inner][INFO] - {"epoch": 8, "update": 7.041, "loss": "2.242", "ppl": "4.73", "wps": "364066", "ups": "3.07", "wpb": "118456", "bsz": "256", "num_updates": "362400", "lr": "0.00064404", "gnorm": "0.67", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.9", "wall": "118344"} [2022-08-02 10:21:36,409][train_inner][INFO] - {"epoch": 8, "update": 7.045, "loss": "2.249", "ppl": "4.76", "wps": "363919", "ups": "3.1", "wpb": "117368", "bsz": "256", "num_updates": "362600", "lr": "0.000643838", "gnorm": "0.672", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.7", "wall": "118409"} [2022-08-02 10:22:41,627][train_inner][INFO] - {"epoch": 8, "update": 7.049, "loss": "2.247", "ppl": "4.75", "wps": "361353", "ups": "3.07", "wpb": "117832", "bsz": "256", "num_updates": "362800", "lr": "0.000643636", "gnorm": "0.67", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "28.2", "wall": "118474"} [2022-08-02 10:23:46,684][train_inner][INFO] - {"epoch": 8, "update": 7.053, "loss": "2.244", "ppl": "4.74", "wps": "365819", "ups": "3.07", "wpb": "118995", "bsz": "256", "num_updates": "363000", "lr": "0.000643434", "gnorm": "0.666", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.7", "wall": "118539"} [2022-08-02 10:24:51,365][train_inner][INFO] - {"epoch": 8, "update": 7.057, "loss": "2.245", "ppl": "4.74", "wps": "365470", "ups": "3.09", "wpb": "118192", "bsz": "256", "num_updates": "363200", "lr": "0.000643232", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.5", "wall": "118604"} [2022-08-02 10:25:57,091][train_inner][INFO] - {"epoch": 8, "update": 7.061, "loss": "2.247", "ppl": "4.75", "wps": "360772", "ups": "3.04", "wpb": "118559", "bsz": "256", "num_updates": "363400", "lr": "0.00064303", "gnorm": "0.681", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.5", "wall": "118669"} [2022-08-02 10:27:02,423][train_inner][INFO] - {"epoch": 8, "update": 7.065, "loss": "2.239", "ppl": "4.72", "wps": "364553", "ups": "3.06", "wpb": "119083", "bsz": "256", "num_updates": "363600", "lr": "0.000642828", "gnorm": "0.666", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.9", "wall": "118735"} [2022-08-02 10:28:07,330][train_inner][INFO] - {"epoch": 8, "update": 7.069, "loss": "2.238", "ppl": "4.72", "wps": "364757", "ups": "3.08", "wpb": "118374", "bsz": "256", "num_updates": "363800", "lr": "0.000642626", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "26.7", "wall": "118800"} [2022-08-02 10:29:12,264][train_inner][INFO] - {"epoch": 8, "update": 7.072, "loss": "2.244", "ppl": "4.74", "wps": "364808", "ups": "3.08", "wpb": "118441", "bsz": "256", "num_updates": "364000", "lr": "0.000642424", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.3", "wall": "118865"} [2022-08-02 10:30:16,776][train_inner][INFO] - {"epoch": 8, "update": 7.076, "loss": "2.238", "ppl": "4.72", "wps": "366773", "ups": "3.1", "wpb": "118303", "bsz": "256", "num_updates": "364200", "lr": "0.000642222", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.1", "wall": "118929"} [2022-08-02 10:31:21,720][train_inner][INFO] - {"epoch": 8, "update": 7.08, "loss": "2.242", "ppl": "4.73", "wps": "364398", "ups": "3.08", "wpb": "118326", "bsz": "256", "num_updates": "364400", "lr": "0.00064202", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.6", "wall": "118994"} [2022-08-02 10:32:26,944][train_inner][INFO] - {"epoch": 8, "update": 7.084, "loss": "2.236", "ppl": "4.71", "wps": "364279", "ups": "3.07", "wpb": "118797", "bsz": "256", "num_updates": "364600", "lr": "0.000641818", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "119059"} [2022-08-02 10:33:31,597][train_inner][INFO] - {"epoch": 8, "update": 7.088, "loss": "2.239", "ppl": "4.72", "wps": "365903", "ups": "3.09", "wpb": "118282", "bsz": "256", "num_updates": "364800", "lr": "0.000641616", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.7", "wall": "119124"} [2022-08-02 10:34:36,324][train_inner][INFO] - {"epoch": 8, "update": 7.092, "loss": "2.241", "ppl": "4.73", "wps": "363436", "ups": "3.09", "wpb": "117619", "bsz": "256", "num_updates": "365000", "lr": "0.000641414", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.2", "wall": "119189"} [2022-08-02 10:35:41,304][train_inner][INFO] - {"epoch": 8, "update": 7.096, "loss": "2.236", "ppl": "4.71", "wps": "365073", "ups": "3.08", "wpb": "118610", "bsz": "256", "num_updates": "365200", "lr": "0.000641212", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21", "wall": "119254"} [2022-08-02 10:36:46,187][train_inner][INFO] - {"epoch": 8, "update": 7.1, "loss": "2.239", "ppl": "4.72", "wps": "365202", "ups": "3.08", "wpb": "118475", "bsz": "256", "num_updates": "365400", "lr": "0.00064101", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.5", "wall": "119319"} [2022-08-02 10:37:51,291][train_inner][INFO] - {"epoch": 8, "update": 7.103, "loss": "2.24", "ppl": "4.72", "wps": "362046", "ups": "3.07", "wpb": "117851", "bsz": "256", "num_updates": "365600", "lr": "0.000640808", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.3", "wall": "119384"} [2022-08-02 10:38:56,312][train_inner][INFO] - {"epoch": 8, "update": 7.107, "loss": "2.232", "ppl": "4.7", "wps": "363555", "ups": "3.08", "wpb": "118192", "bsz": "256", "num_updates": "365800", "lr": "0.000640606", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.5", "wall": "119449"} [2022-08-02 10:40:00,820][train_inner][INFO] - {"epoch": 8, "update": 7.111, "loss": "2.248", "ppl": "4.75", "wps": "365384", "ups": "3.1", "wpb": "117850", "bsz": "256", "num_updates": "366000", "lr": "0.000640404", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "119513"} [2022-08-02 10:41:05,818][train_inner][INFO] - {"epoch": 8, "update": 7.115, "loss": "2.238", "ppl": "4.72", "wps": "364503", "ups": "3.08", "wpb": "118459", "bsz": "256", "num_updates": "366200", "lr": "0.000640202", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "119578"} [2022-08-02 10:42:10,689][train_inner][INFO] - {"epoch": 8, "update": 7.119, "loss": "2.246", "ppl": "4.74", "wps": "364242", "ups": "3.08", "wpb": "118141", "bsz": "256", "num_updates": "366400", "lr": "0.00064", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24", "wall": "119643"} [2022-08-02 10:43:15,540][train_inner][INFO] - {"epoch": 8, "update": 7.123, "loss": "2.238", "ppl": "4.72", "wps": "363514", "ups": "3.08", "wpb": "117871", "bsz": "256", "num_updates": "366600", "lr": "0.000639798", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.7", "wall": "119708"} [2022-08-02 10:44:20,366][train_inner][INFO] - {"epoch": 8, "update": 7.127, "loss": "2.24", "ppl": "4.72", "wps": "364497", "ups": "3.09", "wpb": "118142", "bsz": "256", "num_updates": "366800", "lr": "0.000639596", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21", "wall": "119773"} [2022-08-02 10:45:25,057][train_inner][INFO] - {"epoch": 8, "update": 7.131, "loss": "2.242", "ppl": "4.73", "wps": "364134", "ups": "3.09", "wpb": "117778", "bsz": "256", "num_updates": "367000", "lr": "0.000639394", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.4", "wall": "119837"} [2022-08-02 10:46:30,202][train_inner][INFO] - {"epoch": 8, "update": 7.135, "loss": "2.244", "ppl": "4.74", "wps": "363816", "ups": "3.07", "wpb": "118504", "bsz": "256", "num_updates": "367200", "lr": "0.000639192", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "119903"} [2022-08-02 10:47:34,990][train_inner][INFO] - {"epoch": 8, "update": 7.138, "loss": "2.247", "ppl": "4.75", "wps": "363841", "ups": "3.09", "wpb": "117860", "bsz": "256", "num_updates": "367400", "lr": "0.00063899", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.7", "wall": "119967"} [2022-08-02 10:48:40,231][train_inner][INFO] - {"epoch": 8, "update": 7.142, "loss": "2.24", "ppl": "4.72", "wps": "362273", "ups": "3.07", "wpb": "118173", "bsz": "256", "num_updates": "367600", "lr": "0.000638788", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.6", "wall": "120033"} [2022-08-02 10:49:44,986][train_inner][INFO] - {"epoch": 8, "update": 7.146, "loss": "2.237", "ppl": "4.71", "wps": "366262", "ups": "3.09", "wpb": "118586", "bsz": "256", "num_updates": "367800", "lr": "0.000638586", "gnorm": "0.679", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.3", "wall": "120097"} [2022-08-02 10:50:49,573][train_inner][INFO] - {"epoch": 8, "update": 7.15, "loss": "2.234", "ppl": "4.71", "wps": "368034", "ups": "3.1", "wpb": "118849", "bsz": "256", "num_updates": "368000", "lr": "0.000638384", "gnorm": "0.666", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.8", "wall": "120162"} [2022-08-02 10:51:54,535][train_inner][INFO] - {"epoch": 8, "update": 7.154, "loss": "2.237", "ppl": "4.72", "wps": "364844", "ups": "3.08", "wpb": "118502", "bsz": "256", "num_updates": "368200", "lr": "0.000638182", "gnorm": "0.665", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "19.7", "wall": "120227"} [2022-08-02 10:53:00,944][train_inner][INFO] - {"epoch": 8, "update": 7.158, "loss": "2.235", "ppl": "4.71", "wps": "356166", "ups": "3.01", "wpb": "118261", "bsz": "256", "num_updates": "368400", "lr": "0.00063798", "gnorm": "0.669", "clip": "0", "loss_scale": "32", "train_wall": "66", "gb_free": "19.9", "wall": "120293"} [2022-08-02 10:54:05,802][train_inner][INFO] - {"epoch": 8, "update": 7.162, "loss": "2.242", "ppl": "4.73", "wps": "362295", "ups": "3.08", "wpb": "117488", "bsz": "256", "num_updates": "368600", "lr": "0.000637778", "gnorm": "0.67", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.1", "wall": "120358"} [2022-08-02 10:55:10,655][train_inner][INFO] - {"epoch": 8, "update": 7.166, "loss": "2.239", "ppl": "4.72", "wps": "363290", "ups": "3.08", "wpb": "117801", "bsz": "256", "num_updates": "368800", "lr": "0.000637576", "gnorm": "0.669", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.6", "wall": "120423"} [2022-08-02 10:55:22,162][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-02 10:56:16,957][train_inner][INFO] - {"epoch": 8, "update": 7.17, "loss": "2.238", "ppl": "4.72", "wps": "357094", "ups": "3.02", "wpb": "118378", "bsz": "256", "num_updates": "369000", "lr": "0.000637374", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "20", "wall": "120489"} [2022-08-02 10:57:21,153][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-02 10:57:21,732][train_inner][INFO] - {"epoch": 8, "update": 7.173, "loss": "2.229", "ppl": "4.69", "wps": "365071", "ups": "3.09", "wpb": "118237", "bsz": "256", "num_updates": "369200", "lr": "0.000637172", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "120554"} [2022-08-02 10:57:33,778][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-02 10:58:27,114][train_inner][INFO] - {"epoch": 8, "update": 7.177, "loss": "2.244", "ppl": "4.74", "wps": "362040", "ups": "3.06", "wpb": "118354", "bsz": "256", "num_updates": "369400", "lr": "0.00063697", "gnorm": "0.673", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.3", "wall": "120619"} [2022-08-02 10:59:31,810][train_inner][INFO] - {"epoch": 8, "update": 7.181, "loss": "2.241", "ppl": "4.73", "wps": "363960", "ups": "3.09", "wpb": "117733", "bsz": "256", "num_updates": "369600", "lr": "0.000636768", "gnorm": "0.671", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.9", "wall": "120684"} [2022-08-02 11:00:36,347][train_inner][INFO] - {"epoch": 8, "update": 7.185, "loss": "2.243", "ppl": "4.73", "wps": "366402", "ups": "3.1", "wpb": "118230", "bsz": "256", "num_updates": "369800", "lr": "0.000636566", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.7", "wall": "120749"} [2022-08-02 11:01:41,414][train_inner][INFO] - {"epoch": 8, "update": 7.189, "loss": "2.237", "ppl": "4.71", "wps": "363842", "ups": "3.07", "wpb": "118370", "bsz": "256", "num_updates": "370000", "lr": "0.000636364", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.1", "wall": "120814"} [2022-08-02 11:02:46,283][train_inner][INFO] - {"epoch": 8, "update": 7.193, "loss": "2.238", "ppl": "4.72", "wps": "363014", "ups": "3.08", "wpb": "117739", "bsz": "256", "num_updates": "370200", "lr": "0.000636162", "gnorm": "0.671", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20", "wall": "120879"} [2022-08-02 11:03:51,388][train_inner][INFO] - {"epoch": 8, "update": 7.197, "loss": "2.24", "ppl": "4.73", "wps": "362909", "ups": "3.07", "wpb": "118134", "bsz": "256", "num_updates": "370400", "lr": "0.00063596", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "24", "wall": "120944"} [2022-08-02 11:04:56,059][train_inner][INFO] - {"epoch": 8, "update": 7.201, "loss": "2.232", "ppl": "4.7", "wps": "367052", "ups": "3.09", "wpb": "118686", "bsz": "256", "num_updates": "370600", "lr": "0.000635758", "gnorm": "0.686", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "23.1", "wall": "121008"} [2022-08-02 11:06:00,754][train_inner][INFO] - {"epoch": 8, "update": 7.205, "loss": "2.246", "ppl": "4.74", "wps": "364930", "ups": "3.09", "wpb": "118045", "bsz": "256", "num_updates": "370800", "lr": "0.000635556", "gnorm": "0.671", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.7", "wall": "121073"} [2022-08-02 11:07:05,385][train_inner][INFO] - {"epoch": 8, "update": 7.208, "loss": "2.239", "ppl": "4.72", "wps": "367103", "ups": "3.09", "wpb": "118629", "bsz": "256", "num_updates": "371000", "lr": "0.000635354", "gnorm": "0.665", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.2", "wall": "121138"} [2022-08-02 11:08:09,942][train_inner][INFO] - {"epoch": 8, "update": 7.212, "loss": "2.235", "ppl": "4.71", "wps": "366362", "ups": "3.1", "wpb": "118254", "bsz": "256", "num_updates": "371200", "lr": "0.000635152", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.8", "wall": "121202"} [2022-08-02 11:09:07,751][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-02 11:09:14,922][train_inner][INFO] - {"epoch": 8, "update": 7.216, "loss": "2.244", "ppl": "4.74", "wps": "363420", "ups": "3.08", "wpb": "118075", "bsz": "256", "num_updates": "371400", "lr": "0.000634949", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "26.8", "wall": "121267"} [2022-08-02 11:10:19,848][train_inner][INFO] - {"epoch": 8, "update": 7.22, "loss": "2.24", "ppl": "4.73", "wps": "364150", "ups": "3.08", "wpb": "118212", "bsz": "256", "num_updates": "371600", "lr": "0.000634747", "gnorm": "0.672", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.9", "wall": "121332"} [2022-08-02 11:11:24,321][train_inner][INFO] - {"epoch": 8, "update": 7.224, "loss": "2.237", "ppl": "4.72", "wps": "367252", "ups": "3.1", "wpb": "118387", "bsz": "256", "num_updates": "371800", "lr": "0.000634545", "gnorm": "0.673", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.3", "wall": "121397"} [2022-08-02 11:12:29,507][train_inner][INFO] - {"epoch": 8, "update": 7.228, "loss": "2.24", "ppl": "4.73", "wps": "363982", "ups": "3.07", "wpb": "118630", "bsz": "256", "num_updates": "372000", "lr": "0.000634343", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.9", "wall": "121462"} [2022-08-02 11:13:34,692][train_inner][INFO] - {"epoch": 8, "update": 7.232, "loss": "2.239", "ppl": "4.72", "wps": "364091", "ups": "3.07", "wpb": "118665", "bsz": "256", "num_updates": "372200", "lr": "0.000634141", "gnorm": "0.713", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "26.2", "wall": "121527"} [2022-08-02 11:14:39,207][train_inner][INFO] - {"epoch": 8, "update": 7.236, "loss": "2.239", "ppl": "4.72", "wps": "364685", "ups": "3.1", "wpb": "117637", "bsz": "256", "num_updates": "372400", "lr": "0.000633939", "gnorm": "0.68", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.3", "wall": "121592"} [2022-08-02 11:15:43,978][train_inner][INFO] - {"epoch": 8, "update": 7.24, "loss": "2.229", "ppl": "4.69", "wps": "364323", "ups": "3.09", "wpb": "117984", "bsz": "256", "num_updates": "372600", "lr": "0.000633737", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "23", "wall": "121656"} [2022-08-02 11:16:48,591][train_inner][INFO] - {"epoch": 8, "update": 7.243, "loss": "2.233", "ppl": "4.7", "wps": "367080", "ups": "3.1", "wpb": "118590", "bsz": "256", "num_updates": "372800", "lr": "0.000633535", "gnorm": "0.666", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.2", "wall": "121721"} [2022-08-02 11:17:53,327][train_inner][INFO] - {"epoch": 8, "update": 7.247, "loss": "2.237", "ppl": "4.72", "wps": "364741", "ups": "3.09", "wpb": "118057", "bsz": "256", "num_updates": "373000", "lr": "0.000633333", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.5", "wall": "121786"} [2022-08-02 11:18:58,154][train_inner][INFO] - {"epoch": 8, "update": 7.251, "loss": "2.237", "ppl": "4.71", "wps": "364039", "ups": "3.09", "wpb": "117995", "bsz": "256", "num_updates": "373200", "lr": "0.000633131", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20", "wall": "121850"} [2022-08-02 11:20:02,908][train_inner][INFO] - {"epoch": 8, "update": 7.255, "loss": "2.241", "ppl": "4.73", "wps": "365073", "ups": "3.09", "wpb": "118199", "bsz": "256", "num_updates": "373400", "lr": "0.000632929", "gnorm": "0.672", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.8", "wall": "121915"} [2022-08-02 11:21:08,212][train_inner][INFO] - {"epoch": 8, "update": 7.259, "loss": "2.239", "ppl": "4.72", "wps": "360473", "ups": "3.06", "wpb": "117699", "bsz": "256", "num_updates": "373600", "lr": "0.000632727", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.7", "wall": "121981"} [2022-08-02 11:22:13,386][train_inner][INFO] - {"epoch": 8, "update": 7.263, "loss": "2.239", "ppl": "4.72", "wps": "362402", "ups": "3.07", "wpb": "118094", "bsz": "256", "num_updates": "373800", "lr": "0.000632525", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.6", "wall": "122046"} [2022-08-02 11:23:18,236][train_inner][INFO] - {"epoch": 8, "update": 7.267, "loss": "2.24", "ppl": "4.72", "wps": "364593", "ups": "3.08", "wpb": "118219", "bsz": "256", "num_updates": "374000", "lr": "0.000632323", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.3", "wall": "122111"} [2022-08-02 11:24:22,896][train_inner][INFO] - {"epoch": 8, "update": 7.271, "loss": "2.233", "ppl": "4.7", "wps": "366963", "ups": "3.09", "wpb": "118637", "bsz": "256", "num_updates": "374200", "lr": "0.000632121", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "122175"} [2022-08-02 11:25:27,904][train_inner][INFO] - {"epoch": 8, "update": 7.274, "loss": "2.239", "ppl": "4.72", "wps": "364185", "ups": "3.08", "wpb": "118372", "bsz": "256", "num_updates": "374400", "lr": "0.000631919", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.3", "wall": "122240"} [2022-08-02 11:26:32,531][train_inner][INFO] - {"epoch": 8, "update": 7.278, "loss": "2.238", "ppl": "4.72", "wps": "365163", "ups": "3.09", "wpb": "117995", "bsz": "256", "num_updates": "374600", "lr": "0.000631717", "gnorm": "0.673", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22", "wall": "122305"} [2022-08-02 11:27:37,431][train_inner][INFO] - {"epoch": 8, "update": 7.282, "loss": "2.235", "ppl": "4.71", "wps": "364421", "ups": "3.08", "wpb": "118252", "bsz": "256", "num_updates": "374800", "lr": "0.000631515", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.2", "wall": "122370"} [2022-08-02 11:28:42,451][train_inner][INFO] - {"epoch": 8, "update": 7.286, "loss": "2.239", "ppl": "4.72", "wps": "365376", "ups": "3.08", "wpb": "118781", "bsz": "256", "num_updates": "375000", "lr": "0.000631313", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.9", "wall": "122435"} [2022-08-02 11:29:47,489][train_inner][INFO] - {"epoch": 8, "update": 7.29, "loss": "2.232", "ppl": "4.7", "wps": "364336", "ups": "3.08", "wpb": "118478", "bsz": "256", "num_updates": "375200", "lr": "0.000631111", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21", "wall": "122500"} [2022-08-02 11:30:52,364][train_inner][INFO] - {"epoch": 8, "update": 7.294, "loss": "2.231", "ppl": "4.7", "wps": "366170", "ups": "3.08", "wpb": "118774", "bsz": "256", "num_updates": "375400", "lr": "0.000630909", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "122565"} [2022-08-02 11:31:57,189][train_inner][INFO] - {"epoch": 8, "update": 7.298, "loss": "2.24", "ppl": "4.72", "wps": "364146", "ups": "3.09", "wpb": "118027", "bsz": "256", "num_updates": "375600", "lr": "0.000630707", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "122630"} [2022-08-02 11:33:02,034][train_inner][INFO] - {"epoch": 8, "update": 7.302, "loss": "2.237", "ppl": "4.72", "wps": "364490", "ups": "3.08", "wpb": "118174", "bsz": "256", "num_updates": "375800", "lr": "0.000630505", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.3", "wall": "122694"} [2022-08-02 11:33:28,293][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-02 11:34:07,351][train_inner][INFO] - {"epoch": 8, "update": 7.306, "loss": "2.234", "ppl": "4.71", "wps": "361594", "ups": "3.06", "wpb": "118090", "bsz": "256", "num_updates": "376000", "lr": "0.000630303", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.4", "wall": "122760"} [2022-08-02 11:35:06,870][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-02 11:35:12,459][train_inner][INFO] - {"epoch": 8, "update": 7.309, "loss": "2.234", "ppl": "4.7", "wps": "363751", "ups": "3.07", "wpb": "118415", "bsz": "256", "num_updates": "376200", "lr": "0.000630101", "gnorm": "0.666", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.9", "wall": "122825"} [2022-08-02 11:36:17,490][train_inner][INFO] - {"epoch": 8, "update": 7.313, "loss": "2.235", "ppl": "4.71", "wps": "363832", "ups": "3.08", "wpb": "118301", "bsz": "256", "num_updates": "376400", "lr": "0.000629899", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.5", "wall": "122890"} [2022-08-02 11:37:22,384][train_inner][INFO] - {"epoch": 8, "update": 7.317, "loss": "2.241", "ppl": "4.73", "wps": "364448", "ups": "3.08", "wpb": "118251", "bsz": "256", "num_updates": "376600", "lr": "0.000629697", "gnorm": "0.671", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "23.5", "wall": "122955"} [2022-08-02 11:38:27,303][train_inner][INFO] - {"epoch": 8, "update": 7.321, "loss": "2.233", "ppl": "4.7", "wps": "365570", "ups": "3.08", "wpb": "118660", "bsz": "256", "num_updates": "376800", "lr": "0.000629495", "gnorm": "0.664", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.5", "wall": "123020"} [2022-08-02 11:39:32,239][train_inner][INFO] - {"epoch": 8, "update": 7.325, "loss": "2.232", "ppl": "4.7", "wps": "363851", "ups": "3.08", "wpb": "118134", "bsz": "256", "num_updates": "377000", "lr": "0.000629293", "gnorm": "0.666", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "123085"} [2022-08-02 11:40:37,038][train_inner][INFO] - {"epoch": 8, "update": 7.329, "loss": "2.232", "ppl": "4.7", "wps": "363624", "ups": "3.09", "wpb": "117811", "bsz": "256", "num_updates": "377200", "lr": "0.000629091", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.8", "wall": "123149"} [2022-08-02 11:41:41,836][train_inner][INFO] - {"epoch": 8, "update": 7.333, "loss": "2.236", "ppl": "4.71", "wps": "365469", "ups": "3.09", "wpb": "118406", "bsz": "256", "num_updates": "377400", "lr": "0.000628889", "gnorm": "0.673", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.9", "wall": "123214"} [2022-08-02 11:42:46,839][train_inner][INFO] - {"epoch": 8, "update": 7.337, "loss": "2.235", "ppl": "4.71", "wps": "364604", "ups": "3.08", "wpb": "118498", "bsz": "256", "num_updates": "377600", "lr": "0.000628687", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "27.6", "wall": "123279"} [2022-08-02 11:43:51,864][train_inner][INFO] - {"epoch": 8, "update": 7.341, "loss": "2.231", "ppl": "4.69", "wps": "363729", "ups": "3.08", "wpb": "118256", "bsz": "256", "num_updates": "377800", "lr": "0.000628485", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.7", "wall": "123344"} [2022-08-02 11:44:56,960][train_inner][INFO] - {"epoch": 8, "update": 7.344, "loss": "2.233", "ppl": "4.7", "wps": "365092", "ups": "3.07", "wpb": "118829", "bsz": "256", "num_updates": "378000", "lr": "0.000628283", "gnorm": "0.665", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.6", "wall": "123409"} [2022-08-02 11:46:02,165][train_inner][INFO] - {"epoch": 8, "update": 7.348, "loss": "2.233", "ppl": "4.7", "wps": "362270", "ups": "3.07", "wpb": "118106", "bsz": "256", "num_updates": "378200", "lr": "0.000628081", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "28.2", "wall": "123474"} [2022-08-02 11:47:06,853][train_inner][INFO] - {"epoch": 8, "update": 7.352, "loss": "2.233", "ppl": "4.7", "wps": "367548", "ups": "3.09", "wpb": "118878", "bsz": "256", "num_updates": "378400", "lr": "0.000627879", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.5", "wall": "123539"} [2022-08-02 11:48:12,259][train_inner][INFO] - {"epoch": 8, "update": 7.356, "loss": "2.23", "ppl": "4.69", "wps": "362229", "ups": "3.06", "wpb": "118459", "bsz": "256", "num_updates": "378600", "lr": "0.000627677", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.8", "wall": "123605"} [2022-08-02 11:49:16,839][train_inner][INFO] - {"epoch": 8, "update": 7.36, "loss": "2.232", "ppl": "4.7", "wps": "365480", "ups": "3.1", "wpb": "118009", "bsz": "256", "num_updates": "378800", "lr": "0.000627475", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.3", "wall": "123669"} [2022-08-02 11:50:22,048][train_inner][INFO] - {"epoch": 8, "update": 7.364, "loss": "2.23", "ppl": "4.69", "wps": "362618", "ups": "3.07", "wpb": "118228", "bsz": "256", "num_updates": "379000", "lr": "0.000627273", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "123734"} [2022-08-02 11:51:26,704][train_inner][INFO] - {"epoch": 8, "update": 7.368, "loss": "2.238", "ppl": "4.72", "wps": "364570", "ups": "3.09", "wpb": "117856", "bsz": "256", "num_updates": "379200", "lr": "0.000627071", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.7", "wall": "123799"} [2022-08-02 11:52:31,172][train_inner][INFO] - {"epoch": 8, "update": 7.372, "loss": "2.238", "ppl": "4.72", "wps": "365930", "ups": "3.1", "wpb": "117952", "bsz": "256", "num_updates": "379400", "lr": "0.000626869", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.2", "wall": "123863"} [2022-08-02 11:53:35,797][train_inner][INFO] - {"epoch": 8, "update": 7.376, "loss": "2.233", "ppl": "4.7", "wps": "365968", "ups": "3.09", "wpb": "118253", "bsz": "256", "num_updates": "379600", "lr": "0.000626667", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.3", "wall": "123928"} [2022-08-02 11:54:40,334][train_inner][INFO] - {"epoch": 8, "update": 7.379, "loss": "2.232", "ppl": "4.7", "wps": "367219", "ups": "3.1", "wpb": "118494", "bsz": "256", "num_updates": "379800", "lr": "0.000626465", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.9", "wall": "123993"} [2022-08-02 11:55:45,396][train_inner][INFO] - {"epoch": 8, "update": 7.383, "loss": "2.233", "ppl": "4.7", "wps": "365257", "ups": "3.07", "wpb": "118819", "bsz": "256", "num_updates": "380000", "lr": "0.000626263", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "124058"} [2022-08-02 11:56:51,423][train_inner][INFO] - {"epoch": 8, "update": 7.387, "loss": "2.237", "ppl": "4.71", "wps": "358438", "ups": "3.03", "wpb": "118332", "bsz": "256", "num_updates": "380200", "lr": "0.000626061", "gnorm": "0.689", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "20", "wall": "124124"} [2022-08-02 11:57:56,509][train_inner][INFO] - {"epoch": 8, "update": 7.391, "loss": "2.228", "ppl": "4.69", "wps": "364620", "ups": "3.07", "wpb": "118655", "bsz": "256", "num_updates": "380400", "lr": "0.000625859", "gnorm": "0.666", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.3", "wall": "124189"} [2022-08-02 11:59:01,541][train_inner][INFO] - {"epoch": 8, "update": 7.395, "loss": "2.231", "ppl": "4.7", "wps": "364460", "ups": "3.08", "wpb": "118507", "bsz": "256", "num_updates": "380600", "lr": "0.000625657", "gnorm": "0.667", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.9", "wall": "124254"} [2022-08-02 12:00:06,348][train_inner][INFO] - {"epoch": 8, "update": 7.399, "loss": "2.232", "ppl": "4.7", "wps": "364752", "ups": "3.09", "wpb": "118190", "bsz": "256", "num_updates": "380800", "lr": "0.000625455", "gnorm": "0.667", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.1", "wall": "124319"} [2022-08-02 12:01:11,019][train_inner][INFO] - {"epoch": 8, "update": 7.403, "loss": "2.235", "ppl": "4.71", "wps": "364552", "ups": "3.09", "wpb": "117878", "bsz": "256", "num_updates": "381000", "lr": "0.000625253", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "124383"} [2022-08-02 12:02:15,918][train_inner][INFO] - {"epoch": 8, "update": 7.407, "loss": "2.233", "ppl": "4.7", "wps": "363529", "ups": "3.08", "wpb": "117962", "bsz": "256", "num_updates": "381200", "lr": "0.000625051", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "124448"} [2022-08-02 12:03:20,475][train_inner][INFO] - {"epoch": 8, "update": 7.41, "loss": "2.233", "ppl": "4.7", "wps": "366839", "ups": "3.1", "wpb": "118409", "bsz": "256", "num_updates": "381400", "lr": "0.000624848", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.7", "wall": "124513"} [2022-08-02 12:04:25,576][train_inner][INFO] - {"epoch": 8, "update": 7.414, "loss": "2.23", "ppl": "4.69", "wps": "364698", "ups": "3.07", "wpb": "118708", "bsz": "256", "num_updates": "381600", "lr": "0.000624646", "gnorm": "0.666", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.1", "wall": "124578"} [2022-08-02 12:05:30,419][train_inner][INFO] - {"epoch": 8, "update": 7.418, "loss": "2.236", "ppl": "4.71", "wps": "365304", "ups": "3.08", "wpb": "118436", "bsz": "256", "num_updates": "381800", "lr": "0.000624444", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.1", "wall": "124643"} [2022-08-02 12:06:35,288][train_inner][INFO] - {"epoch": 8, "update": 7.422, "loss": "2.235", "ppl": "4.71", "wps": "364476", "ups": "3.08", "wpb": "118214", "bsz": "256", "num_updates": "382000", "lr": "0.000624242", "gnorm": "0.667", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "27.7", "wall": "124708"} [2022-08-02 12:07:40,162][train_inner][INFO] - {"epoch": 8, "update": 7.426, "loss": "2.234", "ppl": "4.7", "wps": "365899", "ups": "3.08", "wpb": "118685", "bsz": "256", "num_updates": "382200", "lr": "0.00062404", "gnorm": "0.666", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.6", "wall": "124772"} [2022-08-02 12:08:44,948][train_inner][INFO] - {"epoch": 8, "update": 7.43, "loss": "2.231", "ppl": "4.69", "wps": "365052", "ups": "3.09", "wpb": "118250", "bsz": "256", "num_updates": "382400", "lr": "0.000623838", "gnorm": "0.668", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "22.9", "wall": "124837"} [2022-08-02 12:09:50,006][train_inner][INFO] - {"epoch": 8, "update": 7.434, "loss": "2.223", "ppl": "4.67", "wps": "366722", "ups": "3.07", "wpb": "119289", "bsz": "256", "num_updates": "382600", "lr": "0.000623636", "gnorm": "0.663", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.3", "wall": "124902"} [2022-08-02 12:10:55,069][train_inner][INFO] - {"epoch": 8, "update": 7.438, "loss": "2.231", "ppl": "4.69", "wps": "362669", "ups": "3.07", "wpb": "117980", "bsz": "256", "num_updates": "382800", "lr": "0.000623434", "gnorm": "0.668", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.6", "wall": "124967"} [2022-08-02 12:11:56,465][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-02 12:12:00,398][train_inner][INFO] - {"epoch": 8, "update": 7.442, "loss": "2.23", "ppl": "4.69", "wps": "362072", "ups": "3.06", "wpb": "118269", "bsz": "256", "num_updates": "383000", "lr": "0.000623232", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "125033"} [2022-08-02 12:12:05,239][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-02 12:13:05,897][train_inner][INFO] - {"epoch": 8, "update": 7.445, "loss": "2.222", "ppl": "4.67", "wps": "361917", "ups": "3.05", "wpb": "118525", "bsz": "256", "num_updates": "383200", "lr": "0.00062303", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "125098"} [2022-08-02 12:14:10,659][train_inner][INFO] - {"epoch": 8, "update": 7.449, "loss": "2.231", "ppl": "4.69", "wps": "366476", "ups": "3.09", "wpb": "118667", "bsz": "256", "num_updates": "383400", "lr": "0.000622828", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.2", "wall": "125163"} [2022-08-02 12:15:15,252][train_inner][INFO] - {"epoch": 8, "update": 7.453, "loss": "2.232", "ppl": "4.7", "wps": "364998", "ups": "3.1", "wpb": "117880", "bsz": "256", "num_updates": "383600", "lr": "0.000622626", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21", "wall": "125228"} [2022-08-02 12:16:20,225][train_inner][INFO] - {"epoch": 8, "update": 7.457, "loss": "2.235", "ppl": "4.71", "wps": "363554", "ups": "3.08", "wpb": "118104", "bsz": "256", "num_updates": "383800", "lr": "0.000622424", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21", "wall": "125293"} [2022-08-02 12:17:25,027][train_inner][INFO] - {"epoch": 8, "update": 7.461, "loss": "2.226", "ppl": "4.68", "wps": "364150", "ups": "3.09", "wpb": "117987", "bsz": "256", "num_updates": "384000", "lr": "0.000622222", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21", "wall": "125357"} [2022-08-02 12:18:29,581][train_inner][INFO] - {"epoch": 8, "update": 7.465, "loss": "2.229", "ppl": "4.69", "wps": "366682", "ups": "3.1", "wpb": "118353", "bsz": "256", "num_updates": "384200", "lr": "0.00062202", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.1", "wall": "125422"} [2022-08-02 12:19:34,049][train_inner][INFO] - {"epoch": 8, "update": 7.469, "loss": "2.229", "ppl": "4.69", "wps": "367156", "ups": "3.1", "wpb": "118347", "bsz": "256", "num_updates": "384400", "lr": "0.000621818", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.4", "wall": "125486"} [2022-08-02 12:20:38,869][train_inner][INFO] - {"epoch": 8, "update": 7.473, "loss": "2.236", "ppl": "4.71", "wps": "363337", "ups": "3.09", "wpb": "117755", "bsz": "256", "num_updates": "384600", "lr": "0.000621616", "gnorm": "0.673", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.2", "wall": "125551"} [2022-08-02 12:21:43,859][train_inner][INFO] - {"epoch": 8, "update": 7.477, "loss": "2.233", "ppl": "4.7", "wps": "364782", "ups": "3.08", "wpb": "118535", "bsz": "256", "num_updates": "384800", "lr": "0.000621414", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "125616"} [2022-08-02 12:22:48,847][train_inner][INFO] - {"epoch": 8, "update": 7.48, "loss": "2.235", "ppl": "4.71", "wps": "364790", "ups": "3.08", "wpb": "118533", "bsz": "256", "num_updates": "385000", "lr": "0.000621212", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.8", "wall": "125681"} [2022-08-02 12:23:53,521][train_inner][INFO] - {"epoch": 8, "update": 7.484, "loss": "2.232", "ppl": "4.7", "wps": "364344", "ups": "3.09", "wpb": "117816", "bsz": "256", "num_updates": "385200", "lr": "0.00062101", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.4", "wall": "125746"} [2022-08-02 12:24:58,044][train_inner][INFO] - {"epoch": 8, "update": 7.488, "loss": "2.228", "ppl": "4.68", "wps": "364943", "ups": "3.1", "wpb": "117734", "bsz": "256", "num_updates": "385400", "lr": "0.000620808", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.3", "wall": "125810"} [2022-08-02 12:26:02,767][train_inner][INFO] - {"epoch": 8, "update": 7.492, "loss": "2.222", "ppl": "4.66", "wps": "365988", "ups": "3.09", "wpb": "118437", "bsz": "256", "num_updates": "385600", "lr": "0.000620606", "gnorm": "0.665", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.9", "wall": "125875"} [2022-08-02 12:27:07,523][train_inner][INFO] - {"epoch": 8, "update": 7.496, "loss": "2.227", "ppl": "4.68", "wps": "366166", "ups": "3.09", "wpb": "118556", "bsz": "256", "num_updates": "385800", "lr": "0.000620404", "gnorm": "0.666", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.6", "wall": "125940"} [2022-08-02 12:28:12,131][train_inner][INFO] - {"epoch": 8, "update": 7.5, "loss": "2.224", "ppl": "4.67", "wps": "367433", "ups": "3.1", "wpb": "118695", "bsz": "256", "num_updates": "386000", "lr": "0.000620202", "gnorm": "0.665", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.2", "wall": "126004"} [2022-08-02 12:29:17,952][train_inner][INFO] - {"epoch": 8, "update": 7.504, "loss": "2.225", "ppl": "4.67", "wps": "359508", "ups": "3.04", "wpb": "118313", "bsz": "256", "num_updates": "386200", "lr": "0.00062", "gnorm": "0.667", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.1", "wall": "126070"} [2022-08-02 12:30:22,754][train_inner][INFO] - {"epoch": 8, "update": 7.508, "loss": "2.23", "ppl": "4.69", "wps": "365521", "ups": "3.09", "wpb": "118430", "bsz": "256", "num_updates": "386400", "lr": "0.000619798", "gnorm": "0.665", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.3", "wall": "126135"} [2022-08-02 12:31:28,413][train_inner][INFO] - {"epoch": 8, "update": 7.511, "loss": "2.229", "ppl": "4.69", "wps": "360514", "ups": "3.05", "wpb": "118354", "bsz": "256", "num_updates": "386600", "lr": "0.000619596", "gnorm": "0.676", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.4", "wall": "126201"} [2022-08-02 12:32:33,343][train_inner][INFO] - {"epoch": 8, "update": 7.515, "loss": "2.228", "ppl": "4.68", "wps": "364967", "ups": "3.08", "wpb": "118484", "bsz": "256", "num_updates": "386800", "lr": "0.000619394", "gnorm": "0.667", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.6", "wall": "126266"} [2022-08-02 12:33:38,702][train_inner][INFO] - {"epoch": 8, "update": 7.519, "loss": "2.226", "ppl": "4.68", "wps": "361765", "ups": "3.06", "wpb": "118223", "bsz": "256", "num_updates": "387000", "lr": "0.000619192", "gnorm": "0.665", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.8", "wall": "126331"} [2022-08-02 12:34:43,579][train_inner][INFO] - {"epoch": 8, "update": 7.523, "loss": "2.224", "ppl": "4.67", "wps": "365929", "ups": "3.08", "wpb": "118700", "bsz": "256", "num_updates": "387200", "lr": "0.00061899", "gnorm": "0.663", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "25.9", "wall": "126396"} [2022-08-02 12:35:09,190][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-02 12:35:21,248][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-02 12:35:37,337][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-02 12:35:49,052][train_inner][INFO] - {"epoch": 8, "update": 7.527, "loss": "2.229", "ppl": "4.69", "wps": "361202", "ups": "3.05", "wpb": "118243", "bsz": "256", "num_updates": "387400", "lr": "0.000618788", "gnorm": "0.67", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.5", "wall": "126461"} [2022-08-02 12:36:53,737][train_inner][INFO] - {"epoch": 8, "update": 7.531, "loss": "2.236", "ppl": "4.71", "wps": "364207", "ups": "3.09", "wpb": "117792", "bsz": "256", "num_updates": "387600", "lr": "0.000618586", "gnorm": "0.672", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.1", "wall": "126526"} [2022-08-02 12:37:58,435][train_inner][INFO] - {"epoch": 8, "update": 7.535, "loss": "2.227", "ppl": "4.68", "wps": "366062", "ups": "3.09", "wpb": "118415", "bsz": "256", "num_updates": "387800", "lr": "0.000618384", "gnorm": "0.666", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.9", "wall": "126591"} [2022-08-02 12:39:03,255][train_inner][INFO] - {"epoch": 8, "update": 7.539, "loss": "2.227", "ppl": "4.68", "wps": "364755", "ups": "3.09", "wpb": "118216", "bsz": "256", "num_updates": "388000", "lr": "0.000618182", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.9", "wall": "126656"} [2022-08-02 12:40:07,935][train_inner][INFO] - {"epoch": 8, "update": 7.543, "loss": "2.229", "ppl": "4.69", "wps": "363413", "ups": "3.09", "wpb": "117527", "bsz": "256", "num_updates": "388200", "lr": "0.00061798", "gnorm": "0.673", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.4", "wall": "126720"} [2022-08-02 12:41:12,580][train_inner][INFO] - {"epoch": 8, "update": 7.547, "loss": "2.226", "ppl": "4.68", "wps": "365587", "ups": "3.09", "wpb": "118166", "bsz": "256", "num_updates": "388400", "lr": "0.000617778", "gnorm": "0.673", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20", "wall": "126785"} [2022-08-02 12:42:17,611][train_inner][INFO] - {"epoch": 8, "update": 7.55, "loss": "2.224", "ppl": "4.67", "wps": "364273", "ups": "3.08", "wpb": "118443", "bsz": "256", "num_updates": "388600", "lr": "0.000617576", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.8", "wall": "126850"} [2022-08-02 12:42:22,957][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 [2022-08-02 12:43:22,498][train_inner][INFO] - {"epoch": 8, "update": 7.554, "loss": "2.224", "ppl": "4.67", "wps": "364719", "ups": "3.08", "wpb": "118326", "bsz": "256", "num_updates": "388800", "lr": "0.000617374", "gnorm": "0.667", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22", "wall": "126915"} [2022-08-02 12:44:26,994][train_inner][INFO] - {"epoch": 8, "update": 7.558, "loss": "2.228", "ppl": "4.68", "wps": "366917", "ups": "3.1", "wpb": "118321", "bsz": "256", "num_updates": "389000", "lr": "0.000617172", "gnorm": "0.668", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "23.6", "wall": "126979"} [2022-08-02 12:45:31,821][train_inner][INFO] - {"epoch": 8, "update": 7.562, "loss": "2.231", "ppl": "4.69", "wps": "364008", "ups": "3.09", "wpb": "117986", "bsz": "256", "num_updates": "389200", "lr": "0.00061697", "gnorm": "0.676", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "22.4", "wall": "127044"} [2022-08-02 12:46:38,746][train_inner][INFO] - {"epoch": 8, "update": 7.566, "loss": "2.229", "ppl": "4.69", "wps": "353890", "ups": "2.99", "wpb": "118420", "bsz": "256", "num_updates": "389400", "lr": "0.000616768", "gnorm": "0.669", "clip": "0", "loss_scale": "2", "train_wall": "67", "gb_free": "20", "wall": "127111"} [2022-08-02 12:47:43,667][train_inner][INFO] - {"epoch": 8, "update": 7.57, "loss": "2.227", "ppl": "4.68", "wps": "363691", "ups": "3.08", "wpb": "118054", "bsz": "256", "num_updates": "389600", "lr": "0.000616566", "gnorm": "0.666", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "23.7", "wall": "127176"} [2022-08-02 12:48:48,196][train_inner][INFO] - {"epoch": 8, "update": 7.574, "loss": "2.223", "ppl": "4.67", "wps": "367975", "ups": "3.1", "wpb": "118724", "bsz": "256", "num_updates": "389800", "lr": "0.000616364", "gnorm": "0.666", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21", "wall": "127241"} [2022-08-02 12:49:53,072][train_inner][INFO] - {"epoch": 8, "update": 7.578, "loss": "2.23", "ppl": "4.69", "wps": "364203", "ups": "3.08", "wpb": "118138", "bsz": "256", "num_updates": "390000", "lr": "0.000616162", "gnorm": "0.668", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.1", "wall": "127305"} [2022-08-02 12:50:57,774][train_inner][INFO] - {"epoch": 8, "update": 7.581, "loss": "2.225", "ppl": "4.67", "wps": "364978", "ups": "3.09", "wpb": "118073", "bsz": "256", "num_updates": "390200", "lr": "0.00061596", "gnorm": "0.67", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "24.1", "wall": "127370"} [2022-08-02 12:52:02,553][train_inner][INFO] - {"epoch": 8, "update": 7.585, "loss": "2.219", "ppl": "4.66", "wps": "365916", "ups": "3.09", "wpb": "118515", "bsz": "256", "num_updates": "390400", "lr": "0.000615758", "gnorm": "0.664", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.8", "wall": "127435"} [2022-08-02 12:53:07,516][train_inner][INFO] - {"epoch": 8, "update": 7.589, "loss": "2.223", "ppl": "4.67", "wps": "364967", "ups": "3.08", "wpb": "118546", "bsz": "256", "num_updates": "390600", "lr": "0.000615556", "gnorm": "0.667", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.9", "wall": "127500"} [2022-08-02 12:54:12,656][train_inner][INFO] - {"epoch": 8, "update": 7.593, "loss": "2.225", "ppl": "4.68", "wps": "363579", "ups": "3.07", "wpb": "118414", "bsz": "256", "num_updates": "390800", "lr": "0.000615354", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "27", "wall": "127565"} [2022-08-02 12:55:17,799][train_inner][INFO] - {"epoch": 8, "update": 7.597, "loss": "2.232", "ppl": "4.7", "wps": "363388", "ups": "3.07", "wpb": "118360", "bsz": "256", "num_updates": "391000", "lr": "0.000615152", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.9", "wall": "127630"} [2022-08-02 12:56:22,662][train_inner][INFO] - {"epoch": 8, "update": 7.601, "loss": "2.227", "ppl": "4.68", "wps": "364844", "ups": "3.08", "wpb": "118324", "bsz": "256", "num_updates": "391200", "lr": "0.000614949", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "127695"} [2022-08-02 12:57:27,380][train_inner][INFO] - {"epoch": 8, "update": 7.605, "loss": "2.227", "ppl": "4.68", "wps": "367353", "ups": "3.09", "wpb": "118870", "bsz": "256", "num_updates": "391400", "lr": "0.000614747", "gnorm": "0.676", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.1", "wall": "127760"} [2022-08-02 12:58:32,375][train_inner][INFO] - {"epoch": 8, "update": 7.609, "loss": "2.226", "ppl": "4.68", "wps": "365630", "ups": "3.08", "wpb": "118819", "bsz": "256", "num_updates": "391600", "lr": "0.000614545", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "127825"} [2022-08-02 12:59:37,331][train_inner][INFO] - {"epoch": 8, "update": 7.613, "loss": "2.225", "ppl": "4.68", "wps": "366118", "ups": "3.08", "wpb": "118906", "bsz": "256", "num_updates": "391800", "lr": "0.000614343", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "127890"} [2022-08-02 13:00:42,946][train_inner][INFO] - {"epoch": 8, "update": 7.616, "loss": "2.231", "ppl": "4.7", "wps": "360694", "ups": "3.05", "wpb": "118333", "bsz": "256", "num_updates": "392000", "lr": "0.000614141", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "25.2", "wall": "127955"} [2022-08-02 13:01:47,309][train_inner][INFO] - {"epoch": 8, "update": 7.62, "loss": "2.23", "ppl": "4.69", "wps": "366088", "ups": "3.11", "wpb": "117811", "bsz": "256", "num_updates": "392200", "lr": "0.000613939", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20", "wall": "128020"} [2022-08-02 13:02:51,568][train_inner][INFO] - {"epoch": 8, "update": 7.624, "loss": "2.228", "ppl": "4.69", "wps": "367198", "ups": "3.11", "wpb": "117976", "bsz": "256", "num_updates": "392400", "lr": "0.000613737", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.4", "wall": "128084"} [2022-08-02 13:03:56,393][train_inner][INFO] - {"epoch": 8, "update": 7.628, "loss": "2.227", "ppl": "4.68", "wps": "365841", "ups": "3.09", "wpb": "118578", "bsz": "256", "num_updates": "392600", "lr": "0.000613535", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21", "wall": "128149"} [2022-08-02 13:05:01,129][train_inner][INFO] - {"epoch": 8, "update": 7.632, "loss": "2.224", "ppl": "4.67", "wps": "366439", "ups": "3.09", "wpb": "118606", "bsz": "256", "num_updates": "392800", "lr": "0.000613333", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.9", "wall": "128213"} [2022-08-02 13:06:05,164][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-02 13:06:06,152][train_inner][INFO] - {"epoch": 8, "update": 7.636, "loss": "2.222", "ppl": "4.66", "wps": "363518", "ups": "3.08", "wpb": "118184", "bsz": "256", "num_updates": "393000", "lr": "0.000613131", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.9", "wall": "128278"} [2022-08-02 13:07:11,224][train_inner][INFO] - {"epoch": 8, "update": 7.64, "loss": "2.227", "ppl": "4.68", "wps": "365497", "ups": "3.07", "wpb": "118917", "bsz": "256", "num_updates": "393200", "lr": "0.000612929", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.5", "wall": "128344"} [2022-08-02 13:08:16,247][train_inner][INFO] - {"epoch": 8, "update": 7.644, "loss": "2.22", "ppl": "4.66", "wps": "364793", "ups": "3.08", "wpb": "118597", "bsz": "256", "num_updates": "393400", "lr": "0.000612727", "gnorm": "0.665", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "128409"} [2022-08-02 13:09:21,190][train_inner][INFO] - {"epoch": 8, "update": 7.648, "loss": "2.223", "ppl": "4.67", "wps": "363898", "ups": "3.08", "wpb": "118162", "bsz": "256", "num_updates": "393600", "lr": "0.000612525", "gnorm": "0.672", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.5", "wall": "128474"} [2022-08-02 13:10:25,703][train_inner][INFO] - {"epoch": 8, "update": 7.651, "loss": "2.225", "ppl": "4.68", "wps": "365876", "ups": "3.1", "wpb": "118016", "bsz": "256", "num_updates": "393800", "lr": "0.000612323", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.2", "wall": "128538"} [2022-08-02 13:11:30,293][train_inner][INFO] - {"epoch": 8, "update": 7.655, "loss": "2.229", "ppl": "4.69", "wps": "367136", "ups": "3.1", "wpb": "118565", "bsz": "256", "num_updates": "394000", "lr": "0.000612121", "gnorm": "0.683", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.9", "wall": "128603"} [2022-08-02 13:12:37,448][train_inner][INFO] - {"epoch": 8, "update": 7.659, "loss": "2.226", "ppl": "4.68", "wps": "351703", "ups": "2.98", "wpb": "118092", "bsz": "256", "num_updates": "394200", "lr": "0.000611919", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "67", "gb_free": "28.2", "wall": "128670"} [2022-08-02 13:13:42,547][train_inner][INFO] - {"epoch": 8, "update": 7.663, "loss": "2.218", "ppl": "4.65", "wps": "364886", "ups": "3.07", "wpb": "118760", "bsz": "256", "num_updates": "394400", "lr": "0.000611717", "gnorm": "0.666", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.3", "wall": "128735"} [2022-08-02 13:14:47,374][train_inner][INFO] - {"epoch": 8, "update": 7.667, "loss": "2.224", "ppl": "4.67", "wps": "364408", "ups": "3.09", "wpb": "118116", "bsz": "256", "num_updates": "394600", "lr": "0.000611515", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.8", "wall": "128800"} [2022-08-02 13:15:52,275][train_inner][INFO] - {"epoch": 8, "update": 7.671, "loss": "2.214", "ppl": "4.64", "wps": "366254", "ups": "3.08", "wpb": "118849", "bsz": "256", "num_updates": "394800", "lr": "0.000611313", "gnorm": "0.664", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.8", "wall": "128865"} [2022-08-02 13:16:56,979][train_inner][INFO] - {"epoch": 8, "update": 7.675, "loss": "2.223", "ppl": "4.67", "wps": "366232", "ups": "3.09", "wpb": "118481", "bsz": "256", "num_updates": "395000", "lr": "0.000611111", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.2", "wall": "128929"} [2022-08-02 13:18:01,777][train_inner][INFO] - {"epoch": 8, "update": 7.679, "loss": "2.225", "ppl": "4.68", "wps": "364884", "ups": "3.09", "wpb": "118218", "bsz": "256", "num_updates": "395200", "lr": "0.000610909", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.4", "wall": "128994"} [2022-08-02 13:19:06,630][train_inner][INFO] - {"epoch": 8, "update": 7.683, "loss": "2.219", "ppl": "4.66", "wps": "364530", "ups": "3.08", "wpb": "118202", "bsz": "256", "num_updates": "395400", "lr": "0.000610707", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.9", "wall": "129059"} [2022-08-02 13:20:11,036][train_inner][INFO] - {"epoch": 8, "update": 7.686, "loss": "2.222", "ppl": "4.66", "wps": "368801", "ups": "3.11", "wpb": "118763", "bsz": "256", "num_updates": "395600", "lr": "0.000610505", "gnorm": "0.665", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.4", "wall": "129123"} [2022-08-02 13:21:15,902][train_inner][INFO] - {"epoch": 8, "update": 7.69, "loss": "2.219", "ppl": "4.66", "wps": "366158", "ups": "3.08", "wpb": "118755", "bsz": "256", "num_updates": "395800", "lr": "0.000610303", "gnorm": "0.665", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.2", "wall": "129188"} [2022-08-02 13:22:20,673][train_inner][INFO] - {"epoch": 8, "update": 7.694, "loss": "2.226", "ppl": "4.68", "wps": "365332", "ups": "3.09", "wpb": "118313", "bsz": "256", "num_updates": "396000", "lr": "0.000610101", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.1", "wall": "129253"} [2022-08-02 13:23:25,581][train_inner][INFO] - {"epoch": 8, "update": 7.698, "loss": "2.229", "ppl": "4.69", "wps": "364094", "ups": "3.08", "wpb": "118160", "bsz": "256", "num_updates": "396200", "lr": "0.000609899", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.5", "wall": "129318"} [2022-08-02 13:24:30,673][train_inner][INFO] - {"epoch": 8, "update": 7.702, "loss": "2.22", "ppl": "4.66", "wps": "364101", "ups": "3.07", "wpb": "118500", "bsz": "256", "num_updates": "396400", "lr": "0.000609697", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.2", "wall": "129383"} [2022-08-02 13:25:36,029][train_inner][INFO] - {"epoch": 8, "update": 7.706, "loss": "2.219", "ppl": "4.66", "wps": "362744", "ups": "3.06", "wpb": "118536", "bsz": "256", "num_updates": "396600", "lr": "0.000609495", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.4", "wall": "129448"} [2022-08-02 13:26:40,896][train_inner][INFO] - {"epoch": 8, "update": 7.71, "loss": "2.22", "ppl": "4.66", "wps": "365015", "ups": "3.08", "wpb": "118385", "bsz": "256", "num_updates": "396800", "lr": "0.000609293", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.3", "wall": "129513"} [2022-08-02 13:27:45,673][train_inner][INFO] - {"epoch": 8, "update": 7.714, "loss": "2.223", "ppl": "4.67", "wps": "364110", "ups": "3.09", "wpb": "117929", "bsz": "256", "num_updates": "397000", "lr": "0.000609091", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.4", "wall": "129578"} [2022-08-02 13:28:50,239][train_inner][INFO] - {"epoch": 8, "update": 7.717, "loss": "2.226", "ppl": "4.68", "wps": "365181", "ups": "3.1", "wpb": "117889", "bsz": "256", "num_updates": "397200", "lr": "0.000608889", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "129643"} [2022-08-02 13:29:55,142][train_inner][INFO] - {"epoch": 8, "update": 7.721, "loss": "2.227", "ppl": "4.68", "wps": "364299", "ups": "3.08", "wpb": "118218", "bsz": "256", "num_updates": "397400", "lr": "0.000608687", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25.7", "wall": "129707"} [2022-08-02 13:30:59,816][train_inner][INFO] - {"epoch": 8, "update": 7.725, "loss": "2.219", "ppl": "4.66", "wps": "366403", "ups": "3.09", "wpb": "118482", "bsz": "256", "num_updates": "397600", "lr": "0.000608485", "gnorm": "0.665", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.8", "wall": "129772"} [2022-08-02 13:31:46,455][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-02 13:32:01,531][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-02 13:32:05,007][train_inner][INFO] - {"epoch": 8, "update": 7.729, "loss": "2.221", "ppl": "4.66", "wps": "362574", "ups": "3.07", "wpb": "118181", "bsz": "256", "num_updates": "397800", "lr": "0.000608283", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.7", "wall": "129837"} [2022-08-02 13:33:09,940][train_inner][INFO] - {"epoch": 8, "update": 7.733, "loss": "2.218", "ppl": "4.65", "wps": "365943", "ups": "3.08", "wpb": "118809", "bsz": "256", "num_updates": "398000", "lr": "0.000608081", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.9", "wall": "129902"} [2022-08-02 13:34:15,451][train_inner][INFO] - {"epoch": 8, "update": 7.737, "loss": "2.221", "ppl": "4.66", "wps": "360883", "ups": "3.05", "wpb": "118208", "bsz": "256", "num_updates": "398200", "lr": "0.000607879", "gnorm": "0.682", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "129968"} [2022-08-02 13:35:21,209][train_inner][INFO] - {"epoch": 8, "update": 7.741, "loss": "2.22", "ppl": "4.66", "wps": "359926", "ups": "3.04", "wpb": "118337", "bsz": "256", "num_updates": "398400", "lr": "0.000607677", "gnorm": "0.67", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.6", "wall": "130034"} [2022-08-02 13:36:25,903][train_inner][INFO] - {"epoch": 8, "update": 7.745, "loss": "2.221", "ppl": "4.66", "wps": "365975", "ups": "3.09", "wpb": "118381", "bsz": "256", "num_updates": "398600", "lr": "0.000607475", "gnorm": "0.691", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.9", "wall": "130098"} [2022-08-02 13:37:30,605][train_inner][INFO] - {"epoch": 8, "update": 7.749, "loss": "2.224", "ppl": "4.67", "wps": "363727", "ups": "3.09", "wpb": "117666", "bsz": "256", "num_updates": "398800", "lr": "0.000607273", "gnorm": "0.672", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.2", "wall": "130163"} [2022-08-02 13:38:36,444][train_inner][INFO] - {"epoch": 8, "update": 7.752, "loss": "2.223", "ppl": "4.67", "wps": "359439", "ups": "3.04", "wpb": "118325", "bsz": "256", "num_updates": "399000", "lr": "0.000607071", "gnorm": "0.67", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "27.1", "wall": "130229"} [2022-08-02 13:39:41,485][train_inner][INFO] - {"epoch": 8, "update": 7.756, "loss": "2.222", "ppl": "4.66", "wps": "364110", "ups": "3.08", "wpb": "118408", "bsz": "256", "num_updates": "399200", "lr": "0.000606869", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "130294"} [2022-08-02 13:40:46,392][train_inner][INFO] - {"epoch": 8, "update": 7.76, "loss": "2.221", "ppl": "4.66", "wps": "365943", "ups": "3.08", "wpb": "118760", "bsz": "256", "num_updates": "399400", "lr": "0.000606667", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "130359"} [2022-08-02 13:41:51,157][train_inner][INFO] - {"epoch": 8, "update": 7.764, "loss": "2.217", "ppl": "4.65", "wps": "365545", "ups": "3.09", "wpb": "118370", "bsz": "256", "num_updates": "399600", "lr": "0.000606465", "gnorm": "0.666", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.9", "wall": "130423"} [2022-08-02 13:42:55,953][train_inner][INFO] - {"epoch": 8, "update": 7.768, "loss": "2.219", "ppl": "4.66", "wps": "364951", "ups": "3.09", "wpb": "118236", "bsz": "256", "num_updates": "399800", "lr": "0.000606263", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20", "wall": "130488"} [2022-08-02 13:44:00,709][train_inner][INFO] - {"epoch": 8, "update": 7.772, "loss": "2.218", "ppl": "4.65", "wps": "366009", "ups": "3.09", "wpb": "118504", "bsz": "256", "num_updates": "400000", "lr": "0.000606061", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21", "wall": "130553"} [2022-08-02 13:44:00,710][fairseq_cli.train][INFO] - begin validation on "valid" subset [2022-08-02 13:44:23,413][valid][INFO] - {"epoch": 8, "valid_loss": "2.122", "valid_ppl": "4.35", "valid_wps": "1.52207e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "400000", "valid_best_loss": "2.122"} [2022-08-02 13:44:23,415][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 8 @ 400000 updates [2022-08-02 13:44:23,416][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_8_400000.pt [2022-08-02 13:44:32,906][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_8_400000.pt [2022-08-02 13:44:59,303][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/adan2/checkpoint_8_400000.pt (epoch 8 @ 400000 updates, score 2.122) (writing took 35.88700763322413 seconds) [2022-08-02 13:46:04,127][train_inner][INFO] - {"epoch": 8, "update": 7.776, "loss": "2.221", "ppl": "4.66", "wps": "191311", "ups": "1.62", "wpb": "118055", "bsz": "256", "num_updates": "400200", "lr": "0.000605859", "gnorm": "0.673", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "25.2", "wall": "130676"} [2022-08-02 13:47:08,817][train_inner][INFO] - {"epoch": 8, "update": 7.78, "loss": "2.229", "ppl": "4.69", "wps": "363458", "ups": "3.09", "wpb": "117558", "bsz": "256", "num_updates": "400400", "lr": "0.000605657", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.5", "wall": "130741"} [2022-08-02 13:48:13,619][train_inner][INFO] - {"epoch": 8, "update": 7.784, "loss": "2.226", "ppl": "4.68", "wps": "363757", "ups": "3.09", "wpb": "117861", "bsz": "256", "num_updates": "400600", "lr": "0.000605455", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "130806"} [2022-08-02 13:49:18,339][train_inner][INFO] - {"epoch": 8, "update": 7.787, "loss": "2.214", "ppl": "4.64", "wps": "366756", "ups": "3.09", "wpb": "118680", "bsz": "256", "num_updates": "400800", "lr": "0.000605253", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.2", "wall": "130871"} [2022-08-02 13:50:24,206][train_inner][INFO] - {"epoch": 8, "update": 7.791, "loss": "2.212", "ppl": "4.63", "wps": "359174", "ups": "3.04", "wpb": "118287", "bsz": "256", "num_updates": "401000", "lr": "0.000605051", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "19.8", "wall": "130937"} [2022-08-02 13:51:28,840][train_inner][INFO] - {"epoch": 8, "update": 7.795, "loss": "2.218", "ppl": "4.65", "wps": "366153", "ups": "3.09", "wpb": "118328", "bsz": "256", "num_updates": "401200", "lr": "0.000604848", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "28.2", "wall": "131001"} [2022-08-02 13:52:33,535][train_inner][INFO] - {"epoch": 8, "update": 7.799, "loss": "2.219", "ppl": "4.66", "wps": "365731", "ups": "3.09", "wpb": "118304", "bsz": "256", "num_updates": "401400", "lr": "0.000604646", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.2", "wall": "131066"} [2022-08-02 13:53:38,040][train_inner][INFO] - {"epoch": 8, "update": 7.803, "loss": "2.221", "ppl": "4.66", "wps": "366489", "ups": "3.1", "wpb": "118198", "bsz": "256", "num_updates": "401600", "lr": "0.000604444", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.2", "wall": "131130"} [2022-08-02 13:54:43,053][train_inner][INFO] - {"epoch": 8, "update": 7.807, "loss": "2.215", "ppl": "4.64", "wps": "365586", "ups": "3.08", "wpb": "118838", "bsz": "256", "num_updates": "401800", "lr": "0.000604242", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "131195"} [2022-08-02 13:55:47,529][train_inner][INFO] - {"epoch": 8, "update": 7.811, "loss": "2.222", "ppl": "4.67", "wps": "367715", "ups": "3.1", "wpb": "118543", "bsz": "256", "num_updates": "402000", "lr": "0.00060404", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.3", "wall": "131260"} [2022-08-02 13:56:52,233][train_inner][INFO] - {"epoch": 8, "update": 7.815, "loss": "2.221", "ppl": "4.66", "wps": "364437", "ups": "3.09", "wpb": "117902", "bsz": "256", "num_updates": "402200", "lr": "0.000603838", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.9", "wall": "131325"} [2022-08-02 13:57:56,986][train_inner][INFO] - {"epoch": 8, "update": 7.819, "loss": "2.218", "ppl": "4.65", "wps": "365143", "ups": "3.09", "wpb": "118219", "bsz": "256", "num_updates": "402400", "lr": "0.000603636", "gnorm": "0.666", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "131389"} [2022-08-02 13:58:48,917][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-02 13:59:03,395][train_inner][INFO] - {"epoch": 8, "update": 7.822, "loss": "2.218", "ppl": "4.65", "wps": "357153", "ups": "3.01", "wpb": "118589", "bsz": "256", "num_updates": "402600", "lr": "0.000603434", "gnorm": "0.673", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "24.2", "wall": "131456"} [2022-08-02 14:00:08,565][train_inner][INFO] - {"epoch": 8, "update": 7.826, "loss": "2.223", "ppl": "4.67", "wps": "362936", "ups": "3.07", "wpb": "118261", "bsz": "256", "num_updates": "402800", "lr": "0.000603232", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.9", "wall": "131521"} [2022-08-02 14:01:14,553][train_inner][INFO] - {"epoch": 8, "update": 7.83, "loss": "2.218", "ppl": "4.65", "wps": "359479", "ups": "3.03", "wpb": "118605", "bsz": "256", "num_updates": "403000", "lr": "0.00060303", "gnorm": "0.665", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "19.9", "wall": "131587"} [2022-08-02 14:02:19,006][train_inner][INFO] - {"epoch": 8, "update": 7.834, "loss": "2.221", "ppl": "4.66", "wps": "367151", "ups": "3.1", "wpb": "118318", "bsz": "256", "num_updates": "403200", "lr": "0.000602828", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21", "wall": "131651"} [2022-08-02 14:03:23,914][train_inner][INFO] - {"epoch": 8, "update": 7.838, "loss": "2.22", "ppl": "4.66", "wps": "363558", "ups": "3.08", "wpb": "117987", "bsz": "256", "num_updates": "403400", "lr": "0.000602626", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.8", "wall": "131716"} [2022-08-02 14:04:28,861][train_inner][INFO] - {"epoch": 8, "update": 7.842, "loss": "2.218", "ppl": "4.65", "wps": "364802", "ups": "3.08", "wpb": "118462", "bsz": "256", "num_updates": "403600", "lr": "0.000602424", "gnorm": "0.665", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.8", "wall": "131781"} [2022-08-02 14:05:33,591][train_inner][INFO] - {"epoch": 8, "update": 7.846, "loss": "2.218", "ppl": "4.65", "wps": "364883", "ups": "3.09", "wpb": "118092", "bsz": "256", "num_updates": "403800", "lr": "0.000602222", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.2", "wall": "131846"} [2022-08-02 14:06:38,417][train_inner][INFO] - {"epoch": 8, "update": 7.85, "loss": "2.216", "ppl": "4.65", "wps": "365391", "ups": "3.09", "wpb": "118433", "bsz": "256", "num_updates": "404000", "lr": "0.00060202", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.6", "wall": "131911"} [2022-08-02 14:07:42,862][train_inner][INFO] - {"epoch": 8, "update": 7.853, "loss": "2.209", "ppl": "4.62", "wps": "367185", "ups": "3.1", "wpb": "118315", "bsz": "256", "num_updates": "404200", "lr": "0.000601818", "gnorm": "0.665", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.2", "wall": "131975"} [2022-08-02 14:08:48,078][train_inner][INFO] - {"epoch": 8, "update": 7.857, "loss": "2.217", "ppl": "4.65", "wps": "364706", "ups": "3.07", "wpb": "118922", "bsz": "256", "num_updates": "404400", "lr": "0.000601616", "gnorm": "0.665", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "132040"} [2022-08-02 14:09:34,861][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-02 14:09:53,498][train_inner][INFO] - {"epoch": 8, "update": 7.861, "loss": "2.215", "ppl": "4.64", "wps": "360913", "ups": "3.06", "wpb": "118052", "bsz": "256", "num_updates": "404600", "lr": "0.000601414", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "132106"} [2022-08-02 14:10:58,678][train_inner][INFO] - {"epoch": 8, "update": 7.865, "loss": "2.218", "ppl": "4.65", "wps": "363683", "ups": "3.07", "wpb": "118524", "bsz": "256", "num_updates": "404800", "lr": "0.000601212", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.7", "wall": "132171"} [2022-08-02 14:12:03,572][train_inner][INFO] - {"epoch": 8, "update": 7.869, "loss": "2.219", "ppl": "4.65", "wps": "366199", "ups": "3.08", "wpb": "118819", "bsz": "256", "num_updates": "405000", "lr": "0.00060101", "gnorm": "0.666", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.9", "wall": "132236"} [2022-08-02 14:13:08,488][train_inner][INFO] - {"epoch": 8, "update": 7.873, "loss": "2.209", "ppl": "4.62", "wps": "365097", "ups": "3.08", "wpb": "118501", "bsz": "256", "num_updates": "405200", "lr": "0.000600808", "gnorm": "0.664", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "25.5", "wall": "132301"} [2022-08-02 14:14:13,782][train_inner][INFO] - {"epoch": 8, "update": 7.877, "loss": "2.214", "ppl": "4.64", "wps": "364816", "ups": "3.06", "wpb": "119100", "bsz": "256", "num_updates": "405400", "lr": "0.000600606", "gnorm": "0.664", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.7", "wall": "132366"} [2022-08-02 14:15:18,642][train_inner][INFO] - {"epoch": 8, "update": 7.881, "loss": "2.215", "ppl": "4.64", "wps": "365022", "ups": "3.08", "wpb": "118374", "bsz": "256", "num_updates": "405600", "lr": "0.000600404", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.5", "wall": "132431"} [2022-08-02 14:16:23,437][train_inner][INFO] - {"epoch": 8, "update": 7.885, "loss": "2.213", "ppl": "4.64", "wps": "364630", "ups": "3.09", "wpb": "118131", "bsz": "256", "num_updates": "405800", "lr": "0.000600202", "gnorm": "0.666", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.4", "wall": "132496"} [2022-08-02 14:17:28,027][train_inner][INFO] - {"epoch": 8, "update": 7.888, "loss": "2.221", "ppl": "4.66", "wps": "366672", "ups": "3.1", "wpb": "118415", "bsz": "256", "num_updates": "406000", "lr": "0.0006", "gnorm": "0.666", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "25.2", "wall": "132560"} [2022-08-02 14:18:32,841][train_inner][INFO] - {"epoch": 8, "update": 7.892, "loss": "2.222", "ppl": "4.66", "wps": "364821", "ups": "3.09", "wpb": "118225", "bsz": "255.9", "num_updates": "406200", "lr": "0.000599798", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.1", "wall": "132625"} [2022-08-02 14:19:36,934][train_inner][INFO] - {"epoch": 8, "update": 7.896, "loss": "2.219", "ppl": "4.66", "wps": "368212", "ups": "3.12", "wpb": "117998", "bsz": "256", "num_updates": "406400", "lr": "0.000599596", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.9", "wall": "132689"} [2022-08-02 14:20:42,054][train_inner][INFO] - {"epoch": 8, "update": 7.9, "loss": "2.21", "ppl": "4.63", "wps": "364589", "ups": "3.07", "wpb": "118708", "bsz": "256", "num_updates": "406600", "lr": "0.000599394", "gnorm": "0.664", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.9", "wall": "132754"} [2022-08-02 14:21:46,698][train_inner][INFO] - {"epoch": 8, "update": 7.904, "loss": "2.219", "ppl": "4.65", "wps": "365228", "ups": "3.09", "wpb": "118048", "bsz": "256", "num_updates": "406800", "lr": "0.000599192", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.2", "wall": "132819"} [2022-08-02 14:22:51,366][train_inner][INFO] - {"epoch": 8, "update": 7.908, "loss": "2.218", "ppl": "4.65", "wps": "365562", "ups": "3.09", "wpb": "118199", "bsz": "256", "num_updates": "407000", "lr": "0.00059899", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.7", "wall": "132884"} [2022-08-02 14:23:56,257][train_inner][INFO] - {"epoch": 8, "update": 7.912, "loss": "2.214", "ppl": "4.64", "wps": "365564", "ups": "3.08", "wpb": "118606", "bsz": "256", "num_updates": "407200", "lr": "0.000598788", "gnorm": "0.664", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "132949"} [2022-08-02 14:25:00,934][train_inner][INFO] - {"epoch": 8, "update": 7.916, "loss": "2.213", "ppl": "4.64", "wps": "366211", "ups": "3.09", "wpb": "118426", "bsz": "256", "num_updates": "407400", "lr": "0.000598586", "gnorm": "0.665", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.2", "wall": "133013"} [2022-08-02 14:26:05,740][train_inner][INFO] - {"epoch": 8, "update": 7.92, "loss": "2.218", "ppl": "4.65", "wps": "364373", "ups": "3.09", "wpb": "118065", "bsz": "256", "num_updates": "407600", "lr": "0.000598384", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.4", "wall": "133078"} [2022-08-02 14:27:10,567][train_inner][INFO] - {"epoch": 8, "update": 7.923, "loss": "2.212", "ppl": "4.63", "wps": "366632", "ups": "3.09", "wpb": "118837", "bsz": "256", "num_updates": "407800", "lr": "0.000598182", "gnorm": "0.664", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "133143"} [2022-08-02 14:28:15,365][train_inner][INFO] - {"epoch": 8, "update": 7.927, "loss": "2.218", "ppl": "4.65", "wps": "363981", "ups": "3.09", "wpb": "117924", "bsz": "256", "num_updates": "408000", "lr": "0.00059798", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "133208"} [2022-08-02 14:29:21,009][train_inner][INFO] - {"epoch": 8, "update": 7.931, "loss": "2.212", "ppl": "4.63", "wps": "360428", "ups": "3.05", "wpb": "118299", "bsz": "256", "num_updates": "408200", "lr": "0.000597778", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.5", "wall": "133273"} [2022-08-02 14:30:26,006][train_inner][INFO] - {"epoch": 8, "update": 7.935, "loss": "2.215", "ppl": "4.64", "wps": "363324", "ups": "3.08", "wpb": "118073", "bsz": "256", "num_updates": "408400", "lr": "0.000597576", "gnorm": "0.665", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "133338"} [2022-08-02 14:31:30,573][train_inner][INFO] - {"epoch": 8, "update": 7.939, "loss": "2.211", "ppl": "4.63", "wps": "365356", "ups": "3.1", "wpb": "117948", "bsz": "256", "num_updates": "408600", "lr": "0.000597374", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.3", "wall": "133403"} [2022-08-02 14:32:35,565][train_inner][INFO] - {"epoch": 8, "update": 7.943, "loss": "2.214", "ppl": "4.64", "wps": "364112", "ups": "3.08", "wpb": "118321", "bsz": "256", "num_updates": "408800", "lr": "0.000597172", "gnorm": "0.666", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.2", "wall": "133468"} [2022-08-02 14:33:40,574][train_inner][INFO] - {"epoch": 8, "update": 7.947, "loss": "2.21", "ppl": "4.63", "wps": "364249", "ups": "3.08", "wpb": "118395", "bsz": "256", "num_updates": "409000", "lr": "0.00059697", "gnorm": "0.665", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.6", "wall": "133533"} [2022-08-02 14:34:45,336][train_inner][INFO] - {"epoch": 8, "update": 7.951, "loss": "2.218", "ppl": "4.65", "wps": "364701", "ups": "3.09", "wpb": "118091", "bsz": "256", "num_updates": "409200", "lr": "0.000596768", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "133598"} [2022-08-02 14:35:50,169][train_inner][INFO] - {"epoch": 8, "update": 7.954, "loss": "2.213", "ppl": "4.64", "wps": "364850", "ups": "3.08", "wpb": "118270", "bsz": "256", "num_updates": "409400", "lr": "0.000596566", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.2", "wall": "133662"} [2022-08-02 14:36:55,274][train_inner][INFO] - {"epoch": 8, "update": 7.958, "loss": "2.212", "ppl": "4.63", "wps": "362228", "ups": "3.07", "wpb": "117913", "bsz": "256", "num_updates": "409600", "lr": "0.000596364", "gnorm": "0.667", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21", "wall": "133728"} [2022-08-02 14:38:00,229][train_inner][INFO] - {"epoch": 8, "update": 7.962, "loss": "2.21", "ppl": "4.63", "wps": "364150", "ups": "3.08", "wpb": "118262", "bsz": "256", "num_updates": "409800", "lr": "0.000596162", "gnorm": "0.666", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.2", "wall": "133793"} [2022-08-02 14:39:05,310][train_inner][INFO] - {"epoch": 8, "update": 7.966, "loss": "2.206", "ppl": "4.61", "wps": "366302", "ups": "3.07", "wpb": "119185", "bsz": "256", "num_updates": "410000", "lr": "0.00059596", "gnorm": "0.664", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.3", "wall": "133858"} [2022-08-02 14:40:10,023][train_inner][INFO] - {"epoch": 8, "update": 7.97, "loss": "2.209", "ppl": "4.62", "wps": "365927", "ups": "3.09", "wpb": "118400", "bsz": "256", "num_updates": "410200", "lr": "0.000595758", "gnorm": "0.667", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.1", "wall": "133922"} [2022-08-02 14:40:39,110][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-02 14:41:14,929][train_inner][INFO] - {"epoch": 8, "update": 7.974, "loss": "2.213", "ppl": "4.64", "wps": "361649", "ups": "3.08", "wpb": "117364", "bsz": "256", "num_updates": "410400", "lr": "0.000595556", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "133987"} [2022-08-02 14:42:19,609][train_inner][INFO] - {"epoch": 8, "update": 7.978, "loss": "2.217", "ppl": "4.65", "wps": "366552", "ups": "3.09", "wpb": "118543", "bsz": "256", "num_updates": "410600", "lr": "0.000595354", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.4", "wall": "134052"} [2022-08-02 14:43:24,445][train_inner][INFO] - {"epoch": 8, "update": 7.982, "loss": "2.218", "ppl": "4.65", "wps": "363426", "ups": "3.08", "wpb": "117814", "bsz": "256", "num_updates": "410800", "lr": "0.000595152", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.4", "wall": "134117"} [2022-08-02 14:44:29,271][train_inner][INFO] - {"epoch": 8, "update": 7.986, "loss": "2.217", "ppl": "4.65", "wps": "364605", "ups": "3.09", "wpb": "118177", "bsz": "256", "num_updates": "411000", "lr": "0.000594949", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "28.2", "wall": "134182"} [2022-08-02 14:45:34,100][train_inner][INFO] - {"epoch": 8, "update": 7.989, "loss": "2.215", "ppl": "4.64", "wps": "364440", "ups": "3.09", "wpb": "118129", "bsz": "256", "num_updates": "411200", "lr": "0.000594747", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "134246"} [2022-08-02 14:46:39,124][train_inner][INFO] - {"epoch": 8, "update": 7.993, "loss": "2.214", "ppl": "4.64", "wps": "366656", "ups": "3.08", "wpb": "119206", "bsz": "256", "num_updates": "411400", "lr": "0.000594545", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.3", "wall": "134311"} [2022-08-02 14:47:43,963][train_inner][INFO] - {"epoch": 8, "update": 7.997, "loss": "2.214", "ppl": "4.64", "wps": "362809", "ups": "3.08", "wpb": "117618", "bsz": "256", "num_updates": "411600", "lr": "0.000594343", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.8", "wall": "134376"} [2022-08-02 14:48:29,816][fairseq_cli.train][INFO] - end of epoch 8 (average epoch stats below) [2022-08-02 14:48:29,816][train][INFO] - {"epoch": 8, "train_loss": "2.229", "train_ppl": "4.69", "train_wps": "362786", "train_ups": "3.07", "train_wpb": "118298", "train_bsz": "256", "train_num_updates": "411742", "train_lr": "0.0005942", "train_gnorm": "0.669", "train_clip": "0", "train_loss_scale": "8", "train_train_wall": "16619", "train_gb_free": "21.4", "train_wall": "134422"} [2022-08-02 14:48:29,907][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 [2022-08-02 14:48:29,910][fairseq.trainer][INFO] - begin training epoch 9 [2022-08-02 14:48:29,910][fairseq_cli.train][INFO] - Start iterating over samples [2022-08-02 14:48:58,989][train_inner][INFO] - {"epoch": 9, "update": 8.001, "loss": "2.219", "ppl": "4.66", "wps": "312441", "ups": "2.67", "wpb": "117204", "bsz": "255.4", "num_updates": "411800", "lr": "0.000594141", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "28.2", "wall": "134451"} [2022-08-02 14:50:03,881][train_inner][INFO] - {"epoch": 9, "update": 8.005, "loss": "2.213", "ppl": "4.64", "wps": "365932", "ups": "3.08", "wpb": "118730", "bsz": "256", "num_updates": "412000", "lr": "0.000593939", "gnorm": "0.665", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.1", "wall": "134516"} [2022-08-02 14:51:08,617][train_inner][INFO] - {"epoch": 9, "update": 8.009, "loss": "2.203", "ppl": "4.6", "wps": "368276", "ups": "3.09", "wpb": "119201", "bsz": "256", "num_updates": "412200", "lr": "0.000593737", "gnorm": "0.662", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.6", "wall": "134581"} [2022-08-02 14:52:13,596][train_inner][INFO] - {"epoch": 9, "update": 8.013, "loss": "2.208", "ppl": "4.62", "wps": "365087", "ups": "3.08", "wpb": "118613", "bsz": "256", "num_updates": "412400", "lr": "0.000593535", "gnorm": "0.666", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "134646"} [2022-08-02 14:53:18,400][train_inner][INFO] - {"epoch": 9, "update": 8.017, "loss": "2.21", "ppl": "4.63", "wps": "364883", "ups": "3.09", "wpb": "118227", "bsz": "256", "num_updates": "412600", "lr": "0.000593333", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.1", "wall": "134711"} [2022-08-02 14:54:23,271][train_inner][INFO] - {"epoch": 9, "update": 8.021, "loss": "2.219", "ppl": "4.65", "wps": "363210", "ups": "3.08", "wpb": "117808", "bsz": "256", "num_updates": "412800", "lr": "0.000593131", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "134776"} [2022-08-02 14:55:28,118][train_inner][INFO] - {"epoch": 9, "update": 8.024, "loss": "2.207", "ppl": "4.62", "wps": "365322", "ups": "3.08", "wpb": "118449", "bsz": "256", "num_updates": "413000", "lr": "0.000592929", "gnorm": "0.665", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.8", "wall": "134840"} [2022-08-02 14:55:56,463][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-02 14:56:33,763][train_inner][INFO] - {"epoch": 9, "update": 8.028, "loss": "2.21", "ppl": "4.63", "wps": "361094", "ups": "3.05", "wpb": "118518", "bsz": "256", "num_updates": "413200", "lr": "0.000592727", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.5", "wall": "134906"} [2022-08-02 14:57:04,839][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-02 14:57:38,511][train_inner][INFO] - {"epoch": 9, "update": 8.032, "loss": "2.208", "ppl": "4.62", "wps": "366209", "ups": "3.09", "wpb": "118556", "bsz": "256", "num_updates": "413400", "lr": "0.000592525", "gnorm": "0.665", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.6", "wall": "134971"} [2022-08-02 14:58:43,787][train_inner][INFO] - {"epoch": 9, "update": 8.036, "loss": "2.204", "ppl": "4.61", "wps": "365175", "ups": "3.06", "wpb": "119185", "bsz": "256", "num_updates": "413600", "lr": "0.000592323", "gnorm": "0.697", "clip": "0.5", "loss_scale": "4", "train_wall": "65", "gb_free": "22.3", "wall": "135036"} [2022-08-02 14:59:48,570][train_inner][INFO] - {"epoch": 9, "update": 8.04, "loss": "2.213", "ppl": "4.64", "wps": "363816", "ups": "3.09", "wpb": "117844", "bsz": "256", "num_updates": "413800", "lr": "0.000592121", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.7", "wall": "135101"} [2022-08-02 15:00:53,478][train_inner][INFO] - {"epoch": 9, "update": 8.044, "loss": "2.209", "ppl": "4.62", "wps": "365564", "ups": "3.08", "wpb": "118639", "bsz": "256", "num_updates": "414000", "lr": "0.000591919", "gnorm": "0.666", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "135166"} [2022-08-02 15:01:58,627][train_inner][INFO] - {"epoch": 9, "update": 8.048, "loss": "2.213", "ppl": "4.64", "wps": "364813", "ups": "3.07", "wpb": "118833", "bsz": "256", "num_updates": "414200", "lr": "0.000591717", "gnorm": "0.671", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.4", "wall": "135231"} [2022-08-02 15:03:03,559][train_inner][INFO] - {"epoch": 9, "update": 8.052, "loss": "2.21", "ppl": "4.63", "wps": "364390", "ups": "3.08", "wpb": "118301", "bsz": "256", "num_updates": "414400", "lr": "0.000591515", "gnorm": "0.67", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.1", "wall": "135296"} [2022-08-02 15:04:08,432][train_inner][INFO] - {"epoch": 9, "update": 8.056, "loss": "2.208", "ppl": "4.62", "wps": "364683", "ups": "3.08", "wpb": "118289", "bsz": "256", "num_updates": "414600", "lr": "0.000591313", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.7", "wall": "135361"} [2022-08-02 15:05:13,452][train_inner][INFO] - {"epoch": 9, "update": 8.059, "loss": "2.212", "ppl": "4.63", "wps": "365586", "ups": "3.08", "wpb": "118849", "bsz": "256", "num_updates": "414800", "lr": "0.000591111", "gnorm": "0.668", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.9", "wall": "135426"} [2022-08-02 15:06:18,512][train_inner][INFO] - {"epoch": 9, "update": 8.063, "loss": "2.211", "ppl": "4.63", "wps": "364207", "ups": "3.07", "wpb": "118476", "bsz": "256", "num_updates": "415000", "lr": "0.000590909", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.6", "wall": "135491"} [2022-08-02 15:07:24,643][train_inner][INFO] - {"epoch": 9, "update": 8.067, "loss": "2.219", "ppl": "4.66", "wps": "356970", "ups": "3.02", "wpb": "118031", "bsz": "256", "num_updates": "415200", "lr": "0.000590707", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "66", "gb_free": "20.1", "wall": "135557"} [2022-08-02 15:08:29,171][train_inner][INFO] - {"epoch": 9, "update": 8.071, "loss": "2.206", "ppl": "4.62", "wps": "366416", "ups": "3.1", "wpb": "118221", "bsz": "256", "num_updates": "415400", "lr": "0.000590505", "gnorm": "0.673", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.1", "wall": "135621"} [2022-08-02 15:09:34,959][train_inner][INFO] - {"epoch": 9, "update": 8.075, "loss": "2.208", "ppl": "4.62", "wps": "359706", "ups": "3.04", "wpb": "118319", "bsz": "256", "num_updates": "415600", "lr": "0.000590303", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.9", "wall": "135687"} [2022-08-02 15:10:39,775][train_inner][INFO] - {"epoch": 9, "update": 8.079, "loss": "2.212", "ppl": "4.63", "wps": "365891", "ups": "3.09", "wpb": "118577", "bsz": "256", "num_updates": "415800", "lr": "0.000590101", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "25.7", "wall": "135752"} [2022-08-02 15:11:44,627][train_inner][INFO] - {"epoch": 9, "update": 8.083, "loss": "2.21", "ppl": "4.63", "wps": "364336", "ups": "3.08", "wpb": "118137", "bsz": "256", "num_updates": "416000", "lr": "0.000589899", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.5", "wall": "135817"} [2022-08-02 15:12:49,702][train_inner][INFO] - {"epoch": 9, "update": 8.087, "loss": "2.216", "ppl": "4.65", "wps": "363688", "ups": "3.07", "wpb": "118334", "bsz": "256", "num_updates": "416200", "lr": "0.000589697", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.9", "wall": "135882"} [2022-08-02 15:13:54,550][train_inner][INFO] - {"epoch": 9, "update": 8.091, "loss": "2.21", "ppl": "4.63", "wps": "363861", "ups": "3.08", "wpb": "117977", "bsz": "256", "num_updates": "416400", "lr": "0.000589495", "gnorm": "0.679", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.3", "wall": "135947"} [2022-08-02 15:14:59,352][train_inner][INFO] - {"epoch": 9, "update": 8.094, "loss": "2.204", "ppl": "4.61", "wps": "366122", "ups": "3.09", "wpb": "118625", "bsz": "256", "num_updates": "416600", "lr": "0.000589293", "gnorm": "0.664", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.1", "wall": "136012"} [2022-08-02 15:16:04,448][train_inner][INFO] - {"epoch": 9, "update": 8.098, "loss": "2.208", "ppl": "4.62", "wps": "363756", "ups": "3.07", "wpb": "118393", "bsz": "256", "num_updates": "416800", "lr": "0.000589091", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23", "wall": "136077"} [2022-08-02 15:17:08,989][train_inner][INFO] - {"epoch": 9, "update": 8.102, "loss": "2.216", "ppl": "4.65", "wps": "365519", "ups": "3.1", "wpb": "117954", "bsz": "256", "num_updates": "417000", "lr": "0.000588889", "gnorm": "0.672", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "26.7", "wall": "136141"} [2022-08-02 15:18:14,166][train_inner][INFO] - {"epoch": 9, "update": 8.106, "loss": "2.204", "ppl": "4.61", "wps": "363889", "ups": "3.07", "wpb": "118583", "bsz": "256", "num_updates": "417200", "lr": "0.000588687", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.7", "wall": "136206"} [2022-08-02 15:19:19,235][train_inner][INFO] - {"epoch": 9, "update": 8.11, "loss": "2.209", "ppl": "4.62", "wps": "363143", "ups": "3.07", "wpb": "118145", "bsz": "256", "num_updates": "417400", "lr": "0.000588485", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.6", "wall": "136272"} [2022-08-02 15:19:28,606][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-02 15:20:24,383][train_inner][INFO] - {"epoch": 9, "update": 8.114, "loss": "2.207", "ppl": "4.62", "wps": "363602", "ups": "3.07", "wpb": "118438", "bsz": "256", "num_updates": "417600", "lr": "0.000588283", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.3", "wall": "136337"} [2022-08-02 15:21:30,021][train_inner][INFO] - {"epoch": 9, "update": 8.118, "loss": "2.208", "ppl": "4.62", "wps": "360001", "ups": "3.05", "wpb": "118149", "bsz": "256", "num_updates": "417800", "lr": "0.000588081", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.5", "wall": "136402"} [2022-08-02 15:22:34,546][train_inner][INFO] - {"epoch": 9, "update": 8.122, "loss": "2.217", "ppl": "4.65", "wps": "363146", "ups": "3.1", "wpb": "117157", "bsz": "256", "num_updates": "418000", "lr": "0.000587879", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "28.2", "wall": "136467"} [2022-08-02 15:23:39,433][train_inner][INFO] - {"epoch": 9, "update": 8.125, "loss": "2.207", "ppl": "4.62", "wps": "363229", "ups": "3.08", "wpb": "117843", "bsz": "256", "num_updates": "418200", "lr": "0.000587677", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25", "wall": "136532"} [2022-08-02 15:24:44,331][train_inner][INFO] - {"epoch": 9, "update": 8.129, "loss": "2.21", "ppl": "4.63", "wps": "364274", "ups": "3.08", "wpb": "118201", "bsz": "256", "num_updates": "418400", "lr": "0.000587475", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.9", "wall": "136597"} [2022-08-02 15:25:49,177][train_inner][INFO] - {"epoch": 9, "update": 8.133, "loss": "2.197", "ppl": "4.59", "wps": "366848", "ups": "3.08", "wpb": "118942", "bsz": "256", "num_updates": "418600", "lr": "0.000587273", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "27.9", "wall": "136661"} [2022-08-02 15:26:54,085][train_inner][INFO] - {"epoch": 9, "update": 8.137, "loss": "2.207", "ppl": "4.62", "wps": "364259", "ups": "3.08", "wpb": "118214", "bsz": "256", "num_updates": "418800", "lr": "0.000587071", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.4", "wall": "136726"} [2022-08-02 15:27:59,044][train_inner][INFO] - {"epoch": 9, "update": 8.141, "loss": "2.207", "ppl": "4.62", "wps": "361512", "ups": "3.08", "wpb": "117416", "bsz": "256", "num_updates": "419000", "lr": "0.000586869", "gnorm": "0.672", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "136791"} [2022-08-02 15:29:04,079][train_inner][INFO] - {"epoch": 9, "update": 8.145, "loss": "2.209", "ppl": "4.62", "wps": "364317", "ups": "3.08", "wpb": "118465", "bsz": "256", "num_updates": "419200", "lr": "0.000586667", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "136856"} [2022-08-02 15:30:08,846][train_inner][INFO] - {"epoch": 9, "update": 8.149, "loss": "2.208", "ppl": "4.62", "wps": "364174", "ups": "3.09", "wpb": "117929", "bsz": "256", "num_updates": "419400", "lr": "0.000586465", "gnorm": "0.673", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.7", "wall": "136921"} [2022-08-02 15:30:32,004][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-02 15:31:14,053][train_inner][INFO] - {"epoch": 9, "update": 8.153, "loss": "2.212", "ppl": "4.63", "wps": "361224", "ups": "3.07", "wpb": "117771", "bsz": "256", "num_updates": "419600", "lr": "0.000586263", "gnorm": "0.694", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.6", "wall": "136986"} [2022-08-02 15:31:16,297][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 [2022-08-02 15:32:19,481][train_inner][INFO] - {"epoch": 9, "update": 8.157, "loss": "2.213", "ppl": "4.64", "wps": "362591", "ups": "3.06", "wpb": "118617", "bsz": "256", "num_updates": "419800", "lr": "0.000586061", "gnorm": "0.818", "clip": "0.5", "loss_scale": "2", "train_wall": "65", "gb_free": "21.1", "wall": "137052"} [2022-08-02 15:33:24,367][train_inner][INFO] - {"epoch": 9, "update": 8.16, "loss": "2.206", "ppl": "4.61", "wps": "365146", "ups": "3.08", "wpb": "118463", "bsz": "256", "num_updates": "420000", "lr": "0.000585859", "gnorm": "0.668", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.5", "wall": "137117"} [2022-08-02 15:34:30,182][train_inner][INFO] - {"epoch": 9, "update": 8.164, "loss": "2.201", "ppl": "4.6", "wps": "359218", "ups": "3.04", "wpb": "118208", "bsz": "256", "num_updates": "420200", "lr": "0.000585657", "gnorm": "0.668", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "26.1", "wall": "137183"} [2022-08-02 15:35:35,217][train_inner][INFO] - {"epoch": 9, "update": 8.168, "loss": "2.203", "ppl": "4.6", "wps": "364374", "ups": "3.08", "wpb": "118483", "bsz": "256", "num_updates": "420400", "lr": "0.000585455", "gnorm": "0.665", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.3", "wall": "137248"} [2022-08-02 15:36:40,281][train_inner][INFO] - {"epoch": 9, "update": 8.172, "loss": "2.209", "ppl": "4.62", "wps": "363141", "ups": "3.07", "wpb": "118135", "bsz": "256", "num_updates": "420600", "lr": "0.000585253", "gnorm": "0.669", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.9", "wall": "137313"} [2022-08-02 15:37:44,876][train_inner][INFO] - {"epoch": 9, "update": 8.176, "loss": "2.206", "ppl": "4.61", "wps": "366965", "ups": "3.1", "wpb": "118520", "bsz": "256", "num_updates": "420800", "lr": "0.000585051", "gnorm": "0.668", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "23.6", "wall": "137377"} [2022-08-02 15:38:49,835][train_inner][INFO] - {"epoch": 9, "update": 8.18, "loss": "2.206", "ppl": "4.61", "wps": "364810", "ups": "3.08", "wpb": "118486", "bsz": "256", "num_updates": "421000", "lr": "0.000584848", "gnorm": "0.667", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.8", "wall": "137442"} [2022-08-02 15:39:55,136][train_inner][INFO] - {"epoch": 9, "update": 8.184, "loss": "2.21", "ppl": "4.63", "wps": "361254", "ups": "3.06", "wpb": "117950", "bsz": "256", "num_updates": "421200", "lr": "0.000584646", "gnorm": "0.668", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22.6", "wall": "137507"} [2022-08-02 15:40:59,973][train_inner][INFO] - {"epoch": 9, "update": 8.188, "loss": "2.208", "ppl": "4.62", "wps": "363834", "ups": "3.08", "wpb": "117948", "bsz": "256", "num_updates": "421400", "lr": "0.000584444", "gnorm": "0.67", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.3", "wall": "137572"} [2022-08-02 15:42:02,015][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 [2022-08-02 15:42:05,244][train_inner][INFO] - {"epoch": 9, "update": 8.192, "loss": "2.221", "ppl": "4.66", "wps": "361395", "ups": "3.06", "wpb": "117940", "bsz": "256", "num_updates": "421600", "lr": "0.000584242", "gnorm": "0.712", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "19.8", "wall": "137638"} [2022-08-02 15:43:09,867][train_inner][INFO] - {"epoch": 9, "update": 8.195, "loss": "2.209", "ppl": "4.62", "wps": "366630", "ups": "3.09", "wpb": "118464", "bsz": "256", "num_updates": "421800", "lr": "0.00058404", "gnorm": "0.718", "clip": "0.5", "loss_scale": "1", "train_wall": "64", "gb_free": "23.7", "wall": "137702"} [2022-08-02 15:44:14,621][train_inner][INFO] - {"epoch": 9, "update": 8.199, "loss": "2.212", "ppl": "4.63", "wps": "365024", "ups": "3.09", "wpb": "118182", "bsz": "256", "num_updates": "422000", "lr": "0.000583838", "gnorm": "0.671", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.8", "wall": "137767"} [2022-08-02 15:45:19,598][train_inner][INFO] - {"epoch": 9, "update": 8.203, "loss": "2.211", "ppl": "4.63", "wps": "363881", "ups": "3.08", "wpb": "118216", "bsz": "256", "num_updates": "422200", "lr": "0.000583636", "gnorm": "0.669", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20", "wall": "137832"} [2022-08-02 15:46:24,886][train_inner][INFO] - {"epoch": 9, "update": 8.207, "loss": "2.207", "ppl": "4.62", "wps": "363051", "ups": "3.06", "wpb": "118514", "bsz": "256", "num_updates": "422400", "lr": "0.000583434", "gnorm": "0.672", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "23.1", "wall": "137897"} [2022-08-02 15:47:29,961][train_inner][INFO] - {"epoch": 9, "update": 8.211, "loss": "2.205", "ppl": "4.61", "wps": "362130", "ups": "3.07", "wpb": "117825", "bsz": "256", "num_updates": "422600", "lr": "0.000583232", "gnorm": "0.67", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20", "wall": "137962"} [2022-08-02 15:48:35,147][train_inner][INFO] - {"epoch": 9, "update": 8.215, "loss": "2.206", "ppl": "4.62", "wps": "361756", "ups": "3.07", "wpb": "117906", "bsz": "256", "num_updates": "422800", "lr": "0.00058303", "gnorm": "0.668", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.3", "wall": "138027"} [2022-08-02 15:49:40,107][train_inner][INFO] - {"epoch": 9, "update": 8.219, "loss": "2.211", "ppl": "4.63", "wps": "362248", "ups": "3.08", "wpb": "117656", "bsz": "256", "num_updates": "423000", "lr": "0.000582828", "gnorm": "0.684", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "19.8", "wall": "138092"} [2022-08-02 15:50:44,911][train_inner][INFO] - {"epoch": 9, "update": 8.223, "loss": "2.203", "ppl": "4.6", "wps": "364414", "ups": "3.09", "wpb": "118076", "bsz": "255.9", "num_updates": "423200", "lr": "0.000582626", "gnorm": "0.668", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.2", "wall": "138157"} [2022-08-02 15:51:49,933][train_inner][INFO] - {"epoch": 9, "update": 8.227, "loss": "2.206", "ppl": "4.61", "wps": "364549", "ups": "3.08", "wpb": "118517", "bsz": "256", "num_updates": "423400", "lr": "0.000582424", "gnorm": "0.668", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.6", "wall": "138222"} [2022-08-02 15:52:54,751][train_inner][INFO] - {"epoch": 9, "update": 8.23, "loss": "2.203", "ppl": "4.61", "wps": "366127", "ups": "3.09", "wpb": "118656", "bsz": "256", "num_updates": "423600", "lr": "0.000582222", "gnorm": "0.666", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "22.9", "wall": "138287"} [2022-08-02 15:53:59,886][train_inner][INFO] - {"epoch": 9, "update": 8.234, "loss": "2.202", "ppl": "4.6", "wps": "365430", "ups": "3.07", "wpb": "119010", "bsz": "256", "num_updates": "423800", "lr": "0.00058202", "gnorm": "0.665", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.8", "wall": "138352"} [2022-08-02 15:55:04,717][train_inner][INFO] - {"epoch": 9, "update": 8.238, "loss": "2.213", "ppl": "4.64", "wps": "363095", "ups": "3.08", "wpb": "117697", "bsz": "256", "num_updates": "424000", "lr": "0.000581818", "gnorm": "0.671", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.1", "wall": "138417"} [2022-08-02 15:56:10,507][train_inner][INFO] - {"epoch": 9, "update": 8.242, "loss": "2.207", "ppl": "4.62", "wps": "361308", "ups": "3.04", "wpb": "118852", "bsz": "256", "num_updates": "424200", "lr": "0.000581616", "gnorm": "0.669", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.1", "wall": "138483"} [2022-08-02 15:57:15,093][train_inner][INFO] - {"epoch": 9, "update": 8.246, "loss": "2.201", "ppl": "4.6", "wps": "364664", "ups": "3.1", "wpb": "117758", "bsz": "256", "num_updates": "424400", "lr": "0.000581414", "gnorm": "0.671", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.6", "wall": "138547"} [2022-08-02 15:58:19,475][train_inner][INFO] - {"epoch": 9, "update": 8.25, "loss": "2.213", "ppl": "4.64", "wps": "367937", "ups": "3.11", "wpb": "118442", "bsz": "256", "num_updates": "424600", "lr": "0.000581212", "gnorm": "0.668", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.5", "wall": "138612"} [2022-08-02 15:59:23,912][train_inner][INFO] - {"epoch": 9, "update": 8.254, "loss": "2.201", "ppl": "4.6", "wps": "365440", "ups": "3.1", "wpb": "117737", "bsz": "256", "num_updates": "424800", "lr": "0.00058101", "gnorm": "0.669", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "27.4", "wall": "138676"} [2022-08-02 16:00:28,472][train_inner][INFO] - {"epoch": 9, "update": 8.258, "loss": "2.204", "ppl": "4.61", "wps": "366665", "ups": "3.1", "wpb": "118357", "bsz": "256", "num_updates": "425000", "lr": "0.000580808", "gnorm": "0.667", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.2", "wall": "138741"} [2022-08-02 16:01:33,386][train_inner][INFO] - {"epoch": 9, "update": 8.261, "loss": "2.209", "ppl": "4.62", "wps": "365329", "ups": "3.08", "wpb": "118574", "bsz": "256", "num_updates": "425200", "lr": "0.000580606", "gnorm": "0.67", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "23.7", "wall": "138806"} [2022-08-02 16:02:37,504][train_inner][INFO] - {"epoch": 9, "update": 8.265, "loss": "2.202", "ppl": "4.6", "wps": "368884", "ups": "3.12", "wpb": "118258", "bsz": "256", "num_updates": "425400", "lr": "0.000580404", "gnorm": "0.67", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.7", "wall": "138870"} [2022-08-02 16:03:42,250][train_inner][INFO] - {"epoch": 9, "update": 8.269, "loss": "2.21", "ppl": "4.63", "wps": "365366", "ups": "3.09", "wpb": "118280", "bsz": "256", "num_updates": "425600", "lr": "0.000580202", "gnorm": "0.667", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.2", "wall": "138935"} [2022-08-02 16:04:46,808][train_inner][INFO] - {"epoch": 9, "update": 8.273, "loss": "2.205", "ppl": "4.61", "wps": "365050", "ups": "3.1", "wpb": "117833", "bsz": "256", "num_updates": "425800", "lr": "0.00058", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.7", "wall": "138999"} [2022-08-02 16:05:51,746][train_inner][INFO] - {"epoch": 9, "update": 8.277, "loss": "2.204", "ppl": "4.61", "wps": "364847", "ups": "3.08", "wpb": "118460", "bsz": "256", "num_updates": "426000", "lr": "0.000579798", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.1", "wall": "139064"} [2022-08-02 16:06:56,475][train_inner][INFO] - {"epoch": 9, "update": 8.281, "loss": "2.206", "ppl": "4.61", "wps": "364796", "ups": "3.09", "wpb": "118064", "bsz": "256", "num_updates": "426200", "lr": "0.000579596", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.7", "wall": "139129"} [2022-08-02 16:08:01,609][train_inner][INFO] - {"epoch": 9, "update": 8.285, "loss": "2.207", "ppl": "4.62", "wps": "362787", "ups": "3.07", "wpb": "118148", "bsz": "256", "num_updates": "426400", "lr": "0.000579394", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "139194"} [2022-08-02 16:09:06,180][train_inner][INFO] - {"epoch": 9, "update": 8.289, "loss": "2.205", "ppl": "4.61", "wps": "366550", "ups": "3.1", "wpb": "118340", "bsz": "256", "num_updates": "426600", "lr": "0.000579192", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.6", "wall": "139259"} [2022-08-02 16:10:10,862][train_inner][INFO] - {"epoch": 9, "update": 8.293, "loss": "2.206", "ppl": "4.61", "wps": "363886", "ups": "3.09", "wpb": "117683", "bsz": "256", "num_updates": "426800", "lr": "0.00057899", "gnorm": "0.67", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "23.2", "wall": "139323"} [2022-08-02 16:11:15,587][train_inner][INFO] - {"epoch": 9, "update": 8.296, "loss": "2.206", "ppl": "4.61", "wps": "366445", "ups": "3.09", "wpb": "118588", "bsz": "256", "num_updates": "427000", "lr": "0.000578788", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.7", "wall": "139388"} [2022-08-02 16:12:20,284][train_inner][INFO] - {"epoch": 9, "update": 8.3, "loss": "2.204", "ppl": "4.61", "wps": "365513", "ups": "3.09", "wpb": "118237", "bsz": "256", "num_updates": "427200", "lr": "0.000578586", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.1", "wall": "139453"} [2022-08-02 16:13:25,313][train_inner][INFO] - {"epoch": 9, "update": 8.304, "loss": "2.2", "ppl": "4.59", "wps": "364509", "ups": "3.08", "wpb": "118516", "bsz": "256", "num_updates": "427400", "lr": "0.000578384", "gnorm": "0.667", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.8", "wall": "139518"} [2022-08-02 16:14:29,918][train_inner][INFO] - {"epoch": 9, "update": 8.308, "loss": "2.206", "ppl": "4.61", "wps": "366807", "ups": "3.1", "wpb": "118486", "bsz": "256", "num_updates": "427600", "lr": "0.000578182", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.9", "wall": "139582"} [2022-08-02 16:15:35,838][train_inner][INFO] - {"epoch": 9, "update": 8.312, "loss": "2.202", "ppl": "4.6", "wps": "358820", "ups": "3.03", "wpb": "118265", "bsz": "256", "num_updates": "427800", "lr": "0.00057798", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "20.2", "wall": "139648"} [2022-08-02 16:16:40,731][train_inner][INFO] - {"epoch": 9, "update": 8.316, "loss": "2.196", "ppl": "4.58", "wps": "365120", "ups": "3.08", "wpb": "118466", "bsz": "256", "num_updates": "428000", "lr": "0.000577778", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "139713"} [2022-08-02 16:17:45,584][train_inner][INFO] - {"epoch": 9, "update": 8.32, "loss": "2.206", "ppl": "4.61", "wps": "365032", "ups": "3.08", "wpb": "118365", "bsz": "256", "num_updates": "428200", "lr": "0.000577576", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "139778"} [2022-08-02 16:18:50,251][train_inner][INFO] - {"epoch": 9, "update": 8.324, "loss": "2.202", "ppl": "4.6", "wps": "364811", "ups": "3.09", "wpb": "117955", "bsz": "256", "num_updates": "428400", "lr": "0.000577374", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.7", "wall": "139843"} [2022-08-02 16:19:54,909][train_inner][INFO] - {"epoch": 9, "update": 8.328, "loss": "2.205", "ppl": "4.61", "wps": "365081", "ups": "3.09", "wpb": "118026", "bsz": "256", "num_updates": "428600", "lr": "0.000577172", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "139907"} [2022-08-02 16:20:59,738][train_inner][INFO] - {"epoch": 9, "update": 8.331, "loss": "2.205", "ppl": "4.61", "wps": "363333", "ups": "3.09", "wpb": "117771", "bsz": "256", "num_updates": "428800", "lr": "0.00057697", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "26", "wall": "139972"} [2022-08-02 16:22:04,736][train_inner][INFO] - {"epoch": 9, "update": 8.335, "loss": "2.208", "ppl": "4.62", "wps": "363670", "ups": "3.08", "wpb": "118188", "bsz": "256", "num_updates": "429000", "lr": "0.000576768", "gnorm": "0.67", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.7", "wall": "140037"} [2022-08-02 16:23:09,776][train_inner][INFO] - {"epoch": 9, "update": 8.339, "loss": "2.208", "ppl": "4.62", "wps": "361939", "ups": "3.08", "wpb": "117700", "bsz": "256", "num_updates": "429200", "lr": "0.000576566", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.6", "wall": "140102"} [2022-08-02 16:24:14,488][train_inner][INFO] - {"epoch": 9, "update": 8.343, "loss": "2.196", "ppl": "4.58", "wps": "366257", "ups": "3.09", "wpb": "118504", "bsz": "256", "num_updates": "429400", "lr": "0.000576364", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "28.2", "wall": "140167"} [2022-08-02 16:25:19,366][train_inner][INFO] - {"epoch": 9, "update": 8.347, "loss": "2.209", "ppl": "4.62", "wps": "365042", "ups": "3.08", "wpb": "118415", "bsz": "256", "num_updates": "429600", "lr": "0.000576162", "gnorm": "0.68", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "140232"} [2022-08-02 16:26:24,365][train_inner][INFO] - {"epoch": 9, "update": 8.351, "loss": "2.202", "ppl": "4.6", "wps": "363037", "ups": "3.08", "wpb": "117984", "bsz": "255.9", "num_updates": "429800", "lr": "0.00057596", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.8", "wall": "140297"} [2022-08-02 16:27:29,123][train_inner][INFO] - {"epoch": 9, "update": 8.355, "loss": "2.205", "ppl": "4.61", "wps": "365147", "ups": "3.09", "wpb": "118229", "bsz": "256", "num_updates": "430000", "lr": "0.000575758", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "140361"} [2022-08-02 16:28:34,245][train_inner][INFO] - {"epoch": 9, "update": 8.359, "loss": "2.203", "ppl": "4.6", "wps": "362782", "ups": "3.07", "wpb": "118124", "bsz": "256", "num_updates": "430200", "lr": "0.000575556", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "140427"} [2022-08-02 16:29:38,662][train_inner][INFO] - {"epoch": 9, "update": 8.362, "loss": "2.207", "ppl": "4.62", "wps": "365953", "ups": "3.1", "wpb": "117866", "bsz": "256", "num_updates": "430400", "lr": "0.000575354", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "140491"} [2022-08-02 16:30:43,528][train_inner][INFO] - {"epoch": 9, "update": 8.366, "loss": "2.205", "ppl": "4.61", "wps": "365447", "ups": "3.08", "wpb": "118524", "bsz": "256", "num_updates": "430600", "lr": "0.000575152", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "27.7", "wall": "140556"} [2022-08-02 16:31:48,534][train_inner][INFO] - {"epoch": 9, "update": 8.37, "loss": "2.202", "ppl": "4.6", "wps": "363286", "ups": "3.08", "wpb": "118076", "bsz": "256", "num_updates": "430800", "lr": "0.000574949", "gnorm": "0.674", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.7", "wall": "140621"} [2022-08-02 16:32:53,202][train_inner][INFO] - {"epoch": 9, "update": 8.374, "loss": "2.206", "ppl": "4.61", "wps": "365526", "ups": "3.09", "wpb": "118188", "bsz": "256", "num_updates": "431000", "lr": "0.000574747", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "140686"} [2022-08-02 16:33:18,622][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-02 16:33:58,257][train_inner][INFO] - {"epoch": 9, "update": 8.378, "loss": "2.204", "ppl": "4.61", "wps": "363936", "ups": "3.07", "wpb": "118378", "bsz": "256", "num_updates": "431200", "lr": "0.000574545", "gnorm": "0.688", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "140751"} [2022-08-02 16:35:03,558][train_inner][INFO] - {"epoch": 9, "update": 8.382, "loss": "2.204", "ppl": "4.61", "wps": "363039", "ups": "3.06", "wpb": "118533", "bsz": "256", "num_updates": "431400", "lr": "0.000574343", "gnorm": "0.665", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.4", "wall": "140816"} [2022-08-02 16:36:08,100][train_inner][INFO] - {"epoch": 9, "update": 8.386, "loss": "2.202", "ppl": "4.6", "wps": "367070", "ups": "3.1", "wpb": "118455", "bsz": "256", "num_updates": "431600", "lr": "0.000574141", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.2", "wall": "140880"} [2022-08-02 16:37:12,912][train_inner][INFO] - {"epoch": 9, "update": 8.39, "loss": "2.197", "ppl": "4.59", "wps": "366297", "ups": "3.09", "wpb": "118701", "bsz": "256", "num_updates": "431800", "lr": "0.000573939", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "140945"} [2022-08-02 16:38:17,893][train_inner][INFO] - {"epoch": 9, "update": 8.394, "loss": "2.21", "ppl": "4.63", "wps": "362383", "ups": "3.08", "wpb": "117738", "bsz": "256", "num_updates": "432000", "lr": "0.000573737", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "141010"} [2022-08-02 16:39:22,598][train_inner][INFO] - {"epoch": 9, "update": 8.397, "loss": "2.202", "ppl": "4.6", "wps": "366667", "ups": "3.09", "wpb": "118624", "bsz": "256", "num_updates": "432200", "lr": "0.000573535", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "141075"} [2022-08-02 16:40:27,490][train_inner][INFO] - {"epoch": 9, "update": 8.401, "loss": "2.197", "ppl": "4.58", "wps": "363653", "ups": "3.08", "wpb": "117989", "bsz": "256", "num_updates": "432400", "lr": "0.000573333", "gnorm": "0.672", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.5", "wall": "141140"} [2022-08-02 16:41:32,177][train_inner][INFO] - {"epoch": 9, "update": 8.405, "loss": "2.204", "ppl": "4.61", "wps": "363873", "ups": "3.09", "wpb": "117689", "bsz": "256", "num_updates": "432600", "lr": "0.000573131", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "141204"} [2022-08-02 16:42:36,643][train_inner][INFO] - {"epoch": 9, "update": 8.409, "loss": "2.203", "ppl": "4.6", "wps": "367303", "ups": "3.1", "wpb": "118390", "bsz": "256", "num_updates": "432800", "lr": "0.000572929", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.7", "wall": "141269"} [2022-08-02 16:43:41,616][train_inner][INFO] - {"epoch": 9, "update": 8.413, "loss": "2.198", "ppl": "4.59", "wps": "364257", "ups": "3.08", "wpb": "118334", "bsz": "256", "num_updates": "433000", "lr": "0.000572727", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "141334"} [2022-08-02 16:44:46,825][train_inner][INFO] - {"epoch": 9, "update": 8.417, "loss": "2.196", "ppl": "4.58", "wps": "363403", "ups": "3.07", "wpb": "118484", "bsz": "256", "num_updates": "433200", "lr": "0.000572525", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24", "wall": "141399"} [2022-08-02 16:45:51,744][train_inner][INFO] - {"epoch": 9, "update": 8.421, "loss": "2.202", "ppl": "4.6", "wps": "363652", "ups": "3.08", "wpb": "118036", "bsz": "256", "num_updates": "433400", "lr": "0.000572323", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.7", "wall": "141464"} [2022-08-02 16:46:55,839][train_inner][INFO] - {"epoch": 9, "update": 8.425, "loss": "2.205", "ppl": "4.61", "wps": "368641", "ups": "3.12", "wpb": "118139", "bsz": "256", "num_updates": "433600", "lr": "0.000572121", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "141528"} [2022-08-02 16:48:00,157][train_inner][INFO] - {"epoch": 9, "update": 8.429, "loss": "2.198", "ppl": "4.59", "wps": "366729", "ups": "3.11", "wpb": "117935", "bsz": "256", "num_updates": "433800", "lr": "0.000571919", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.3", "wall": "141592"} [2022-08-02 16:49:04,521][train_inner][INFO] - {"epoch": 9, "update": 8.432, "loss": "2.195", "ppl": "4.58", "wps": "366125", "ups": "3.11", "wpb": "117825", "bsz": "256", "num_updates": "434000", "lr": "0.000571717", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.4", "wall": "141657"} [2022-08-02 16:50:09,385][train_inner][INFO] - {"epoch": 9, "update": 8.436, "loss": "2.202", "ppl": "4.6", "wps": "364608", "ups": "3.08", "wpb": "118248", "bsz": "256", "num_updates": "434200", "lr": "0.000571515", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "141722"} [2022-08-02 16:51:13,590][train_inner][INFO] - {"epoch": 9, "update": 8.44, "loss": "2.202", "ppl": "4.6", "wps": "368410", "ups": "3.12", "wpb": "118268", "bsz": "256", "num_updates": "434400", "lr": "0.000571313", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.2", "wall": "141786"} [2022-08-02 16:52:18,724][train_inner][INFO] - {"epoch": 9, "update": 8.444, "loss": "2.196", "ppl": "4.58", "wps": "363728", "ups": "3.07", "wpb": "118453", "bsz": "256", "num_updates": "434600", "lr": "0.000571111", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.6", "wall": "141851"} [2022-08-02 16:53:23,971][train_inner][INFO] - {"epoch": 9, "update": 8.448, "loss": "2.195", "ppl": "4.58", "wps": "361899", "ups": "3.07", "wpb": "118063", "bsz": "256", "num_updates": "434800", "lr": "0.000570909", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25", "wall": "141916"} [2022-08-02 16:54:29,468][train_inner][INFO] - {"epoch": 9, "update": 8.452, "loss": "2.2", "ppl": "4.59", "wps": "360879", "ups": "3.05", "wpb": "118179", "bsz": "256", "num_updates": "435000", "lr": "0.000570707", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.7", "wall": "141982"} [2022-08-02 16:55:34,309][train_inner][INFO] - {"epoch": 9, "update": 8.456, "loss": "2.197", "ppl": "4.58", "wps": "365632", "ups": "3.08", "wpb": "118538", "bsz": "256", "num_updates": "435200", "lr": "0.000570505", "gnorm": "0.671", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.8", "wall": "142047"} [2022-08-02 16:56:39,209][train_inner][INFO] - {"epoch": 9, "update": 8.46, "loss": "2.192", "ppl": "4.57", "wps": "366444", "ups": "3.08", "wpb": "118909", "bsz": "256", "num_updates": "435400", "lr": "0.000570303", "gnorm": "0.664", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.5", "wall": "142112"} [2022-08-02 16:57:44,271][train_inner][INFO] - {"epoch": 9, "update": 8.464, "loss": "2.204", "ppl": "4.61", "wps": "362980", "ups": "3.07", "wpb": "118080", "bsz": "256", "num_updates": "435600", "lr": "0.000570101", "gnorm": "0.67", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.5", "wall": "142177"} [2022-08-02 16:58:48,942][train_inner][INFO] - {"epoch": 9, "update": 8.467, "loss": "2.195", "ppl": "4.58", "wps": "366853", "ups": "3.09", "wpb": "118623", "bsz": "256", "num_updates": "435800", "lr": "0.000569899", "gnorm": "0.667", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.4", "wall": "142241"} [2022-08-02 16:59:53,763][train_inner][INFO] - {"epoch": 9, "update": 8.471, "loss": "2.198", "ppl": "4.59", "wps": "365233", "ups": "3.09", "wpb": "118371", "bsz": "256", "num_updates": "436000", "lr": "0.000569697", "gnorm": "0.668", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.3", "wall": "142306"} [2022-08-02 17:00:58,373][train_inner][INFO] - {"epoch": 9, "update": 8.475, "loss": "2.194", "ppl": "4.58", "wps": "365341", "ups": "3.1", "wpb": "118018", "bsz": "256", "num_updates": "436200", "lr": "0.000569495", "gnorm": "0.669", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.3", "wall": "142371"} [2022-08-02 17:02:03,779][train_inner][INFO] - {"epoch": 9, "update": 8.479, "loss": "2.2", "ppl": "4.6", "wps": "361522", "ups": "3.06", "wpb": "118227", "bsz": "256", "num_updates": "436400", "lr": "0.000569293", "gnorm": "0.669", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.1", "wall": "142436"} [2022-08-02 17:03:08,725][train_inner][INFO] - {"epoch": 9, "update": 8.483, "loss": "2.19", "ppl": "4.56", "wps": "365762", "ups": "3.08", "wpb": "118771", "bsz": "256", "num_updates": "436600", "lr": "0.000569091", "gnorm": "0.665", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.1", "wall": "142501"} [2022-08-02 17:03:23,041][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-02 17:04:13,873][train_inner][INFO] - {"epoch": 9, "update": 8.487, "loss": "2.199", "ppl": "4.59", "wps": "363337", "ups": "3.07", "wpb": "118353", "bsz": "256", "num_updates": "436800", "lr": "0.000568889", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.8", "wall": "142566"} [2022-08-02 17:05:18,100][train_inner][INFO] - {"epoch": 9, "update": 8.491, "loss": "2.202", "ppl": "4.6", "wps": "368161", "ups": "3.11", "wpb": "118228", "bsz": "256", "num_updates": "437000", "lr": "0.000568687", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.1", "wall": "142630"} [2022-08-02 17:06:23,051][train_inner][INFO] - {"epoch": 9, "update": 8.495, "loss": "2.195", "ppl": "4.58", "wps": "364475", "ups": "3.08", "wpb": "118365", "bsz": "256", "num_updates": "437200", "lr": "0.000568485", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.1", "wall": "142695"} [2022-08-02 17:07:27,935][train_inner][INFO] - {"epoch": 9, "update": 8.498, "loss": "2.195", "ppl": "4.58", "wps": "364828", "ups": "3.08", "wpb": "118355", "bsz": "256", "num_updates": "437400", "lr": "0.000568283", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.6", "wall": "142760"} [2022-08-02 17:08:32,552][train_inner][INFO] - {"epoch": 9, "update": 8.502, "loss": "2.196", "ppl": "4.58", "wps": "365648", "ups": "3.1", "wpb": "118134", "bsz": "256", "num_updates": "437600", "lr": "0.000568081", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "142825"} [2022-08-02 17:09:37,460][train_inner][INFO] - {"epoch": 9, "update": 8.506, "loss": "2.203", "ppl": "4.6", "wps": "364211", "ups": "3.08", "wpb": "118199", "bsz": "256", "num_updates": "437800", "lr": "0.000567879", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21", "wall": "142890"} [2022-08-02 17:10:42,133][train_inner][INFO] - {"epoch": 9, "update": 8.51, "loss": "2.195", "ppl": "4.58", "wps": "365994", "ups": "3.09", "wpb": "118350", "bsz": "256", "num_updates": "438000", "lr": "0.000567677", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.3", "wall": "142954"} [2022-08-02 17:11:47,003][train_inner][INFO] - {"epoch": 9, "update": 8.514, "loss": "2.197", "ppl": "4.59", "wps": "362776", "ups": "3.08", "wpb": "117663", "bsz": "256", "num_updates": "438200", "lr": "0.000567475", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "28.2", "wall": "143019"} [2022-08-02 17:12:51,757][train_inner][INFO] - {"epoch": 9, "update": 8.518, "loss": "2.192", "ppl": "4.57", "wps": "366338", "ups": "3.09", "wpb": "118609", "bsz": "256", "num_updates": "438400", "lr": "0.000567273", "gnorm": "0.666", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.8", "wall": "143084"} [2022-08-02 17:13:56,367][train_inner][INFO] - {"epoch": 9, "update": 8.522, "loss": "2.198", "ppl": "4.59", "wps": "364919", "ups": "3.1", "wpb": "117884", "bsz": "256", "num_updates": "438600", "lr": "0.000567071", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "143149"} [2022-08-02 17:15:01,174][train_inner][INFO] - {"epoch": 9, "update": 8.526, "loss": "2.198", "ppl": "4.59", "wps": "365515", "ups": "3.09", "wpb": "118438", "bsz": "256", "num_updates": "438800", "lr": "0.000566869", "gnorm": "0.668", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "23.3", "wall": "143213"} [2022-08-02 17:16:01,236][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-02 17:16:06,095][train_inner][INFO] - {"epoch": 9, "update": 8.53, "loss": "2.198", "ppl": "4.59", "wps": "363680", "ups": "3.08", "wpb": "118051", "bsz": "256", "num_updates": "439000", "lr": "0.000566667", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.5", "wall": "143278"} [2022-08-02 17:16:37,227][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-02 17:17:11,290][train_inner][INFO] - {"epoch": 9, "update": 8.533, "loss": "2.191", "ppl": "4.57", "wps": "362863", "ups": "3.07", "wpb": "118284", "bsz": "256", "num_updates": "439200", "lr": "0.000566465", "gnorm": "0.672", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "143344"} [2022-08-02 17:18:16,569][train_inner][INFO] - {"epoch": 9, "update": 8.537, "loss": "2.198", "ppl": "4.59", "wps": "362206", "ups": "3.06", "wpb": "118221", "bsz": "256", "num_updates": "439400", "lr": "0.000566263", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "143409"} [2022-08-02 17:19:21,430][train_inner][INFO] - {"epoch": 9, "update": 8.541, "loss": "2.192", "ppl": "4.57", "wps": "365726", "ups": "3.08", "wpb": "118605", "bsz": "256", "num_updates": "439600", "lr": "0.000566061", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "143474"} [2022-08-02 17:20:26,242][train_inner][INFO] - {"epoch": 9, "update": 8.545, "loss": "2.197", "ppl": "4.59", "wps": "366008", "ups": "3.09", "wpb": "118608", "bsz": "256", "num_updates": "439800", "lr": "0.000565859", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.7", "wall": "143539"} [2022-08-02 17:21:31,136][train_inner][INFO] - {"epoch": 9, "update": 8.549, "loss": "2.194", "ppl": "4.58", "wps": "366285", "ups": "3.08", "wpb": "118846", "bsz": "256", "num_updates": "440000", "lr": "0.000565657", "gnorm": "0.666", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.7", "wall": "143603"} [2022-08-02 17:22:35,872][train_inner][INFO] - {"epoch": 9, "update": 8.553, "loss": "2.193", "ppl": "4.57", "wps": "367252", "ups": "3.09", "wpb": "118871", "bsz": "256", "num_updates": "440200", "lr": "0.000565455", "gnorm": "0.669", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24", "wall": "143668"} [2022-08-02 17:23:40,538][train_inner][INFO] - {"epoch": 9, "update": 8.557, "loss": "2.199", "ppl": "4.59", "wps": "366795", "ups": "3.09", "wpb": "118594", "bsz": "256", "num_updates": "440400", "lr": "0.000565253", "gnorm": "0.667", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.1", "wall": "143733"} [2022-08-02 17:24:45,222][train_inner][INFO] - {"epoch": 9, "update": 8.561, "loss": "2.195", "ppl": "4.58", "wps": "366785", "ups": "3.09", "wpb": "118625", "bsz": "256", "num_updates": "440600", "lr": "0.000565051", "gnorm": "0.685", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.7", "wall": "143798"} [2022-08-02 17:25:50,153][train_inner][INFO] - {"epoch": 9, "update": 8.565, "loss": "2.198", "ppl": "4.59", "wps": "364463", "ups": "3.08", "wpb": "118321", "bsz": "256", "num_updates": "440800", "lr": "0.000564848", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.7", "wall": "143862"} [2022-08-02 17:26:54,176][train_inner][INFO] - {"epoch": 9, "update": 8.568, "loss": "2.199", "ppl": "4.59", "wps": "368537", "ups": "3.12", "wpb": "117972", "bsz": "256", "num_updates": "441000", "lr": "0.000564646", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "143926"} [2022-08-02 17:27:58,976][train_inner][INFO] - {"epoch": 9, "update": 8.572, "loss": "2.187", "ppl": "4.55", "wps": "366763", "ups": "3.09", "wpb": "118831", "bsz": "256", "num_updates": "441200", "lr": "0.000564444", "gnorm": "0.667", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "143991"} [2022-08-02 17:29:03,966][train_inner][INFO] - {"epoch": 9, "update": 8.576, "loss": "2.203", "ppl": "4.6", "wps": "363417", "ups": "3.08", "wpb": "118089", "bsz": "256", "num_updates": "441400", "lr": "0.000564242", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "144056"} [2022-08-02 17:30:08,704][train_inner][INFO] - {"epoch": 9, "update": 8.58, "loss": "2.195", "ppl": "4.58", "wps": "364524", "ups": "3.09", "wpb": "117992", "bsz": "256", "num_updates": "441600", "lr": "0.00056404", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23", "wall": "144121"} [2022-08-02 17:31:13,487][train_inner][INFO] - {"epoch": 9, "update": 8.584, "loss": "2.197", "ppl": "4.58", "wps": "365785", "ups": "3.09", "wpb": "118482", "bsz": "256", "num_updates": "441800", "lr": "0.000563838", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.6", "wall": "144186"} [2022-08-02 17:32:18,560][train_inner][INFO] - {"epoch": 9, "update": 8.588, "loss": "2.195", "ppl": "4.58", "wps": "361934", "ups": "3.07", "wpb": "117758", "bsz": "256", "num_updates": "442000", "lr": "0.000563636", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "26.8", "wall": "144251"} [2022-08-02 17:33:22,915][train_inner][INFO] - {"epoch": 9, "update": 8.592, "loss": "2.2", "ppl": "4.59", "wps": "368250", "ups": "3.11", "wpb": "118492", "bsz": "256", "num_updates": "442200", "lr": "0.000563434", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "27.9", "wall": "144315"} [2022-08-02 17:34:27,744][train_inner][INFO] - {"epoch": 9, "update": 8.596, "loss": "2.198", "ppl": "4.59", "wps": "367556", "ups": "3.09", "wpb": "119139", "bsz": "256", "num_updates": "442400", "lr": "0.000563232", "gnorm": "0.665", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.6", "wall": "144380"} [2022-08-02 17:35:32,824][train_inner][INFO] - {"epoch": 9, "update": 8.6, "loss": "2.201", "ppl": "4.6", "wps": "362919", "ups": "3.07", "wpb": "118091", "bsz": "256", "num_updates": "442600", "lr": "0.00056303", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.7", "wall": "144445"} [2022-08-02 17:36:37,933][train_inner][INFO] - {"epoch": 9, "update": 8.603, "loss": "2.192", "ppl": "4.57", "wps": "363161", "ups": "3.07", "wpb": "118225", "bsz": "256", "num_updates": "442800", "lr": "0.000562828", "gnorm": "0.666", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "144510"} [2022-08-02 17:37:43,069][train_inner][INFO] - {"epoch": 9, "update": 8.607, "loss": "2.195", "ppl": "4.58", "wps": "364004", "ups": "3.07", "wpb": "118546", "bsz": "256", "num_updates": "443000", "lr": "0.000562626", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "144575"} [2022-08-02 17:38:47,882][train_inner][INFO] - {"epoch": 9, "update": 8.611, "loss": "2.191", "ppl": "4.57", "wps": "366596", "ups": "3.09", "wpb": "118798", "bsz": "256", "num_updates": "443200", "lr": "0.000562424", "gnorm": "0.669", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "22.5", "wall": "144640"} [2022-08-02 17:39:52,881][train_inner][INFO] - {"epoch": 9, "update": 8.615, "loss": "2.192", "ppl": "4.57", "wps": "365104", "ups": "3.08", "wpb": "118655", "bsz": "256", "num_updates": "443400", "lr": "0.000562222", "gnorm": "0.668", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "19.8", "wall": "144705"} [2022-08-02 17:40:58,095][train_inner][INFO] - {"epoch": 9, "update": 8.619, "loss": "2.189", "ppl": "4.56", "wps": "363833", "ups": "3.07", "wpb": "118633", "bsz": "256", "num_updates": "443600", "lr": "0.00056202", "gnorm": "0.666", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.6", "wall": "144770"} [2022-08-02 17:42:03,393][train_inner][INFO] - {"epoch": 9, "update": 8.623, "loss": "2.186", "ppl": "4.55", "wps": "362448", "ups": "3.06", "wpb": "118335", "bsz": "256", "num_updates": "443800", "lr": "0.000561818", "gnorm": "0.669", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.4", "wall": "144836"} [2022-08-02 17:42:29,570][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-02 17:43:08,626][train_inner][INFO] - {"epoch": 9, "update": 8.627, "loss": "2.197", "ppl": "4.58", "wps": "361531", "ups": "3.07", "wpb": "117916", "bsz": "256", "num_updates": "444000", "lr": "0.000561616", "gnorm": "0.675", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "144901"} [2022-08-02 17:44:13,457][train_inner][INFO] - {"epoch": 9, "update": 8.631, "loss": "2.189", "ppl": "4.56", "wps": "366414", "ups": "3.08", "wpb": "118775", "bsz": "256", "num_updates": "444200", "lr": "0.000561414", "gnorm": "0.666", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "144966"} [2022-08-02 17:45:18,354][train_inner][INFO] - {"epoch": 9, "update": 8.634, "loss": "2.193", "ppl": "4.57", "wps": "363992", "ups": "3.08", "wpb": "118107", "bsz": "256", "num_updates": "444400", "lr": "0.000561212", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "145031"} [2022-08-02 17:46:23,065][train_inner][INFO] - {"epoch": 9, "update": 8.638, "loss": "2.196", "ppl": "4.58", "wps": "365344", "ups": "3.09", "wpb": "118207", "bsz": "256", "num_updates": "444600", "lr": "0.00056101", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.3", "wall": "145095"} [2022-08-02 17:47:27,778][train_inner][INFO] - {"epoch": 9, "update": 8.642, "loss": "2.198", "ppl": "4.59", "wps": "364979", "ups": "3.09", "wpb": "118093", "bsz": "256", "num_updates": "444800", "lr": "0.000560808", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "145160"} [2022-08-02 17:48:32,982][train_inner][INFO] - {"epoch": 9, "update": 8.646, "loss": "2.19", "ppl": "4.56", "wps": "363406", "ups": "3.07", "wpb": "118475", "bsz": "256", "num_updates": "445000", "lr": "0.000560606", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.8", "wall": "145225"} [2022-08-02 17:49:38,012][train_inner][INFO] - {"epoch": 9, "update": 8.65, "loss": "2.186", "ppl": "4.55", "wps": "365062", "ups": "3.08", "wpb": "118698", "bsz": "256", "num_updates": "445200", "lr": "0.000560404", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.4", "wall": "145290"} [2022-08-02 17:50:42,761][train_inner][INFO] - {"epoch": 9, "update": 8.654, "loss": "2.191", "ppl": "4.57", "wps": "366842", "ups": "3.09", "wpb": "118762", "bsz": "256", "num_updates": "445400", "lr": "0.000560202", "gnorm": "0.667", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "26.6", "wall": "145355"} [2022-08-02 17:51:47,657][train_inner][INFO] - {"epoch": 9, "update": 8.658, "loss": "2.193", "ppl": "4.57", "wps": "364344", "ups": "3.08", "wpb": "118220", "bsz": "256", "num_updates": "445600", "lr": "0.00056", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "145420"} [2022-08-02 17:52:52,479][train_inner][INFO] - {"epoch": 9, "update": 8.662, "loss": "2.187", "ppl": "4.55", "wps": "365632", "ups": "3.09", "wpb": "118503", "bsz": "256", "num_updates": "445800", "lr": "0.000559798", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.1", "wall": "145485"} [2022-08-02 17:53:45,817][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-02 17:53:57,523][train_inner][INFO] - {"epoch": 9, "update": 8.666, "loss": "2.192", "ppl": "4.57", "wps": "364142", "ups": "3.07", "wpb": "118425", "bsz": "256", "num_updates": "446000", "lr": "0.000559596", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.3", "wall": "145550"} [2022-08-02 17:55:02,683][train_inner][INFO] - {"epoch": 9, "update": 8.669, "loss": "2.191", "ppl": "4.57", "wps": "362818", "ups": "3.07", "wpb": "118206", "bsz": "256", "num_updates": "446200", "lr": "0.000559394", "gnorm": "0.679", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.4", "wall": "145615"} [2022-08-02 17:56:07,715][train_inner][INFO] - {"epoch": 9, "update": 8.673, "loss": "2.191", "ppl": "4.57", "wps": "364183", "ups": "3.08", "wpb": "118415", "bsz": "256", "num_updates": "446400", "lr": "0.000559192", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "26.7", "wall": "145680"} [2022-08-02 17:57:12,413][train_inner][INFO] - {"epoch": 9, "update": 8.677, "loss": "2.192", "ppl": "4.57", "wps": "366216", "ups": "3.09", "wpb": "118464", "bsz": "256", "num_updates": "446600", "lr": "0.00055899", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "28.2", "wall": "145745"} [2022-08-02 17:58:17,433][train_inner][INFO] - {"epoch": 9, "update": 8.681, "loss": "2.187", "ppl": "4.55", "wps": "363005", "ups": "3.08", "wpb": "118011", "bsz": "256", "num_updates": "446800", "lr": "0.000558788", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "145810"} [2022-08-02 17:59:22,259][train_inner][INFO] - {"epoch": 9, "update": 8.685, "loss": "2.195", "ppl": "4.58", "wps": "365566", "ups": "3.09", "wpb": "118489", "bsz": "256", "num_updates": "447000", "lr": "0.000558586", "gnorm": "0.681", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "26", "wall": "145875"} [2022-08-02 18:00:27,476][train_inner][INFO] - {"epoch": 9, "update": 8.689, "loss": "2.192", "ppl": "4.57", "wps": "363347", "ups": "3.07", "wpb": "118481", "bsz": "256", "num_updates": "447200", "lr": "0.000558384", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.5", "wall": "145940"} [2022-08-02 18:01:32,708][train_inner][INFO] - {"epoch": 9, "update": 8.693, "loss": "2.196", "ppl": "4.58", "wps": "361309", "ups": "3.07", "wpb": "117843", "bsz": "256", "num_updates": "447400", "lr": "0.000558182", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25.2", "wall": "146005"} [2022-08-02 18:02:37,745][train_inner][INFO] - {"epoch": 9, "update": 8.697, "loss": "2.193", "ppl": "4.57", "wps": "362755", "ups": "3.08", "wpb": "117961", "bsz": "256", "num_updates": "447600", "lr": "0.00055798", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "146070"} [2022-08-02 18:03:42,693][train_inner][INFO] - {"epoch": 9, "update": 8.701, "loss": "2.192", "ppl": "4.57", "wps": "365739", "ups": "3.08", "wpb": "118768", "bsz": "256", "num_updates": "447800", "lr": "0.000557778", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21", "wall": "146135"} [2022-08-02 18:04:47,603][train_inner][INFO] - {"epoch": 9, "update": 8.704, "loss": "2.188", "ppl": "4.56", "wps": "364510", "ups": "3.08", "wpb": "118299", "bsz": "256", "num_updates": "448000", "lr": "0.000557576", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.5", "wall": "146200"} [2022-08-02 18:05:52,394][train_inner][INFO] - {"epoch": 9, "update": 8.708, "loss": "2.189", "ppl": "4.56", "wps": "364077", "ups": "3.09", "wpb": "117943", "bsz": "256", "num_updates": "448200", "lr": "0.000557374", "gnorm": "0.671", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.2", "wall": "146265"} [2022-08-02 18:06:57,544][train_inner][INFO] - {"epoch": 9, "update": 8.712, "loss": "2.188", "ppl": "4.56", "wps": "363553", "ups": "3.07", "wpb": "118426", "bsz": "256", "num_updates": "448400", "lr": "0.000557172", "gnorm": "0.667", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "23.8", "wall": "146330"} [2022-08-02 18:07:57,977][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-02 18:08:02,553][train_inner][INFO] - {"epoch": 9, "update": 8.716, "loss": "2.191", "ppl": "4.57", "wps": "362777", "ups": "3.08", "wpb": "117917", "bsz": "256", "num_updates": "448600", "lr": "0.00055697", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.5", "wall": "146395"} [2022-08-02 18:09:07,525][train_inner][INFO] - {"epoch": 9, "update": 8.72, "loss": "2.19", "ppl": "4.56", "wps": "364387", "ups": "3.08", "wpb": "118374", "bsz": "256", "num_updates": "448800", "lr": "0.000556768", "gnorm": "0.674", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.7", "wall": "146460"} [2022-08-02 18:10:12,343][train_inner][INFO] - {"epoch": 9, "update": 8.724, "loss": "2.183", "ppl": "4.54", "wps": "366297", "ups": "3.09", "wpb": "118711", "bsz": "256", "num_updates": "449000", "lr": "0.000556566", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.8", "wall": "146525"} [2022-08-02 18:11:17,310][train_inner][INFO] - {"epoch": 9, "update": 8.728, "loss": "2.188", "ppl": "4.56", "wps": "365667", "ups": "3.08", "wpb": "118779", "bsz": "256", "num_updates": "449200", "lr": "0.000556364", "gnorm": "0.667", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "146590"} [2022-08-02 18:12:22,378][train_inner][INFO] - {"epoch": 9, "update": 8.732, "loss": "2.192", "ppl": "4.57", "wps": "364680", "ups": "3.07", "wpb": "118642", "bsz": "256", "num_updates": "449400", "lr": "0.000556162", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.4", "wall": "146655"} [2022-08-02 18:13:27,045][train_inner][INFO] - {"epoch": 9, "update": 8.736, "loss": "2.188", "ppl": "4.56", "wps": "365980", "ups": "3.09", "wpb": "118333", "bsz": "256", "num_updates": "449600", "lr": "0.00055596", "gnorm": "0.666", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "25.6", "wall": "146719"} [2022-08-02 18:14:31,886][train_inner][INFO] - {"epoch": 9, "update": 8.739, "loss": "2.189", "ppl": "4.56", "wps": "365841", "ups": "3.08", "wpb": "118606", "bsz": "256", "num_updates": "449800", "lr": "0.000555758", "gnorm": "0.667", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23", "wall": "146784"} [2022-08-02 18:15:36,652][train_inner][INFO] - {"epoch": 9, "update": 8.743, "loss": "2.189", "ppl": "4.56", "wps": "365496", "ups": "3.09", "wpb": "118356", "bsz": "256", "num_updates": "450000", "lr": "0.000555556", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.4", "wall": "146849"} [2022-08-02 18:15:36,653][fairseq_cli.train][INFO] - begin validation on "valid" subset [2022-08-02 18:15:59,444][valid][INFO] - {"epoch": 9, "valid_loss": "2.094", "valid_ppl": "4.27", "valid_wps": "1.57319e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "450000", "valid_best_loss": "2.094"} [2022-08-02 18:15:59,448][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 9 @ 450000 updates [2022-08-02 18:15:59,449][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_9_450000.pt [2022-08-02 18:16:09,444][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_9_450000.pt [2022-08-02 18:16:41,724][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/adan2/checkpoint_9_450000.pt (epoch 9 @ 450000 updates, score 2.094) (writing took 42.27624590694904 seconds) [2022-08-02 18:17:46,958][train_inner][INFO] - {"epoch": 9, "update": 8.747, "loss": "2.187", "ppl": "4.55", "wps": "181827", "ups": "1.53", "wpb": "118465", "bsz": "256", "num_updates": "450200", "lr": "0.000555354", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.6", "wall": "146979"} [2022-08-02 18:18:52,216][train_inner][INFO] - {"epoch": 9, "update": 8.751, "loss": "2.194", "ppl": "4.58", "wps": "360156", "ups": "3.06", "wpb": "117513", "bsz": "256", "num_updates": "450400", "lr": "0.000555152", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "147045"} [2022-08-02 18:19:56,863][train_inner][INFO] - {"epoch": 9, "update": 8.755, "loss": "2.194", "ppl": "4.58", "wps": "365883", "ups": "3.09", "wpb": "118265", "bsz": "256", "num_updates": "450600", "lr": "0.000554949", "gnorm": "0.668", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.8", "wall": "147109"} [2022-08-02 18:21:01,963][train_inner][INFO] - {"epoch": 9, "update": 8.759, "loss": "2.188", "ppl": "4.56", "wps": "363512", "ups": "3.07", "wpb": "118320", "bsz": "256", "num_updates": "450800", "lr": "0.000554747", "gnorm": "0.668", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "25.9", "wall": "147174"} [2022-08-02 18:22:06,567][train_inner][INFO] - {"epoch": 9, "update": 8.763, "loss": "2.191", "ppl": "4.57", "wps": "363690", "ups": "3.1", "wpb": "117477", "bsz": "256", "num_updates": "451000", "lr": "0.000554545", "gnorm": "0.67", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "19.9", "wall": "147239"} [2022-08-02 18:23:11,543][train_inner][INFO] - {"epoch": 9, "update": 8.767, "loss": "2.195", "ppl": "4.58", "wps": "363973", "ups": "3.08", "wpb": "118247", "bsz": "256", "num_updates": "451200", "lr": "0.000554343", "gnorm": "0.671", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.2", "wall": "147304"} [2022-08-02 18:24:16,455][train_inner][INFO] - {"epoch": 9, "update": 8.77, "loss": "2.188", "ppl": "4.56", "wps": "363717", "ups": "3.08", "wpb": "118045", "bsz": "256", "num_updates": "451400", "lr": "0.000554141", "gnorm": "0.674", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.8", "wall": "147369"} [2022-08-02 18:25:14,947][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-02 18:25:21,883][train_inner][INFO] - {"epoch": 9, "update": 8.774, "loss": "2.187", "ppl": "4.55", "wps": "362122", "ups": "3.06", "wpb": "118464", "bsz": "256", "num_updates": "451600", "lr": "0.000553939", "gnorm": "0.667", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23", "wall": "147434"} [2022-08-02 18:26:27,021][train_inner][INFO] - {"epoch": 9, "update": 8.778, "loss": "2.193", "ppl": "4.57", "wps": "362194", "ups": "3.07", "wpb": "117961", "bsz": "256", "num_updates": "451800", "lr": "0.000553737", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "147499"} [2022-08-02 18:27:32,279][train_inner][INFO] - {"epoch": 9, "update": 8.782, "loss": "2.192", "ppl": "4.57", "wps": "364140", "ups": "3.06", "wpb": "118814", "bsz": "256", "num_updates": "452000", "lr": "0.000553535", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "147565"} [2022-08-02 18:28:37,436][train_inner][INFO] - {"epoch": 9, "update": 8.786, "loss": "2.187", "ppl": "4.55", "wps": "362593", "ups": "3.07", "wpb": "118125", "bsz": "256", "num_updates": "452200", "lr": "0.000553333", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.4", "wall": "147630"} [2022-08-02 18:29:42,501][train_inner][INFO] - {"epoch": 9, "update": 8.79, "loss": "2.191", "ppl": "4.57", "wps": "365563", "ups": "3.07", "wpb": "118926", "bsz": "256", "num_updates": "452400", "lr": "0.000553131", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "147695"} [2022-08-02 18:30:47,406][train_inner][INFO] - {"epoch": 9, "update": 8.794, "loss": "2.192", "ppl": "4.57", "wps": "364174", "ups": "3.08", "wpb": "118180", "bsz": "256", "num_updates": "452600", "lr": "0.000552929", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "147760"} [2022-08-02 18:31:52,110][train_inner][INFO] - {"epoch": 9, "update": 8.798, "loss": "2.194", "ppl": "4.57", "wps": "364980", "ups": "3.09", "wpb": "118077", "bsz": "256", "num_updates": "452800", "lr": "0.000552727", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.8", "wall": "147824"} [2022-08-02 18:32:57,293][train_inner][INFO] - {"epoch": 9, "update": 8.802, "loss": "2.19", "ppl": "4.56", "wps": "364109", "ups": "3.07", "wpb": "118667", "bsz": "256", "num_updates": "453000", "lr": "0.000552525", "gnorm": "0.666", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.4", "wall": "147890"} [2022-08-02 18:34:02,253][train_inner][INFO] - {"epoch": 9, "update": 8.805, "loss": "2.191", "ppl": "4.57", "wps": "365222", "ups": "3.08", "wpb": "118622", "bsz": "256", "num_updates": "453200", "lr": "0.000552323", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.3", "wall": "147955"} [2022-08-02 18:35:07,248][train_inner][INFO] - {"epoch": 9, "update": 8.809, "loss": "2.186", "ppl": "4.55", "wps": "365246", "ups": "3.08", "wpb": "118695", "bsz": "256", "num_updates": "453400", "lr": "0.000552121", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "148020"} [2022-08-02 18:36:12,276][train_inner][INFO] - {"epoch": 9, "update": 8.813, "loss": "2.186", "ppl": "4.55", "wps": "365020", "ups": "3.08", "wpb": "118681", "bsz": "256", "num_updates": "453600", "lr": "0.000551919", "gnorm": "0.666", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "148085"} [2022-08-02 18:37:17,274][train_inner][INFO] - {"epoch": 9, "update": 8.817, "loss": "2.188", "ppl": "4.56", "wps": "364598", "ups": "3.08", "wpb": "118487", "bsz": "256", "num_updates": "453800", "lr": "0.000551717", "gnorm": "0.666", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.4", "wall": "148150"} [2022-08-02 18:38:22,157][train_inner][INFO] - {"epoch": 9, "update": 8.821, "loss": "2.186", "ppl": "4.55", "wps": "365543", "ups": "3.08", "wpb": "118587", "bsz": "256", "num_updates": "454000", "lr": "0.000551515", "gnorm": "0.67", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.6", "wall": "148214"} [2022-08-02 18:39:27,156][train_inner][INFO] - {"epoch": 9, "update": 8.825, "loss": "2.187", "ppl": "4.55", "wps": "365115", "ups": "3.08", "wpb": "118659", "bsz": "256", "num_updates": "454200", "lr": "0.000551313", "gnorm": "0.67", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "26", "wall": "148279"} [2022-08-02 18:40:30,305][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-02 18:40:32,565][train_inner][INFO] - {"epoch": 9, "update": 8.829, "loss": "2.184", "ppl": "4.54", "wps": "362341", "ups": "3.06", "wpb": "118499", "bsz": "256", "num_updates": "454400", "lr": "0.000551111", "gnorm": "0.669", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "27.2", "wall": "148345"} [2022-08-02 18:41:38,378][train_inner][INFO] - {"epoch": 9, "update": 8.833, "loss": "2.189", "ppl": "4.56", "wps": "357292", "ups": "3.04", "wpb": "117571", "bsz": "256", "num_updates": "454600", "lr": "0.000550909", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "148411"} [2022-08-02 18:42:43,399][train_inner][INFO] - {"epoch": 9, "update": 8.837, "loss": "2.186", "ppl": "4.55", "wps": "363739", "ups": "3.08", "wpb": "118252", "bsz": "256", "num_updates": "454800", "lr": "0.000550707", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "148476"} [2022-08-02 18:43:48,383][train_inner][INFO] - {"epoch": 9, "update": 8.84, "loss": "2.183", "ppl": "4.54", "wps": "366151", "ups": "3.08", "wpb": "118968", "bsz": "256", "num_updates": "455000", "lr": "0.000550505", "gnorm": "0.666", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "148541"} [2022-08-02 18:44:53,361][train_inner][INFO] - {"epoch": 9, "update": 8.844, "loss": "2.186", "ppl": "4.55", "wps": "362759", "ups": "3.08", "wpb": "117855", "bsz": "256", "num_updates": "455200", "lr": "0.000550303", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.6", "wall": "148606"} [2022-08-02 18:45:18,414][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-02 18:45:20,692][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-02 18:45:21,288][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 [2022-08-02 18:45:21,584][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 [2022-08-02 18:45:21,882][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 [2022-08-02 18:45:22,496][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.25 [2022-08-02 18:45:59,965][train_inner][INFO] - {"epoch": 9, "update": 8.848, "loss": "2.199", "ppl": "4.59", "wps": "353759", "ups": "3", "wpb": "117806", "bsz": "256", "num_updates": "455400", "lr": "0.000550101", "gnorm": "0.892", "clip": "1", "loss_scale": "0.25", "train_wall": "66", "gb_free": "19.7", "wall": "148672"} [2022-08-02 18:47:04,963][train_inner][INFO] - {"epoch": 9, "update": 8.852, "loss": "2.188", "ppl": "4.56", "wps": "365390", "ups": "3.08", "wpb": "118748", "bsz": "256", "num_updates": "455600", "lr": "0.000549899", "gnorm": "0.67", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "24.1", "wall": "148737"} [2022-08-02 18:48:09,507][train_inner][INFO] - {"epoch": 9, "update": 8.856, "loss": "2.19", "ppl": "4.56", "wps": "366192", "ups": "3.1", "wpb": "118175", "bsz": "256", "num_updates": "455800", "lr": "0.000549697", "gnorm": "0.677", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "22.8", "wall": "148802"} [2022-08-02 18:49:14,696][train_inner][INFO] - {"epoch": 9, "update": 8.86, "loss": "2.183", "ppl": "4.54", "wps": "365650", "ups": "3.07", "wpb": "119181", "bsz": "256", "num_updates": "456000", "lr": "0.000549495", "gnorm": "0.666", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "19.9", "wall": "148867"} [2022-08-02 18:50:19,729][train_inner][INFO] - {"epoch": 9, "update": 8.864, "loss": "2.189", "ppl": "4.56", "wps": "362411", "ups": "3.08", "wpb": "117842", "bsz": "256", "num_updates": "456200", "lr": "0.000549293", "gnorm": "0.671", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "19.9", "wall": "148932"} [2022-08-02 18:51:24,398][train_inner][INFO] - {"epoch": 9, "update": 8.868, "loss": "2.189", "ppl": "4.56", "wps": "364045", "ups": "3.09", "wpb": "117709", "bsz": "256", "num_updates": "456400", "lr": "0.000549091", "gnorm": "0.672", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "20.8", "wall": "148997"} [2022-08-02 18:52:29,083][train_inner][INFO] - {"epoch": 9, "update": 8.872, "loss": "2.187", "ppl": "4.55", "wps": "364687", "ups": "3.09", "wpb": "117947", "bsz": "256", "num_updates": "456600", "lr": "0.000548889", "gnorm": "0.67", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "21", "wall": "149061"} [2022-08-02 18:53:33,940][train_inner][INFO] - {"epoch": 9, "update": 8.876, "loss": "2.193", "ppl": "4.57", "wps": "363080", "ups": "3.08", "wpb": "117741", "bsz": "256", "num_updates": "456800", "lr": "0.000548687", "gnorm": "0.674", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "21.4", "wall": "149126"} [2022-08-02 18:54:38,889][train_inner][INFO] - {"epoch": 9, "update": 8.879, "loss": "2.192", "ppl": "4.57", "wps": "362149", "ups": "3.08", "wpb": "117603", "bsz": "256", "num_updates": "457000", "lr": "0.000548485", "gnorm": "0.674", "clip": "0", "loss_scale": "0.25", "train_wall": "65", "gb_free": "23.9", "wall": "149191"} [2022-08-02 18:55:43,711][train_inner][INFO] - {"epoch": 9, "update": 8.883, "loss": "2.191", "ppl": "4.57", "wps": "365443", "ups": "3.09", "wpb": "118442", "bsz": "256", "num_updates": "457200", "lr": "0.000548283", "gnorm": "0.673", "clip": "0", "loss_scale": "0.25", "train_wall": "64", "gb_free": "22.1", "wall": "149256"} [2022-08-02 18:56:48,883][train_inner][INFO] - {"epoch": 9, "update": 8.887, "loss": "2.186", "ppl": "4.55", "wps": "365561", "ups": "3.07", "wpb": "119121", "bsz": "256", "num_updates": "457400", "lr": "0.000548081", "gnorm": "0.668", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "19.9", "wall": "149321"} [2022-08-02 18:57:53,620][train_inner][INFO] - {"epoch": 9, "update": 8.891, "loss": "2.186", "ppl": "4.55", "wps": "366554", "ups": "3.09", "wpb": "118646", "bsz": "256", "num_updates": "457600", "lr": "0.000547879", "gnorm": "0.669", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "28.2", "wall": "149386"} [2022-08-02 18:58:58,705][train_inner][INFO] - {"epoch": 9, "update": 8.895, "loss": "2.186", "ppl": "4.55", "wps": "365355", "ups": "3.07", "wpb": "118893", "bsz": "256", "num_updates": "457800", "lr": "0.000547677", "gnorm": "0.668", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "26.9", "wall": "149451"} [2022-08-02 19:00:03,616][train_inner][INFO] - {"epoch": 9, "update": 8.899, "loss": "2.182", "ppl": "4.54", "wps": "366000", "ups": "3.08", "wpb": "118786", "bsz": "256", "num_updates": "458000", "lr": "0.000547475", "gnorm": "0.667", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "19.9", "wall": "149516"} [2022-08-02 19:01:08,222][train_inner][INFO] - {"epoch": 9, "update": 8.903, "loss": "2.183", "ppl": "4.54", "wps": "367086", "ups": "3.1", "wpb": "118578", "bsz": "256", "num_updates": "458200", "lr": "0.000547273", "gnorm": "0.686", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "24", "wall": "149581"} [2022-08-02 19:02:13,400][train_inner][INFO] - {"epoch": 9, "update": 8.907, "loss": "2.184", "ppl": "4.54", "wps": "364189", "ups": "3.07", "wpb": "118684", "bsz": "256", "num_updates": "458400", "lr": "0.000547071", "gnorm": "0.673", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.1", "wall": "149646"} [2022-08-02 19:03:18,403][train_inner][INFO] - {"epoch": 9, "update": 8.91, "loss": "2.19", "ppl": "4.56", "wps": "363286", "ups": "3.08", "wpb": "118071", "bsz": "256", "num_updates": "458600", "lr": "0.000546869", "gnorm": "0.674", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "22.6", "wall": "149711"} [2022-08-02 19:04:23,271][train_inner][INFO] - {"epoch": 9, "update": 8.914, "loss": "2.183", "ppl": "4.54", "wps": "365341", "ups": "3.08", "wpb": "118493", "bsz": "256", "num_updates": "458800", "lr": "0.000546667", "gnorm": "0.669", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "19.9", "wall": "149776"} [2022-08-02 19:05:28,213][train_inner][INFO] - {"epoch": 9, "update": 8.918, "loss": "2.19", "ppl": "4.56", "wps": "363567", "ups": "3.08", "wpb": "118052", "bsz": "256", "num_updates": "459000", "lr": "0.000546465", "gnorm": "0.679", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "28.2", "wall": "149841"} [2022-08-02 19:06:33,235][train_inner][INFO] - {"epoch": 9, "update": 8.922, "loss": "2.185", "ppl": "4.55", "wps": "363890", "ups": "3.08", "wpb": "118303", "bsz": "256", "num_updates": "459200", "lr": "0.000546263", "gnorm": "0.673", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20", "wall": "149906"} [2022-08-02 19:07:38,032][train_inner][INFO] - {"epoch": 9, "update": 8.926, "loss": "2.186", "ppl": "4.55", "wps": "364844", "ups": "3.09", "wpb": "118202", "bsz": "256", "num_updates": "459400", "lr": "0.000546061", "gnorm": "0.68", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.5", "wall": "149970"} [2022-08-02 19:08:42,559][train_inner][INFO] - {"epoch": 9, "update": 8.93, "loss": "2.189", "ppl": "4.56", "wps": "366175", "ups": "3.1", "wpb": "118140", "bsz": "256", "num_updates": "459600", "lr": "0.000545859", "gnorm": "0.673", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "22.2", "wall": "150035"} [2022-08-02 19:09:47,367][train_inner][INFO] - {"epoch": 9, "update": 8.934, "loss": "2.188", "ppl": "4.56", "wps": "363728", "ups": "3.09", "wpb": "117860", "bsz": "256", "num_updates": "459800", "lr": "0.000545657", "gnorm": "0.67", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "19.7", "wall": "150100"} [2022-08-02 19:10:52,413][train_inner][INFO] - {"epoch": 9, "update": 8.938, "loss": "2.185", "ppl": "4.55", "wps": "363900", "ups": "3.07", "wpb": "118350", "bsz": "256", "num_updates": "460000", "lr": "0.000545455", "gnorm": "0.671", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "19.9", "wall": "150165"} [2022-08-02 19:11:57,098][train_inner][INFO] - {"epoch": 9, "update": 8.942, "loss": "2.186", "ppl": "4.55", "wps": "364085", "ups": "3.09", "wpb": "117751", "bsz": "256", "num_updates": "460200", "lr": "0.000545253", "gnorm": "0.673", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "19.7", "wall": "150229"} [2022-08-02 19:13:01,352][train_inner][INFO] - {"epoch": 9, "update": 8.945, "loss": "2.185", "ppl": "4.55", "wps": "367536", "ups": "3.11", "wpb": "118076", "bsz": "256", "num_updates": "460400", "lr": "0.000545051", "gnorm": "0.696", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "21.1", "wall": "150294"} [2022-08-02 19:14:06,306][train_inner][INFO] - {"epoch": 9, "update": 8.949, "loss": "2.193", "ppl": "4.57", "wps": "364202", "ups": "3.08", "wpb": "118273", "bsz": "256", "num_updates": "460600", "lr": "0.000544848", "gnorm": "0.672", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.8", "wall": "150359"} [2022-08-02 19:15:11,331][train_inner][INFO] - {"epoch": 9, "update": 8.953, "loss": "2.181", "ppl": "4.53", "wps": "364033", "ups": "3.08", "wpb": "118354", "bsz": "256", "num_updates": "460800", "lr": "0.000544646", "gnorm": "0.672", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.1", "wall": "150424"} [2022-08-02 19:15:26,497][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 [2022-08-02 19:16:16,408][train_inner][INFO] - {"epoch": 9, "update": 8.957, "loss": "2.184", "ppl": "4.55", "wps": "362157", "ups": "3.07", "wpb": "117839", "bsz": "256", "num_updates": "461000", "lr": "0.000544444", "gnorm": "0.673", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "22", "wall": "150489"} [2022-08-02 19:17:21,128][train_inner][INFO] - {"epoch": 9, "update": 8.961, "loss": "2.191", "ppl": "4.56", "wps": "366157", "ups": "3.09", "wpb": "118488", "bsz": "256", "num_updates": "461200", "lr": "0.000544242", "gnorm": "0.673", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "28.2", "wall": "150553"} [2022-08-02 19:18:26,087][train_inner][INFO] - {"epoch": 9, "update": 8.965, "loss": "2.187", "ppl": "4.56", "wps": "363577", "ups": "3.08", "wpb": "118086", "bsz": "256", "num_updates": "461400", "lr": "0.00054404", "gnorm": "0.671", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "22.4", "wall": "150618"} [2022-08-02 19:19:31,012][train_inner][INFO] - {"epoch": 9, "update": 8.969, "loss": "2.186", "ppl": "4.55", "wps": "363182", "ups": "3.08", "wpb": "117895", "bsz": "256", "num_updates": "461600", "lr": "0.000543838", "gnorm": "0.673", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "23.3", "wall": "150683"} [2022-08-02 19:20:36,070][train_inner][INFO] - {"epoch": 9, "update": 8.973, "loss": "2.184", "ppl": "4.54", "wps": "365193", "ups": "3.08", "wpb": "118731", "bsz": "256", "num_updates": "461800", "lr": "0.000543636", "gnorm": "0.671", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "23.4", "wall": "150748"} [2022-08-02 19:21:40,583][train_inner][INFO] - {"epoch": 9, "update": 8.977, "loss": "2.187", "ppl": "4.55", "wps": "364733", "ups": "3.1", "wpb": "117647", "bsz": "256", "num_updates": "462000", "lr": "0.000543434", "gnorm": "0.672", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "22.1", "wall": "150813"} [2022-08-02 19:22:45,569][train_inner][INFO] - {"epoch": 9, "update": 8.98, "loss": "2.182", "ppl": "4.54", "wps": "364947", "ups": "3.08", "wpb": "118581", "bsz": "256", "num_updates": "462200", "lr": "0.000543232", "gnorm": "0.669", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "24.3", "wall": "150878"} [2022-08-02 19:23:50,274][train_inner][INFO] - {"epoch": 9, "update": 8.984, "loss": "2.175", "ppl": "4.52", "wps": "367006", "ups": "3.09", "wpb": "118735", "bsz": "256", "num_updates": "462400", "lr": "0.00054303", "gnorm": "0.67", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.7", "wall": "150943"} [2022-08-02 19:24:55,296][train_inner][INFO] - {"epoch": 9, "update": 8.988, "loss": "2.185", "ppl": "4.55", "wps": "363673", "ups": "3.08", "wpb": "118231", "bsz": "256", "num_updates": "462600", "lr": "0.000542828", "gnorm": "0.673", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "26.2", "wall": "151008"} [2022-08-02 19:26:00,168][train_inner][INFO] - {"epoch": 9, "update": 8.992, "loss": "2.191", "ppl": "4.57", "wps": "362679", "ups": "3.08", "wpb": "117637", "bsz": "256", "num_updates": "462800", "lr": "0.000542626", "gnorm": "0.673", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.2", "wall": "151072"} [2022-08-02 19:27:05,231][train_inner][INFO] - {"epoch": 9, "update": 8.996, "loss": "2.184", "ppl": "4.54", "wps": "364438", "ups": "3.07", "wpb": "118555", "bsz": "256", "num_updates": "463000", "lr": "0.000542424", "gnorm": "0.672", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "20.5", "wall": "151138"} [2022-08-02 19:28:10,311][train_inner][INFO] - {"epoch": 9, "update": 9.0, "loss": "2.187", "ppl": "4.55", "wps": "363316", "ups": "3.07", "wpb": "118222", "bsz": "256", "num_updates": "463200", "lr": "0.000542222", "gnorm": "0.671", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "25.7", "wall": "151203"} [2022-08-02 19:28:12,983][fairseq_cli.train][INFO] - end of epoch 9 (average epoch stats below) [2022-08-02 19:28:12,983][train][INFO] - {"epoch": 9, "train_loss": "2.198", "train_ppl": "4.59", "train_wps": "362762", "train_ups": "3.07", "train_wpb": "118298", "train_bsz": "256", "train_num_updates": "463208", "train_lr": "0.000542214", "train_gnorm": "0.672", "train_clip": "0", "train_loss_scale": "1", "train_train_wall": "16614", "train_gb_free": "22.9", "train_wall": "151205"} [2022-08-02 19:28:13,092][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 [2022-08-02 19:28:13,096][fairseq.trainer][INFO] - begin training epoch 10 [2022-08-02 19:28:13,096][fairseq_cli.train][INFO] - Start iterating over samples [2022-08-02 19:29:28,946][train_inner][INFO] - {"epoch": 10, "update": 9.004, "loss": "2.179", "ppl": "4.53", "wps": "299898", "ups": "2.54", "wpb": "117910", "bsz": "255.4", "num_updates": "463400", "lr": "0.00054202", "gnorm": "0.672", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.9", "wall": "151281"} [2022-08-02 19:30:34,137][train_inner][INFO] - {"epoch": 10, "update": 9.008, "loss": "2.176", "ppl": "4.52", "wps": "362740", "ups": "3.07", "wpb": "118236", "bsz": "256", "num_updates": "463600", "lr": "0.000541818", "gnorm": "0.672", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.8", "wall": "151346"} [2022-08-02 19:31:38,837][train_inner][INFO] - {"epoch": 10, "update": 9.011, "loss": "2.186", "ppl": "4.55", "wps": "364688", "ups": "3.09", "wpb": "117975", "bsz": "256", "num_updates": "463800", "lr": "0.000541616", "gnorm": "0.674", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "21.7", "wall": "151411"} [2022-08-02 19:32:43,589][train_inner][INFO] - {"epoch": 10, "update": 9.015, "loss": "2.184", "ppl": "4.55", "wps": "364183", "ups": "3.09", "wpb": "117906", "bsz": "256", "num_updates": "464000", "lr": "0.000541414", "gnorm": "0.673", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "21.1", "wall": "151476"} [2022-08-02 19:33:48,436][train_inner][INFO] - {"epoch": 10, "update": 9.019, "loss": "2.179", "ppl": "4.53", "wps": "365063", "ups": "3.08", "wpb": "118365", "bsz": "256", "num_updates": "464200", "lr": "0.000541212", "gnorm": "0.674", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "22.5", "wall": "151541"} [2022-08-02 19:34:53,132][train_inner][INFO] - {"epoch": 10, "update": 9.023, "loss": "2.185", "ppl": "4.55", "wps": "364496", "ups": "3.09", "wpb": "117906", "bsz": "256", "num_updates": "464400", "lr": "0.00054101", "gnorm": "0.679", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.6", "wall": "151605"} [2022-08-02 19:35:58,030][train_inner][INFO] - {"epoch": 10, "update": 9.027, "loss": "2.179", "ppl": "4.53", "wps": "362974", "ups": "3.08", "wpb": "117780", "bsz": "256", "num_updates": "464600", "lr": "0.000540808", "gnorm": "0.673", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "19.9", "wall": "151670"} [2022-08-02 19:37:02,945][train_inner][INFO] - {"epoch": 10, "update": 9.031, "loss": "2.183", "ppl": "4.54", "wps": "365366", "ups": "3.08", "wpb": "118586", "bsz": "256", "num_updates": "464800", "lr": "0.000540606", "gnorm": "0.671", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.1", "wall": "151735"} [2022-08-02 19:38:07,526][train_inner][INFO] - {"epoch": 10, "update": 9.035, "loss": "2.18", "ppl": "4.53", "wps": "366724", "ups": "3.1", "wpb": "118415", "bsz": "256", "num_updates": "465000", "lr": "0.000540404", "gnorm": "0.673", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "22", "wall": "151800"} [2022-08-02 19:39:12,189][train_inner][INFO] - {"epoch": 10, "update": 9.039, "loss": "2.174", "ppl": "4.51", "wps": "365891", "ups": "3.09", "wpb": "118297", "bsz": "256", "num_updates": "465200", "lr": "0.000540202", "gnorm": "0.67", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "19.9", "wall": "151865"} [2022-08-02 19:40:16,810][train_inner][INFO] - {"epoch": 10, "update": 9.043, "loss": "2.179", "ppl": "4.53", "wps": "366800", "ups": "3.1", "wpb": "118513", "bsz": "256", "num_updates": "465400", "lr": "0.00054", "gnorm": "0.68", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "19.8", "wall": "151929"} [2022-08-02 19:41:21,698][train_inner][INFO] - {"epoch": 10, "update": 9.046, "loss": "2.184", "ppl": "4.54", "wps": "364009", "ups": "3.08", "wpb": "118098", "bsz": "256", "num_updates": "465600", "lr": "0.000539798", "gnorm": "0.672", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.5", "wall": "151994"} [2022-08-02 19:42:26,557][train_inner][INFO] - {"epoch": 10, "update": 9.05, "loss": "2.175", "ppl": "4.52", "wps": "365812", "ups": "3.08", "wpb": "118629", "bsz": "256", "num_updates": "465800", "lr": "0.000539596", "gnorm": "0.671", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20", "wall": "152059"} [2022-08-02 19:43:31,158][train_inner][INFO] - {"epoch": 10, "update": 9.054, "loss": "2.176", "ppl": "4.52", "wps": "367106", "ups": "3.1", "wpb": "118574", "bsz": "256", "num_updates": "466000", "lr": "0.000539394", "gnorm": "0.671", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.5", "wall": "152123"} [2022-08-02 19:44:36,221][train_inner][INFO] - {"epoch": 10, "update": 9.058, "loss": "2.178", "ppl": "4.53", "wps": "364194", "ups": "3.07", "wpb": "118476", "bsz": "256", "num_updates": "466200", "lr": "0.000539192", "gnorm": "0.673", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.5", "wall": "152189"} [2022-08-02 19:45:41,425][train_inner][INFO] - {"epoch": 10, "update": 9.062, "loss": "2.183", "ppl": "4.54", "wps": "363231", "ups": "3.07", "wpb": "118420", "bsz": "256", "num_updates": "466400", "lr": "0.00053899", "gnorm": "0.673", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.5", "wall": "152254"} [2022-08-02 19:46:46,593][train_inner][INFO] - {"epoch": 10, "update": 9.066, "loss": "2.174", "ppl": "4.51", "wps": "364330", "ups": "3.07", "wpb": "118711", "bsz": "256", "num_updates": "466600", "lr": "0.000538788", "gnorm": "0.672", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22.7", "wall": "152319"} [2022-08-02 19:47:51,211][train_inner][INFO] - {"epoch": 10, "update": 9.07, "loss": "2.18", "ppl": "4.53", "wps": "366549", "ups": "3.1", "wpb": "118427", "bsz": "256", "num_updates": "466800", "lr": "0.000538586", "gnorm": "0.672", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.1", "wall": "152384"} [2022-08-02 19:48:55,756][train_inner][INFO] - {"epoch": 10, "update": 9.074, "loss": "2.179", "ppl": "4.53", "wps": "366112", "ups": "3.1", "wpb": "118152", "bsz": "256", "num_updates": "467000", "lr": "0.000538384", "gnorm": "0.673", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.9", "wall": "152448"} [2022-08-02 19:50:00,791][train_inner][INFO] - {"epoch": 10, "update": 9.078, "loss": "2.177", "ppl": "4.52", "wps": "362793", "ups": "3.08", "wpb": "117970", "bsz": "256", "num_updates": "467200", "lr": "0.000538182", "gnorm": "0.672", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.1", "wall": "152513"} [2022-08-02 19:51:05,618][train_inner][INFO] - {"epoch": 10, "update": 9.081, "loss": "2.178", "ppl": "4.53", "wps": "365818", "ups": "3.09", "wpb": "118571", "bsz": "256", "num_updates": "467400", "lr": "0.00053798", "gnorm": "0.673", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.9", "wall": "152578"} [2022-08-02 19:52:10,600][train_inner][INFO] - {"epoch": 10, "update": 9.085, "loss": "2.177", "ppl": "4.52", "wps": "364839", "ups": "3.08", "wpb": "118540", "bsz": "256", "num_updates": "467600", "lr": "0.000537778", "gnorm": "0.672", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.1", "wall": "152643"} [2022-08-02 19:53:16,834][train_inner][INFO] - {"epoch": 10, "update": 9.089, "loss": "2.177", "ppl": "4.52", "wps": "356441", "ups": "3.02", "wpb": "118039", "bsz": "256", "num_updates": "467800", "lr": "0.000537576", "gnorm": "0.695", "clip": "0", "loss_scale": "4", "train_wall": "66", "gb_free": "24.5", "wall": "152709"} [2022-08-02 19:54:21,926][train_inner][INFO] - {"epoch": 10, "update": 9.093, "loss": "2.183", "ppl": "4.54", "wps": "362982", "ups": "3.07", "wpb": "118136", "bsz": "256", "num_updates": "468000", "lr": "0.000537374", "gnorm": "0.674", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.3", "wall": "152774"} [2022-08-02 19:55:26,806][train_inner][INFO] - {"epoch": 10, "update": 9.097, "loss": "2.184", "ppl": "4.54", "wps": "362291", "ups": "3.08", "wpb": "117524", "bsz": "256", "num_updates": "468200", "lr": "0.000537172", "gnorm": "0.676", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.9", "wall": "152839"} [2022-08-02 19:56:31,438][train_inner][INFO] - {"epoch": 10, "update": 9.101, "loss": "2.184", "ppl": "4.54", "wps": "365272", "ups": "3.09", "wpb": "118040", "bsz": "256", "num_updates": "468400", "lr": "0.00053697", "gnorm": "0.674", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.3", "wall": "152904"} [2022-08-02 19:57:37,307][train_inner][INFO] - {"epoch": 10, "update": 9.105, "loss": "2.179", "ppl": "4.53", "wps": "360618", "ups": "3.04", "wpb": "118766", "bsz": "256", "num_updates": "468600", "lr": "0.000536768", "gnorm": "0.669", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21", "wall": "152970"} [2022-08-02 19:58:42,357][train_inner][INFO] - {"epoch": 10, "update": 9.109, "loss": "2.172", "ppl": "4.51", "wps": "365559", "ups": "3.07", "wpb": "118897", "bsz": "256", "num_updates": "468800", "lr": "0.000536566", "gnorm": "0.67", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "153035"} [2022-08-02 19:59:47,412][train_inner][INFO] - {"epoch": 10, "update": 9.112, "loss": "2.176", "ppl": "4.52", "wps": "364101", "ups": "3.07", "wpb": "118430", "bsz": "256", "num_updates": "469000", "lr": "0.000536364", "gnorm": "0.671", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "25.7", "wall": "153100"} [2022-08-02 20:00:52,386][train_inner][INFO] - {"epoch": 10, "update": 9.116, "loss": "2.178", "ppl": "4.53", "wps": "366140", "ups": "3.08", "wpb": "118946", "bsz": "256", "num_updates": "469200", "lr": "0.000536162", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.8", "wall": "153165"} [2022-08-02 20:01:57,487][train_inner][INFO] - {"epoch": 10, "update": 9.12, "loss": "2.174", "ppl": "4.51", "wps": "364180", "ups": "3.07", "wpb": "118541", "bsz": "256", "num_updates": "469400", "lr": "0.00053596", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.2", "wall": "153230"} [2022-08-02 20:03:02,345][train_inner][INFO] - {"epoch": 10, "update": 9.124, "loss": "2.18", "ppl": "4.53", "wps": "365958", "ups": "3.08", "wpb": "118674", "bsz": "256", "num_updates": "469600", "lr": "0.000535758", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.7", "wall": "153295"} [2022-08-02 20:04:07,660][train_inner][INFO] - {"epoch": 10, "update": 9.128, "loss": "2.173", "ppl": "4.51", "wps": "365081", "ups": "3.06", "wpb": "119225", "bsz": "256", "num_updates": "469800", "lr": "0.000535556", "gnorm": "0.668", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "153360"} [2022-08-02 20:05:12,639][train_inner][INFO] - {"epoch": 10, "update": 9.132, "loss": "2.181", "ppl": "4.53", "wps": "364171", "ups": "3.08", "wpb": "118316", "bsz": "256", "num_updates": "470000", "lr": "0.000535354", "gnorm": "0.673", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "153425"} [2022-08-02 20:06:17,714][train_inner][INFO] - {"epoch": 10, "update": 9.136, "loss": "2.175", "ppl": "4.52", "wps": "363071", "ups": "3.07", "wpb": "118132", "bsz": "256", "num_updates": "470200", "lr": "0.000535152", "gnorm": "0.69", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.1", "wall": "153490"} [2022-08-02 20:07:22,639][train_inner][INFO] - {"epoch": 10, "update": 9.14, "loss": "2.179", "ppl": "4.53", "wps": "365056", "ups": "3.08", "wpb": "118506", "bsz": "256", "num_updates": "470400", "lr": "0.000534949", "gnorm": "0.672", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.5", "wall": "153555"} [2022-08-02 20:08:27,520][train_inner][INFO] - {"epoch": 10, "update": 9.144, "loss": "2.187", "ppl": "4.55", "wps": "361806", "ups": "3.08", "wpb": "117370", "bsz": "256", "num_updates": "470600", "lr": "0.000534747", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.6", "wall": "153620"} [2022-08-02 20:09:32,478][train_inner][INFO] - {"epoch": 10, "update": 9.147, "loss": "2.183", "ppl": "4.54", "wps": "363280", "ups": "3.08", "wpb": "117987", "bsz": "256", "num_updates": "470800", "lr": "0.000534545", "gnorm": "0.684", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.7", "wall": "153685"} [2022-08-02 20:10:37,727][train_inner][INFO] - {"epoch": 10, "update": 9.151, "loss": "2.177", "ppl": "4.52", "wps": "363264", "ups": "3.07", "wpb": "118512", "bsz": "256", "num_updates": "471000", "lr": "0.000534343", "gnorm": "0.672", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.6", "wall": "153750"} [2022-08-02 20:11:43,072][train_inner][INFO] - {"epoch": 10, "update": 9.155, "loss": "2.176", "ppl": "4.52", "wps": "362719", "ups": "3.06", "wpb": "118508", "bsz": "256", "num_updates": "471200", "lr": "0.000534141", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.1", "wall": "153815"} [2022-08-02 20:12:48,139][train_inner][INFO] - {"epoch": 10, "update": 9.159, "loss": "2.18", "ppl": "4.53", "wps": "362933", "ups": "3.07", "wpb": "118073", "bsz": "256", "num_updates": "471400", "lr": "0.000533939", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "153880"} [2022-08-02 20:13:53,258][train_inner][INFO] - {"epoch": 10, "update": 9.163, "loss": "2.177", "ppl": "4.52", "wps": "364914", "ups": "3.07", "wpb": "118812", "bsz": "256", "num_updates": "471600", "lr": "0.000533737", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "153946"} [2022-08-02 20:14:57,865][train_inner][INFO] - {"epoch": 10, "update": 9.167, "loss": "2.182", "ppl": "4.54", "wps": "365694", "ups": "3.1", "wpb": "118131", "bsz": "256", "num_updates": "471800", "lr": "0.000533535", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "154010"} [2022-08-02 20:16:03,731][train_inner][INFO] - {"epoch": 10, "update": 9.171, "loss": "2.182", "ppl": "4.54", "wps": "359119", "ups": "3.04", "wpb": "118266", "bsz": "256", "num_updates": "472000", "lr": "0.000533333", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "21.7", "wall": "154076"} [2022-08-02 20:17:08,532][train_inner][INFO] - {"epoch": 10, "update": 9.175, "loss": "2.177", "ppl": "4.52", "wps": "365283", "ups": "3.09", "wpb": "118352", "bsz": "256", "num_updates": "472200", "lr": "0.000533131", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23", "wall": "154141"} [2022-08-02 20:18:13,427][train_inner][INFO] - {"epoch": 10, "update": 9.179, "loss": "2.173", "ppl": "4.51", "wps": "365496", "ups": "3.08", "wpb": "118594", "bsz": "256", "num_updates": "472400", "lr": "0.000532929", "gnorm": "0.672", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "154206"} [2022-08-02 20:19:18,477][train_inner][INFO] - {"epoch": 10, "update": 9.182, "loss": "2.172", "ppl": "4.51", "wps": "365338", "ups": "3.07", "wpb": "118825", "bsz": "256", "num_updates": "472600", "lr": "0.000532727", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "154271"} [2022-08-02 20:20:23,289][train_inner][INFO] - {"epoch": 10, "update": 9.186, "loss": "2.177", "ppl": "4.52", "wps": "363871", "ups": "3.09", "wpb": "117913", "bsz": "256", "num_updates": "472800", "lr": "0.000532525", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.6", "wall": "154336"} [2022-08-02 20:21:28,045][train_inner][INFO] - {"epoch": 10, "update": 9.19, "loss": "2.178", "ppl": "4.53", "wps": "365342", "ups": "3.09", "wpb": "118289", "bsz": "256", "num_updates": "473000", "lr": "0.000532323", "gnorm": "0.684", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.8", "wall": "154400"} [2022-08-02 20:22:33,167][train_inner][INFO] - {"epoch": 10, "update": 9.194, "loss": "2.172", "ppl": "4.51", "wps": "363410", "ups": "3.07", "wpb": "118329", "bsz": "256", "num_updates": "473200", "lr": "0.000532121", "gnorm": "0.671", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "19.8", "wall": "154465"} [2022-08-02 20:23:37,990][train_inner][INFO] - {"epoch": 10, "update": 9.198, "loss": "2.182", "ppl": "4.54", "wps": "365000", "ups": "3.09", "wpb": "118300", "bsz": "256", "num_updates": "473400", "lr": "0.000531919", "gnorm": "0.674", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "21.5", "wall": "154530"} [2022-08-02 20:24:42,993][train_inner][INFO] - {"epoch": 10, "update": 9.202, "loss": "2.173", "ppl": "4.51", "wps": "365214", "ups": "3.08", "wpb": "118697", "bsz": "256", "num_updates": "473600", "lr": "0.000531717", "gnorm": "0.669", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "26.1", "wall": "154595"} [2022-08-02 20:25:49,076][train_inner][INFO] - {"epoch": 10, "update": 9.206, "loss": "2.178", "ppl": "4.53", "wps": "358274", "ups": "3.03", "wpb": "118378", "bsz": "256", "num_updates": "473800", "lr": "0.000531515", "gnorm": "0.671", "clip": "0", "loss_scale": "32", "train_wall": "66", "gb_free": "20.2", "wall": "154661"} [2022-08-02 20:26:53,465][train_inner][INFO] - {"epoch": 10, "update": 9.21, "loss": "2.172", "ppl": "4.51", "wps": "365798", "ups": "3.11", "wpb": "117766", "bsz": "256", "num_updates": "474000", "lr": "0.000531313", "gnorm": "0.676", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "27.6", "wall": "154726"} [2022-08-02 20:27:10,819][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-02 20:27:59,047][train_inner][INFO] - {"epoch": 10, "update": 9.214, "loss": "2.175", "ppl": "4.51", "wps": "360239", "ups": "3.05", "wpb": "118123", "bsz": "256", "num_updates": "474200", "lr": "0.000531111", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "154791"} [2022-08-02 20:29:03,993][train_inner][INFO] - {"epoch": 10, "update": 9.217, "loss": "2.175", "ppl": "4.52", "wps": "364254", "ups": "3.08", "wpb": "118284", "bsz": "256", "num_updates": "474400", "lr": "0.000530909", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "154856"} [2022-08-02 20:30:08,941][train_inner][INFO] - {"epoch": 10, "update": 9.221, "loss": "2.177", "ppl": "4.52", "wps": "363602", "ups": "3.08", "wpb": "118075", "bsz": "256", "num_updates": "474600", "lr": "0.000530707", "gnorm": "0.675", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "154921"} [2022-08-02 20:31:13,741][train_inner][INFO] - {"epoch": 10, "update": 9.225, "loss": "2.177", "ppl": "4.52", "wps": "364915", "ups": "3.09", "wpb": "118231", "bsz": "256", "num_updates": "474800", "lr": "0.000530505", "gnorm": "0.674", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.9", "wall": "154986"} [2022-08-02 20:32:18,960][train_inner][INFO] - {"epoch": 10, "update": 9.229, "loss": "2.175", "ppl": "4.52", "wps": "364065", "ups": "3.07", "wpb": "118717", "bsz": "256", "num_updates": "475000", "lr": "0.000530303", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "155051"} [2022-08-02 20:33:23,932][train_inner][INFO] - {"epoch": 10, "update": 9.233, "loss": "2.17", "ppl": "4.5", "wps": "366080", "ups": "3.08", "wpb": "118924", "bsz": "256", "num_updates": "475200", "lr": "0.000530101", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.3", "wall": "155116"} [2022-08-02 20:34:28,818][train_inner][INFO] - {"epoch": 10, "update": 9.237, "loss": "2.175", "ppl": "4.52", "wps": "363262", "ups": "3.08", "wpb": "117850", "bsz": "256", "num_updates": "475400", "lr": "0.000529899", "gnorm": "0.675", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "28", "wall": "155181"} [2022-08-02 20:35:33,803][train_inner][INFO] - {"epoch": 10, "update": 9.241, "loss": "2.172", "ppl": "4.51", "wps": "365241", "ups": "3.08", "wpb": "118675", "bsz": "256", "num_updates": "475600", "lr": "0.000529697", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.4", "wall": "155246"} [2022-08-02 20:36:38,749][train_inner][INFO] - {"epoch": 10, "update": 9.245, "loss": "2.171", "ppl": "4.5", "wps": "364668", "ups": "3.08", "wpb": "118416", "bsz": "256", "num_updates": "475800", "lr": "0.000529495", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "155311"} [2022-08-02 20:37:43,258][train_inner][INFO] - {"epoch": 10, "update": 9.248, "loss": "2.171", "ppl": "4.5", "wps": "367541", "ups": "3.1", "wpb": "118548", "bsz": "256", "num_updates": "476000", "lr": "0.000529293", "gnorm": "0.677", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.3", "wall": "155376"} [2022-08-02 20:38:47,956][train_inner][INFO] - {"epoch": 10, "update": 9.252, "loss": "2.175", "ppl": "4.51", "wps": "366471", "ups": "3.09", "wpb": "118546", "bsz": "256", "num_updates": "476200", "lr": "0.000529091", "gnorm": "0.672", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "21.2", "wall": "155440"} [2022-08-02 20:39:52,726][train_inner][INFO] - {"epoch": 10, "update": 9.256, "loss": "2.168", "ppl": "4.49", "wps": "365052", "ups": "3.09", "wpb": "118222", "bsz": "256", "num_updates": "476400", "lr": "0.000528889", "gnorm": "0.671", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "22.7", "wall": "155505"} [2022-08-02 20:40:57,414][train_inner][INFO] - {"epoch": 10, "update": 9.26, "loss": "2.175", "ppl": "4.52", "wps": "365367", "ups": "3.09", "wpb": "118173", "bsz": "256", "num_updates": "476600", "lr": "0.000528687", "gnorm": "0.674", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "24.7", "wall": "155570"} [2022-08-02 20:41:30,636][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-02 20:42:02,588][train_inner][INFO] - {"epoch": 10, "update": 9.264, "loss": "2.171", "ppl": "4.5", "wps": "363548", "ups": "3.07", "wpb": "118468", "bsz": "256", "num_updates": "476800", "lr": "0.000528485", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "155635"} [2022-08-02 20:43:08,709][train_inner][INFO] - {"epoch": 10, "update": 9.268, "loss": "2.173", "ppl": "4.51", "wps": "357640", "ups": "3.02", "wpb": "118236", "bsz": "256", "num_updates": "477000", "lr": "0.000528283", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "23.4", "wall": "155701"} [2022-08-02 20:44:13,696][train_inner][INFO] - {"epoch": 10, "update": 9.272, "loss": "2.176", "ppl": "4.52", "wps": "363919", "ups": "3.08", "wpb": "118248", "bsz": "255.9", "num_updates": "477200", "lr": "0.000528081", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "155766"} [2022-08-02 20:45:18,735][train_inner][INFO] - {"epoch": 10, "update": 9.276, "loss": "2.171", "ppl": "4.5", "wps": "363803", "ups": "3.08", "wpb": "118306", "bsz": "256", "num_updates": "477400", "lr": "0.000527879", "gnorm": "0.685", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "28", "wall": "155831"} [2022-08-02 20:45:19,030][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-02 20:46:24,051][train_inner][INFO] - {"epoch": 10, "update": 9.28, "loss": "2.18", "ppl": "4.53", "wps": "362166", "ups": "3.06", "wpb": "118273", "bsz": "256", "num_updates": "477600", "lr": "0.000527677", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "155896"} [2022-08-02 20:47:28,986][train_inner][INFO] - {"epoch": 10, "update": 9.283, "loss": "2.172", "ppl": "4.51", "wps": "363259", "ups": "3.08", "wpb": "117940", "bsz": "256", "num_updates": "477800", "lr": "0.000527475", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "155961"} [2022-08-02 20:48:34,045][train_inner][INFO] - {"epoch": 10, "update": 9.287, "loss": "2.176", "ppl": "4.52", "wps": "362464", "ups": "3.07", "wpb": "117907", "bsz": "256", "num_updates": "478000", "lr": "0.000527273", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.7", "wall": "156026"} [2022-08-02 20:49:38,691][train_inner][INFO] - {"epoch": 10, "update": 9.291, "loss": "2.176", "ppl": "4.52", "wps": "365181", "ups": "3.09", "wpb": "118035", "bsz": "256", "num_updates": "478200", "lr": "0.000527071", "gnorm": "0.673", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.6", "wall": "156091"} [2022-08-02 20:50:43,503][train_inner][INFO] - {"epoch": 10, "update": 9.295, "loss": "2.177", "ppl": "4.52", "wps": "365571", "ups": "3.09", "wpb": "118466", "bsz": "256", "num_updates": "478400", "lr": "0.000526869", "gnorm": "0.673", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "156156"} [2022-08-02 20:51:48,569][train_inner][INFO] - {"epoch": 10, "update": 9.299, "loss": "2.177", "ppl": "4.52", "wps": "363722", "ups": "3.07", "wpb": "118328", "bsz": "256", "num_updates": "478600", "lr": "0.000526667", "gnorm": "0.687", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "156221"} [2022-08-02 20:52:54,223][train_inner][INFO] - {"epoch": 10, "update": 9.303, "loss": "2.178", "ppl": "4.53", "wps": "359292", "ups": "3.05", "wpb": "117944", "bsz": "256", "num_updates": "478800", "lr": "0.000526465", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.4", "wall": "156287"} [2022-08-02 20:53:59,119][train_inner][INFO] - {"epoch": 10, "update": 9.307, "loss": "2.172", "ppl": "4.51", "wps": "364155", "ups": "3.08", "wpb": "118158", "bsz": "256", "num_updates": "479000", "lr": "0.000526263", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.9", "wall": "156351"} [2022-08-02 20:55:04,009][train_inner][INFO] - {"epoch": 10, "update": 9.311, "loss": "2.175", "ppl": "4.52", "wps": "364231", "ups": "3.08", "wpb": "118174", "bsz": "256", "num_updates": "479200", "lr": "0.000526061", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "156416"} [2022-08-02 20:56:09,101][train_inner][INFO] - {"epoch": 10, "update": 9.315, "loss": "2.174", "ppl": "4.51", "wps": "363113", "ups": "3.07", "wpb": "118176", "bsz": "256", "num_updates": "479400", "lr": "0.000525859", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "28.2", "wall": "156481"} [2022-08-02 20:57:14,066][train_inner][INFO] - {"epoch": 10, "update": 9.318, "loss": "2.171", "ppl": "4.5", "wps": "365098", "ups": "3.08", "wpb": "118591", "bsz": "256", "num_updates": "479600", "lr": "0.000525657", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21", "wall": "156546"} [2022-08-02 20:58:19,165][train_inner][INFO] - {"epoch": 10, "update": 9.322, "loss": "2.168", "ppl": "4.5", "wps": "362992", "ups": "3.07", "wpb": "118148", "bsz": "256", "num_updates": "479800", "lr": "0.000525455", "gnorm": "0.674", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "156611"} [2022-08-02 20:59:24,051][train_inner][INFO] - {"epoch": 10, "update": 9.326, "loss": "2.18", "ppl": "4.53", "wps": "363281", "ups": "3.08", "wpb": "117848", "bsz": "256", "num_updates": "480000", "lr": "0.000525253", "gnorm": "0.677", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.8", "wall": "156676"} [2022-08-02 21:00:28,824][train_inner][INFO] - {"epoch": 10, "update": 9.33, "loss": "2.173", "ppl": "4.51", "wps": "364942", "ups": "3.09", "wpb": "118190", "bsz": "256", "num_updates": "480200", "lr": "0.000525051", "gnorm": "0.675", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "28.2", "wall": "156741"} [2022-08-02 21:01:33,621][train_inner][INFO] - {"epoch": 10, "update": 9.334, "loss": "2.169", "ppl": "4.5", "wps": "364624", "ups": "3.09", "wpb": "118131", "bsz": "256", "num_updates": "480400", "lr": "0.000524848", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22", "wall": "156806"} [2022-08-02 21:02:38,805][train_inner][INFO] - {"epoch": 10, "update": 9.338, "loss": "2.173", "ppl": "4.51", "wps": "366792", "ups": "3.07", "wpb": "119544", "bsz": "256", "num_updates": "480600", "lr": "0.000524646", "gnorm": "0.67", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "156871"} [2022-08-02 21:03:44,791][train_inner][INFO] - {"epoch": 10, "update": 9.342, "loss": "2.174", "ppl": "4.51", "wps": "359414", "ups": "3.03", "wpb": "118580", "bsz": "256", "num_updates": "480800", "lr": "0.000524444", "gnorm": "0.674", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "21.9", "wall": "156937"} [2022-08-02 21:04:49,556][train_inner][INFO] - {"epoch": 10, "update": 9.346, "loss": "2.17", "ppl": "4.5", "wps": "365499", "ups": "3.09", "wpb": "118355", "bsz": "256", "num_updates": "481000", "lr": "0.000524242", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "157002"} [2022-08-02 21:05:54,604][train_inner][INFO] - {"epoch": 10, "update": 9.349, "loss": "2.173", "ppl": "4.51", "wps": "363343", "ups": "3.07", "wpb": "118173", "bsz": "256", "num_updates": "481200", "lr": "0.00052404", "gnorm": "0.676", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.5", "wall": "157067"} [2022-08-02 21:06:59,813][train_inner][INFO] - {"epoch": 10, "update": 9.353, "loss": "2.171", "ppl": "4.5", "wps": "364714", "ups": "3.07", "wpb": "118911", "bsz": "256", "num_updates": "481400", "lr": "0.000523838", "gnorm": "0.671", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.8", "wall": "157132"} [2022-08-02 21:07:48,596][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-02 21:08:04,789][train_inner][INFO] - {"epoch": 10, "update": 9.357, "loss": "2.178", "ppl": "4.53", "wps": "362281", "ups": "3.08", "wpb": "117696", "bsz": "256", "num_updates": "481600", "lr": "0.000523636", "gnorm": "0.677", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.8", "wall": "157197"} [2022-08-02 21:08:25,028][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-02 21:09:09,998][train_inner][INFO] - {"epoch": 10, "update": 9.361, "loss": "2.174", "ppl": "4.51", "wps": "363129", "ups": "3.07", "wpb": "118395", "bsz": "256", "num_updates": "481800", "lr": "0.000523434", "gnorm": "0.687", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.8", "wall": "157262"} [2022-08-02 21:10:14,831][train_inner][INFO] - {"epoch": 10, "update": 9.365, "loss": "2.17", "ppl": "4.5", "wps": "366184", "ups": "3.08", "wpb": "118703", "bsz": "256", "num_updates": "482000", "lr": "0.000523232", "gnorm": "0.671", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.2", "wall": "157327"} [2022-08-02 21:11:19,504][train_inner][INFO] - {"epoch": 10, "update": 9.369, "loss": "2.177", "ppl": "4.52", "wps": "366436", "ups": "3.09", "wpb": "118492", "bsz": "256", "num_updates": "482200", "lr": "0.00052303", "gnorm": "0.672", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.3", "wall": "157392"} [2022-08-02 21:12:24,600][train_inner][INFO] - {"epoch": 10, "update": 9.373, "loss": "2.177", "ppl": "4.52", "wps": "363432", "ups": "3.07", "wpb": "118288", "bsz": "256", "num_updates": "482400", "lr": "0.000522828", "gnorm": "0.673", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.5", "wall": "157457"} [2022-08-02 21:13:29,239][train_inner][INFO] - {"epoch": 10, "update": 9.377, "loss": "2.174", "ppl": "4.51", "wps": "362802", "ups": "3.09", "wpb": "117254", "bsz": "256", "num_updates": "482600", "lr": "0.000522626", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.2", "wall": "157522"} [2022-08-02 21:14:34,083][train_inner][INFO] - {"epoch": 10, "update": 9.381, "loss": "2.169", "ppl": "4.5", "wps": "364387", "ups": "3.08", "wpb": "118140", "bsz": "256", "num_updates": "482800", "lr": "0.000522424", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "157586"} [2022-08-02 21:15:38,892][train_inner][INFO] - {"epoch": 10, "update": 9.384, "loss": "2.173", "ppl": "4.51", "wps": "363217", "ups": "3.09", "wpb": "117697", "bsz": "256", "num_updates": "483000", "lr": "0.000522222", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.7", "wall": "157651"} [2022-08-02 21:16:43,547][train_inner][INFO] - {"epoch": 10, "update": 9.388, "loss": "2.174", "ppl": "4.51", "wps": "365435", "ups": "3.09", "wpb": "118134", "bsz": "256", "num_updates": "483200", "lr": "0.00052202", "gnorm": "0.673", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.7", "wall": "157716"} [2022-08-02 21:17:48,833][train_inner][INFO] - {"epoch": 10, "update": 9.392, "loss": "2.173", "ppl": "4.51", "wps": "361812", "ups": "3.06", "wpb": "118105", "bsz": "256", "num_updates": "483400", "lr": "0.000521818", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "157781"} [2022-08-02 21:18:53,459][train_inner][INFO] - {"epoch": 10, "update": 9.396, "loss": "2.175", "ppl": "4.52", "wps": "366465", "ups": "3.09", "wpb": "118414", "bsz": "256", "num_updates": "483600", "lr": "0.000521616", "gnorm": "0.676", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.1", "wall": "157846"} [2022-08-02 21:19:58,694][train_inner][INFO] - {"epoch": 10, "update": 9.4, "loss": "2.169", "ppl": "4.5", "wps": "362221", "ups": "3.07", "wpb": "118146", "bsz": "256", "num_updates": "483800", "lr": "0.000521414", "gnorm": "0.675", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.5", "wall": "157911"} [2022-08-02 21:21:03,780][train_inner][INFO] - {"epoch": 10, "update": 9.404, "loss": "2.173", "ppl": "4.51", "wps": "363614", "ups": "3.07", "wpb": "118328", "bsz": "256", "num_updates": "484000", "lr": "0.000521212", "gnorm": "0.676", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.1", "wall": "157976"} [2022-08-02 21:21:15,824][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-02 21:22:09,118][train_inner][INFO] - {"epoch": 10, "update": 9.408, "loss": "2.171", "ppl": "4.5", "wps": "361710", "ups": "3.06", "wpb": "118167", "bsz": "256", "num_updates": "484200", "lr": "0.00052101", "gnorm": "0.684", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.8", "wall": "158041"} [2022-08-02 21:23:14,026][train_inner][INFO] - {"epoch": 10, "update": 9.412, "loss": "2.182", "ppl": "4.54", "wps": "363781", "ups": "3.08", "wpb": "118059", "bsz": "256", "num_updates": "484400", "lr": "0.000520808", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "158106"} [2022-08-02 21:24:18,912][train_inner][INFO] - {"epoch": 10, "update": 9.416, "loss": "2.169", "ppl": "4.5", "wps": "364010", "ups": "3.08", "wpb": "118094", "bsz": "256", "num_updates": "484600", "lr": "0.000520606", "gnorm": "0.68", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "158171"} [2022-08-02 21:25:24,884][train_inner][INFO] - {"epoch": 10, "update": 9.419, "loss": "2.172", "ppl": "4.51", "wps": "358867", "ups": "3.03", "wpb": "118375", "bsz": "256", "num_updates": "484800", "lr": "0.000520404", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "19.9", "wall": "158237"} [2022-08-02 21:26:29,872][train_inner][INFO] - {"epoch": 10, "update": 9.423, "loss": "2.167", "ppl": "4.49", "wps": "365248", "ups": "3.08", "wpb": "118675", "bsz": "256", "num_updates": "485000", "lr": "0.000520202", "gnorm": "0.672", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.2", "wall": "158302"} [2022-08-02 21:27:36,120][train_inner][INFO] - {"epoch": 10, "update": 9.427, "loss": "2.17", "ppl": "4.5", "wps": "357924", "ups": "3.02", "wpb": "118556", "bsz": "256", "num_updates": "485200", "lr": "0.00052", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "20", "wall": "158368"} [2022-08-02 21:28:40,792][train_inner][INFO] - {"epoch": 10, "update": 9.431, "loss": "2.166", "ppl": "4.49", "wps": "365196", "ups": "3.09", "wpb": "118088", "bsz": "256", "num_updates": "485400", "lr": "0.000519798", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.8", "wall": "158433"} [2022-08-02 21:29:45,496][train_inner][INFO] - {"epoch": 10, "update": 9.435, "loss": "2.171", "ppl": "4.5", "wps": "367006", "ups": "3.09", "wpb": "118732", "bsz": "256", "num_updates": "485600", "lr": "0.000519596", "gnorm": "0.673", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21", "wall": "158498"} [2022-08-02 21:30:50,247][train_inner][INFO] - {"epoch": 10, "update": 9.439, "loss": "2.167", "ppl": "4.49", "wps": "364720", "ups": "3.09", "wpb": "118077", "bsz": "256", "num_updates": "485800", "lr": "0.000519394", "gnorm": "0.676", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "158563"} [2022-08-02 21:31:55,082][train_inner][INFO] - {"epoch": 10, "update": 9.443, "loss": "2.167", "ppl": "4.49", "wps": "365288", "ups": "3.08", "wpb": "118416", "bsz": "256", "num_updates": "486000", "lr": "0.000519192", "gnorm": "0.676", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "158627"} [2022-08-02 21:32:59,574][train_inner][INFO] - {"epoch": 10, "update": 9.447, "loss": "2.167", "ppl": "4.49", "wps": "367487", "ups": "3.1", "wpb": "118486", "bsz": "256", "num_updates": "486200", "lr": "0.00051899", "gnorm": "0.679", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "158692"} [2022-08-02 21:33:47,437][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-02 21:34:04,772][train_inner][INFO] - {"epoch": 10, "update": 9.451, "loss": "2.171", "ppl": "4.5", "wps": "362355", "ups": "3.07", "wpb": "118123", "bsz": "256", "num_updates": "486400", "lr": "0.000518788", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "158757"} [2022-08-02 21:35:09,623][train_inner][INFO] - {"epoch": 10, "update": 9.454, "loss": "2.169", "ppl": "4.5", "wps": "365461", "ups": "3.08", "wpb": "118501", "bsz": "256", "num_updates": "486600", "lr": "0.000518586", "gnorm": "0.676", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.2", "wall": "158822"} [2022-08-02 21:36:14,655][train_inner][INFO] - {"epoch": 10, "update": 9.458, "loss": "2.167", "ppl": "4.49", "wps": "363425", "ups": "3.08", "wpb": "118170", "bsz": "256", "num_updates": "486800", "lr": "0.000518384", "gnorm": "0.676", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.8", "wall": "158887"} [2022-08-02 21:37:19,327][train_inner][INFO] - {"epoch": 10, "update": 9.462, "loss": "2.166", "ppl": "4.49", "wps": "366899", "ups": "3.09", "wpb": "118631", "bsz": "256", "num_updates": "487000", "lr": "0.000518182", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.3", "wall": "158952"} [2022-08-02 21:38:24,380][train_inner][INFO] - {"epoch": 10, "update": 9.466, "loss": "2.17", "ppl": "4.5", "wps": "364840", "ups": "3.07", "wpb": "118662", "bsz": "256", "num_updates": "487200", "lr": "0.00051798", "gnorm": "0.676", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "159017"} [2022-08-02 21:39:29,129][train_inner][INFO] - {"epoch": 10, "update": 9.47, "loss": "2.172", "ppl": "4.51", "wps": "364738", "ups": "3.09", "wpb": "118081", "bsz": "256", "num_updates": "487400", "lr": "0.000517778", "gnorm": "0.676", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.9", "wall": "159081"} [2022-08-02 21:40:33,916][train_inner][INFO] - {"epoch": 10, "update": 9.474, "loss": "2.172", "ppl": "4.51", "wps": "365216", "ups": "3.09", "wpb": "118304", "bsz": "256", "num_updates": "487600", "lr": "0.000517576", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.8", "wall": "159146"} [2022-08-02 21:41:38,829][train_inner][INFO] - {"epoch": 10, "update": 9.478, "loss": "2.169", "ppl": "4.5", "wps": "364988", "ups": "3.08", "wpb": "118461", "bsz": "256", "num_updates": "487800", "lr": "0.000517374", "gnorm": "0.676", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.9", "wall": "159211"} [2022-08-02 21:42:43,164][train_inner][INFO] - {"epoch": 10, "update": 9.482, "loss": "2.168", "ppl": "4.5", "wps": "368839", "ups": "3.11", "wpb": "118644", "bsz": "256", "num_updates": "488000", "lr": "0.000517172", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.7", "wall": "159275"} [2022-08-02 21:43:48,222][train_inner][INFO] - {"epoch": 10, "update": 9.486, "loss": "2.166", "ppl": "4.49", "wps": "363424", "ups": "3.07", "wpb": "118217", "bsz": "256", "num_updates": "488200", "lr": "0.00051697", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "159341"} [2022-08-02 21:44:53,302][train_inner][INFO] - {"epoch": 10, "update": 9.489, "loss": "2.163", "ppl": "4.48", "wps": "364225", "ups": "3.07", "wpb": "118516", "bsz": "256", "num_updates": "488400", "lr": "0.000516768", "gnorm": "0.674", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.1", "wall": "159406"} [2022-08-02 21:45:58,302][train_inner][INFO] - {"epoch": 10, "update": 9.493, "loss": "2.166", "ppl": "4.49", "wps": "366099", "ups": "3.08", "wpb": "118981", "bsz": "256", "num_updates": "488600", "lr": "0.000516566", "gnorm": "0.673", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.6", "wall": "159471"} [2022-08-02 21:46:54,287][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-02 21:47:03,438][train_inner][INFO] - {"epoch": 10, "update": 9.497, "loss": "2.164", "ppl": "4.48", "wps": "362148", "ups": "3.07", "wpb": "117943", "bsz": "256", "num_updates": "488800", "lr": "0.000516364", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "159536"} [2022-08-02 21:48:08,098][train_inner][INFO] - {"epoch": 10, "update": 9.501, "loss": "2.169", "ppl": "4.5", "wps": "367049", "ups": "3.09", "wpb": "118665", "bsz": "256", "num_updates": "489000", "lr": "0.000516162", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.9", "wall": "159600"} [2022-08-02 21:49:12,675][train_inner][INFO] - {"epoch": 10, "update": 9.505, "loss": "2.173", "ppl": "4.51", "wps": "366947", "ups": "3.1", "wpb": "118481", "bsz": "256", "num_updates": "489200", "lr": "0.00051596", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.2", "wall": "159665"} [2022-08-02 21:50:18,182][train_inner][INFO] - {"epoch": 10, "update": 9.509, "loss": "2.17", "ppl": "4.5", "wps": "360559", "ups": "3.05", "wpb": "118094", "bsz": "256", "num_updates": "489400", "lr": "0.000515758", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "159731"} [2022-08-02 21:51:22,990][train_inner][INFO] - {"epoch": 10, "update": 9.513, "loss": "2.168", "ppl": "4.49", "wps": "364363", "ups": "3.09", "wpb": "118066", "bsz": "256", "num_updates": "489600", "lr": "0.000515556", "gnorm": "0.679", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.6", "wall": "159795"} [2022-08-02 21:52:27,549][train_inner][INFO] - {"epoch": 10, "update": 9.517, "loss": "2.169", "ppl": "4.5", "wps": "366878", "ups": "3.1", "wpb": "118424", "bsz": "256", "num_updates": "489800", "lr": "0.000515354", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.3", "wall": "159860"} [2022-08-02 21:53:31,850][train_inner][INFO] - {"epoch": 10, "update": 9.521, "loss": "2.168", "ppl": "4.49", "wps": "367155", "ups": "3.11", "wpb": "118040", "bsz": "256", "num_updates": "490000", "lr": "0.000515152", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.8", "wall": "159924"} [2022-08-02 21:54:36,273][train_inner][INFO] - {"epoch": 10, "update": 9.524, "loss": "2.171", "ppl": "4.5", "wps": "366080", "ups": "3.1", "wpb": "117919", "bsz": "256", "num_updates": "490200", "lr": "0.000514949", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.5", "wall": "159989"} [2022-08-02 21:55:40,832][train_inner][INFO] - {"epoch": 10, "update": 9.528, "loss": "2.167", "ppl": "4.49", "wps": "367157", "ups": "3.1", "wpb": "118516", "bsz": "256", "num_updates": "490400", "lr": "0.000514747", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "160053"} [2022-08-02 21:56:45,959][train_inner][INFO] - {"epoch": 10, "update": 9.532, "loss": "2.168", "ppl": "4.49", "wps": "362310", "ups": "3.07", "wpb": "117979", "bsz": "255.9", "num_updates": "490600", "lr": "0.000514545", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.8", "wall": "160118"} [2022-08-02 21:57:50,460][train_inner][INFO] - {"epoch": 10, "update": 9.536, "loss": "2.166", "ppl": "4.49", "wps": "366510", "ups": "3.1", "wpb": "118200", "bsz": "256", "num_updates": "490800", "lr": "0.000514343", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "28.2", "wall": "160183"} [2022-08-02 21:58:54,828][train_inner][INFO] - {"epoch": 10, "update": 9.54, "loss": "2.165", "ppl": "4.48", "wps": "366015", "ups": "3.11", "wpb": "117797", "bsz": "256", "num_updates": "491000", "lr": "0.000514141", "gnorm": "0.68", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.4", "wall": "160247"} [2022-08-02 21:59:59,464][train_inner][INFO] - {"epoch": 10, "update": 9.544, "loss": "2.167", "ppl": "4.49", "wps": "365285", "ups": "3.09", "wpb": "118052", "bsz": "256", "num_updates": "491200", "lr": "0.000513939", "gnorm": "0.678", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21", "wall": "160312"} [2022-08-02 22:00:22,109][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-02 22:01:04,378][train_inner][INFO] - {"epoch": 10, "update": 9.548, "loss": "2.167", "ppl": "4.49", "wps": "364908", "ups": "3.08", "wpb": "118436", "bsz": "256", "num_updates": "491400", "lr": "0.000513737", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.9", "wall": "160377"} [2022-08-02 22:01:56,281][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-02 22:02:09,038][train_inner][INFO] - {"epoch": 10, "update": 9.552, "loss": "2.166", "ppl": "4.49", "wps": "363961", "ups": "3.09", "wpb": "117669", "bsz": "256", "num_updates": "491600", "lr": "0.000513535", "gnorm": "0.676", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.1", "wall": "160441"} [2022-08-02 22:03:13,921][train_inner][INFO] - {"epoch": 10, "update": 9.556, "loss": "2.167", "ppl": "4.49", "wps": "365253", "ups": "3.08", "wpb": "118492", "bsz": "256", "num_updates": "491800", "lr": "0.000513333", "gnorm": "0.685", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.5", "wall": "160506"} [2022-08-02 22:04:18,727][train_inner][INFO] - {"epoch": 10, "update": 9.559, "loss": "2.165", "ppl": "4.48", "wps": "366180", "ups": "3.09", "wpb": "118651", "bsz": "256", "num_updates": "492000", "lr": "0.000513131", "gnorm": "0.678", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "24.2", "wall": "160571"} [2022-08-02 22:05:23,849][train_inner][INFO] - {"epoch": 10, "update": 9.563, "loss": "2.17", "ppl": "4.5", "wps": "362494", "ups": "3.07", "wpb": "118030", "bsz": "256", "num_updates": "492200", "lr": "0.000512929", "gnorm": "0.678", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "160636"} [2022-08-02 22:06:28,675][train_inner][INFO] - {"epoch": 10, "update": 9.567, "loss": "2.167", "ppl": "4.49", "wps": "363935", "ups": "3.09", "wpb": "117961", "bsz": "256", "num_updates": "492400", "lr": "0.000512727", "gnorm": "0.678", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.2", "wall": "160701"} [2022-08-02 22:07:32,974][train_inner][INFO] - {"epoch": 10, "update": 9.571, "loss": "2.17", "ppl": "4.5", "wps": "367052", "ups": "3.11", "wpb": "118004", "bsz": "256", "num_updates": "492600", "lr": "0.000512525", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.5", "wall": "160765"} [2022-08-02 22:08:37,588][train_inner][INFO] - {"epoch": 10, "update": 9.575, "loss": "2.172", "ppl": "4.51", "wps": "365803", "ups": "3.1", "wpb": "118178", "bsz": "256", "num_updates": "492800", "lr": "0.000512323", "gnorm": "0.68", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.6", "wall": "160830"} [2022-08-02 22:09:42,690][train_inner][INFO] - {"epoch": 10, "update": 9.579, "loss": "2.167", "ppl": "4.49", "wps": "362414", "ups": "3.07", "wpb": "117967", "bsz": "256", "num_updates": "493000", "lr": "0.000512121", "gnorm": "0.677", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.8", "wall": "160895"} [2022-08-02 22:10:47,352][train_inner][INFO] - {"epoch": 10, "update": 9.583, "loss": "2.172", "ppl": "4.51", "wps": "364848", "ups": "3.09", "wpb": "117958", "bsz": "256", "num_updates": "493200", "lr": "0.000511919", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.9", "wall": "160960"} [2022-08-02 22:11:52,137][train_inner][INFO] - {"epoch": 10, "update": 9.587, "loss": "2.165", "ppl": "4.48", "wps": "366624", "ups": "3.09", "wpb": "118755", "bsz": "256", "num_updates": "493400", "lr": "0.000511717", "gnorm": "0.675", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20", "wall": "161024"} [2022-08-02 22:12:56,845][train_inner][INFO] - {"epoch": 10, "update": 9.59, "loss": "2.175", "ppl": "4.52", "wps": "364297", "ups": "3.09", "wpb": "117864", "bsz": "256", "num_updates": "493600", "lr": "0.000511515", "gnorm": "0.688", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.7", "wall": "161089"} [2022-08-02 22:14:01,767][train_inner][INFO] - {"epoch": 10, "update": 9.594, "loss": "2.169", "ppl": "4.5", "wps": "364553", "ups": "3.08", "wpb": "118335", "bsz": "256", "num_updates": "493800", "lr": "0.000511313", "gnorm": "0.676", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "161154"} [2022-08-02 22:15:06,326][train_inner][INFO] - {"epoch": 10, "update": 9.598, "loss": "2.168", "ppl": "4.49", "wps": "365658", "ups": "3.1", "wpb": "118021", "bsz": "256", "num_updates": "494000", "lr": "0.000511111", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.6", "wall": "161219"} [2022-08-02 22:16:11,297][train_inner][INFO] - {"epoch": 10, "update": 9.602, "loss": "2.167", "ppl": "4.49", "wps": "364419", "ups": "3.08", "wpb": "118381", "bsz": "256", "num_updates": "494200", "lr": "0.000510909", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "28.2", "wall": "161284"} [2022-08-02 22:17:16,425][train_inner][INFO] - {"epoch": 10, "update": 9.606, "loss": "2.168", "ppl": "4.49", "wps": "361912", "ups": "3.07", "wpb": "117852", "bsz": "256", "num_updates": "494400", "lr": "0.000510707", "gnorm": "0.679", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.8", "wall": "161349"} [2022-08-02 22:18:21,421][train_inner][INFO] - {"epoch": 10, "update": 9.61, "loss": "2.164", "ppl": "4.48", "wps": "365577", "ups": "3.08", "wpb": "118802", "bsz": "256", "num_updates": "494600", "lr": "0.000510505", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "161414"} [2022-08-02 22:19:26,360][train_inner][INFO] - {"epoch": 10, "update": 9.614, "loss": "2.164", "ppl": "4.48", "wps": "365824", "ups": "3.08", "wpb": "118781", "bsz": "256", "num_updates": "494800", "lr": "0.000510303", "gnorm": "0.682", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "161479"} [2022-08-02 22:20:30,866][train_inner][INFO] - {"epoch": 10, "update": 9.618, "loss": "2.163", "ppl": "4.48", "wps": "366646", "ups": "3.1", "wpb": "118252", "bsz": "256", "num_updates": "495000", "lr": "0.000510101", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.8", "wall": "161543"} [2022-08-02 22:21:36,776][train_inner][INFO] - {"epoch": 10, "update": 9.622, "loss": "2.166", "ppl": "4.49", "wps": "360343", "ups": "3.03", "wpb": "118749", "bsz": "256", "num_updates": "495200", "lr": "0.000509899", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "20.7", "wall": "161609"} [2022-08-02 22:22:41,438][train_inner][INFO] - {"epoch": 10, "update": 9.625, "loss": "2.169", "ppl": "4.5", "wps": "364024", "ups": "3.09", "wpb": "117691", "bsz": "256", "num_updates": "495400", "lr": "0.000509697", "gnorm": "0.681", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21", "wall": "161674"} [2022-08-02 22:23:46,270][train_inner][INFO] - {"epoch": 10, "update": 9.629, "loss": "2.165", "ppl": "4.48", "wps": "364344", "ups": "3.08", "wpb": "118105", "bsz": "256", "num_updates": "495600", "lr": "0.000509495", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.1", "wall": "161739"} [2022-08-02 22:24:50,733][train_inner][INFO] - {"epoch": 10, "update": 9.633, "loss": "2.163", "ppl": "4.48", "wps": "366242", "ups": "3.1", "wpb": "118043", "bsz": "256", "num_updates": "495800", "lr": "0.000509293", "gnorm": "0.678", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.3", "wall": "161803"} [2022-08-02 22:25:55,602][train_inner][INFO] - {"epoch": 10, "update": 9.637, "loss": "2.164", "ppl": "4.48", "wps": "364746", "ups": "3.08", "wpb": "118301", "bsz": "256", "num_updates": "496000", "lr": "0.000509091", "gnorm": "0.677", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "161868"} [2022-08-02 22:27:00,613][train_inner][INFO] - {"epoch": 10, "update": 9.641, "loss": "2.168", "ppl": "4.49", "wps": "361859", "ups": "3.08", "wpb": "117623", "bsz": "256", "num_updates": "496200", "lr": "0.000508889", "gnorm": "0.68", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.6", "wall": "161933"} [2022-08-02 22:28:05,328][train_inner][INFO] - {"epoch": 10, "update": 9.645, "loss": "2.159", "ppl": "4.47", "wps": "368185", "ups": "3.09", "wpb": "119134", "bsz": "256", "num_updates": "496400", "lr": "0.000508687", "gnorm": "0.674", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "161998"} [2022-08-02 22:29:09,756][train_inner][INFO] - {"epoch": 10, "update": 9.649, "loss": "2.171", "ppl": "4.5", "wps": "366162", "ups": "3.1", "wpb": "117955", "bsz": "256", "num_updates": "496600", "lr": "0.000508485", "gnorm": "0.678", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.3", "wall": "162062"} [2022-08-02 22:30:14,542][train_inner][INFO] - {"epoch": 10, "update": 9.653, "loss": "2.165", "ppl": "4.49", "wps": "365555", "ups": "3.09", "wpb": "118412", "bsz": "256", "num_updates": "496800", "lr": "0.000508283", "gnorm": "0.677", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.4", "wall": "162127"} [2022-08-02 22:31:19,700][train_inner][INFO] - {"epoch": 10, "update": 9.657, "loss": "2.164", "ppl": "4.48", "wps": "365808", "ups": "3.07", "wpb": "119175", "bsz": "256", "num_updates": "497000", "lr": "0.000508081", "gnorm": "0.675", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.8", "wall": "162192"} [2022-08-02 22:32:24,253][train_inner][INFO] - {"epoch": 10, "update": 9.66, "loss": "2.168", "ppl": "4.49", "wps": "366094", "ups": "3.1", "wpb": "118161", "bsz": "256", "num_updates": "497200", "lr": "0.000507879", "gnorm": "0.677", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "162257"} [2022-08-02 22:33:28,810][train_inner][INFO] - {"epoch": 10, "update": 9.664, "loss": "2.165", "ppl": "4.48", "wps": "366381", "ups": "3.1", "wpb": "118261", "bsz": "256", "num_updates": "497400", "lr": "0.000507677", "gnorm": "0.679", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.1", "wall": "162321"} [2022-08-02 22:34:23,964][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-02 22:34:25,874][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-02 22:34:33,965][train_inner][INFO] - {"epoch": 10, "update": 9.668, "loss": "2.167", "ppl": "4.49", "wps": "363755", "ups": "3.07", "wpb": "118499", "bsz": "256", "num_updates": "497600", "lr": "0.000507475", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.9", "wall": "162386"} [2022-08-02 22:35:38,897][train_inner][INFO] - {"epoch": 10, "update": 9.672, "loss": "2.164", "ppl": "4.48", "wps": "364479", "ups": "3.08", "wpb": "118331", "bsz": "256", "num_updates": "497800", "lr": "0.000507273", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.1", "wall": "162451"} [2022-08-02 22:36:44,754][train_inner][INFO] - {"epoch": 10, "update": 9.676, "loss": "2.169", "ppl": "4.5", "wps": "358822", "ups": "3.04", "wpb": "118154", "bsz": "256", "num_updates": "498000", "lr": "0.000507071", "gnorm": "0.678", "clip": "0", "loss_scale": "4", "train_wall": "66", "gb_free": "21.8", "wall": "162517"} [2022-08-02 22:37:49,654][train_inner][INFO] - {"epoch": 10, "update": 9.68, "loss": "2.165", "ppl": "4.49", "wps": "365114", "ups": "3.08", "wpb": "118477", "bsz": "256", "num_updates": "498200", "lr": "0.000506869", "gnorm": "0.676", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.3", "wall": "162582"} [2022-08-02 22:38:54,561][train_inner][INFO] - {"epoch": 10, "update": 9.684, "loss": "2.162", "ppl": "4.48", "wps": "365046", "ups": "3.08", "wpb": "118469", "bsz": "256", "num_updates": "498400", "lr": "0.000506667", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.9", "wall": "162647"} [2022-08-02 22:39:59,530][train_inner][INFO] - {"epoch": 10, "update": 9.688, "loss": "2.168", "ppl": "4.49", "wps": "362656", "ups": "3.08", "wpb": "117806", "bsz": "256", "num_updates": "498600", "lr": "0.000506465", "gnorm": "0.68", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.5", "wall": "162712"} [2022-08-02 22:41:04,560][train_inner][INFO] - {"epoch": 10, "update": 9.692, "loss": "2.16", "ppl": "4.47", "wps": "364365", "ups": "3.08", "wpb": "118471", "bsz": "256", "num_updates": "498800", "lr": "0.000506263", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.5", "wall": "162777"} [2022-08-02 22:42:09,797][train_inner][INFO] - {"epoch": 10, "update": 9.695, "loss": "2.161", "ppl": "4.47", "wps": "362969", "ups": "3.07", "wpb": "118393", "bsz": "256", "num_updates": "499000", "lr": "0.000506061", "gnorm": "0.678", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "162842"} [2022-08-02 22:43:14,414][train_inner][INFO] - {"epoch": 10, "update": 9.699, "loss": "2.163", "ppl": "4.48", "wps": "364910", "ups": "3.1", "wpb": "117895", "bsz": "256", "num_updates": "499200", "lr": "0.000505859", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.1", "wall": "162907"} [2022-08-02 22:44:19,548][train_inner][INFO] - {"epoch": 10, "update": 9.703, "loss": "2.165", "ppl": "4.48", "wps": "363444", "ups": "3.07", "wpb": "118361", "bsz": "256", "num_updates": "499400", "lr": "0.000505657", "gnorm": "0.678", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.4", "wall": "162972"} [2022-08-02 22:45:24,653][train_inner][INFO] - {"epoch": 10, "update": 9.707, "loss": "2.168", "ppl": "4.49", "wps": "361049", "ups": "3.07", "wpb": "117529", "bsz": "256", "num_updates": "499600", "lr": "0.000505455", "gnorm": "0.681", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.3", "wall": "163037"} [2022-08-02 22:46:29,295][train_inner][INFO] - {"epoch": 10, "update": 9.711, "loss": "2.162", "ppl": "4.48", "wps": "368129", "ups": "3.09", "wpb": "118981", "bsz": "256", "num_updates": "499800", "lr": "0.000505253", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.9", "wall": "163102"} [2022-08-02 22:47:33,933][train_inner][INFO] - {"epoch": 10, "update": 9.715, "loss": "2.167", "ppl": "4.49", "wps": "364546", "ups": "3.09", "wpb": "117816", "bsz": "256", "num_updates": "500000", "lr": "0.000505051", "gnorm": "0.682", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "163166"} [2022-08-02 22:47:33,934][fairseq_cli.train][INFO] - begin validation on "valid" subset [2022-08-02 22:47:57,722][valid][INFO] - {"epoch": 10, "valid_loss": "2.069", "valid_ppl": "4.2", "valid_wps": "1.58577e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "500000", "valid_best_loss": "2.069"} [2022-08-02 22:47:57,726][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 10 @ 500000 updates [2022-08-02 22:47:57,727][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_10_500000.pt [2022-08-02 22:48:07,303][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_10_500000.pt [2022-08-02 22:48:33,491][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/adan2/checkpoint_10_500000.pt (epoch 10 @ 500000 updates, score 2.069) (writing took 35.76536318846047 seconds) [2022-08-02 22:49:38,497][train_inner][INFO] - {"epoch": 10, "update": 9.719, "loss": "2.159", "ppl": "4.47", "wps": "190379", "ups": "1.61", "wpb": "118571", "bsz": "256", "num_updates": "500200", "lr": "0.000504848", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.4", "wall": "163291"} [2022-08-02 22:50:43,639][train_inner][INFO] - {"epoch": 10, "update": 9.723, "loss": "2.163", "ppl": "4.48", "wps": "363480", "ups": "3.07", "wpb": "118387", "bsz": "256", "num_updates": "500400", "lr": "0.000504646", "gnorm": "0.681", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.3", "wall": "163356"} [2022-08-02 22:51:48,648][train_inner][INFO] - {"epoch": 10, "update": 9.726, "loss": "2.161", "ppl": "4.47", "wps": "365128", "ups": "3.08", "wpb": "118682", "bsz": "256", "num_updates": "500600", "lr": "0.000504444", "gnorm": "0.679", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "163421"} [2022-08-02 22:52:53,781][train_inner][INFO] - {"epoch": 10, "update": 9.73, "loss": "2.158", "ppl": "4.46", "wps": "363902", "ups": "3.07", "wpb": "118508", "bsz": "256", "num_updates": "500800", "lr": "0.000504242", "gnorm": "0.697", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "163486"} [2022-08-02 22:53:58,271][train_inner][INFO] - {"epoch": 10, "update": 9.734, "loss": "2.16", "ppl": "4.47", "wps": "366474", "ups": "3.1", "wpb": "118170", "bsz": "256", "num_updates": "501000", "lr": "0.00050404", "gnorm": "0.68", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.4", "wall": "163551"} [2022-08-02 22:55:03,332][train_inner][INFO] - {"epoch": 10, "update": 9.738, "loss": "2.155", "ppl": "4.45", "wps": "365554", "ups": "3.07", "wpb": "118914", "bsz": "256", "num_updates": "501200", "lr": "0.000503838", "gnorm": "0.683", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "28", "wall": "163616"} [2022-08-02 22:56:07,859][train_inner][INFO] - {"epoch": 10, "update": 9.742, "loss": "2.17", "ppl": "4.5", "wps": "366728", "ups": "3.1", "wpb": "118318", "bsz": "256", "num_updates": "501400", "lr": "0.000503636", "gnorm": "0.68", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22", "wall": "163680"} [2022-08-02 22:57:12,843][train_inner][INFO] - {"epoch": 10, "update": 9.746, "loss": "2.162", "ppl": "4.48", "wps": "364429", "ups": "3.08", "wpb": "118408", "bsz": "256", "num_updates": "501600", "lr": "0.000503434", "gnorm": "0.679", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.3", "wall": "163745"} [2022-08-02 22:58:17,760][train_inner][INFO] - {"epoch": 10, "update": 9.75, "loss": "2.168", "ppl": "4.49", "wps": "363918", "ups": "3.08", "wpb": "118121", "bsz": "256", "num_updates": "501800", "lr": "0.000503232", "gnorm": "0.682", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21", "wall": "163810"} [2022-08-02 22:59:23,071][train_inner][INFO] - {"epoch": 10, "update": 9.754, "loss": "2.161", "ppl": "4.47", "wps": "364437", "ups": "3.06", "wpb": "119008", "bsz": "256", "num_updates": "502000", "lr": "0.00050303", "gnorm": "0.676", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.5", "wall": "163875"} [2022-08-02 23:00:28,223][train_inner][INFO] - {"epoch": 10, "update": 9.758, "loss": "2.166", "ppl": "4.49", "wps": "361083", "ups": "3.07", "wpb": "117623", "bsz": "256", "num_updates": "502200", "lr": "0.000502828", "gnorm": "0.683", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.5", "wall": "163941"} [2022-08-02 23:01:33,039][train_inner][INFO] - {"epoch": 10, "update": 9.761, "loss": "2.165", "ppl": "4.48", "wps": "366283", "ups": "3.09", "wpb": "118703", "bsz": "256", "num_updates": "502400", "lr": "0.000502626", "gnorm": "0.681", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "26.8", "wall": "164005"} [2022-08-02 23:02:37,577][train_inner][INFO] - {"epoch": 10, "update": 9.765, "loss": "2.163", "ppl": "4.48", "wps": "365452", "ups": "3.1", "wpb": "117927", "bsz": "256", "num_updates": "502600", "lr": "0.000502424", "gnorm": "0.681", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.7", "wall": "164070"} [2022-08-02 23:02:40,987][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-02 23:03:43,035][train_inner][INFO] - {"epoch": 10, "update": 9.769, "loss": "2.153", "ppl": "4.45", "wps": "362658", "ups": "3.06", "wpb": "118692", "bsz": "256", "num_updates": "502800", "lr": "0.000502222", "gnorm": "0.675", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.5", "wall": "164135"} [2022-08-02 23:04:48,200][train_inner][INFO] - {"epoch": 10, "update": 9.773, "loss": "2.158", "ppl": "4.46", "wps": "362902", "ups": "3.07", "wpb": "118241", "bsz": "256", "num_updates": "503000", "lr": "0.00050202", "gnorm": "0.679", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23", "wall": "164201"} [2022-08-02 23:05:53,015][train_inner][INFO] - {"epoch": 10, "update": 9.777, "loss": "2.161", "ppl": "4.47", "wps": "363425", "ups": "3.09", "wpb": "117775", "bsz": "256", "num_updates": "503200", "lr": "0.000501818", "gnorm": "0.686", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.1", "wall": "164265"} [2022-08-02 23:06:58,280][train_inner][INFO] - {"epoch": 10, "update": 9.781, "loss": "2.167", "ppl": "4.49", "wps": "362314", "ups": "3.06", "wpb": "118230", "bsz": "256", "num_updates": "503400", "lr": "0.000501616", "gnorm": "0.679", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.3", "wall": "164331"} [2022-08-02 23:08:02,963][train_inner][INFO] - {"epoch": 10, "update": 9.785, "loss": "2.168", "ppl": "4.49", "wps": "367494", "ups": "3.09", "wpb": "118851", "bsz": "256", "num_updates": "503600", "lr": "0.000501414", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "28.2", "wall": "164395"} [2022-08-02 23:09:08,733][train_inner][INFO] - {"epoch": 10, "update": 9.789, "loss": "2.155", "ppl": "4.45", "wps": "359475", "ups": "3.04", "wpb": "118212", "bsz": "256", "num_updates": "503800", "lr": "0.000501212", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.6", "wall": "164461"} [2022-08-02 23:10:13,437][train_inner][INFO] - {"epoch": 10, "update": 9.793, "loss": "2.16", "ppl": "4.47", "wps": "364954", "ups": "3.09", "wpb": "118069", "bsz": "256", "num_updates": "504000", "lr": "0.00050101", "gnorm": "0.681", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.4", "wall": "164526"} [2022-08-02 23:11:18,056][train_inner][INFO] - {"epoch": 10, "update": 9.796, "loss": "2.158", "ppl": "4.46", "wps": "364045", "ups": "3.1", "wpb": "117620", "bsz": "256", "num_updates": "504200", "lr": "0.000500808", "gnorm": "0.68", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "164590"} [2022-08-02 23:12:22,534][train_inner][INFO] - {"epoch": 10, "update": 9.8, "loss": "2.16", "ppl": "4.47", "wps": "369304", "ups": "3.1", "wpb": "119057", "bsz": "256", "num_updates": "504400", "lr": "0.000500606", "gnorm": "0.676", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.1", "wall": "164655"} [2022-08-02 23:13:27,354][train_inner][INFO] - {"epoch": 10, "update": 9.804, "loss": "2.164", "ppl": "4.48", "wps": "364758", "ups": "3.09", "wpb": "118217", "bsz": "256", "num_updates": "504600", "lr": "0.000500404", "gnorm": "0.682", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.6", "wall": "164720"} [2022-08-02 23:14:33,219][train_inner][INFO] - {"epoch": 10, "update": 9.808, "loss": "2.165", "ppl": "4.48", "wps": "356996", "ups": "3.04", "wpb": "117566", "bsz": "256", "num_updates": "504800", "lr": "0.000500202", "gnorm": "0.68", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "21.6", "wall": "164786"} [2022-08-02 23:15:38,122][train_inner][INFO] - {"epoch": 10, "update": 9.812, "loss": "2.162", "ppl": "4.48", "wps": "364424", "ups": "3.08", "wpb": "118260", "bsz": "256", "num_updates": "505000", "lr": "0.0005", "gnorm": "0.679", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "164850"} [2022-08-02 23:16:43,169][train_inner][INFO] - {"epoch": 10, "update": 9.816, "loss": "2.158", "ppl": "4.46", "wps": "364400", "ups": "3.07", "wpb": "118514", "bsz": "256", "num_updates": "505200", "lr": "0.000499798", "gnorm": "0.677", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "164915"} [2022-08-02 23:17:47,954][train_inner][INFO] - {"epoch": 10, "update": 9.82, "loss": "2.16", "ppl": "4.47", "wps": "364817", "ups": "3.09", "wpb": "118170", "bsz": "256", "num_updates": "505400", "lr": "0.000499596", "gnorm": "0.679", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.6", "wall": "164980"} [2022-08-02 23:18:45,255][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-02 23:18:53,052][train_inner][INFO] - {"epoch": 10, "update": 9.824, "loss": "2.158", "ppl": "4.46", "wps": "362493", "ups": "3.07", "wpb": "117988", "bsz": "256", "num_updates": "505600", "lr": "0.000499394", "gnorm": "0.679", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.6", "wall": "165045"} [2022-08-02 23:19:57,790][train_inner][INFO] - {"epoch": 10, "update": 9.827, "loss": "2.164", "ppl": "4.48", "wps": "364364", "ups": "3.09", "wpb": "117939", "bsz": "256", "num_updates": "505800", "lr": "0.000499192", "gnorm": "0.681", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.8", "wall": "165110"} [2022-08-02 23:21:02,367][train_inner][INFO] - {"epoch": 10, "update": 9.831, "loss": "2.162", "ppl": "4.47", "wps": "364320", "ups": "3.1", "wpb": "117633", "bsz": "256", "num_updates": "506000", "lr": "0.00049899", "gnorm": "0.684", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.9", "wall": "165175"} [2022-08-02 23:22:07,329][train_inner][INFO] - {"epoch": 10, "update": 9.835, "loss": "2.161", "ppl": "4.47", "wps": "363497", "ups": "3.08", "wpb": "118065", "bsz": "256", "num_updates": "506200", "lr": "0.000498788", "gnorm": "0.68", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.3", "wall": "165240"} [2022-08-02 23:23:11,775][train_inner][INFO] - {"epoch": 10, "update": 9.839, "loss": "2.162", "ppl": "4.48", "wps": "367480", "ups": "3.1", "wpb": "118412", "bsz": "256", "num_updates": "506400", "lr": "0.000498586", "gnorm": "0.679", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.1", "wall": "165304"} [2022-08-02 23:24:16,632][train_inner][INFO] - {"epoch": 10, "update": 9.843, "loss": "2.16", "ppl": "4.47", "wps": "363886", "ups": "3.08", "wpb": "118000", "bsz": "256", "num_updates": "506600", "lr": "0.000498384", "gnorm": "0.683", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "165369"} [2022-08-02 23:25:21,474][train_inner][INFO] - {"epoch": 10, "update": 9.847, "loss": "2.156", "ppl": "4.46", "wps": "366791", "ups": "3.08", "wpb": "118916", "bsz": "256", "num_updates": "506800", "lr": "0.000498182", "gnorm": "0.684", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.5", "wall": "165434"} [2022-08-02 23:26:26,221][train_inner][INFO] - {"epoch": 10, "update": 9.851, "loss": "2.161", "ppl": "4.47", "wps": "367382", "ups": "3.09", "wpb": "118932", "bsz": "256", "num_updates": "507000", "lr": "0.00049798", "gnorm": "0.679", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.5", "wall": "165499"} [2022-08-02 23:27:31,017][train_inner][INFO] - {"epoch": 10, "update": 9.855, "loss": "2.154", "ppl": "4.45", "wps": "366796", "ups": "3.09", "wpb": "118832", "bsz": "256", "num_updates": "507200", "lr": "0.000497778", "gnorm": "0.684", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.2", "wall": "165563"} [2022-08-02 23:28:36,450][train_inner][INFO] - {"epoch": 10, "update": 9.859, "loss": "2.157", "ppl": "4.46", "wps": "359971", "ups": "3.06", "wpb": "117769", "bsz": "256", "num_updates": "507400", "lr": "0.000497576", "gnorm": "0.679", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "165629"} [2022-08-02 23:29:41,406][train_inner][INFO] - {"epoch": 10, "update": 9.862, "loss": "2.159", "ppl": "4.47", "wps": "363973", "ups": "3.08", "wpb": "118209", "bsz": "256", "num_updates": "507600", "lr": "0.000497374", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.3", "wall": "165694"} [2022-08-02 23:30:46,544][train_inner][INFO] - {"epoch": 10, "update": 9.866, "loss": "2.158", "ppl": "4.46", "wps": "362230", "ups": "3.07", "wpb": "117972", "bsz": "256", "num_updates": "507800", "lr": "0.000497172", "gnorm": "0.68", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "165759"} [2022-08-02 23:31:51,418][train_inner][INFO] - {"epoch": 10, "update": 9.87, "loss": "2.157", "ppl": "4.46", "wps": "365634", "ups": "3.08", "wpb": "118600", "bsz": "256", "num_updates": "508000", "lr": "0.00049697", "gnorm": "0.677", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "165824"} [2022-08-02 23:32:56,793][train_inner][INFO] - {"epoch": 10, "update": 9.874, "loss": "2.16", "ppl": "4.47", "wps": "361583", "ups": "3.06", "wpb": "118191", "bsz": "256", "num_updates": "508200", "lr": "0.000496768", "gnorm": "0.681", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21", "wall": "165889"} [2022-08-02 23:34:01,773][train_inner][INFO] - {"epoch": 10, "update": 9.878, "loss": "2.16", "ppl": "4.47", "wps": "364505", "ups": "3.08", "wpb": "118425", "bsz": "256", "num_updates": "508400", "lr": "0.000496566", "gnorm": "0.679", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.8", "wall": "165954"} [2022-08-02 23:35:07,758][train_inner][INFO] - {"epoch": 10, "update": 9.882, "loss": "2.154", "ppl": "4.45", "wps": "358576", "ups": "3.03", "wpb": "118303", "bsz": "256", "num_updates": "508600", "lr": "0.000496364", "gnorm": "0.682", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "23.6", "wall": "166020"} [2022-08-02 23:36:12,840][train_inner][INFO] - {"epoch": 10, "update": 9.886, "loss": "2.157", "ppl": "4.46", "wps": "365225", "ups": "3.07", "wpb": "118846", "bsz": "256", "num_updates": "508800", "lr": "0.000496162", "gnorm": "0.68", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.6", "wall": "166085"} [2022-08-02 23:37:17,549][train_inner][INFO] - {"epoch": 10, "update": 9.89, "loss": "2.155", "ppl": "4.45", "wps": "365570", "ups": "3.09", "wpb": "118277", "bsz": "256", "num_updates": "509000", "lr": "0.00049596", "gnorm": "0.68", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.5", "wall": "166150"} [2022-08-02 23:37:41,861][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-02 23:38:22,662][train_inner][INFO] - {"epoch": 10, "update": 9.894, "loss": "2.154", "ppl": "4.45", "wps": "362682", "ups": "3.07", "wpb": "118074", "bsz": "256", "num_updates": "509200", "lr": "0.000495758", "gnorm": "0.679", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.5", "wall": "166215"} [2022-08-02 23:39:27,634][train_inner][INFO] - {"epoch": 10, "update": 9.897, "loss": "2.163", "ppl": "4.48", "wps": "362812", "ups": "3.08", "wpb": "117862", "bsz": "256", "num_updates": "509400", "lr": "0.000495556", "gnorm": "0.682", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.8", "wall": "166280"} [2022-08-02 23:40:32,583][train_inner][INFO] - {"epoch": 10, "update": 9.901, "loss": "2.153", "ppl": "4.45", "wps": "365786", "ups": "3.08", "wpb": "118786", "bsz": "256", "num_updates": "509600", "lr": "0.000495354", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.7", "wall": "166345"} [2022-08-02 23:41:37,586][train_inner][INFO] - {"epoch": 10, "update": 9.905, "loss": "2.155", "ppl": "4.45", "wps": "364064", "ups": "3.08", "wpb": "118325", "bsz": "256", "num_updates": "509800", "lr": "0.000495152", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.8", "wall": "166410"} [2022-08-02 23:42:42,436][train_inner][INFO] - {"epoch": 10, "update": 9.909, "loss": "2.153", "ppl": "4.45", "wps": "364700", "ups": "3.08", "wpb": "118253", "bsz": "256", "num_updates": "510000", "lr": "0.000494949", "gnorm": "0.679", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "166475"} [2022-08-02 23:43:46,916][train_inner][INFO] - {"epoch": 10, "update": 9.913, "loss": "2.165", "ppl": "4.49", "wps": "366137", "ups": "3.1", "wpb": "118040", "bsz": "256", "num_updates": "510200", "lr": "0.000494747", "gnorm": "0.681", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.5", "wall": "166539"} [2022-08-02 23:44:52,590][train_inner][INFO] - {"epoch": 10, "update": 9.917, "loss": "2.156", "ppl": "4.46", "wps": "359443", "ups": "3.05", "wpb": "118029", "bsz": "256", "num_updates": "510400", "lr": "0.000494545", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "166605"} [2022-08-02 23:45:57,503][train_inner][INFO] - {"epoch": 10, "update": 9.921, "loss": "2.156", "ppl": "4.46", "wps": "365012", "ups": "3.08", "wpb": "118468", "bsz": "256", "num_updates": "510600", "lr": "0.000494343", "gnorm": "0.68", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23", "wall": "166670"} [2022-08-02 23:47:02,326][train_inner][INFO] - {"epoch": 10, "update": 9.925, "loss": "2.158", "ppl": "4.46", "wps": "365450", "ups": "3.09", "wpb": "118447", "bsz": "256", "num_updates": "510800", "lr": "0.000494141", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.6", "wall": "166735"} [2022-08-02 23:48:07,375][train_inner][INFO] - {"epoch": 10, "update": 9.929, "loss": "2.151", "ppl": "4.44", "wps": "365756", "ups": "3.07", "wpb": "118959", "bsz": "256", "num_updates": "511000", "lr": "0.000493939", "gnorm": "0.674", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.1", "wall": "166800"} [2022-08-02 23:49:11,888][train_inner][INFO] - {"epoch": 10, "update": 9.932, "loss": "2.159", "ppl": "4.46", "wps": "362978", "ups": "3.1", "wpb": "117082", "bsz": "256", "num_updates": "511200", "lr": "0.000493737", "gnorm": "0.686", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.4", "wall": "166864"} [2022-08-02 23:50:17,181][train_inner][INFO] - {"epoch": 10, "update": 9.936, "loss": "2.151", "ppl": "4.44", "wps": "362556", "ups": "3.06", "wpb": "118360", "bsz": "256", "num_updates": "511400", "lr": "0.000493535", "gnorm": "0.678", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "166930"} [2022-08-02 23:51:21,734][train_inner][INFO] - {"epoch": 10, "update": 9.94, "loss": "2.157", "ppl": "4.46", "wps": "365483", "ups": "3.1", "wpb": "117964", "bsz": "256", "num_updates": "511600", "lr": "0.000493333", "gnorm": "0.679", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.3", "wall": "166994"} [2022-08-02 23:52:26,560][train_inner][INFO] - {"epoch": 10, "update": 9.944, "loss": "2.155", "ppl": "4.45", "wps": "364692", "ups": "3.09", "wpb": "118205", "bsz": "256", "num_updates": "511800", "lr": "0.000493131", "gnorm": "0.679", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22", "wall": "167059"} [2022-08-02 23:53:31,222][train_inner][INFO] - {"epoch": 10, "update": 9.948, "loss": "2.156", "ppl": "4.46", "wps": "365962", "ups": "3.09", "wpb": "118318", "bsz": "256", "num_updates": "512000", "lr": "0.000492929", "gnorm": "0.68", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.1", "wall": "167124"} [2022-08-02 23:54:36,026][train_inner][INFO] - {"epoch": 10, "update": 9.952, "loss": "2.151", "ppl": "4.44", "wps": "366499", "ups": "3.09", "wpb": "118751", "bsz": "256", "num_updates": "512200", "lr": "0.000492727", "gnorm": "0.677", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.5", "wall": "167188"} [2022-08-02 23:55:40,873][train_inner][INFO] - {"epoch": 10, "update": 9.956, "loss": "2.152", "ppl": "4.44", "wps": "367068", "ups": "3.08", "wpb": "119015", "bsz": "256", "num_updates": "512400", "lr": "0.000492525", "gnorm": "0.677", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "26.6", "wall": "167253"} [2022-08-02 23:56:45,461][train_inner][INFO] - {"epoch": 10, "update": 9.96, "loss": "2.153", "ppl": "4.45", "wps": "366393", "ups": "3.1", "wpb": "118321", "bsz": "256", "num_updates": "512600", "lr": "0.000492323", "gnorm": "0.678", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.1", "wall": "167318"} [2022-08-02 23:57:50,638][train_inner][INFO] - {"epoch": 10, "update": 9.963, "loss": "2.156", "ppl": "4.46", "wps": "361636", "ups": "3.07", "wpb": "117849", "bsz": "256", "num_updates": "512800", "lr": "0.000492121", "gnorm": "0.679", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.4", "wall": "167383"} [2022-08-02 23:58:55,585][train_inner][INFO] - {"epoch": 10, "update": 9.967, "loss": "2.152", "ppl": "4.45", "wps": "365720", "ups": "3.08", "wpb": "118761", "bsz": "256", "num_updates": "513000", "lr": "0.000491919", "gnorm": "0.677", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.1", "wall": "167448"} [2022-08-03 00:00:01,278][train_inner][INFO] - {"epoch": 10, "update": 9.971, "loss": "2.158", "ppl": "4.46", "wps": "359843", "ups": "3.04", "wpb": "118194", "bsz": "256", "num_updates": "513200", "lr": "0.000491717", "gnorm": "0.68", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21", "wall": "167514"} [2022-08-03 00:01:06,328][train_inner][INFO] - {"epoch": 10, "update": 9.975, "loss": "2.154", "ppl": "4.45", "wps": "363693", "ups": "3.07", "wpb": "118284", "bsz": "256", "num_updates": "513400", "lr": "0.000491515", "gnorm": "0.682", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "19.9", "wall": "167579"} [2022-08-03 00:01:07,288][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 00:01:09,450][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-03 00:01:48,095][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-03 00:02:12,080][train_inner][INFO] - {"epoch": 10, "update": 9.979, "loss": "2.154", "ppl": "4.45", "wps": "360521", "ups": "3.04", "wpb": "118524", "bsz": "256", "num_updates": "513600", "lr": "0.000491313", "gnorm": "0.692", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.2", "wall": "167644"} [2022-08-03 00:03:17,523][train_inner][INFO] - {"epoch": 10, "update": 9.983, "loss": "2.153", "ppl": "4.45", "wps": "364930", "ups": "3.06", "wpb": "119410", "bsz": "256", "num_updates": "513800", "lr": "0.000491111", "gnorm": "0.685", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.4", "wall": "167710"} [2022-08-03 00:04:21,936][train_inner][INFO] - {"epoch": 10, "update": 9.987, "loss": "2.158", "ppl": "4.46", "wps": "366051", "ups": "3.1", "wpb": "117891", "bsz": "256", "num_updates": "514000", "lr": "0.000490909", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.5", "wall": "167774"} [2022-08-03 00:05:26,753][train_inner][INFO] - {"epoch": 10, "update": 9.991, "loss": "2.159", "ppl": "4.47", "wps": "366302", "ups": "3.09", "wpb": "118711", "bsz": "256", "num_updates": "514200", "lr": "0.000490707", "gnorm": "0.677", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.8", "wall": "167839"} [2022-08-03 00:06:31,818][train_inner][INFO] - {"epoch": 10, "update": 9.995, "loss": "2.16", "ppl": "4.47", "wps": "362003", "ups": "3.07", "wpb": "117767", "bsz": "256", "num_updates": "514400", "lr": "0.000490505", "gnorm": "0.681", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "24.1", "wall": "167904"} [2022-08-03 00:07:37,798][train_inner][INFO] - {"epoch": 10, "update": 9.998, "loss": "2.152", "ppl": "4.44", "wps": "359162", "ups": "3.03", "wpb": "118487", "bsz": "256", "num_updates": "514600", "lr": "0.000490303", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "66", "gb_free": "24.4", "wall": "167970"} [2022-08-03 00:08:03,090][fairseq_cli.train][INFO] - begin validation on "valid" subset [2022-08-03 00:08:25,701][valid][INFO] - {"epoch": 10, "valid_loss": "2.062", "valid_ppl": "4.17", "valid_wps": "1.56508e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "514678", "valid_best_loss": "2.062"} [2022-08-03 00:08:25,704][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 10 @ 514678 updates [2022-08-03 00:08:25,705][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_best.pt [2022-08-03 00:08:35,643][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_best.pt [2022-08-03 00:08:54,905][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/adan2/checkpoint_best.pt (epoch 10 @ 514678 updates, score 2.062) (writing took 29.200979253277183 seconds) [2022-08-03 00:08:54,905][fairseq_cli.train][INFO] - end of epoch 10 (average epoch stats below) [2022-08-03 00:08:54,906][train][INFO] - {"epoch": 10, "train_loss": "2.168", "train_ppl": "4.5", "train_wps": "361526", "train_ups": "3.06", "train_wpb": "118298", "train_bsz": "256", "train_num_updates": "514678", "train_lr": "0.000490224", "train_gnorm": "0.677", "train_clip": "0", "train_loss_scale": "4", "train_train_wall": "16626", "train_gb_free": "21.1", "train_wall": "168047"} [2022-08-03 00:08:55,017][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 [2022-08-03 00:08:55,020][fairseq.trainer][INFO] - begin training epoch 11 [2022-08-03 00:08:55,020][fairseq_cli.train][INFO] - Start iterating over samples [2022-08-03 00:09:44,475][train_inner][INFO] - {"epoch": 11, "update": 10.002, "loss": "2.162", "ppl": "4.48", "wps": "184754", "ups": "1.58", "wpb": "117019", "bsz": "255.4", "num_updates": "514800", "lr": "0.000490101", "gnorm": "0.692", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.7", "wall": "168097"} [2022-08-03 00:10:49,248][train_inner][INFO] - {"epoch": 11, "update": 10.006, "loss": "2.149", "ppl": "4.43", "wps": "364492", "ups": "3.09", "wpb": "118046", "bsz": "256", "num_updates": "515000", "lr": "0.000489899", "gnorm": "0.68", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.6", "wall": "168162"} [2022-08-03 00:11:54,855][train_inner][INFO] - {"epoch": 11, "update": 10.01, "loss": "2.152", "ppl": "4.45", "wps": "360129", "ups": "3.05", "wpb": "118132", "bsz": "256", "num_updates": "515200", "lr": "0.000489697", "gnorm": "0.677", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.3", "wall": "168227"} [2022-08-03 00:12:59,437][train_inner][INFO] - {"epoch": 11, "update": 10.014, "loss": "2.152", "ppl": "4.45", "wps": "364796", "ups": "3.1", "wpb": "117794", "bsz": "256", "num_updates": "515400", "lr": "0.000489495", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.6", "wall": "168292"} [2022-08-03 00:14:04,425][train_inner][INFO] - {"epoch": 11, "update": 10.018, "loss": "2.155", "ppl": "4.45", "wps": "363961", "ups": "3.08", "wpb": "118265", "bsz": "256", "num_updates": "515600", "lr": "0.000489293", "gnorm": "0.679", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24", "wall": "168357"} [2022-08-03 00:15:09,076][train_inner][INFO] - {"epoch": 11, "update": 10.022, "loss": "2.152", "ppl": "4.44", "wps": "365150", "ups": "3.09", "wpb": "118034", "bsz": "256", "num_updates": "515800", "lr": "0.000489091", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.4", "wall": "168421"} [2022-08-03 00:16:14,096][train_inner][INFO] - {"epoch": 11, "update": 10.026, "loss": "2.152", "ppl": "4.44", "wps": "361982", "ups": "3.08", "wpb": "117679", "bsz": "256", "num_updates": "516000", "lr": "0.000488889", "gnorm": "0.68", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.9", "wall": "168486"} [2022-08-03 00:17:18,646][train_inner][INFO] - {"epoch": 11, "update": 10.03, "loss": "2.162", "ppl": "4.47", "wps": "365505", "ups": "3.1", "wpb": "117964", "bsz": "256", "num_updates": "516200", "lr": "0.000488687", "gnorm": "0.681", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "168551"} [2022-08-03 00:18:23,562][train_inner][INFO] - {"epoch": 11, "update": 10.033, "loss": "2.146", "ppl": "4.43", "wps": "364560", "ups": "3.08", "wpb": "118327", "bsz": "256", "num_updates": "516400", "lr": "0.000488485", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "168616"} [2022-08-03 00:19:28,377][train_inner][INFO] - {"epoch": 11, "update": 10.037, "loss": "2.15", "ppl": "4.44", "wps": "364878", "ups": "3.09", "wpb": "118246", "bsz": "256", "num_updates": "516600", "lr": "0.000488283", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "168681"} [2022-08-03 00:20:32,973][train_inner][INFO] - {"epoch": 11, "update": 10.041, "loss": "2.15", "ppl": "4.44", "wps": "365301", "ups": "3.1", "wpb": "117984", "bsz": "256", "num_updates": "516800", "lr": "0.000488081", "gnorm": "0.681", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.1", "wall": "168745"} [2022-08-03 00:21:37,493][train_inner][INFO] - {"epoch": 11, "update": 10.045, "loss": "2.15", "ppl": "4.44", "wps": "365781", "ups": "3.1", "wpb": "117998", "bsz": "256", "num_updates": "517000", "lr": "0.000487879", "gnorm": "0.68", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.7", "wall": "168810"} [2022-08-03 00:22:42,506][train_inner][INFO] - {"epoch": 11, "update": 10.049, "loss": "2.15", "ppl": "4.44", "wps": "365526", "ups": "3.08", "wpb": "118818", "bsz": "256", "num_updates": "517200", "lr": "0.000487677", "gnorm": "0.677", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.6", "wall": "168875"} [2022-08-03 00:23:22,770][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-03 00:23:47,714][train_inner][INFO] - {"epoch": 11, "update": 10.053, "loss": "2.157", "ppl": "4.46", "wps": "363452", "ups": "3.07", "wpb": "118497", "bsz": "256", "num_updates": "517400", "lr": "0.000487475", "gnorm": "0.711", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "168940"} [2022-08-03 00:24:52,703][train_inner][INFO] - {"epoch": 11, "update": 10.057, "loss": "2.151", "ppl": "4.44", "wps": "365614", "ups": "3.08", "wpb": "118804", "bsz": "256", "num_updates": "517600", "lr": "0.000487273", "gnorm": "0.677", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "169005"} [2022-08-03 00:25:57,586][train_inner][INFO] - {"epoch": 11, "update": 10.061, "loss": "2.15", "ppl": "4.44", "wps": "365866", "ups": "3.08", "wpb": "118690", "bsz": "256", "num_updates": "517800", "lr": "0.000487071", "gnorm": "0.676", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.9", "wall": "169070"} [2022-08-03 00:27:02,544][train_inner][INFO] - {"epoch": 11, "update": 10.065, "loss": "2.15", "ppl": "4.44", "wps": "364258", "ups": "3.08", "wpb": "118307", "bsz": "256", "num_updates": "518000", "lr": "0.000486869", "gnorm": "0.678", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.4", "wall": "169135"} [2022-08-03 00:28:07,450][train_inner][INFO] - {"epoch": 11, "update": 10.068, "loss": "2.146", "ppl": "4.43", "wps": "364274", "ups": "3.08", "wpb": "118216", "bsz": "256", "num_updates": "518200", "lr": "0.000486667", "gnorm": "0.682", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.6", "wall": "169200"} [2022-08-03 00:29:12,162][train_inner][INFO] - {"epoch": 11, "update": 10.072, "loss": "2.151", "ppl": "4.44", "wps": "365079", "ups": "3.09", "wpb": "118122", "bsz": "256", "num_updates": "518400", "lr": "0.000486465", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.9", "wall": "169264"} [2022-08-03 00:30:17,214][train_inner][INFO] - {"epoch": 11, "update": 10.076, "loss": "2.147", "ppl": "4.43", "wps": "364516", "ups": "3.07", "wpb": "118561", "bsz": "256", "num_updates": "518600", "lr": "0.000486263", "gnorm": "0.676", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.6", "wall": "169330"} [2022-08-03 00:31:21,684][train_inner][INFO] - {"epoch": 11, "update": 10.08, "loss": "2.147", "ppl": "4.43", "wps": "366338", "ups": "3.1", "wpb": "118088", "bsz": "256", "num_updates": "518800", "lr": "0.000486061", "gnorm": "0.681", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.5", "wall": "169394"} [2022-08-03 00:32:26,502][train_inner][INFO] - {"epoch": 11, "update": 10.084, "loss": "2.145", "ppl": "4.42", "wps": "365778", "ups": "3.09", "wpb": "118543", "bsz": "256", "num_updates": "519000", "lr": "0.000485859", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.2", "wall": "169459"} [2022-08-03 00:33:30,906][train_inner][INFO] - {"epoch": 11, "update": 10.088, "loss": "2.147", "ppl": "4.43", "wps": "367031", "ups": "3.11", "wpb": "118189", "bsz": "256", "num_updates": "519200", "lr": "0.000485657", "gnorm": "0.681", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.9", "wall": "169523"} [2022-08-03 00:34:36,079][train_inner][INFO] - {"epoch": 11, "update": 10.092, "loss": "2.156", "ppl": "4.46", "wps": "361908", "ups": "3.07", "wpb": "117931", "bsz": "256", "num_updates": "519400", "lr": "0.000485455", "gnorm": "0.68", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22", "wall": "169588"} [2022-08-03 00:35:28,287][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-03 00:35:34,368][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 [2022-08-03 00:35:41,569][train_inner][INFO] - {"epoch": 11, "update": 10.096, "loss": "2.151", "ppl": "4.44", "wps": "362689", "ups": "3.05", "wpb": "118761", "bsz": "256", "num_updates": "519600", "lr": "0.000485253", "gnorm": "0.701", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.1", "wall": "169654"} [2022-08-03 00:36:46,264][train_inner][INFO] - {"epoch": 11, "update": 10.1, "loss": "2.145", "ppl": "4.42", "wps": "364826", "ups": "3.09", "wpb": "118012", "bsz": "256", "num_updates": "519800", "lr": "0.000485051", "gnorm": "0.681", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.7", "wall": "169719"} [2022-08-03 00:37:51,264][train_inner][INFO] - {"epoch": 11, "update": 10.103, "loss": "2.147", "ppl": "4.43", "wps": "364445", "ups": "3.08", "wpb": "118442", "bsz": "256", "num_updates": "520000", "lr": "0.000484848", "gnorm": "0.677", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.1", "wall": "169784"} [2022-08-03 00:38:56,516][train_inner][INFO] - {"epoch": 11, "update": 10.107, "loss": "2.143", "ppl": "4.42", "wps": "364692", "ups": "3.07", "wpb": "118983", "bsz": "256", "num_updates": "520200", "lr": "0.000484646", "gnorm": "0.676", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20", "wall": "169849"} [2022-08-03 00:40:01,175][train_inner][INFO] - {"epoch": 11, "update": 10.111, "loss": "2.151", "ppl": "4.44", "wps": "366578", "ups": "3.09", "wpb": "118511", "bsz": "256", "num_updates": "520400", "lr": "0.000484444", "gnorm": "0.679", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.2", "wall": "169913"} [2022-08-03 00:41:06,003][train_inner][INFO] - {"epoch": 11, "update": 10.115, "loss": "2.148", "ppl": "4.43", "wps": "363971", "ups": "3.09", "wpb": "117972", "bsz": "256", "num_updates": "520600", "lr": "0.000484242", "gnorm": "0.681", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "23.7", "wall": "169978"} [2022-08-03 00:42:10,529][train_inner][INFO] - {"epoch": 11, "update": 10.119, "loss": "2.148", "ppl": "4.43", "wps": "367364", "ups": "3.1", "wpb": "118521", "bsz": "256", "num_updates": "520800", "lr": "0.00048404", "gnorm": "0.68", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20", "wall": "170043"} [2022-08-03 00:43:15,062][train_inner][INFO] - {"epoch": 11, "update": 10.123, "loss": "2.146", "ppl": "4.43", "wps": "365650", "ups": "3.1", "wpb": "117980", "bsz": "256", "num_updates": "521000", "lr": "0.000483838", "gnorm": "0.679", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "23.3", "wall": "170107"} [2022-08-03 00:44:19,687][train_inner][INFO] - {"epoch": 11, "update": 10.127, "loss": "2.15", "ppl": "4.44", "wps": "365542", "ups": "3.09", "wpb": "118115", "bsz": "256", "num_updates": "521200", "lr": "0.000483636", "gnorm": "0.679", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.6", "wall": "170172"} [2022-08-03 00:45:24,812][train_inner][INFO] - {"epoch": 11, "update": 10.131, "loss": "2.145", "ppl": "4.42", "wps": "364771", "ups": "3.07", "wpb": "118776", "bsz": "256", "num_updates": "521400", "lr": "0.000483434", "gnorm": "0.675", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.5", "wall": "170237"} [2022-08-03 00:46:30,406][train_inner][INFO] - {"epoch": 11, "update": 10.134, "loss": "2.151", "ppl": "4.44", "wps": "361209", "ups": "3.05", "wpb": "118466", "bsz": "256", "num_updates": "521600", "lr": "0.000483232", "gnorm": "0.679", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.6", "wall": "170303"} [2022-08-03 00:47:34,874][train_inner][INFO] - {"epoch": 11, "update": 10.138, "loss": "2.15", "ppl": "4.44", "wps": "367743", "ups": "3.1", "wpb": "118536", "bsz": "256", "num_updates": "521800", "lr": "0.00048303", "gnorm": "0.677", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "25.4", "wall": "170367"} [2022-08-03 00:48:40,064][train_inner][INFO] - {"epoch": 11, "update": 10.142, "loss": "2.146", "ppl": "4.43", "wps": "363122", "ups": "3.07", "wpb": "118357", "bsz": "256", "num_updates": "522000", "lr": "0.000482828", "gnorm": "0.678", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.7", "wall": "170432"} [2022-08-03 00:49:44,970][train_inner][INFO] - {"epoch": 11, "update": 10.146, "loss": "2.145", "ppl": "4.42", "wps": "364076", "ups": "3.08", "wpb": "118151", "bsz": "256", "num_updates": "522200", "lr": "0.000482626", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "170497"} [2022-08-03 00:50:50,017][train_inner][INFO] - {"epoch": 11, "update": 10.15, "loss": "2.149", "ppl": "4.43", "wps": "363945", "ups": "3.07", "wpb": "118366", "bsz": "256", "num_updates": "522400", "lr": "0.000482424", "gnorm": "0.708", "clip": "0.5", "loss_scale": "4", "train_wall": "65", "gb_free": "21.1", "wall": "170562"} [2022-08-03 00:51:54,624][train_inner][INFO] - {"epoch": 11, "update": 10.154, "loss": "2.142", "ppl": "4.42", "wps": "366753", "ups": "3.1", "wpb": "118473", "bsz": "256", "num_updates": "522600", "lr": "0.000482222", "gnorm": "0.677", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "28.2", "wall": "170627"} [2022-08-03 00:52:59,591][train_inner][INFO] - {"epoch": 11, "update": 10.158, "loss": "2.147", "ppl": "4.43", "wps": "365866", "ups": "3.08", "wpb": "118845", "bsz": "256", "num_updates": "522800", "lr": "0.00048202", "gnorm": "0.676", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "170692"} [2022-08-03 00:54:04,600][train_inner][INFO] - {"epoch": 11, "update": 10.162, "loss": "2.145", "ppl": "4.42", "wps": "363913", "ups": "3.08", "wpb": "118286", "bsz": "256", "num_updates": "523000", "lr": "0.000481818", "gnorm": "0.678", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "170757"} [2022-08-03 00:55:09,406][train_inner][INFO] - {"epoch": 11, "update": 10.166, "loss": "2.146", "ppl": "4.42", "wps": "367513", "ups": "3.09", "wpb": "119082", "bsz": "256", "num_updates": "523200", "lr": "0.000481616", "gnorm": "0.691", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.9", "wall": "170822"} [2022-08-03 00:56:07,422][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 [2022-08-03 00:56:14,997][train_inner][INFO] - {"epoch": 11, "update": 10.169, "loss": "2.142", "ppl": "4.41", "wps": "362657", "ups": "3.05", "wpb": "118934", "bsz": "256", "num_updates": "523400", "lr": "0.000481414", "gnorm": "0.676", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.3", "wall": "170887"} [2022-08-03 00:57:19,696][train_inner][INFO] - {"epoch": 11, "update": 10.173, "loss": "2.145", "ppl": "4.42", "wps": "364402", "ups": "3.09", "wpb": "117881", "bsz": "256", "num_updates": "523600", "lr": "0.000481212", "gnorm": "0.683", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.4", "wall": "170952"} [2022-08-03 00:58:24,773][train_inner][INFO] - {"epoch": 11, "update": 10.177, "loss": "2.138", "ppl": "4.4", "wps": "366944", "ups": "3.07", "wpb": "119396", "bsz": "256", "num_updates": "523800", "lr": "0.00048101", "gnorm": "0.673", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.9", "wall": "171017"} [2022-08-03 00:59:29,665][train_inner][INFO] - {"epoch": 11, "update": 10.181, "loss": "2.147", "ppl": "4.43", "wps": "364262", "ups": "3.08", "wpb": "118187", "bsz": "256", "num_updates": "524000", "lr": "0.000480808", "gnorm": "0.679", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "25.1", "wall": "171082"} [2022-08-03 01:00:34,291][train_inner][INFO] - {"epoch": 11, "update": 10.185, "loss": "2.143", "ppl": "4.42", "wps": "367343", "ups": "3.09", "wpb": "118698", "bsz": "256", "num_updates": "524200", "lr": "0.000480606", "gnorm": "0.677", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "22", "wall": "171147"} [2022-08-03 01:01:39,199][train_inner][INFO] - {"epoch": 11, "update": 10.189, "loss": "2.152", "ppl": "4.44", "wps": "363052", "ups": "3.08", "wpb": "117823", "bsz": "256", "num_updates": "524400", "lr": "0.000480404", "gnorm": "0.683", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "28.2", "wall": "171212"} [2022-08-03 01:02:44,043][train_inner][INFO] - {"epoch": 11, "update": 10.193, "loss": "2.15", "ppl": "4.44", "wps": "364416", "ups": "3.08", "wpb": "118149", "bsz": "256", "num_updates": "524600", "lr": "0.000480202", "gnorm": "0.681", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.4", "wall": "171276"} [2022-08-03 01:03:48,497][train_inner][INFO] - {"epoch": 11, "update": 10.197, "loss": "2.154", "ppl": "4.45", "wps": "365371", "ups": "3.1", "wpb": "117747", "bsz": "256", "num_updates": "524800", "lr": "0.00048", "gnorm": "0.683", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "23.2", "wall": "171341"} [2022-08-03 01:04:53,611][train_inner][INFO] - {"epoch": 11, "update": 10.201, "loss": "2.148", "ppl": "4.43", "wps": "363007", "ups": "3.07", "wpb": "118184", "bsz": "256", "num_updates": "525000", "lr": "0.000479798", "gnorm": "0.68", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.7", "wall": "171406"} [2022-08-03 01:05:58,307][train_inner][INFO] - {"epoch": 11, "update": 10.204, "loss": "2.149", "ppl": "4.43", "wps": "367648", "ups": "3.09", "wpb": "118924", "bsz": "256", "num_updates": "525200", "lr": "0.000479596", "gnorm": "0.679", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20", "wall": "171471"} [2022-08-03 01:07:02,957][train_inner][INFO] - {"epoch": 11, "update": 10.208, "loss": "2.141", "ppl": "4.41", "wps": "366176", "ups": "3.09", "wpb": "118365", "bsz": "256", "num_updates": "525400", "lr": "0.000479394", "gnorm": "0.677", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "23.1", "wall": "171535"} [2022-08-03 01:08:07,407][train_inner][INFO] - {"epoch": 11, "update": 10.212, "loss": "2.142", "ppl": "4.41", "wps": "367042", "ups": "3.1", "wpb": "118277", "bsz": "256", "num_updates": "525600", "lr": "0.000479192", "gnorm": "0.678", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.1", "wall": "171600"} [2022-08-03 01:09:12,245][train_inner][INFO] - {"epoch": 11, "update": 10.216, "loss": "2.138", "ppl": "4.4", "wps": "366118", "ups": "3.08", "wpb": "118690", "bsz": "256", "num_updates": "525800", "lr": "0.00047899", "gnorm": "0.682", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.1", "wall": "171665"} [2022-08-03 01:10:17,198][train_inner][INFO] - {"epoch": 11, "update": 10.22, "loss": "2.145", "ppl": "4.42", "wps": "366844", "ups": "3.08", "wpb": "119135", "bsz": "256", "num_updates": "526000", "lr": "0.000478788", "gnorm": "0.677", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "25.9", "wall": "171730"} [2022-08-03 01:11:21,929][train_inner][INFO] - {"epoch": 11, "update": 10.224, "loss": "2.147", "ppl": "4.43", "wps": "366006", "ups": "3.09", "wpb": "118458", "bsz": "256", "num_updates": "526200", "lr": "0.000478586", "gnorm": "0.686", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.7", "wall": "171794"} [2022-08-03 01:12:26,757][train_inner][INFO] - {"epoch": 11, "update": 10.228, "loss": "2.151", "ppl": "4.44", "wps": "365450", "ups": "3.09", "wpb": "118454", "bsz": "256", "num_updates": "526400", "lr": "0.000478384", "gnorm": "0.681", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.9", "wall": "171859"} [2022-08-03 01:13:31,971][train_inner][INFO] - {"epoch": 11, "update": 10.232, "loss": "2.151", "ppl": "4.44", "wps": "362386", "ups": "3.07", "wpb": "118163", "bsz": "256", "num_updates": "526600", "lr": "0.000478182", "gnorm": "0.684", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21", "wall": "171924"} [2022-08-03 01:14:36,535][train_inner][INFO] - {"epoch": 11, "update": 10.236, "loss": "2.152", "ppl": "4.45", "wps": "363576", "ups": "3.1", "wpb": "117367", "bsz": "256", "num_updates": "526800", "lr": "0.00047798", "gnorm": "0.683", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.1", "wall": "171989"} [2022-08-03 01:15:42,089][train_inner][INFO] - {"epoch": 11, "update": 10.239, "loss": "2.143", "ppl": "4.42", "wps": "360675", "ups": "3.05", "wpb": "118216", "bsz": "256", "num_updates": "527000", "lr": "0.000477778", "gnorm": "0.679", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.2", "wall": "172054"} [2022-08-03 01:16:46,973][train_inner][INFO] - {"epoch": 11, "update": 10.243, "loss": "2.147", "ppl": "4.43", "wps": "365363", "ups": "3.08", "wpb": "118530", "bsz": "256", "num_updates": "527200", "lr": "0.000477576", "gnorm": "0.681", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "172119"} [2022-08-03 01:17:51,598][train_inner][INFO] - {"epoch": 11, "update": 10.247, "loss": "2.148", "ppl": "4.43", "wps": "365171", "ups": "3.09", "wpb": "117993", "bsz": "256", "num_updates": "527400", "lr": "0.000477374", "gnorm": "0.685", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.5", "wall": "172184"} [2022-08-03 01:18:56,426][train_inner][INFO] - {"epoch": 11, "update": 10.251, "loss": "2.138", "ppl": "4.4", "wps": "365819", "ups": "3.09", "wpb": "118575", "bsz": "256", "num_updates": "527600", "lr": "0.000477172", "gnorm": "0.678", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.8", "wall": "172249"} [2022-08-03 01:19:54,326][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-03 01:19:56,260][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 [2022-08-03 01:20:02,012][train_inner][INFO] - {"epoch": 11, "update": 10.255, "loss": "2.142", "ppl": "4.41", "wps": "362311", "ups": "3.05", "wpb": "118810", "bsz": "256", "num_updates": "527800", "lr": "0.00047697", "gnorm": "0.69", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22", "wall": "172314"} [2022-08-03 01:21:06,887][train_inner][INFO] - {"epoch": 11, "update": 10.259, "loss": "2.141", "ppl": "4.41", "wps": "365881", "ups": "3.08", "wpb": "118683", "bsz": "256", "num_updates": "528000", "lr": "0.000476768", "gnorm": "0.685", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.9", "wall": "172379"} [2022-08-03 01:22:11,519][train_inner][INFO] - {"epoch": 11, "update": 10.263, "loss": "2.145", "ppl": "4.42", "wps": "366023", "ups": "3.09", "wpb": "118283", "bsz": "256", "num_updates": "528200", "lr": "0.000476566", "gnorm": "0.693", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "24.2", "wall": "172444"} [2022-08-03 01:23:16,111][train_inner][INFO] - {"epoch": 11, "update": 10.267, "loss": "2.14", "ppl": "4.41", "wps": "367503", "ups": "3.1", "wpb": "118686", "bsz": "256", "num_updates": "528400", "lr": "0.000476364", "gnorm": "0.678", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.2", "wall": "172508"} [2022-08-03 01:24:20,962][train_inner][INFO] - {"epoch": 11, "update": 10.271, "loss": "2.152", "ppl": "4.44", "wps": "364164", "ups": "3.08", "wpb": "118082", "bsz": "256", "num_updates": "528600", "lr": "0.000476162", "gnorm": "0.683", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.8", "wall": "172573"} [2022-08-03 01:25:25,815][train_inner][INFO] - {"epoch": 11, "update": 10.274, "loss": "2.146", "ppl": "4.43", "wps": "364359", "ups": "3.08", "wpb": "118145", "bsz": "256", "num_updates": "528800", "lr": "0.00047596", "gnorm": "0.683", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "28.2", "wall": "172638"} [2022-08-03 01:26:30,472][train_inner][INFO] - {"epoch": 11, "update": 10.278, "loss": "2.141", "ppl": "4.41", "wps": "365161", "ups": "3.09", "wpb": "118050", "bsz": "256", "num_updates": "529000", "lr": "0.000475758", "gnorm": "0.688", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "23.6", "wall": "172703"} [2022-08-03 01:27:35,310][train_inner][INFO] - {"epoch": 11, "update": 10.282, "loss": "2.14", "ppl": "4.41", "wps": "365134", "ups": "3.08", "wpb": "118371", "bsz": "256", "num_updates": "529200", "lr": "0.000475556", "gnorm": "0.679", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.4", "wall": "172768"} [2022-08-03 01:28:12,183][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 [2022-08-03 01:28:40,454][train_inner][INFO] - {"epoch": 11, "update": 10.286, "loss": "2.143", "ppl": "4.42", "wps": "363070", "ups": "3.07", "wpb": "118257", "bsz": "256", "num_updates": "529400", "lr": "0.000475354", "gnorm": "0.688", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "21.3", "wall": "172833"} [2022-08-03 01:29:45,332][train_inner][INFO] - {"epoch": 11, "update": 10.29, "loss": "2.144", "ppl": "4.42", "wps": "364447", "ups": "3.08", "wpb": "118223", "bsz": "256", "num_updates": "529600", "lr": "0.000475152", "gnorm": "0.681", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "22.3", "wall": "172898"} [2022-08-03 01:30:27,178][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 0.5 [2022-08-03 01:30:50,682][train_inner][INFO] - {"epoch": 11, "update": 10.294, "loss": "2.148", "ppl": "4.43", "wps": "361406", "ups": "3.06", "wpb": "118088", "bsz": "256", "num_updates": "529800", "lr": "0.000474949", "gnorm": "0.698", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "21.8", "wall": "172963"} [2022-08-03 01:31:55,431][train_inner][INFO] - {"epoch": 11, "update": 10.298, "loss": "2.141", "ppl": "4.41", "wps": "365812", "ups": "3.09", "wpb": "118430", "bsz": "256", "num_updates": "530000", "lr": "0.000474747", "gnorm": "0.681", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.2", "wall": "173028"} [2022-08-03 01:33:00,194][train_inner][INFO] - {"epoch": 11, "update": 10.302, "loss": "2.138", "ppl": "4.4", "wps": "367096", "ups": "3.09", "wpb": "118869", "bsz": "256", "num_updates": "530200", "lr": "0.000474545", "gnorm": "0.681", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "23.7", "wall": "173093"} [2022-08-03 01:34:05,227][train_inner][INFO] - {"epoch": 11, "update": 10.306, "loss": "2.139", "ppl": "4.4", "wps": "363275", "ups": "3.08", "wpb": "118123", "bsz": "256", "num_updates": "530400", "lr": "0.000474343", "gnorm": "0.683", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.5", "wall": "173158"} [2022-08-03 01:35:10,198][train_inner][INFO] - {"epoch": 11, "update": 10.309, "loss": "2.152", "ppl": "4.45", "wps": "361448", "ups": "3.08", "wpb": "117417", "bsz": "256", "num_updates": "530600", "lr": "0.000474141", "gnorm": "0.77", "clip": "0.5", "loss_scale": "0.5", "train_wall": "65", "gb_free": "22.7", "wall": "173223"} [2022-08-03 01:36:14,934][train_inner][INFO] - {"epoch": 11, "update": 10.313, "loss": "2.136", "ppl": "4.4", "wps": "367054", "ups": "3.09", "wpb": "118807", "bsz": "256", "num_updates": "530800", "lr": "0.000473939", "gnorm": "0.678", "clip": "0", "loss_scale": "0.5", "train_wall": "64", "gb_free": "20.5", "wall": "173287"} [2022-08-03 01:37:19,866][train_inner][INFO] - {"epoch": 11, "update": 10.317, "loss": "2.146", "ppl": "4.42", "wps": "363155", "ups": "3.08", "wpb": "117900", "bsz": "256", "num_updates": "531000", "lr": "0.000473737", "gnorm": "0.683", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "20.3", "wall": "173352"} [2022-08-03 01:38:24,839][train_inner][INFO] - {"epoch": 11, "update": 10.321, "loss": "2.143", "ppl": "4.42", "wps": "364787", "ups": "3.08", "wpb": "118504", "bsz": "256", "num_updates": "531200", "lr": "0.000473535", "gnorm": "0.683", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "21.3", "wall": "173417"} [2022-08-03 01:39:29,835][train_inner][INFO] - {"epoch": 11, "update": 10.325, "loss": "2.142", "ppl": "4.41", "wps": "362911", "ups": "3.08", "wpb": "117937", "bsz": "256", "num_updates": "531400", "lr": "0.000473333", "gnorm": "0.681", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "19.8", "wall": "173482"} [2022-08-03 01:40:34,716][train_inner][INFO] - {"epoch": 11, "update": 10.329, "loss": "2.146", "ppl": "4.42", "wps": "364124", "ups": "3.08", "wpb": "118121", "bsz": "256", "num_updates": "531600", "lr": "0.000473131", "gnorm": "0.68", "clip": "0", "loss_scale": "0.5", "train_wall": "65", "gb_free": "26.4", "wall": "173547"} [2022-08-03 01:41:39,436][train_inner][INFO] - {"epoch": 11, "update": 10.333, "loss": "2.147", "ppl": "4.43", "wps": "364988", "ups": "3.09", "wpb": "118109", "bsz": "256", "num_updates": "531800", "lr": "0.000472929", "gnorm": "0.682", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "24.4", "wall": "173612"} [2022-08-03 01:42:44,349][train_inner][INFO] - {"epoch": 11, "update": 10.337, "loss": "2.14", "ppl": "4.41", "wps": "365977", "ups": "3.08", "wpb": "118781", "bsz": "256", "num_updates": "532000", "lr": "0.000472727", "gnorm": "0.679", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "22.5", "wall": "173677"} [2022-08-03 01:43:49,255][train_inner][INFO] - {"epoch": 11, "update": 10.34, "loss": "2.139", "ppl": "4.4", "wps": "365127", "ups": "3.08", "wpb": "118492", "bsz": "256", "num_updates": "532200", "lr": "0.000472525", "gnorm": "0.682", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "22.5", "wall": "173742"} [2022-08-03 01:44:53,844][train_inner][INFO] - {"epoch": 11, "update": 10.344, "loss": "2.142", "ppl": "4.41", "wps": "365844", "ups": "3.1", "wpb": "118147", "bsz": "256", "num_updates": "532400", "lr": "0.000472323", "gnorm": "0.681", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "28.2", "wall": "173806"} [2022-08-03 01:45:59,400][train_inner][INFO] - {"epoch": 11, "update": 10.348, "loss": "2.146", "ppl": "4.42", "wps": "360825", "ups": "3.05", "wpb": "118269", "bsz": "256", "num_updates": "532600", "lr": "0.000472121", "gnorm": "0.683", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "19.8", "wall": "173872"} [2022-08-03 01:47:04,069][train_inner][INFO] - {"epoch": 11, "update": 10.352, "loss": "2.145", "ppl": "4.42", "wps": "365000", "ups": "3.09", "wpb": "118018", "bsz": "256", "num_updates": "532800", "lr": "0.000471919", "gnorm": "0.682", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "24.1", "wall": "173936"} [2022-08-03 01:48:09,055][train_inner][INFO] - {"epoch": 11, "update": 10.356, "loss": "2.143", "ppl": "4.42", "wps": "364500", "ups": "3.08", "wpb": "118436", "bsz": "256", "num_updates": "533000", "lr": "0.000471717", "gnorm": "0.68", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "19.7", "wall": "174001"} [2022-08-03 01:49:13,791][train_inner][INFO] - {"epoch": 11, "update": 10.36, "loss": "2.139", "ppl": "4.4", "wps": "368031", "ups": "3.09", "wpb": "119121", "bsz": "256", "num_updates": "533200", "lr": "0.000471515", "gnorm": "0.678", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "23.6", "wall": "174066"} [2022-08-03 01:50:18,639][train_inner][INFO] - {"epoch": 11, "update": 10.364, "loss": "2.137", "ppl": "4.4", "wps": "367418", "ups": "3.08", "wpb": "119130", "bsz": "256", "num_updates": "533400", "lr": "0.000471313", "gnorm": "0.676", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "19.8", "wall": "174131"} [2022-08-03 01:51:23,319][train_inner][INFO] - {"epoch": 11, "update": 10.368, "loss": "2.141", "ppl": "4.41", "wps": "365485", "ups": "3.09", "wpb": "118196", "bsz": "256", "num_updates": "533600", "lr": "0.000471111", "gnorm": "0.68", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "19.8", "wall": "174196"} [2022-08-03 01:52:28,036][train_inner][INFO] - {"epoch": 11, "update": 10.372, "loss": "2.144", "ppl": "4.42", "wps": "366244", "ups": "3.09", "wpb": "118510", "bsz": "256", "num_updates": "533800", "lr": "0.000470909", "gnorm": "0.699", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.6", "wall": "174260"} [2022-08-03 01:53:32,607][train_inner][INFO] - {"epoch": 11, "update": 10.375, "loss": "2.151", "ppl": "4.44", "wps": "365716", "ups": "3.1", "wpb": "118070", "bsz": "256", "num_updates": "534000", "lr": "0.000470707", "gnorm": "0.69", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.3", "wall": "174325"} [2022-08-03 01:54:37,245][train_inner][INFO] - {"epoch": 11, "update": 10.379, "loss": "2.142", "ppl": "4.41", "wps": "364807", "ups": "3.09", "wpb": "117902", "bsz": "256", "num_updates": "534200", "lr": "0.000470505", "gnorm": "0.683", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "19.8", "wall": "174390"} [2022-08-03 01:55:42,178][train_inner][INFO] - {"epoch": 11, "update": 10.383, "loss": "2.139", "ppl": "4.4", "wps": "364109", "ups": "3.08", "wpb": "118211", "bsz": "256", "num_updates": "534400", "lr": "0.000470303", "gnorm": "0.681", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.9", "wall": "174455"} [2022-08-03 01:56:47,118][train_inner][INFO] - {"epoch": 11, "update": 10.387, "loss": "2.145", "ppl": "4.42", "wps": "364719", "ups": "3.08", "wpb": "118423", "bsz": "256", "num_updates": "534600", "lr": "0.000470101", "gnorm": "0.681", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "28.2", "wall": "174519"} [2022-08-03 01:57:51,825][train_inner][INFO] - {"epoch": 11, "update": 10.391, "loss": "2.138", "ppl": "4.4", "wps": "363939", "ups": "3.09", "wpb": "117745", "bsz": "256", "num_updates": "534800", "lr": "0.000469899", "gnorm": "0.683", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.4", "wall": "174584"} [2022-08-03 01:58:56,873][train_inner][INFO] - {"epoch": 11, "update": 10.395, "loss": "2.146", "ppl": "4.43", "wps": "362537", "ups": "3.07", "wpb": "117909", "bsz": "256", "num_updates": "535000", "lr": "0.000469697", "gnorm": "0.685", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "26.7", "wall": "174649"} [2022-08-03 02:00:01,568][train_inner][INFO] - {"epoch": 11, "update": 10.399, "loss": "2.14", "ppl": "4.41", "wps": "366068", "ups": "3.09", "wpb": "118411", "bsz": "256", "num_updates": "535200", "lr": "0.000469495", "gnorm": "0.68", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.6", "wall": "174714"} [2022-08-03 02:01:06,577][train_inner][INFO] - {"epoch": 11, "update": 10.403, "loss": "2.142", "ppl": "4.41", "wps": "363080", "ups": "3.08", "wpb": "118016", "bsz": "256", "num_updates": "535400", "lr": "0.000469293", "gnorm": "0.683", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.2", "wall": "174779"} [2022-08-03 02:02:11,386][train_inner][INFO] - {"epoch": 11, "update": 10.407, "loss": "2.14", "ppl": "4.41", "wps": "364956", "ups": "3.09", "wpb": "118260", "bsz": "256", "num_updates": "535600", "lr": "0.000469091", "gnorm": "0.68", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "22.7", "wall": "174844"} [2022-08-03 02:03:16,176][train_inner][INFO] - {"epoch": 11, "update": 10.41, "loss": "2.139", "ppl": "4.41", "wps": "365909", "ups": "3.09", "wpb": "118535", "bsz": "256", "num_updates": "535800", "lr": "0.000468889", "gnorm": "0.68", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.7", "wall": "174908"} [2022-08-03 02:04:20,727][train_inner][INFO] - {"epoch": 11, "update": 10.414, "loss": "2.143", "ppl": "4.42", "wps": "366796", "ups": "3.1", "wpb": "118382", "bsz": "256", "num_updates": "536000", "lr": "0.000468687", "gnorm": "0.684", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.9", "wall": "174973"} [2022-08-03 02:05:25,923][train_inner][INFO] - {"epoch": 11, "update": 10.418, "loss": "2.137", "ppl": "4.4", "wps": "363140", "ups": "3.07", "wpb": "118374", "bsz": "256", "num_updates": "536200", "lr": "0.000468485", "gnorm": "0.683", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.1", "wall": "175038"} [2022-08-03 02:06:30,761][train_inner][INFO] - {"epoch": 11, "update": 10.422, "loss": "2.141", "ppl": "4.41", "wps": "365922", "ups": "3.08", "wpb": "118627", "bsz": "255.9", "num_updates": "536400", "lr": "0.000468283", "gnorm": "0.681", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20", "wall": "175103"} [2022-08-03 02:07:35,419][train_inner][INFO] - {"epoch": 11, "update": 10.426, "loss": "2.142", "ppl": "4.42", "wps": "365344", "ups": "3.09", "wpb": "118109", "bsz": "256", "num_updates": "536600", "lr": "0.000468081", "gnorm": "0.684", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.9", "wall": "175168"} [2022-08-03 02:08:40,379][train_inner][INFO] - {"epoch": 11, "update": 10.43, "loss": "2.144", "ppl": "4.42", "wps": "363233", "ups": "3.08", "wpb": "117977", "bsz": "256", "num_updates": "536800", "lr": "0.000467879", "gnorm": "0.682", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.2", "wall": "175233"} [2022-08-03 02:09:45,075][train_inner][INFO] - {"epoch": 11, "update": 10.434, "loss": "2.147", "ppl": "4.43", "wps": "364991", "ups": "3.09", "wpb": "118065", "bsz": "256", "num_updates": "537000", "lr": "0.000467677", "gnorm": "0.686", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "23.7", "wall": "175297"} [2022-08-03 02:10:50,788][train_inner][INFO] - {"epoch": 11, "update": 10.438, "loss": "2.136", "ppl": "4.4", "wps": "358635", "ups": "3.04", "wpb": "117833", "bsz": "256", "num_updates": "537200", "lr": "0.000467475", "gnorm": "0.686", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "175363"} [2022-08-03 02:11:55,530][train_inner][INFO] - {"epoch": 11, "update": 10.441, "loss": "2.139", "ppl": "4.4", "wps": "364180", "ups": "3.09", "wpb": "117887", "bsz": "256", "num_updates": "537400", "lr": "0.000467273", "gnorm": "0.683", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.9", "wall": "175428"} [2022-08-03 02:13:00,704][train_inner][INFO] - {"epoch": 11, "update": 10.445, "loss": "2.141", "ppl": "4.41", "wps": "364348", "ups": "3.07", "wpb": "118728", "bsz": "256", "num_updates": "537600", "lr": "0.000467071", "gnorm": "0.687", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.8", "wall": "175493"} [2022-08-03 02:14:05,963][train_inner][INFO] - {"epoch": 11, "update": 10.449, "loss": "2.139", "ppl": "4.41", "wps": "362862", "ups": "3.06", "wpb": "118399", "bsz": "256", "num_updates": "537800", "lr": "0.000466869", "gnorm": "0.681", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "175558"} [2022-08-03 02:15:10,951][train_inner][INFO] - {"epoch": 11, "update": 10.453, "loss": "2.136", "ppl": "4.4", "wps": "363873", "ups": "3.08", "wpb": "118234", "bsz": "256", "num_updates": "538000", "lr": "0.000466667", "gnorm": "0.686", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "175623"} [2022-08-03 02:16:15,617][train_inner][INFO] - {"epoch": 11, "update": 10.457, "loss": "2.149", "ppl": "4.44", "wps": "364432", "ups": "3.09", "wpb": "117830", "bsz": "256", "num_updates": "538200", "lr": "0.000466465", "gnorm": "0.685", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.3", "wall": "175688"} [2022-08-03 02:17:20,467][train_inner][INFO] - {"epoch": 11, "update": 10.461, "loss": "2.147", "ppl": "4.43", "wps": "364312", "ups": "3.08", "wpb": "118127", "bsz": "256", "num_updates": "538400", "lr": "0.000466263", "gnorm": "0.683", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.7", "wall": "175753"} [2022-08-03 02:18:25,432][train_inner][INFO] - {"epoch": 11, "update": 10.465, "loss": "2.143", "ppl": "4.42", "wps": "361619", "ups": "3.08", "wpb": "117460", "bsz": "256", "num_updates": "538600", "lr": "0.000466061", "gnorm": "0.685", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "175818"} [2022-08-03 02:19:30,566][train_inner][INFO] - {"epoch": 11, "update": 10.469, "loss": "2.135", "ppl": "4.39", "wps": "364342", "ups": "3.07", "wpb": "118653", "bsz": "256", "num_updates": "538800", "lr": "0.000465859", "gnorm": "0.681", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "26", "wall": "175883"} [2022-08-03 02:20:36,020][train_inner][INFO] - {"epoch": 11, "update": 10.473, "loss": "2.136", "ppl": "4.4", "wps": "362831", "ups": "3.06", "wpb": "118742", "bsz": "256", "num_updates": "539000", "lr": "0.000465657", "gnorm": "0.683", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "175948"} [2022-08-03 02:21:40,860][train_inner][INFO] - {"epoch": 11, "update": 10.476, "loss": "2.139", "ppl": "4.4", "wps": "363962", "ups": "3.08", "wpb": "117996", "bsz": "256", "num_updates": "539200", "lr": "0.000465455", "gnorm": "0.683", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.4", "wall": "176013"} [2022-08-03 02:22:45,786][train_inner][INFO] - {"epoch": 11, "update": 10.48, "loss": "2.14", "ppl": "4.41", "wps": "364467", "ups": "3.08", "wpb": "118314", "bsz": "256", "num_updates": "539400", "lr": "0.000465253", "gnorm": "0.679", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.8", "wall": "176078"} [2022-08-03 02:23:50,573][train_inner][INFO] - {"epoch": 11, "update": 10.484, "loss": "2.138", "ppl": "4.4", "wps": "365719", "ups": "3.09", "wpb": "118468", "bsz": "256", "num_updates": "539600", "lr": "0.000465051", "gnorm": "0.681", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "176143"} [2022-08-03 02:24:55,327][train_inner][INFO] - {"epoch": 11, "update": 10.488, "loss": "2.146", "ppl": "4.43", "wps": "364744", "ups": "3.09", "wpb": "118091", "bsz": "256", "num_updates": "539800", "lr": "0.000464848", "gnorm": "0.692", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.4", "wall": "176208"} [2022-08-03 02:25:59,989][train_inner][INFO] - {"epoch": 11, "update": 10.492, "loss": "2.135", "ppl": "4.39", "wps": "365234", "ups": "3.09", "wpb": "118082", "bsz": "256", "num_updates": "540000", "lr": "0.000464646", "gnorm": "0.682", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "25.2", "wall": "176272"} [2022-08-03 02:27:04,948][train_inner][INFO] - {"epoch": 11, "update": 10.496, "loss": "2.135", "ppl": "4.39", "wps": "365603", "ups": "3.08", "wpb": "118745", "bsz": "256", "num_updates": "540200", "lr": "0.000464444", "gnorm": "0.682", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25.3", "wall": "176337"} [2022-08-03 02:27:09,432][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-03 02:28:09,926][train_inner][INFO] - {"epoch": 11, "update": 10.5, "loss": "2.136", "ppl": "4.39", "wps": "363042", "ups": "3.08", "wpb": "117948", "bsz": "256", "num_updates": "540400", "lr": "0.000464242", "gnorm": "0.685", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "26.7", "wall": "176402"} [2022-08-03 02:29:14,941][train_inner][INFO] - {"epoch": 11, "update": 10.504, "loss": "2.144", "ppl": "4.42", "wps": "364342", "ups": "3.08", "wpb": "118437", "bsz": "256", "num_updates": "540600", "lr": "0.00046404", "gnorm": "0.682", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.8", "wall": "176467"} [2022-08-03 02:30:21,130][train_inner][INFO] - {"epoch": 11, "update": 10.508, "loss": "2.137", "ppl": "4.4", "wps": "355969", "ups": "3.02", "wpb": "117804", "bsz": "256", "num_updates": "540800", "lr": "0.000463838", "gnorm": "0.684", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "22.2", "wall": "176533"} [2022-08-03 02:31:25,926][train_inner][INFO] - {"epoch": 11, "update": 10.511, "loss": "2.138", "ppl": "4.4", "wps": "367325", "ups": "3.09", "wpb": "119004", "bsz": "256", "num_updates": "541000", "lr": "0.000463636", "gnorm": "0.688", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.4", "wall": "176598"} [2022-08-03 02:32:30,745][train_inner][INFO] - {"epoch": 11, "update": 10.515, "loss": "2.132", "ppl": "4.38", "wps": "364006", "ups": "3.09", "wpb": "117970", "bsz": "256", "num_updates": "541200", "lr": "0.000463434", "gnorm": "0.682", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.5", "wall": "176663"} [2022-08-03 02:33:35,431][train_inner][INFO] - {"epoch": 11, "update": 10.519, "loss": "2.138", "ppl": "4.4", "wps": "364252", "ups": "3.09", "wpb": "117808", "bsz": "256", "num_updates": "541400", "lr": "0.000463232", "gnorm": "0.686", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.2", "wall": "176728"} [2022-08-03 02:34:40,186][train_inner][INFO] - {"epoch": 11, "update": 10.523, "loss": "2.129", "ppl": "4.37", "wps": "367713", "ups": "3.09", "wpb": "119054", "bsz": "256", "num_updates": "541600", "lr": "0.00046303", "gnorm": "0.68", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.4", "wall": "176793"} [2022-08-03 02:35:46,284][train_inner][INFO] - {"epoch": 11, "update": 10.527, "loss": "2.141", "ppl": "4.41", "wps": "356801", "ups": "3.03", "wpb": "117918", "bsz": "256", "num_updates": "541800", "lr": "0.000462828", "gnorm": "0.686", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "21.2", "wall": "176859"} [2022-08-03 02:36:48,474][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-03 02:36:51,415][train_inner][INFO] - {"epoch": 11, "update": 10.531, "loss": "2.144", "ppl": "4.42", "wps": "363282", "ups": "3.07", "wpb": "118302", "bsz": "256", "num_updates": "542000", "lr": "0.000462626", "gnorm": "0.707", "clip": "0.5", "loss_scale": "4", "train_wall": "65", "gb_free": "20.5", "wall": "176924"} [2022-08-03 02:37:12,118][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 [2022-08-03 02:37:12,738][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 [2022-08-03 02:37:57,166][train_inner][INFO] - {"epoch": 11, "update": 10.535, "loss": "2.141", "ppl": "4.41", "wps": "359558", "ups": "3.04", "wpb": "118206", "bsz": "256", "num_updates": "542200", "lr": "0.000462424", "gnorm": "0.703", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "19.7", "wall": "176989"} [2022-08-03 02:39:01,916][train_inner][INFO] - {"epoch": 11, "update": 10.539, "loss": "2.139", "ppl": "4.41", "wps": "366923", "ups": "3.09", "wpb": "118791", "bsz": "256", "num_updates": "542400", "lr": "0.000462222", "gnorm": "0.69", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20.2", "wall": "177054"} [2022-08-03 02:40:06,743][train_inner][INFO] - {"epoch": 11, "update": 10.543, "loss": "2.136", "ppl": "4.4", "wps": "363939", "ups": "3.09", "wpb": "117963", "bsz": "256", "num_updates": "542600", "lr": "0.00046202", "gnorm": "0.685", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "20", "wall": "177119"} [2022-08-03 02:41:11,822][train_inner][INFO] - {"epoch": 11, "update": 10.546, "loss": "2.134", "ppl": "4.39", "wps": "364406", "ups": "3.07", "wpb": "118575", "bsz": "256", "num_updates": "542800", "lr": "0.000461818", "gnorm": "0.682", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "19.9", "wall": "177184"} [2022-08-03 02:42:16,246][train_inner][INFO] - {"epoch": 11, "update": 10.55, "loss": "2.14", "ppl": "4.41", "wps": "366179", "ups": "3.1", "wpb": "117951", "bsz": "256", "num_updates": "543000", "lr": "0.000461616", "gnorm": "0.683", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "21.8", "wall": "177249"} [2022-08-03 02:43:21,401][train_inner][INFO] - {"epoch": 11, "update": 10.554, "loss": "2.138", "ppl": "4.4", "wps": "362587", "ups": "3.07", "wpb": "118121", "bsz": "256", "num_updates": "543200", "lr": "0.000461414", "gnorm": "0.684", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "22.6", "wall": "177314"} [2022-08-03 02:44:26,283][train_inner][INFO] - {"epoch": 11, "update": 10.558, "loss": "2.137", "ppl": "4.4", "wps": "363363", "ups": "3.08", "wpb": "117876", "bsz": "256", "num_updates": "543400", "lr": "0.000461212", "gnorm": "0.694", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "24.1", "wall": "177379"} [2022-08-03 02:45:30,848][train_inner][INFO] - {"epoch": 11, "update": 10.562, "loss": "2.138", "ppl": "4.4", "wps": "365769", "ups": "3.1", "wpb": "118078", "bsz": "256", "num_updates": "543600", "lr": "0.00046101", "gnorm": "0.684", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "24", "wall": "177443"} [2022-08-03 02:46:35,948][train_inner][INFO] - {"epoch": 11, "update": 10.566, "loss": "2.135", "ppl": "4.39", "wps": "363290", "ups": "3.07", "wpb": "118248", "bsz": "256", "num_updates": "543800", "lr": "0.000460808", "gnorm": "0.685", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "22.6", "wall": "177508"} [2022-08-03 02:47:40,561][train_inner][INFO] - {"epoch": 11, "update": 10.57, "loss": "2.133", "ppl": "4.39", "wps": "365930", "ups": "3.1", "wpb": "118217", "bsz": "256", "num_updates": "544000", "lr": "0.000460606", "gnorm": "0.696", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "23.1", "wall": "177573"} [2022-08-03 02:48:45,671][train_inner][INFO] - {"epoch": 11, "update": 10.574, "loss": "2.139", "ppl": "4.4", "wps": "364204", "ups": "3.07", "wpb": "118565", "bsz": "256", "num_updates": "544200", "lr": "0.000460404", "gnorm": "0.685", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22.8", "wall": "177638"} [2022-08-03 02:49:50,666][train_inner][INFO] - {"epoch": 11, "update": 10.577, "loss": "2.132", "ppl": "4.38", "wps": "365174", "ups": "3.08", "wpb": "118671", "bsz": "256", "num_updates": "544400", "lr": "0.000460202", "gnorm": "0.681", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "24.3", "wall": "177703"} [2022-08-03 02:50:55,432][train_inner][INFO] - {"epoch": 11, "update": 10.581, "loss": "2.135", "ppl": "4.39", "wps": "366422", "ups": "3.09", "wpb": "118656", "bsz": "256", "num_updates": "544600", "lr": "0.00046", "gnorm": "0.695", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "22", "wall": "177768"} [2022-08-03 02:52:01,515][train_inner][INFO] - {"epoch": 11, "update": 10.585, "loss": "2.132", "ppl": "4.38", "wps": "357371", "ups": "3.03", "wpb": "118080", "bsz": "256", "num_updates": "544800", "lr": "0.000459798", "gnorm": "0.683", "clip": "0", "loss_scale": "2", "train_wall": "66", "gb_free": "20", "wall": "177834"} [2022-08-03 02:53:06,448][train_inner][INFO] - {"epoch": 11, "update": 10.589, "loss": "2.138", "ppl": "4.4", "wps": "365660", "ups": "3.08", "wpb": "118715", "bsz": "256", "num_updates": "545000", "lr": "0.000459596", "gnorm": "0.682", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.7", "wall": "177899"} [2022-08-03 02:54:11,482][train_inner][INFO] - {"epoch": 11, "update": 10.593, "loss": "2.133", "ppl": "4.39", "wps": "365091", "ups": "3.08", "wpb": "118715", "bsz": "256", "num_updates": "545200", "lr": "0.000459394", "gnorm": "0.682", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.7", "wall": "177964"} [2022-08-03 02:55:16,717][train_inner][INFO] - {"epoch": 11, "update": 10.597, "loss": "2.128", "ppl": "4.37", "wps": "364633", "ups": "3.07", "wpb": "118932", "bsz": "256", "num_updates": "545400", "lr": "0.000459192", "gnorm": "0.683", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.8", "wall": "178029"} [2022-08-03 02:56:21,157][train_inner][INFO] - {"epoch": 11, "update": 10.601, "loss": "2.136", "ppl": "4.39", "wps": "369204", "ups": "3.1", "wpb": "118955", "bsz": "256", "num_updates": "545600", "lr": "0.00045899", "gnorm": "0.683", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "25.7", "wall": "178093"} [2022-08-03 02:57:25,839][train_inner][INFO] - {"epoch": 11, "update": 10.605, "loss": "2.139", "ppl": "4.4", "wps": "366535", "ups": "3.09", "wpb": "118540", "bsz": "256", "num_updates": "545800", "lr": "0.000458788", "gnorm": "0.684", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "22.6", "wall": "178158"} [2022-08-03 02:58:31,190][train_inner][INFO] - {"epoch": 11, "update": 10.609, "loss": "2.131", "ppl": "4.38", "wps": "364062", "ups": "3.06", "wpb": "118957", "bsz": "256", "num_updates": "546000", "lr": "0.000458586", "gnorm": "0.682", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.8", "wall": "178224"} [2022-08-03 02:59:35,979][train_inner][INFO] - {"epoch": 11, "update": 10.612, "loss": "2.129", "ppl": "4.37", "wps": "366460", "ups": "3.09", "wpb": "118712", "bsz": "256", "num_updates": "546200", "lr": "0.000458384", "gnorm": "0.682", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "24.4", "wall": "178288"} [2022-08-03 03:00:41,004][train_inner][INFO] - {"epoch": 11, "update": 10.616, "loss": "2.137", "ppl": "4.4", "wps": "362458", "ups": "3.08", "wpb": "117842", "bsz": "256", "num_updates": "546400", "lr": "0.000458182", "gnorm": "0.685", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "178353"} [2022-08-03 03:01:45,828][train_inner][INFO] - {"epoch": 11, "update": 10.62, "loss": "2.143", "ppl": "4.42", "wps": "363045", "ups": "3.09", "wpb": "117669", "bsz": "256", "num_updates": "546600", "lr": "0.00045798", "gnorm": "0.688", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.2", "wall": "178418"} [2022-08-03 03:02:50,784][train_inner][INFO] - {"epoch": 11, "update": 10.624, "loss": "2.135", "ppl": "4.39", "wps": "363947", "ups": "3.08", "wpb": "118201", "bsz": "256", "num_updates": "546800", "lr": "0.000457778", "gnorm": "0.682", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.5", "wall": "178483"} [2022-08-03 03:03:55,563][train_inner][INFO] - {"epoch": 11, "update": 10.628, "loss": "2.139", "ppl": "4.4", "wps": "365465", "ups": "3.09", "wpb": "118369", "bsz": "256", "num_updates": "547000", "lr": "0.000457576", "gnorm": "0.687", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21", "wall": "178548"} [2022-08-03 03:05:00,332][train_inner][INFO] - {"epoch": 11, "update": 10.632, "loss": "2.131", "ppl": "4.38", "wps": "364777", "ups": "3.09", "wpb": "118130", "bsz": "256", "num_updates": "547200", "lr": "0.000457374", "gnorm": "0.686", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.8", "wall": "178613"} [2022-08-03 03:06:05,306][train_inner][INFO] - {"epoch": 11, "update": 10.636, "loss": "2.136", "ppl": "4.39", "wps": "362276", "ups": "3.08", "wpb": "117690", "bsz": "256", "num_updates": "547400", "lr": "0.000457172", "gnorm": "0.684", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.9", "wall": "178678"} [2022-08-03 03:07:09,690][train_inner][INFO] - {"epoch": 11, "update": 10.64, "loss": "2.133", "ppl": "4.38", "wps": "367504", "ups": "3.11", "wpb": "118306", "bsz": "256", "num_updates": "547600", "lr": "0.00045697", "gnorm": "0.684", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21", "wall": "178742"} [2022-08-03 03:08:14,217][train_inner][INFO] - {"epoch": 11, "update": 10.644, "loss": "2.135", "ppl": "4.39", "wps": "368183", "ups": "3.1", "wpb": "118785", "bsz": "256", "num_updates": "547800", "lr": "0.000456768", "gnorm": "0.683", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.9", "wall": "178807"} [2022-08-03 03:09:19,309][train_inner][INFO] - {"epoch": 11, "update": 10.647, "loss": "2.132", "ppl": "4.38", "wps": "362579", "ups": "3.07", "wpb": "118003", "bsz": "256", "num_updates": "548000", "lr": "0.000456566", "gnorm": "0.685", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "178872"} [2022-08-03 03:10:23,992][train_inner][INFO] - {"epoch": 11, "update": 10.651, "loss": "2.136", "ppl": "4.4", "wps": "365346", "ups": "3.09", "wpb": "118157", "bsz": "256", "num_updates": "548200", "lr": "0.000456364", "gnorm": "0.685", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "26.5", "wall": "178936"} [2022-08-03 03:11:29,033][train_inner][INFO] - {"epoch": 11, "update": 10.655, "loss": "2.142", "ppl": "4.41", "wps": "362020", "ups": "3.08", "wpb": "117729", "bsz": "256", "num_updates": "548400", "lr": "0.000456162", "gnorm": "0.687", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.3", "wall": "179001"} [2022-08-03 03:12:33,915][train_inner][INFO] - {"epoch": 11, "update": 10.659, "loss": "2.131", "ppl": "4.38", "wps": "365746", "ups": "3.08", "wpb": "118650", "bsz": "256", "num_updates": "548600", "lr": "0.00045596", "gnorm": "0.683", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.8", "wall": "179066"} [2022-08-03 03:13:38,624][train_inner][INFO] - {"epoch": 11, "update": 10.663, "loss": "2.13", "ppl": "4.38", "wps": "366184", "ups": "3.09", "wpb": "118475", "bsz": "256", "num_updates": "548800", "lr": "0.000455758", "gnorm": "0.683", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.3", "wall": "179131"} [2022-08-03 03:14:43,337][train_inner][INFO] - {"epoch": 11, "update": 10.667, "loss": "2.138", "ppl": "4.4", "wps": "364450", "ups": "3.09", "wpb": "117921", "bsz": "256", "num_updates": "549000", "lr": "0.000455556", "gnorm": "0.686", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21", "wall": "179196"} [2022-08-03 03:15:48,518][train_inner][INFO] - {"epoch": 11, "update": 10.671, "loss": "2.139", "ppl": "4.41", "wps": "362550", "ups": "3.07", "wpb": "118156", "bsz": "256", "num_updates": "549200", "lr": "0.000455354", "gnorm": "0.684", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.1", "wall": "179261"} [2022-08-03 03:16:53,327][train_inner][INFO] - {"epoch": 11, "update": 10.675, "loss": "2.131", "ppl": "4.38", "wps": "364528", "ups": "3.09", "wpb": "118121", "bsz": "256", "num_updates": "549400", "lr": "0.000455152", "gnorm": "0.684", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.8", "wall": "179326"} [2022-08-03 03:17:58,536][train_inner][INFO] - {"epoch": 11, "update": 10.678, "loss": "2.134", "ppl": "4.39", "wps": "363812", "ups": "3.07", "wpb": "118618", "bsz": "256", "num_updates": "549600", "lr": "0.000454949", "gnorm": "0.683", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "179391"} [2022-08-03 03:19:03,404][train_inner][INFO] - {"epoch": 11, "update": 10.682, "loss": "2.126", "ppl": "4.37", "wps": "364262", "ups": "3.08", "wpb": "118142", "bsz": "256", "num_updates": "549800", "lr": "0.000454747", "gnorm": "0.683", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "179456"} [2022-08-03 03:20:08,030][train_inner][INFO] - {"epoch": 11, "update": 10.686, "loss": "2.13", "ppl": "4.38", "wps": "366355", "ups": "3.09", "wpb": "118378", "bsz": "256", "num_updates": "550000", "lr": "0.000454545", "gnorm": "0.685", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.5", "wall": "179520"} [2022-08-03 03:20:08,031][fairseq_cli.train][INFO] - begin validation on "valid" subset [2022-08-03 03:20:31,019][valid][INFO] - {"epoch": 11, "valid_loss": "2.038", "valid_ppl": "4.11", "valid_wps": "1.60422e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "550000", "valid_best_loss": "2.038"} [2022-08-03 03:20:31,022][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 11 @ 550000 updates [2022-08-03 03:20:31,023][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_11_550000.pt [2022-08-03 03:20:41,449][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_11_550000.pt [2022-08-03 03:21:09,064][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/adan2/checkpoint_11_550000.pt (epoch 11 @ 550000 updates, score 2.038) (writing took 38.042021503672004 seconds) [2022-08-03 03:22:13,960][train_inner][INFO] - {"epoch": 11, "update": 10.69, "loss": "2.135", "ppl": "4.39", "wps": "187689", "ups": "1.59", "wpb": "118178", "bsz": "256", "num_updates": "550200", "lr": "0.000454343", "gnorm": "0.685", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "179646"} [2022-08-03 03:23:18,856][train_inner][INFO] - {"epoch": 11, "update": 10.694, "loss": "2.135", "ppl": "4.39", "wps": "364203", "ups": "3.08", "wpb": "118174", "bsz": "256", "num_updates": "550400", "lr": "0.000454141", "gnorm": "0.685", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25.1", "wall": "179711"} [2022-08-03 03:24:23,911][train_inner][INFO] - {"epoch": 11, "update": 10.698, "loss": "2.136", "ppl": "4.39", "wps": "363633", "ups": "3.07", "wpb": "118278", "bsz": "256", "num_updates": "550600", "lr": "0.000453939", "gnorm": "0.686", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.3", "wall": "179776"} [2022-08-03 03:25:28,404][train_inner][INFO] - {"epoch": 11, "update": 10.702, "loss": "2.14", "ppl": "4.41", "wps": "365965", "ups": "3.1", "wpb": "118011", "bsz": "255.9", "num_updates": "550800", "lr": "0.000453737", "gnorm": "0.687", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.9", "wall": "179841"} [2022-08-03 03:26:33,422][train_inner][INFO] - {"epoch": 11, "update": 10.706, "loss": "2.132", "ppl": "4.38", "wps": "365357", "ups": "3.08", "wpb": "118772", "bsz": "256", "num_updates": "551000", "lr": "0.000453535", "gnorm": "0.685", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "179906"} [2022-08-03 03:27:38,414][train_inner][INFO] - {"epoch": 11, "update": 10.71, "loss": "2.136", "ppl": "4.4", "wps": "362092", "ups": "3.08", "wpb": "117664", "bsz": "256", "num_updates": "551200", "lr": "0.000453333", "gnorm": "0.688", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "179971"} [2022-08-03 03:28:43,260][train_inner][INFO] - {"epoch": 11, "update": 10.713, "loss": "2.133", "ppl": "4.39", "wps": "364733", "ups": "3.08", "wpb": "118256", "bsz": "256", "num_updates": "551400", "lr": "0.000453131", "gnorm": "0.685", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.7", "wall": "180036"} [2022-08-03 03:29:48,190][train_inner][INFO] - {"epoch": 11, "update": 10.717, "loss": "2.133", "ppl": "4.39", "wps": "363619", "ups": "3.08", "wpb": "118046", "bsz": "256", "num_updates": "551600", "lr": "0.000452929", "gnorm": "0.688", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "27.4", "wall": "180101"} [2022-08-03 03:30:53,326][train_inner][INFO] - {"epoch": 11, "update": 10.721, "loss": "2.13", "ppl": "4.38", "wps": "362775", "ups": "3.07", "wpb": "118146", "bsz": "256", "num_updates": "551800", "lr": "0.000452727", "gnorm": "0.685", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "180166"} [2022-08-03 03:31:59,062][train_inner][INFO] - {"epoch": 11, "update": 10.725, "loss": "2.126", "ppl": "4.37", "wps": "361500", "ups": "3.04", "wpb": "118817", "bsz": "256", "num_updates": "552000", "lr": "0.000452525", "gnorm": "0.684", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.4", "wall": "180231"} [2022-08-03 03:33:04,154][train_inner][INFO] - {"epoch": 11, "update": 10.729, "loss": "2.133", "ppl": "4.39", "wps": "363323", "ups": "3.07", "wpb": "118244", "bsz": "256", "num_updates": "552200", "lr": "0.000452323", "gnorm": "0.685", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "180296"} [2022-08-03 03:34:09,145][train_inner][INFO] - {"epoch": 11, "update": 10.733, "loss": "2.133", "ppl": "4.38", "wps": "365786", "ups": "3.08", "wpb": "118862", "bsz": "256", "num_updates": "552400", "lr": "0.000452121", "gnorm": "0.681", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "22.9", "wall": "180361"} [2022-08-03 03:34:20,886][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 03:35:14,387][train_inner][INFO] - {"epoch": 11, "update": 10.737, "loss": "2.132", "ppl": "4.38", "wps": "363446", "ups": "3.07", "wpb": "118557", "bsz": "256", "num_updates": "552600", "lr": "0.000451919", "gnorm": "0.689", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "180427"} [2022-08-03 03:36:19,584][train_inner][INFO] - {"epoch": 11, "update": 10.741, "loss": "2.131", "ppl": "4.38", "wps": "361829", "ups": "3.07", "wpb": "117951", "bsz": "256", "num_updates": "552800", "lr": "0.000451717", "gnorm": "0.687", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "180492"} [2022-08-03 03:37:24,661][train_inner][INFO] - {"epoch": 11, "update": 10.745, "loss": "2.14", "ppl": "4.41", "wps": "362496", "ups": "3.07", "wpb": "117948", "bsz": "256", "num_updates": "553000", "lr": "0.000451515", "gnorm": "0.688", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.6", "wall": "180557"} [2022-08-03 03:38:29,311][train_inner][INFO] - {"epoch": 11, "update": 10.748, "loss": "2.13", "ppl": "4.38", "wps": "364474", "ups": "3.09", "wpb": "117815", "bsz": "256", "num_updates": "553200", "lr": "0.000451313", "gnorm": "0.686", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23", "wall": "180622"} [2022-08-03 03:39:33,801][train_inner][INFO] - {"epoch": 11, "update": 10.752, "loss": "2.132", "ppl": "4.38", "wps": "366114", "ups": "3.1", "wpb": "118051", "bsz": "256", "num_updates": "553400", "lr": "0.000451111", "gnorm": "0.687", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.1", "wall": "180686"} [2022-08-03 03:40:39,728][train_inner][INFO] - {"epoch": 11, "update": 10.756, "loss": "2.13", "ppl": "4.38", "wps": "359416", "ups": "3.03", "wpb": "118474", "bsz": "256", "num_updates": "553600", "lr": "0.000450909", "gnorm": "0.684", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "19.9", "wall": "180752"} [2022-08-03 03:41:44,704][train_inner][INFO] - {"epoch": 11, "update": 10.76, "loss": "2.129", "ppl": "4.37", "wps": "365497", "ups": "3.08", "wpb": "118740", "bsz": "256", "num_updates": "553800", "lr": "0.000450707", "gnorm": "0.686", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.5", "wall": "180817"} [2022-08-03 03:42:49,281][train_inner][INFO] - {"epoch": 11, "update": 10.764, "loss": "2.132", "ppl": "4.38", "wps": "364256", "ups": "3.1", "wpb": "117612", "bsz": "256", "num_updates": "554000", "lr": "0.000450505", "gnorm": "0.687", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "180882"} [2022-08-03 03:43:53,899][train_inner][INFO] - {"epoch": 11, "update": 10.768, "loss": "2.14", "ppl": "4.41", "wps": "365703", "ups": "3.1", "wpb": "118152", "bsz": "256", "num_updates": "554200", "lr": "0.000450303", "gnorm": "0.687", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.3", "wall": "180946"} [2022-08-03 03:44:58,625][train_inner][INFO] - {"epoch": 11, "update": 10.772, "loss": "2.131", "ppl": "4.38", "wps": "364297", "ups": "3.09", "wpb": "117895", "bsz": "256", "num_updates": "554400", "lr": "0.000450101", "gnorm": "0.688", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "25.3", "wall": "181011"} [2022-08-03 03:46:03,197][train_inner][INFO] - {"epoch": 11, "update": 10.776, "loss": "2.132", "ppl": "4.38", "wps": "367664", "ups": "3.1", "wpb": "118702", "bsz": "256", "num_updates": "554600", "lr": "0.000449899", "gnorm": "0.687", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "19.7", "wall": "181076"} [2022-08-03 03:47:07,803][train_inner][INFO] - {"epoch": 11, "update": 10.78, "loss": "2.134", "ppl": "4.39", "wps": "364395", "ups": "3.1", "wpb": "117708", "bsz": "256", "num_updates": "554800", "lr": "0.000449697", "gnorm": "0.69", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "23.4", "wall": "181140"} [2022-08-03 03:48:12,963][train_inner][INFO] - {"epoch": 11, "update": 10.783, "loss": "2.132", "ppl": "4.38", "wps": "365192", "ups": "3.07", "wpb": "118978", "bsz": "256", "num_updates": "555000", "lr": "0.000449495", "gnorm": "0.685", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.1", "wall": "181205"} [2022-08-03 03:48:32,650][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 03:49:18,005][train_inner][INFO] - {"epoch": 11, "update": 10.787, "loss": "2.125", "ppl": "4.36", "wps": "365256", "ups": "3.07", "wpb": "118783", "bsz": "256", "num_updates": "555200", "lr": "0.000449293", "gnorm": "0.686", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "181270"} [2022-08-03 03:50:22,555][train_inner][INFO] - {"epoch": 11, "update": 10.791, "loss": "2.125", "ppl": "4.36", "wps": "366407", "ups": "3.1", "wpb": "118258", "bsz": "256", "num_updates": "555400", "lr": "0.000449091", "gnorm": "0.684", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "181335"} [2022-08-03 03:50:40,761][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-03 03:51:27,704][train_inner][INFO] - {"epoch": 11, "update": 10.795, "loss": "2.124", "ppl": "4.36", "wps": "364440", "ups": "3.07", "wpb": "118713", "bsz": "256", "num_updates": "555600", "lr": "0.000448889", "gnorm": "0.683", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.3", "wall": "181400"} [2022-08-03 03:52:32,467][train_inner][INFO] - {"epoch": 11, "update": 10.799, "loss": "2.127", "ppl": "4.37", "wps": "365022", "ups": "3.09", "wpb": "118199", "bsz": "256", "num_updates": "555800", "lr": "0.000448687", "gnorm": "0.687", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.1", "wall": "181465"} [2022-08-03 03:53:37,014][train_inner][INFO] - {"epoch": 11, "update": 10.803, "loss": "2.13", "ppl": "4.38", "wps": "364804", "ups": "3.1", "wpb": "117733", "bsz": "256", "num_updates": "556000", "lr": "0.000448485", "gnorm": "0.688", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.7", "wall": "181529"} [2022-08-03 03:54:41,933][train_inner][INFO] - {"epoch": 11, "update": 10.807, "loss": "2.129", "ppl": "4.37", "wps": "364574", "ups": "3.08", "wpb": "118337", "bsz": "256", "num_updates": "556200", "lr": "0.000448283", "gnorm": "0.689", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.3", "wall": "181594"} [2022-08-03 03:55:47,177][train_inner][INFO] - {"epoch": 11, "update": 10.811, "loss": "2.122", "ppl": "4.35", "wps": "366015", "ups": "3.07", "wpb": "119399", "bsz": "256", "num_updates": "556400", "lr": "0.000448081", "gnorm": "0.682", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.8", "wall": "181659"} [2022-08-03 03:56:51,633][train_inner][INFO] - {"epoch": 11, "update": 10.815, "loss": "2.133", "ppl": "4.38", "wps": "366320", "ups": "3.1", "wpb": "118057", "bsz": "256", "num_updates": "556600", "lr": "0.000447879", "gnorm": "0.691", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "25.7", "wall": "181724"} [2022-08-03 03:56:59,116][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-03 03:57:04,510][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 [2022-08-03 03:57:57,059][train_inner][INFO] - {"epoch": 11, "update": 10.818, "loss": "2.136", "ppl": "4.4", "wps": "359016", "ups": "3.06", "wpb": "117442", "bsz": "256", "num_updates": "556800", "lr": "0.000447677", "gnorm": "0.706", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "23.3", "wall": "181789"} [2022-08-03 03:59:01,937][train_inner][INFO] - {"epoch": 11, "update": 10.822, "loss": "2.124", "ppl": "4.36", "wps": "365912", "ups": "3.08", "wpb": "118697", "bsz": "256", "num_updates": "557000", "lr": "0.000447475", "gnorm": "0.693", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "24.7", "wall": "181854"} [2022-08-03 04:00:06,568][train_inner][INFO] - {"epoch": 11, "update": 10.826, "loss": "2.139", "ppl": "4.4", "wps": "364635", "ups": "3.09", "wpb": "117833", "bsz": "256", "num_updates": "557200", "lr": "0.000447273", "gnorm": "0.705", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "22.1", "wall": "181919"} [2022-08-03 04:01:11,182][train_inner][INFO] - {"epoch": 11, "update": 10.83, "loss": "2.133", "ppl": "4.39", "wps": "365551", "ups": "3.1", "wpb": "118096", "bsz": "256", "num_updates": "557400", "lr": "0.000447071", "gnorm": "0.689", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.7", "wall": "181984"} [2022-08-03 04:02:16,326][train_inner][INFO] - {"epoch": 11, "update": 10.834, "loss": "2.122", "ppl": "4.35", "wps": "364767", "ups": "3.07", "wpb": "118810", "bsz": "256", "num_updates": "557600", "lr": "0.000446869", "gnorm": "0.684", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.1", "wall": "182049"} [2022-08-03 04:03:21,032][train_inner][INFO] - {"epoch": 11, "update": 10.838, "loss": "2.124", "ppl": "4.36", "wps": "366210", "ups": "3.09", "wpb": "118480", "bsz": "256", "num_updates": "557800", "lr": "0.000446667", "gnorm": "0.686", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "22.5", "wall": "182113"} [2022-08-03 04:04:25,526][train_inner][INFO] - {"epoch": 11, "update": 10.842, "loss": "2.129", "ppl": "4.37", "wps": "366593", "ups": "3.1", "wpb": "118212", "bsz": "256", "num_updates": "558000", "lr": "0.000446465", "gnorm": "0.686", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "22.3", "wall": "182178"} [2022-08-03 04:05:29,707][train_inner][INFO] - {"epoch": 11, "update": 10.846, "loss": "2.134", "ppl": "4.39", "wps": "366844", "ups": "3.12", "wpb": "117721", "bsz": "256", "num_updates": "558200", "lr": "0.000446263", "gnorm": "0.704", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.1", "wall": "182242"} [2022-08-03 04:06:34,949][train_inner][INFO] - {"epoch": 11, "update": 10.849, "loss": "2.129", "ppl": "4.37", "wps": "362233", "ups": "3.07", "wpb": "118162", "bsz": "256", "num_updates": "558400", "lr": "0.000446061", "gnorm": "0.69", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.8", "wall": "182307"} [2022-08-03 04:07:39,723][train_inner][INFO] - {"epoch": 11, "update": 10.853, "loss": "2.125", "ppl": "4.36", "wps": "368354", "ups": "3.09", "wpb": "119298", "bsz": "256", "num_updates": "558600", "lr": "0.000445859", "gnorm": "0.687", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.7", "wall": "182372"} [2022-08-03 04:08:26,130][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 [2022-08-03 04:08:44,625][train_inner][INFO] - {"epoch": 11, "update": 10.857, "loss": "2.133", "ppl": "4.39", "wps": "363129", "ups": "3.08", "wpb": "117838", "bsz": "256", "num_updates": "558800", "lr": "0.000445657", "gnorm": "0.731", "clip": "0.5", "loss_scale": "2", "train_wall": "65", "gb_free": "19.9", "wall": "182437"} [2022-08-03 04:09:49,640][train_inner][INFO] - {"epoch": 11, "update": 10.861, "loss": "2.125", "ppl": "4.36", "wps": "363555", "ups": "3.08", "wpb": "118181", "bsz": "256", "num_updates": "559000", "lr": "0.000445455", "gnorm": "0.688", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.3", "wall": "182502"} [2022-08-03 04:10:54,847][train_inner][INFO] - {"epoch": 11, "update": 10.865, "loss": "2.129", "ppl": "4.37", "wps": "362740", "ups": "3.07", "wpb": "118264", "bsz": "256", "num_updates": "559200", "lr": "0.000445253", "gnorm": "0.69", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "24", "wall": "182567"} [2022-08-03 04:11:59,696][train_inner][INFO] - {"epoch": 11, "update": 10.869, "loss": "2.128", "ppl": "4.37", "wps": "363464", "ups": "3.08", "wpb": "117849", "bsz": "256", "num_updates": "559400", "lr": "0.000445051", "gnorm": "0.691", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.6", "wall": "182632"} [2022-08-03 04:13:04,517][train_inner][INFO] - {"epoch": 11, "update": 10.873, "loss": "2.134", "ppl": "4.39", "wps": "363521", "ups": "3.09", "wpb": "117818", "bsz": "256", "num_updates": "559600", "lr": "0.000444848", "gnorm": "0.693", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "19.8", "wall": "182697"} [2022-08-03 04:14:09,446][train_inner][INFO] - {"epoch": 11, "update": 10.877, "loss": "2.124", "ppl": "4.36", "wps": "366868", "ups": "3.08", "wpb": "119099", "bsz": "256", "num_updates": "559800", "lr": "0.000444646", "gnorm": "0.683", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "23.3", "wall": "182762"} [2022-08-03 04:15:14,675][train_inner][INFO] - {"epoch": 11, "update": 10.881, "loss": "2.132", "ppl": "4.38", "wps": "364204", "ups": "3.07", "wpb": "118782", "bsz": "256", "num_updates": "560000", "lr": "0.000444444", "gnorm": "0.685", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.4", "wall": "182827"} [2022-08-03 04:16:19,685][train_inner][INFO] - {"epoch": 11, "update": 10.884, "loss": "2.124", "ppl": "4.36", "wps": "365236", "ups": "3.08", "wpb": "118718", "bsz": "256", "num_updates": "560200", "lr": "0.000444242", "gnorm": "0.685", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.5", "wall": "182892"} [2022-08-03 04:17:24,354][train_inner][INFO] - {"epoch": 11, "update": 10.888, "loss": "2.122", "ppl": "4.35", "wps": "366942", "ups": "3.09", "wpb": "118647", "bsz": "256", "num_updates": "560400", "lr": "0.00044404", "gnorm": "0.686", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.5", "wall": "182957"} [2022-08-03 04:18:29,301][train_inner][INFO] - {"epoch": 11, "update": 10.892, "loss": "2.129", "ppl": "4.37", "wps": "363130", "ups": "3.08", "wpb": "117920", "bsz": "256", "num_updates": "560600", "lr": "0.000443838", "gnorm": "0.69", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21", "wall": "183022"} [2022-08-03 04:19:34,185][train_inner][INFO] - {"epoch": 11, "update": 10.896, "loss": "2.121", "ppl": "4.35", "wps": "366124", "ups": "3.08", "wpb": "118776", "bsz": "256", "num_updates": "560800", "lr": "0.000443636", "gnorm": "0.685", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.4", "wall": "183087"} [2022-08-03 04:20:38,998][train_inner][INFO] - {"epoch": 11, "update": 10.9, "loss": "2.124", "ppl": "4.36", "wps": "365611", "ups": "3.09", "wpb": "118480", "bsz": "256", "num_updates": "561000", "lr": "0.000443434", "gnorm": "0.688", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "24.2", "wall": "183151"} [2022-08-03 04:21:43,648][train_inner][INFO] - {"epoch": 11, "update": 10.904, "loss": "2.131", "ppl": "4.38", "wps": "366272", "ups": "3.09", "wpb": "118395", "bsz": "256", "num_updates": "561200", "lr": "0.000443232", "gnorm": "0.691", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.8", "wall": "183216"} [2022-08-03 04:22:48,499][train_inner][INFO] - {"epoch": 11, "update": 10.908, "loss": "2.126", "ppl": "4.36", "wps": "364283", "ups": "3.08", "wpb": "118119", "bsz": "256", "num_updates": "561400", "lr": "0.00044303", "gnorm": "0.689", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "23.1", "wall": "183281"} [2022-08-03 04:23:53,339][train_inner][INFO] - {"epoch": 11, "update": 10.912, "loss": "2.128", "ppl": "4.37", "wps": "364680", "ups": "3.08", "wpb": "118227", "bsz": "256", "num_updates": "561600", "lr": "0.000442828", "gnorm": "0.688", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.3", "wall": "183346"} [2022-08-03 04:24:57,982][train_inner][INFO] - {"epoch": 11, "update": 10.916, "loss": "2.126", "ppl": "4.37", "wps": "367978", "ups": "3.09", "wpb": "118935", "bsz": "256", "num_updates": "561800", "lr": "0.000442626", "gnorm": "0.686", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "27.2", "wall": "183410"} [2022-08-03 04:26:02,727][train_inner][INFO] - {"epoch": 11, "update": 10.919, "loss": "2.129", "ppl": "4.37", "wps": "363109", "ups": "3.09", "wpb": "117544", "bsz": "256", "num_updates": "562000", "lr": "0.000442424", "gnorm": "0.691", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.7", "wall": "183475"} [2022-08-03 04:27:07,768][train_inner][INFO] - {"epoch": 11, "update": 10.923, "loss": "2.126", "ppl": "4.36", "wps": "364285", "ups": "3.08", "wpb": "118466", "bsz": "256", "num_updates": "562200", "lr": "0.000442222", "gnorm": "0.698", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "183540"} [2022-08-03 04:28:12,970][train_inner][INFO] - {"epoch": 11, "update": 10.927, "loss": "2.125", "ppl": "4.36", "wps": "362734", "ups": "3.07", "wpb": "118254", "bsz": "256", "num_updates": "562400", "lr": "0.00044202", "gnorm": "0.688", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.3", "wall": "183605"} [2022-08-03 04:29:17,969][train_inner][INFO] - {"epoch": 11, "update": 10.931, "loss": "2.13", "ppl": "4.38", "wps": "363972", "ups": "3.08", "wpb": "118286", "bsz": "256", "num_updates": "562600", "lr": "0.000441818", "gnorm": "0.688", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "26.6", "wall": "183670"} [2022-08-03 04:30:22,591][train_inner][INFO] - {"epoch": 11, "update": 10.935, "loss": "2.132", "ppl": "4.38", "wps": "366142", "ups": "3.09", "wpb": "118302", "bsz": "256", "num_updates": "562800", "lr": "0.000441616", "gnorm": "0.689", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.7", "wall": "183735"} [2022-08-03 04:31:26,948][train_inner][INFO] - {"epoch": 11, "update": 10.939, "loss": "2.127", "ppl": "4.37", "wps": "366248", "ups": "3.11", "wpb": "117853", "bsz": "256", "num_updates": "563000", "lr": "0.000441414", "gnorm": "0.691", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.1", "wall": "183799"} [2022-08-03 04:32:32,191][train_inner][INFO] - {"epoch": 11, "update": 10.943, "loss": "2.13", "ppl": "4.38", "wps": "361313", "ups": "3.07", "wpb": "117862", "bsz": "256", "num_updates": "563200", "lr": "0.000441212", "gnorm": "0.689", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.8", "wall": "183865"} [2022-08-03 04:33:37,194][train_inner][INFO] - {"epoch": 11, "update": 10.947, "loss": "2.127", "ppl": "4.37", "wps": "363434", "ups": "3.08", "wpb": "118120", "bsz": "256", "num_updates": "563400", "lr": "0.00044101", "gnorm": "0.692", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22", "wall": "183930"} [2022-08-03 04:34:42,145][train_inner][INFO] - {"epoch": 11, "update": 10.951, "loss": "2.129", "ppl": "4.37", "wps": "363632", "ups": "3.08", "wpb": "118090", "bsz": "256", "num_updates": "563600", "lr": "0.000440808", "gnorm": "0.691", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "183994"} [2022-08-03 04:35:46,995][train_inner][INFO] - {"epoch": 11, "update": 10.954, "loss": "2.133", "ppl": "4.39", "wps": "362773", "ups": "3.08", "wpb": "117627", "bsz": "256", "num_updates": "563800", "lr": "0.000440606", "gnorm": "0.69", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.1", "wall": "184059"} [2022-08-03 04:36:52,320][train_inner][INFO] - {"epoch": 11, "update": 10.958, "loss": "2.129", "ppl": "4.37", "wps": "362706", "ups": "3.06", "wpb": "118466", "bsz": "256", "num_updates": "564000", "lr": "0.000440404", "gnorm": "0.689", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.7", "wall": "184125"} [2022-08-03 04:37:57,073][train_inner][INFO] - {"epoch": 11, "update": 10.962, "loss": "2.127", "ppl": "4.37", "wps": "364452", "ups": "3.09", "wpb": "117995", "bsz": "256", "num_updates": "564200", "lr": "0.000440202", "gnorm": "0.693", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "184189"} [2022-08-03 04:39:02,167][train_inner][INFO] - {"epoch": 11, "update": 10.966, "loss": "2.119", "ppl": "4.34", "wps": "364081", "ups": "3.07", "wpb": "118496", "bsz": "256", "num_updates": "564400", "lr": "0.00044", "gnorm": "0.688", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "184254"} [2022-08-03 04:40:07,204][train_inner][INFO] - {"epoch": 11, "update": 10.97, "loss": "2.129", "ppl": "4.38", "wps": "363384", "ups": "3.08", "wpb": "118166", "bsz": "256", "num_updates": "564600", "lr": "0.000439798", "gnorm": "0.694", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "184320"} [2022-08-03 04:41:12,129][train_inner][INFO] - {"epoch": 11, "update": 10.974, "loss": "2.122", "ppl": "4.35", "wps": "365572", "ups": "3.08", "wpb": "118672", "bsz": "256", "num_updates": "564800", "lr": "0.000439596", "gnorm": "0.69", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.2", "wall": "184384"} [2022-08-03 04:42:16,750][train_inner][INFO] - {"epoch": 11, "update": 10.978, "loss": "2.135", "ppl": "4.39", "wps": "366096", "ups": "3.1", "wpb": "118285", "bsz": "256", "num_updates": "565000", "lr": "0.000439394", "gnorm": "0.701", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.8", "wall": "184449"} [2022-08-03 04:43:21,778][train_inner][INFO] - {"epoch": 11, "update": 10.982, "loss": "2.128", "ppl": "4.37", "wps": "365461", "ups": "3.08", "wpb": "118825", "bsz": "256", "num_updates": "565200", "lr": "0.000439192", "gnorm": "0.689", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.3", "wall": "184514"} [2022-08-03 04:44:26,114][train_inner][INFO] - {"epoch": 11, "update": 10.985, "loss": "2.131", "ppl": "4.38", "wps": "368505", "ups": "3.11", "wpb": "118539", "bsz": "256", "num_updates": "565400", "lr": "0.00043899", "gnorm": "0.689", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "184578"} [2022-08-03 04:45:30,942][train_inner][INFO] - {"epoch": 11, "update": 10.989, "loss": "2.125", "ppl": "4.36", "wps": "366150", "ups": "3.09", "wpb": "118682", "bsz": "256", "num_updates": "565600", "lr": "0.000438788", "gnorm": "0.689", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.4", "wall": "184643"} [2022-08-03 04:46:34,975][train_inner][INFO] - {"epoch": 11, "update": 10.993, "loss": "2.133", "ppl": "4.39", "wps": "367087", "ups": "3.12", "wpb": "117528", "bsz": "256", "num_updates": "565800", "lr": "0.000438586", "gnorm": "0.692", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.5", "wall": "184707"} [2022-08-03 04:47:39,541][train_inner][INFO] - {"epoch": 11, "update": 10.997, "loss": "2.13", "ppl": "4.38", "wps": "366251", "ups": "3.1", "wpb": "118235", "bsz": "256", "num_updates": "566000", "lr": "0.000438384", "gnorm": "0.688", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.9", "wall": "184772"} [2022-08-03 04:48:27,274][fairseq_cli.train][INFO] - end of epoch 11 (average epoch stats below) [2022-08-03 04:48:27,275][train][INFO] - {"epoch": 11, "train_loss": "2.138", "train_ppl": "4.4", "train_wps": "363026", "train_ups": "3.07", "train_wpb": "118298", "train_bsz": "256", "train_num_updates": "566148", "train_lr": "0.000438234", "train_gnorm": "0.685", "train_clip": "0", "train_loss_scale": "16", "train_train_wall": "16606", "train_gb_free": "24.9", "train_wall": "184820"} [2022-08-03 04:48:27,385][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 [2022-08-03 04:48:27,388][fairseq.trainer][INFO] - begin training epoch 12 [2022-08-03 04:48:27,388][fairseq_cli.train][INFO] - Start iterating over samples [2022-08-03 04:48:57,497][train_inner][INFO] - {"epoch": 12, "update": 11.001, "loss": "2.122", "ppl": "4.35", "wps": "302727", "ups": "2.57", "wpb": "117995", "bsz": "255.4", "num_updates": "566200", "lr": "0.000438182", "gnorm": "0.69", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "184850"} [2022-08-03 04:50:02,266][train_inner][INFO] - {"epoch": 12, "update": 11.005, "loss": "2.121", "ppl": "4.35", "wps": "365302", "ups": "3.09", "wpb": "118300", "bsz": "256", "num_updates": "566400", "lr": "0.00043798", "gnorm": "0.688", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.5", "wall": "184915"} [2022-08-03 04:51:07,076][train_inner][INFO] - {"epoch": 12, "update": 11.009, "loss": "2.123", "ppl": "4.36", "wps": "364022", "ups": "3.09", "wpb": "117960", "bsz": "256", "num_updates": "566600", "lr": "0.000437778", "gnorm": "0.69", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "184979"} [2022-08-03 04:52:11,837][train_inner][INFO] - {"epoch": 12, "update": 11.013, "loss": "2.123", "ppl": "4.36", "wps": "365374", "ups": "3.09", "wpb": "118308", "bsz": "256", "num_updates": "566800", "lr": "0.000437576", "gnorm": "0.691", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.6", "wall": "185044"} [2022-08-03 04:53:16,485][train_inner][INFO] - {"epoch": 12, "update": 11.017, "loss": "2.117", "ppl": "4.34", "wps": "366503", "ups": "3.09", "wpb": "118466", "bsz": "256", "num_updates": "567000", "lr": "0.000437374", "gnorm": "0.688", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "23.2", "wall": "185109"} [2022-08-03 04:54:21,470][train_inner][INFO] - {"epoch": 12, "update": 11.02, "loss": "2.124", "ppl": "4.36", "wps": "363847", "ups": "3.08", "wpb": "118221", "bsz": "256", "num_updates": "567200", "lr": "0.000437172", "gnorm": "0.692", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.7", "wall": "185174"} [2022-08-03 04:54:59,633][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 04:55:26,531][train_inner][INFO] - {"epoch": 12, "update": 11.024, "loss": "2.121", "ppl": "4.35", "wps": "362485", "ups": "3.07", "wpb": "117916", "bsz": "256", "num_updates": "567400", "lr": "0.00043697", "gnorm": "0.693", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.5", "wall": "185239"} [2022-08-03 04:56:31,252][train_inner][INFO] - {"epoch": 12, "update": 11.028, "loss": "2.122", "ppl": "4.35", "wps": "364839", "ups": "3.09", "wpb": "118063", "bsz": "256", "num_updates": "567600", "lr": "0.000436768", "gnorm": "0.691", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "185304"} [2022-08-03 04:57:35,799][train_inner][INFO] - {"epoch": 12, "update": 11.032, "loss": "2.124", "ppl": "4.36", "wps": "364526", "ups": "3.1", "wpb": "117645", "bsz": "256", "num_updates": "567800", "lr": "0.000436566", "gnorm": "0.694", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.3", "wall": "185368"} [2022-08-03 04:58:40,698][train_inner][INFO] - {"epoch": 12, "update": 11.036, "loss": "2.125", "ppl": "4.36", "wps": "364198", "ups": "3.08", "wpb": "118179", "bsz": "256", "num_updates": "568000", "lr": "0.000436364", "gnorm": "0.692", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.1", "wall": "185433"} [2022-08-03 04:59:45,572][train_inner][INFO] - {"epoch": 12, "update": 11.04, "loss": "2.124", "ppl": "4.36", "wps": "364173", "ups": "3.08", "wpb": "118124", "bsz": "256", "num_updates": "568200", "lr": "0.000436162", "gnorm": "0.691", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.1", "wall": "185498"} [2022-08-03 05:00:50,444][train_inner][INFO] - {"epoch": 12, "update": 11.044, "loss": "2.126", "ppl": "4.37", "wps": "364403", "ups": "3.08", "wpb": "118196", "bsz": "256", "num_updates": "568400", "lr": "0.00043596", "gnorm": "0.695", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.4", "wall": "185563"} [2022-08-03 05:01:55,268][train_inner][INFO] - {"epoch": 12, "update": 11.048, "loss": "2.123", "ppl": "4.35", "wps": "366074", "ups": "3.09", "wpb": "118651", "bsz": "256", "num_updates": "568600", "lr": "0.000435758", "gnorm": "0.69", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.1", "wall": "185628"} [2022-08-03 05:03:00,427][train_inner][INFO] - {"epoch": 12, "update": 11.052, "loss": "2.12", "ppl": "4.35", "wps": "361981", "ups": "3.07", "wpb": "117929", "bsz": "256", "num_updates": "568800", "lr": "0.000435556", "gnorm": "0.693", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.6", "wall": "185693"} [2022-08-03 05:04:05,635][train_inner][INFO] - {"epoch": 12, "update": 11.055, "loss": "2.113", "ppl": "4.33", "wps": "362087", "ups": "3.07", "wpb": "118054", "bsz": "256", "num_updates": "569000", "lr": "0.000435354", "gnorm": "0.691", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.1", "wall": "185758"} [2022-08-03 05:05:11,712][train_inner][INFO] - {"epoch": 12, "update": 11.059, "loss": "2.121", "ppl": "4.35", "wps": "358335", "ups": "3.03", "wpb": "118386", "bsz": "256", "num_updates": "569200", "lr": "0.000435152", "gnorm": "0.69", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "28.2", "wall": "185824"} [2022-08-03 05:06:16,243][train_inner][INFO] - {"epoch": 12, "update": 11.063, "loss": "2.125", "ppl": "4.36", "wps": "367455", "ups": "3.1", "wpb": "118559", "bsz": "256", "num_updates": "569400", "lr": "0.000434949", "gnorm": "0.689", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "19.9", "wall": "185889"} [2022-08-03 05:07:21,086][train_inner][INFO] - {"epoch": 12, "update": 11.067, "loss": "2.118", "ppl": "4.34", "wps": "365976", "ups": "3.08", "wpb": "118653", "bsz": "256", "num_updates": "569600", "lr": "0.000434747", "gnorm": "0.688", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "19.8", "wall": "185953"} [2022-08-03 05:08:25,833][train_inner][INFO] - {"epoch": 12, "update": 11.071, "loss": "2.123", "ppl": "4.35", "wps": "366418", "ups": "3.09", "wpb": "118620", "bsz": "256", "num_updates": "569800", "lr": "0.000434545", "gnorm": "0.69", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "19.7", "wall": "186018"} [2022-08-03 05:09:31,073][train_inner][INFO] - {"epoch": 12, "update": 11.075, "loss": "2.121", "ppl": "4.35", "wps": "364234", "ups": "3.07", "wpb": "118810", "bsz": "256", "num_updates": "570000", "lr": "0.000434343", "gnorm": "0.689", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.1", "wall": "186083"} [2022-08-03 05:10:35,997][train_inner][INFO] - {"epoch": 12, "update": 11.079, "loss": "2.122", "ppl": "4.35", "wps": "364692", "ups": "3.08", "wpb": "118385", "bsz": "256", "num_updates": "570200", "lr": "0.000434141", "gnorm": "0.69", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "25.4", "wall": "186148"} [2022-08-03 05:11:41,355][train_inner][INFO] - {"epoch": 12, "update": 11.083, "loss": "2.117", "ppl": "4.34", "wps": "362993", "ups": "3.06", "wpb": "118622", "bsz": "256", "num_updates": "570400", "lr": "0.000433939", "gnorm": "0.689", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.9", "wall": "186214"} [2022-08-03 05:12:46,068][train_inner][INFO] - {"epoch": 12, "update": 11.086, "loss": "2.124", "ppl": "4.36", "wps": "364966", "ups": "3.09", "wpb": "118088", "bsz": "256", "num_updates": "570600", "lr": "0.000433737", "gnorm": "0.693", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.7", "wall": "186278"} [2022-08-03 05:13:51,278][train_inner][INFO] - {"epoch": 12, "update": 11.09, "loss": "2.119", "ppl": "4.35", "wps": "363290", "ups": "3.07", "wpb": "118450", "bsz": "256", "num_updates": "570800", "lr": "0.000433535", "gnorm": "0.692", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "28.2", "wall": "186344"} [2022-08-03 05:14:56,104][train_inner][INFO] - {"epoch": 12, "update": 11.094, "loss": "2.122", "ppl": "4.35", "wps": "361918", "ups": "3.09", "wpb": "117306", "bsz": "256", "num_updates": "571000", "lr": "0.000433333", "gnorm": "0.693", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.1", "wall": "186408"} [2022-08-03 05:16:01,138][train_inner][INFO] - {"epoch": 12, "update": 11.098, "loss": "2.122", "ppl": "4.35", "wps": "365023", "ups": "3.08", "wpb": "118692", "bsz": "256", "num_updates": "571200", "lr": "0.000433131", "gnorm": "0.694", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.6", "wall": "186473"} [2022-08-03 05:17:05,705][train_inner][INFO] - {"epoch": 12, "update": 11.102, "loss": "2.124", "ppl": "4.36", "wps": "367003", "ups": "3.1", "wpb": "118480", "bsz": "256", "num_updates": "571400", "lr": "0.000432929", "gnorm": "0.691", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "23.6", "wall": "186538"} [2022-08-03 05:17:11,556][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 [2022-08-03 05:18:11,068][train_inner][INFO] - {"epoch": 12, "update": 11.106, "loss": "2.117", "ppl": "4.34", "wps": "362666", "ups": "3.06", "wpb": "118522", "bsz": "256", "num_updates": "571600", "lr": "0.000432727", "gnorm": "0.691", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.2", "wall": "186603"} [2022-08-03 05:18:41,876][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 05:19:16,341][train_inner][INFO] - {"epoch": 12, "update": 11.11, "loss": "2.119", "ppl": "4.35", "wps": "363387", "ups": "3.06", "wpb": "118596", "bsz": "256", "num_updates": "571800", "lr": "0.000432525", "gnorm": "0.692", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "186669"} [2022-08-03 05:20:21,643][train_inner][INFO] - {"epoch": 12, "update": 11.114, "loss": "2.118", "ppl": "4.34", "wps": "363815", "ups": "3.06", "wpb": "118790", "bsz": "256", "num_updates": "572000", "lr": "0.000432323", "gnorm": "0.69", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.7", "wall": "186734"} [2022-08-03 05:21:26,767][train_inner][INFO] - {"epoch": 12, "update": 11.118, "loss": "2.117", "ppl": "4.34", "wps": "362103", "ups": "3.07", "wpb": "117906", "bsz": "256", "num_updates": "572200", "lr": "0.000432121", "gnorm": "0.692", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22", "wall": "186799"} [2022-08-03 05:22:06,342][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-03 05:22:32,040][train_inner][INFO] - {"epoch": 12, "update": 11.122, "loss": "2.115", "ppl": "4.33", "wps": "363693", "ups": "3.06", "wpb": "118694", "bsz": "256", "num_updates": "572400", "lr": "0.000431919", "gnorm": "0.694", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "186864"} [2022-08-03 05:23:36,852][train_inner][INFO] - {"epoch": 12, "update": 11.125, "loss": "2.121", "ppl": "4.35", "wps": "364844", "ups": "3.09", "wpb": "118231", "bsz": "256", "num_updates": "572600", "lr": "0.000431717", "gnorm": "0.692", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.7", "wall": "186929"} [2022-08-03 05:24:41,937][train_inner][INFO] - {"epoch": 12, "update": 11.129, "loss": "2.131", "ppl": "4.38", "wps": "364950", "ups": "3.07", "wpb": "118762", "bsz": "256", "num_updates": "572800", "lr": "0.000431515", "gnorm": "0.693", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.5", "wall": "186994"} [2022-08-03 05:25:46,864][train_inner][INFO] - {"epoch": 12, "update": 11.133, "loss": "2.121", "ppl": "4.35", "wps": "365578", "ups": "3.08", "wpb": "118677", "bsz": "256", "num_updates": "573000", "lr": "0.000431313", "gnorm": "0.694", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "187059"} [2022-08-03 05:26:51,816][train_inner][INFO] - {"epoch": 12, "update": 11.137, "loss": "2.117", "ppl": "4.34", "wps": "365679", "ups": "3.08", "wpb": "118756", "bsz": "256", "num_updates": "573200", "lr": "0.000431111", "gnorm": "0.69", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "187124"} [2022-08-03 05:27:56,911][train_inner][INFO] - {"epoch": 12, "update": 11.141, "loss": "2.121", "ppl": "4.35", "wps": "364087", "ups": "3.07", "wpb": "118499", "bsz": "256", "num_updates": "573400", "lr": "0.000430909", "gnorm": "0.694", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.7", "wall": "187189"} [2022-08-03 05:29:01,581][train_inner][INFO] - {"epoch": 12, "update": 11.145, "loss": "2.114", "ppl": "4.33", "wps": "364774", "ups": "3.09", "wpb": "117949", "bsz": "256", "num_updates": "573600", "lr": "0.000430707", "gnorm": "0.694", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24", "wall": "187254"} [2022-08-03 05:30:06,563][train_inner][INFO] - {"epoch": 12, "update": 11.149, "loss": "2.122", "ppl": "4.35", "wps": "365126", "ups": "3.08", "wpb": "118630", "bsz": "256", "num_updates": "573800", "lr": "0.000430505", "gnorm": "0.697", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22", "wall": "187319"} [2022-08-03 05:31:11,527][train_inner][INFO] - {"epoch": 12, "update": 11.153, "loss": "2.117", "ppl": "4.34", "wps": "364299", "ups": "3.08", "wpb": "118330", "bsz": "256", "num_updates": "574000", "lr": "0.000430303", "gnorm": "0.69", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24", "wall": "187384"} [2022-08-03 05:32:16,720][train_inner][INFO] - {"epoch": 12, "update": 11.156, "loss": "2.114", "ppl": "4.33", "wps": "362662", "ups": "3.07", "wpb": "118213", "bsz": "256", "num_updates": "574200", "lr": "0.000430101", "gnorm": "0.69", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "187449"} [2022-08-03 05:33:21,534][train_inner][INFO] - {"epoch": 12, "update": 11.16, "loss": "2.115", "ppl": "4.33", "wps": "365086", "ups": "3.09", "wpb": "118311", "bsz": "256", "num_updates": "574400", "lr": "0.000429899", "gnorm": "0.693", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.5", "wall": "187514"} [2022-08-03 05:34:26,703][train_inner][INFO] - {"epoch": 12, "update": 11.164, "loss": "2.118", "ppl": "4.34", "wps": "363755", "ups": "3.07", "wpb": "118526", "bsz": "256", "num_updates": "574600", "lr": "0.000429697", "gnorm": "0.691", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "187579"} [2022-08-03 05:35:07,718][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-03 05:35:18,690][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-03 05:35:32,346][train_inner][INFO] - {"epoch": 12, "update": 11.168, "loss": "2.12", "ppl": "4.35", "wps": "359688", "ups": "3.05", "wpb": "118053", "bsz": "256", "num_updates": "574800", "lr": "0.000429495", "gnorm": "0.691", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.1", "wall": "187645"} [2022-08-03 05:36:38,042][train_inner][INFO] - {"epoch": 12, "update": 11.172, "loss": "2.125", "ppl": "4.36", "wps": "358623", "ups": "3.04", "wpb": "117799", "bsz": "256", "num_updates": "575000", "lr": "0.000429293", "gnorm": "0.697", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21", "wall": "187710"} [2022-08-03 05:37:43,027][train_inner][INFO] - {"epoch": 12, "update": 11.176, "loss": "2.113", "ppl": "4.33", "wps": "366063", "ups": "3.08", "wpb": "118941", "bsz": "256", "num_updates": "575200", "lr": "0.000429091", "gnorm": "0.691", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23", "wall": "187775"} [2022-08-03 05:38:47,758][train_inner][INFO] - {"epoch": 12, "update": 11.18, "loss": "2.114", "ppl": "4.33", "wps": "366298", "ups": "3.09", "wpb": "118553", "bsz": "256", "num_updates": "575400", "lr": "0.000428889", "gnorm": "0.69", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21", "wall": "187840"} [2022-08-03 05:39:52,526][train_inner][INFO] - {"epoch": 12, "update": 11.184, "loss": "2.113", "ppl": "4.33", "wps": "366255", "ups": "3.09", "wpb": "118607", "bsz": "256", "num_updates": "575600", "lr": "0.000428687", "gnorm": "0.69", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.2", "wall": "187905"} [2022-08-03 05:40:57,556][train_inner][INFO] - {"epoch": 12, "update": 11.188, "loss": "2.116", "ppl": "4.34", "wps": "363211", "ups": "3.08", "wpb": "118097", "bsz": "256", "num_updates": "575800", "lr": "0.000428485", "gnorm": "0.693", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.6", "wall": "187970"} [2022-08-03 05:42:02,706][train_inner][INFO] - {"epoch": 12, "update": 11.191, "loss": "2.115", "ppl": "4.33", "wps": "364116", "ups": "3.07", "wpb": "118608", "bsz": "256", "num_updates": "576000", "lr": "0.000428283", "gnorm": "0.693", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23", "wall": "188035"} [2022-08-03 05:43:07,305][train_inner][INFO] - {"epoch": 12, "update": 11.195, "loss": "2.118", "ppl": "4.34", "wps": "367372", "ups": "3.1", "wpb": "118659", "bsz": "256", "num_updates": "576200", "lr": "0.000428081", "gnorm": "0.692", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22", "wall": "188100"} [2022-08-03 05:44:12,196][train_inner][INFO] - {"epoch": 12, "update": 11.199, "loss": "2.112", "ppl": "4.32", "wps": "365251", "ups": "3.08", "wpb": "118506", "bsz": "256", "num_updates": "576400", "lr": "0.000427879", "gnorm": "0.691", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "24.4", "wall": "188165"} [2022-08-03 05:45:17,482][train_inner][INFO] - {"epoch": 12, "update": 11.203, "loss": "2.122", "ppl": "4.35", "wps": "362925", "ups": "3.06", "wpb": "118466", "bsz": "256", "num_updates": "576600", "lr": "0.000427677", "gnorm": "0.695", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "25.8", "wall": "188230"} [2022-08-03 05:46:21,919][train_inner][INFO] - {"epoch": 12, "update": 11.207, "loss": "2.119", "ppl": "4.34", "wps": "368321", "ups": "3.1", "wpb": "118666", "bsz": "256", "num_updates": "576800", "lr": "0.000427475", "gnorm": "0.692", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.7", "wall": "188294"} [2022-08-03 05:47:27,062][train_inner][INFO] - {"epoch": 12, "update": 11.211, "loss": "2.119", "ppl": "4.34", "wps": "362372", "ups": "3.07", "wpb": "118029", "bsz": "256", "num_updates": "577000", "lr": "0.000427273", "gnorm": "0.696", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "188359"} [2022-08-03 05:48:05,347][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-03 05:48:32,318][train_inner][INFO] - {"epoch": 12, "update": 11.215, "loss": "2.119", "ppl": "4.34", "wps": "364553", "ups": "3.06", "wpb": "118944", "bsz": "256", "num_updates": "577200", "lr": "0.000427071", "gnorm": "0.697", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.8", "wall": "188425"} [2022-08-03 05:49:37,149][train_inner][INFO] - {"epoch": 12, "update": 11.219, "loss": "2.126", "ppl": "4.36", "wps": "362841", "ups": "3.08", "wpb": "117615", "bsz": "256", "num_updates": "577400", "lr": "0.000426869", "gnorm": "0.702", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.2", "wall": "188489"} [2022-08-03 05:50:41,677][train_inner][INFO] - {"epoch": 12, "update": 11.223, "loss": "2.115", "ppl": "4.33", "wps": "367387", "ups": "3.1", "wpb": "118532", "bsz": "256", "num_updates": "577600", "lr": "0.000426667", "gnorm": "0.694", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.2", "wall": "188554"} [2022-08-03 05:51:46,427][train_inner][INFO] - {"epoch": 12, "update": 11.226, "loss": "2.119", "ppl": "4.34", "wps": "365516", "ups": "3.09", "wpb": "118335", "bsz": "256", "num_updates": "577800", "lr": "0.000426465", "gnorm": "0.694", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21", "wall": "188619"} [2022-08-03 05:52:51,301][train_inner][INFO] - {"epoch": 12, "update": 11.23, "loss": "2.124", "ppl": "4.36", "wps": "364106", "ups": "3.08", "wpb": "118103", "bsz": "256", "num_updates": "578000", "lr": "0.000426263", "gnorm": "0.695", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.4", "wall": "188684"} [2022-08-03 05:53:56,239][train_inner][INFO] - {"epoch": 12, "update": 11.234, "loss": "2.121", "ppl": "4.35", "wps": "362058", "ups": "3.08", "wpb": "117554", "bsz": "256", "num_updates": "578200", "lr": "0.000426061", "gnorm": "0.701", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.6", "wall": "188749"} [2022-08-03 05:55:00,831][train_inner][INFO] - {"epoch": 12, "update": 11.238, "loss": "2.115", "ppl": "4.33", "wps": "368550", "ups": "3.1", "wpb": "119026", "bsz": "256", "num_updates": "578400", "lr": "0.000425859", "gnorm": "0.693", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.7", "wall": "188813"} [2022-08-03 05:56:05,775][train_inner][INFO] - {"epoch": 12, "update": 11.242, "loss": "2.123", "ppl": "4.36", "wps": "363244", "ups": "3.08", "wpb": "117951", "bsz": "256", "num_updates": "578600", "lr": "0.000425657", "gnorm": "0.695", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "188878"} [2022-08-03 05:57:10,578][train_inner][INFO] - {"epoch": 12, "update": 11.246, "loss": "2.116", "ppl": "4.33", "wps": "365869", "ups": "3.09", "wpb": "118545", "bsz": "256", "num_updates": "578800", "lr": "0.000425455", "gnorm": "0.692", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21", "wall": "188943"} [2022-08-03 05:58:15,356][train_inner][INFO] - {"epoch": 12, "update": 11.25, "loss": "2.115", "ppl": "4.33", "wps": "365556", "ups": "3.09", "wpb": "118398", "bsz": "256", "num_updates": "579000", "lr": "0.000425253", "gnorm": "0.696", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22", "wall": "189008"} [2022-08-03 05:59:20,333][train_inner][INFO] - {"epoch": 12, "update": 11.254, "loss": "2.119", "ppl": "4.34", "wps": "362785", "ups": "3.08", "wpb": "117862", "bsz": "256", "num_updates": "579200", "lr": "0.000425051", "gnorm": "0.697", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "189073"} [2022-08-03 06:00:25,400][train_inner][INFO] - {"epoch": 12, "update": 11.258, "loss": "2.114", "ppl": "4.33", "wps": "365133", "ups": "3.07", "wpb": "118789", "bsz": "256", "num_updates": "579400", "lr": "0.000424848", "gnorm": "0.694", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.7", "wall": "189138"} [2022-08-03 06:01:30,086][train_inner][INFO] - {"epoch": 12, "update": 11.261, "loss": "2.12", "ppl": "4.35", "wps": "364848", "ups": "3.09", "wpb": "118001", "bsz": "256", "num_updates": "579600", "lr": "0.000424646", "gnorm": "0.699", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "189202"} [2022-08-03 06:02:34,847][train_inner][INFO] - {"epoch": 12, "update": 11.265, "loss": "2.118", "ppl": "4.34", "wps": "366129", "ups": "3.09", "wpb": "118552", "bsz": "256", "num_updates": "579800", "lr": "0.000424444", "gnorm": "0.696", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "27", "wall": "189267"} [2022-08-03 06:03:39,599][train_inner][INFO] - {"epoch": 12, "update": 11.269, "loss": "2.114", "ppl": "4.33", "wps": "364616", "ups": "3.09", "wpb": "118047", "bsz": "255.9", "num_updates": "580000", "lr": "0.000424242", "gnorm": "0.695", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.2", "wall": "189332"} [2022-08-03 06:04:44,252][train_inner][INFO] - {"epoch": 12, "update": 11.273, "loss": "2.119", "ppl": "4.34", "wps": "365978", "ups": "3.09", "wpb": "118306", "bsz": "256", "num_updates": "580200", "lr": "0.00042404", "gnorm": "0.701", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "189397"} [2022-08-03 06:05:49,459][train_inner][INFO] - {"epoch": 12, "update": 11.277, "loss": "2.119", "ppl": "4.34", "wps": "360703", "ups": "3.07", "wpb": "117601", "bsz": "256", "num_updates": "580400", "lr": "0.000423838", "gnorm": "0.698", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.3", "wall": "189462"} [2022-08-03 06:06:54,140][train_inner][INFO] - {"epoch": 12, "update": 11.281, "loss": "2.118", "ppl": "4.34", "wps": "364271", "ups": "3.09", "wpb": "117805", "bsz": "256", "num_updates": "580600", "lr": "0.000423636", "gnorm": "0.697", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "189526"} [2022-08-03 06:07:59,268][train_inner][INFO] - {"epoch": 12, "update": 11.285, "loss": "2.116", "ppl": "4.33", "wps": "363094", "ups": "3.07", "wpb": "118236", "bsz": "256", "num_updates": "580800", "lr": "0.000423434", "gnorm": "0.695", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.3", "wall": "189592"} [2022-08-03 06:09:04,395][train_inner][INFO] - {"epoch": 12, "update": 11.289, "loss": "2.112", "ppl": "4.32", "wps": "364243", "ups": "3.07", "wpb": "118609", "bsz": "256", "num_updates": "581000", "lr": "0.000423232", "gnorm": "0.694", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.2", "wall": "189657"} [2022-08-03 06:10:09,126][train_inner][INFO] - {"epoch": 12, "update": 11.292, "loss": "2.109", "ppl": "4.31", "wps": "368112", "ups": "3.09", "wpb": "119139", "bsz": "256", "num_updates": "581200", "lr": "0.00042303", "gnorm": "0.693", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "189721"} [2022-08-03 06:11:14,257][train_inner][INFO] - {"epoch": 12, "update": 11.296, "loss": "2.114", "ppl": "4.33", "wps": "360916", "ups": "3.07", "wpb": "117533", "bsz": "256", "num_updates": "581400", "lr": "0.000422828", "gnorm": "0.699", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.5", "wall": "189787"} [2022-08-03 06:12:19,043][train_inner][INFO] - {"epoch": 12, "update": 11.3, "loss": "2.113", "ppl": "4.33", "wps": "365508", "ups": "3.09", "wpb": "118397", "bsz": "256", "num_updates": "581600", "lr": "0.000422626", "gnorm": "0.694", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "189851"} [2022-08-03 06:13:24,315][train_inner][INFO] - {"epoch": 12, "update": 11.304, "loss": "2.119", "ppl": "4.34", "wps": "361781", "ups": "3.06", "wpb": "118069", "bsz": "256", "num_updates": "581800", "lr": "0.000422424", "gnorm": "0.698", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "189917"} [2022-08-03 06:14:28,952][train_inner][INFO] - {"epoch": 12, "update": 11.308, "loss": "2.114", "ppl": "4.33", "wps": "366169", "ups": "3.09", "wpb": "118338", "bsz": "256", "num_updates": "582000", "lr": "0.000422222", "gnorm": "0.697", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.1", "wall": "189981"} [2022-08-03 06:15:34,016][train_inner][INFO] - {"epoch": 12, "update": 11.312, "loss": "2.109", "ppl": "4.31", "wps": "363878", "ups": "3.07", "wpb": "118376", "bsz": "256", "num_updates": "582200", "lr": "0.00042202", "gnorm": "0.694", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "190046"} [2022-08-03 06:16:38,911][train_inner][INFO] - {"epoch": 12, "update": 11.316, "loss": "2.116", "ppl": "4.33", "wps": "364079", "ups": "3.08", "wpb": "118132", "bsz": "256", "num_updates": "582400", "lr": "0.000421818", "gnorm": "0.694", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.7", "wall": "190111"} [2022-08-03 06:17:43,489][train_inner][INFO] - {"epoch": 12, "update": 11.32, "loss": "2.11", "ppl": "4.32", "wps": "366178", "ups": "3.1", "wpb": "118233", "bsz": "256", "num_updates": "582600", "lr": "0.000421616", "gnorm": "0.696", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.5", "wall": "190176"} [2022-08-03 06:18:48,064][train_inner][INFO] - {"epoch": 12, "update": 11.324, "loss": "2.117", "ppl": "4.34", "wps": "365868", "ups": "3.1", "wpb": "118128", "bsz": "256", "num_updates": "582800", "lr": "0.000421414", "gnorm": "0.699", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.4", "wall": "190240"} [2022-08-03 06:19:53,037][train_inner][INFO] - {"epoch": 12, "update": 11.327, "loss": "2.112", "ppl": "4.32", "wps": "364275", "ups": "3.08", "wpb": "118338", "bsz": "256", "num_updates": "583000", "lr": "0.000421212", "gnorm": "0.701", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.4", "wall": "190305"} [2022-08-03 06:20:57,819][train_inner][INFO] - {"epoch": 12, "update": 11.331, "loss": "2.118", "ppl": "4.34", "wps": "364968", "ups": "3.09", "wpb": "118215", "bsz": "256", "num_updates": "583200", "lr": "0.00042101", "gnorm": "0.698", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "25.4", "wall": "190370"} [2022-08-03 06:22:03,189][train_inner][INFO] - {"epoch": 12, "update": 11.335, "loss": "2.112", "ppl": "4.32", "wps": "362745", "ups": "3.06", "wpb": "118561", "bsz": "256", "num_updates": "583400", "lr": "0.000420808", "gnorm": "0.695", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.5", "wall": "190436"} [2022-08-03 06:23:08,225][train_inner][INFO] - {"epoch": 12, "update": 11.339, "loss": "2.117", "ppl": "4.34", "wps": "363823", "ups": "3.08", "wpb": "118306", "bsz": "256", "num_updates": "583600", "lr": "0.000420606", "gnorm": "0.695", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "24.6", "wall": "190501"} [2022-08-03 06:24:12,829][train_inner][INFO] - {"epoch": 12, "update": 11.343, "loss": "2.114", "ppl": "4.33", "wps": "366719", "ups": "3.1", "wpb": "118456", "bsz": "256", "num_updates": "583800", "lr": "0.000420404", "gnorm": "0.695", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "21.8", "wall": "190565"} [2022-08-03 06:25:17,601][train_inner][INFO] - {"epoch": 12, "update": 11.347, "loss": "2.115", "ppl": "4.33", "wps": "363947", "ups": "3.09", "wpb": "117865", "bsz": "256", "num_updates": "584000", "lr": "0.000420202", "gnorm": "0.7", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "24.7", "wall": "190630"} [2022-08-03 06:26:22,444][train_inner][INFO] - {"epoch": 12, "update": 11.351, "loss": "2.115", "ppl": "4.33", "wps": "363970", "ups": "3.08", "wpb": "118004", "bsz": "256", "num_updates": "584200", "lr": "0.00042", "gnorm": "0.695", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "21.5", "wall": "190695"} [2022-08-03 06:26:43,178][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 06:27:27,638][train_inner][INFO] - {"epoch": 12, "update": 11.355, "loss": "2.116", "ppl": "4.33", "wps": "362795", "ups": "3.07", "wpb": "118258", "bsz": "256", "num_updates": "584400", "lr": "0.000419798", "gnorm": "0.698", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25.6", "wall": "190760"} [2022-08-03 06:28:32,477][train_inner][INFO] - {"epoch": 12, "update": 11.359, "loss": "2.113", "ppl": "4.33", "wps": "365970", "ups": "3.08", "wpb": "118644", "bsz": "256", "num_updates": "584600", "lr": "0.000419596", "gnorm": "0.694", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.1", "wall": "190825"} [2022-08-03 06:29:37,443][train_inner][INFO] - {"epoch": 12, "update": 11.362, "loss": "2.112", "ppl": "4.32", "wps": "362450", "ups": "3.08", "wpb": "117733", "bsz": "256", "num_updates": "584800", "lr": "0.000419394", "gnorm": "0.697", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "190890"} [2022-08-03 06:30:42,722][train_inner][INFO] - {"epoch": 12, "update": 11.366, "loss": "2.112", "ppl": "4.32", "wps": "362929", "ups": "3.06", "wpb": "118457", "bsz": "256", "num_updates": "585000", "lr": "0.000419192", "gnorm": "0.694", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.3", "wall": "190955"} [2022-08-03 06:31:47,303][train_inner][INFO] - {"epoch": 12, "update": 11.37, "loss": "2.116", "ppl": "4.33", "wps": "366281", "ups": "3.1", "wpb": "118272", "bsz": "256", "num_updates": "585200", "lr": "0.00041899", "gnorm": "0.698", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.9", "wall": "191020"} [2022-08-03 06:32:52,100][train_inner][INFO] - {"epoch": 12, "update": 11.374, "loss": "2.113", "ppl": "4.33", "wps": "364777", "ups": "3.09", "wpb": "118182", "bsz": "256", "num_updates": "585400", "lr": "0.000418788", "gnorm": "0.696", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.8", "wall": "191084"} [2022-08-03 06:33:56,734][train_inner][INFO] - {"epoch": 12, "update": 11.378, "loss": "2.114", "ppl": "4.33", "wps": "365288", "ups": "3.09", "wpb": "118048", "bsz": "256", "num_updates": "585600", "lr": "0.000418586", "gnorm": "0.696", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.7", "wall": "191149"} [2022-08-03 06:35:01,285][train_inner][INFO] - {"epoch": 12, "update": 11.382, "loss": "2.108", "ppl": "4.31", "wps": "368241", "ups": "3.1", "wpb": "118851", "bsz": "256", "num_updates": "585800", "lr": "0.000418384", "gnorm": "0.694", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "191214"} [2022-08-03 06:36:05,831][train_inner][INFO] - {"epoch": 12, "update": 11.386, "loss": "2.116", "ppl": "4.34", "wps": "364629", "ups": "3.1", "wpb": "117674", "bsz": "256", "num_updates": "586000", "lr": "0.000418182", "gnorm": "0.705", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.3", "wall": "191278"} [2022-08-03 06:37:10,929][train_inner][INFO] - {"epoch": 12, "update": 11.39, "loss": "2.107", "ppl": "4.31", "wps": "363735", "ups": "3.07", "wpb": "118389", "bsz": "256", "num_updates": "586200", "lr": "0.00041798", "gnorm": "0.695", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "191343"} [2022-08-03 06:38:16,932][train_inner][INFO] - {"epoch": 12, "update": 11.393, "loss": "2.108", "ppl": "4.31", "wps": "358691", "ups": "3.03", "wpb": "118372", "bsz": "256", "num_updates": "586400", "lr": "0.000417778", "gnorm": "0.696", "clip": "0", "loss_scale": "32", "train_wall": "66", "gb_free": "22.5", "wall": "191409"} [2022-08-03 06:39:21,622][train_inner][INFO] - {"epoch": 12, "update": 11.397, "loss": "2.114", "ppl": "4.33", "wps": "364190", "ups": "3.09", "wpb": "117795", "bsz": "256", "num_updates": "586600", "lr": "0.000417576", "gnorm": "0.696", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.3", "wall": "191474"} [2022-08-03 06:40:26,388][train_inner][INFO] - {"epoch": 12, "update": 11.401, "loss": "2.108", "ppl": "4.31", "wps": "365081", "ups": "3.09", "wpb": "118223", "bsz": "256", "num_updates": "586800", "lr": "0.000417374", "gnorm": "0.698", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.3", "wall": "191539"} [2022-08-03 06:41:31,233][train_inner][INFO] - {"epoch": 12, "update": 11.405, "loss": "2.111", "ppl": "4.32", "wps": "364892", "ups": "3.08", "wpb": "118305", "bsz": "256", "num_updates": "587000", "lr": "0.000417172", "gnorm": "0.696", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.8", "wall": "191604"} [2022-08-03 06:42:35,920][train_inner][INFO] - {"epoch": 12, "update": 11.409, "loss": "2.117", "ppl": "4.34", "wps": "366084", "ups": "3.09", "wpb": "118403", "bsz": "256", "num_updates": "587200", "lr": "0.00041697", "gnorm": "0.696", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "21.7", "wall": "191668"} [2022-08-03 06:43:40,468][train_inner][INFO] - {"epoch": 12, "update": 11.413, "loss": "2.112", "ppl": "4.32", "wps": "364782", "ups": "3.1", "wpb": "117729", "bsz": "256", "num_updates": "587400", "lr": "0.000416768", "gnorm": "0.699", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "23.1", "wall": "191733"} [2022-08-03 06:44:45,341][train_inner][INFO] - {"epoch": 12, "update": 11.417, "loss": "2.113", "ppl": "4.33", "wps": "366743", "ups": "3.08", "wpb": "118957", "bsz": "256", "num_updates": "587600", "lr": "0.000416566", "gnorm": "0.694", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.3", "wall": "191798"} [2022-08-03 06:45:43,228][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 06:45:51,030][train_inner][INFO] - {"epoch": 12, "update": 11.421, "loss": "2.112", "ppl": "4.32", "wps": "361053", "ups": "3.04", "wpb": "118584", "bsz": "256", "num_updates": "587800", "lr": "0.000416364", "gnorm": "0.7", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "191863"} [2022-08-03 06:45:54,825][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-03 06:46:55,988][train_inner][INFO] - {"epoch": 12, "update": 11.425, "loss": "2.119", "ppl": "4.34", "wps": "363257", "ups": "3.08", "wpb": "117982", "bsz": "256", "num_updates": "588000", "lr": "0.000416162", "gnorm": "0.719", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21", "wall": "191928"} [2022-08-03 06:48:00,653][train_inner][INFO] - {"epoch": 12, "update": 11.428, "loss": "2.116", "ppl": "4.34", "wps": "365578", "ups": "3.09", "wpb": "118198", "bsz": "256", "num_updates": "588200", "lr": "0.00041596", "gnorm": "0.714", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "191993"} [2022-08-03 06:49:06,693][train_inner][INFO] - {"epoch": 12, "update": 11.432, "loss": "2.108", "ppl": "4.31", "wps": "358253", "ups": "3.03", "wpb": "118294", "bsz": "256", "num_updates": "588400", "lr": "0.000415758", "gnorm": "0.695", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "25.6", "wall": "192059"} [2022-08-03 06:50:11,151][train_inner][INFO] - {"epoch": 12, "update": 11.436, "loss": "2.112", "ppl": "4.32", "wps": "367061", "ups": "3.1", "wpb": "118298", "bsz": "256", "num_updates": "588600", "lr": "0.000415556", "gnorm": "0.694", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.9", "wall": "192123"} [2022-08-03 06:51:15,963][train_inner][INFO] - {"epoch": 12, "update": 11.44, "loss": "2.11", "ppl": "4.32", "wps": "364005", "ups": "3.09", "wpb": "117957", "bsz": "256", "num_updates": "588800", "lr": "0.000415354", "gnorm": "0.696", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.9", "wall": "192188"} [2022-08-03 06:52:20,879][train_inner][INFO] - {"epoch": 12, "update": 11.444, "loss": "2.11", "ppl": "4.32", "wps": "364674", "ups": "3.08", "wpb": "118365", "bsz": "256", "num_updates": "589000", "lr": "0.000415152", "gnorm": "0.696", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.4", "wall": "192253"} [2022-08-03 06:53:25,701][train_inner][INFO] - {"epoch": 12, "update": 11.448, "loss": "2.115", "ppl": "4.33", "wps": "365049", "ups": "3.09", "wpb": "118313", "bsz": "256", "num_updates": "589200", "lr": "0.000414949", "gnorm": "0.724", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.9", "wall": "192318"} [2022-08-03 06:54:30,700][train_inner][INFO] - {"epoch": 12, "update": 11.452, "loss": "2.11", "ppl": "4.32", "wps": "364890", "ups": "3.08", "wpb": "118587", "bsz": "256", "num_updates": "589400", "lr": "0.000414747", "gnorm": "0.698", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.9", "wall": "192383"} [2022-08-03 06:55:35,856][train_inner][INFO] - {"epoch": 12, "update": 11.456, "loss": "2.114", "ppl": "4.33", "wps": "362815", "ups": "3.07", "wpb": "118196", "bsz": "256", "num_updates": "589600", "lr": "0.000414545", "gnorm": "0.7", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "192448"} [2022-08-03 06:56:40,509][train_inner][INFO] - {"epoch": 12, "update": 11.46, "loss": "2.106", "ppl": "4.3", "wps": "366599", "ups": "3.09", "wpb": "118508", "bsz": "256", "num_updates": "589800", "lr": "0.000414343", "gnorm": "0.7", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.2", "wall": "192513"} [2022-08-03 06:57:45,162][train_inner][INFO] - {"epoch": 12, "update": 11.463, "loss": "2.113", "ppl": "4.33", "wps": "364741", "ups": "3.09", "wpb": "117906", "bsz": "256", "num_updates": "590000", "lr": "0.000414141", "gnorm": "0.701", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.1", "wall": "192577"} [2022-08-03 06:58:50,221][train_inner][INFO] - {"epoch": 12, "update": 11.467, "loss": "2.108", "ppl": "4.31", "wps": "365536", "ups": "3.07", "wpb": "118904", "bsz": "256", "num_updates": "590200", "lr": "0.000413939", "gnorm": "0.696", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.7", "wall": "192643"} [2022-08-03 06:59:55,086][train_inner][INFO] - {"epoch": 12, "update": 11.471, "loss": "2.107", "ppl": "4.31", "wps": "363860", "ups": "3.08", "wpb": "118008", "bsz": "256", "num_updates": "590400", "lr": "0.000413737", "gnorm": "0.699", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.5", "wall": "192707"} [2022-08-03 07:00:59,729][train_inner][INFO] - {"epoch": 12, "update": 11.475, "loss": "2.109", "ppl": "4.31", "wps": "367273", "ups": "3.09", "wpb": "118707", "bsz": "256", "num_updates": "590600", "lr": "0.000413535", "gnorm": "0.698", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.3", "wall": "192772"} [2022-08-03 07:02:04,570][train_inner][INFO] - {"epoch": 12, "update": 11.479, "loss": "2.107", "ppl": "4.31", "wps": "365471", "ups": "3.08", "wpb": "118486", "bsz": "256", "num_updates": "590800", "lr": "0.000413333", "gnorm": "0.698", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.1", "wall": "192837"} [2022-08-03 07:03:09,573][train_inner][INFO] - {"epoch": 12, "update": 11.483, "loss": "2.109", "ppl": "4.31", "wps": "363775", "ups": "3.08", "wpb": "118230", "bsz": "256", "num_updates": "591000", "lr": "0.000413131", "gnorm": "0.698", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "28.2", "wall": "192902"} [2022-08-03 07:03:53,723][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-03 07:04:14,529][train_inner][INFO] - {"epoch": 12, "update": 11.487, "loss": "2.112", "ppl": "4.32", "wps": "363415", "ups": "3.08", "wpb": "118029", "bsz": "256", "num_updates": "591200", "lr": "0.000412929", "gnorm": "0.699", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.7", "wall": "192967"} [2022-08-03 07:05:19,213][train_inner][INFO] - {"epoch": 12, "update": 11.491, "loss": "2.113", "ppl": "4.32", "wps": "364367", "ups": "3.09", "wpb": "117842", "bsz": "256", "num_updates": "591400", "lr": "0.000412727", "gnorm": "0.701", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.6", "wall": "193032"} [2022-08-03 07:06:24,068][train_inner][INFO] - {"epoch": 12, "update": 11.495, "loss": "2.113", "ppl": "4.33", "wps": "363587", "ups": "3.08", "wpb": "117900", "bsz": "256", "num_updates": "591600", "lr": "0.000412525", "gnorm": "0.701", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.7", "wall": "193096"} [2022-08-03 07:07:28,902][train_inner][INFO] - {"epoch": 12, "update": 11.498, "loss": "2.112", "ppl": "4.32", "wps": "365892", "ups": "3.08", "wpb": "118611", "bsz": "256", "num_updates": "591800", "lr": "0.000412323", "gnorm": "0.7", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21", "wall": "193161"} [2022-08-03 07:08:33,683][train_inner][INFO] - {"epoch": 12, "update": 11.502, "loss": "2.105", "ppl": "4.3", "wps": "366998", "ups": "3.09", "wpb": "118871", "bsz": "256", "num_updates": "592000", "lr": "0.000412121", "gnorm": "0.697", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.3", "wall": "193226"} [2022-08-03 07:09:39,503][train_inner][INFO] - {"epoch": 12, "update": 11.506, "loss": "2.11", "ppl": "4.32", "wps": "358607", "ups": "3.04", "wpb": "118015", "bsz": "256", "num_updates": "592200", "lr": "0.000411919", "gnorm": "0.698", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "193292"} [2022-08-03 07:10:44,329][train_inner][INFO] - {"epoch": 12, "update": 11.51, "loss": "2.112", "ppl": "4.32", "wps": "364826", "ups": "3.09", "wpb": "118250", "bsz": "256", "num_updates": "592400", "lr": "0.000411717", "gnorm": "0.7", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.1", "wall": "193357"} [2022-08-03 07:11:49,310][train_inner][INFO] - {"epoch": 12, "update": 11.514, "loss": "2.101", "ppl": "4.29", "wps": "364728", "ups": "3.08", "wpb": "118499", "bsz": "256", "num_updates": "592600", "lr": "0.000411515", "gnorm": "0.698", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.2", "wall": "193422"} [2022-08-03 07:12:54,128][train_inner][INFO] - {"epoch": 12, "update": 11.518, "loss": "2.106", "ppl": "4.3", "wps": "362816", "ups": "3.09", "wpb": "117584", "bsz": "256", "num_updates": "592800", "lr": "0.000411313", "gnorm": "0.703", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.4", "wall": "193486"} [2022-08-03 07:13:58,820][train_inner][INFO] - {"epoch": 12, "update": 11.522, "loss": "2.102", "ppl": "4.29", "wps": "363375", "ups": "3.09", "wpb": "117535", "bsz": "256", "num_updates": "593000", "lr": "0.000411111", "gnorm": "0.699", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "26.6", "wall": "193551"} [2022-08-03 07:15:03,977][train_inner][INFO] - {"epoch": 12, "update": 11.526, "loss": "2.112", "ppl": "4.32", "wps": "364267", "ups": "3.07", "wpb": "118671", "bsz": "256", "num_updates": "593200", "lr": "0.000410909", "gnorm": "0.698", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "193616"} [2022-08-03 07:16:08,604][train_inner][INFO] - {"epoch": 12, "update": 11.53, "loss": "2.112", "ppl": "4.32", "wps": "366449", "ups": "3.09", "wpb": "118410", "bsz": "256", "num_updates": "593400", "lr": "0.000410707", "gnorm": "0.699", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.7", "wall": "193681"} [2022-08-03 07:17:13,715][train_inner][INFO] - {"epoch": 12, "update": 11.533, "loss": "2.107", "ppl": "4.31", "wps": "364705", "ups": "3.07", "wpb": "118730", "bsz": "256", "num_updates": "593600", "lr": "0.000410505", "gnorm": "0.698", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.9", "wall": "193746"} [2022-08-03 07:18:18,545][train_inner][INFO] - {"epoch": 12, "update": 11.537, "loss": "2.106", "ppl": "4.3", "wps": "364976", "ups": "3.09", "wpb": "118306", "bsz": "256", "num_updates": "593800", "lr": "0.000410303", "gnorm": "0.698", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "193811"} [2022-08-03 07:19:23,022][train_inner][INFO] - {"epoch": 12, "update": 11.541, "loss": "2.107", "ppl": "4.31", "wps": "367425", "ups": "3.1", "wpb": "118450", "bsz": "256", "num_updates": "594000", "lr": "0.000410101", "gnorm": "0.698", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.1", "wall": "193875"} [2022-08-03 07:20:28,038][train_inner][INFO] - {"epoch": 12, "update": 11.545, "loss": "2.107", "ppl": "4.31", "wps": "366616", "ups": "3.08", "wpb": "119178", "bsz": "256", "num_updates": "594200", "lr": "0.000409899", "gnorm": "0.694", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.3", "wall": "193940"} [2022-08-03 07:21:32,902][train_inner][INFO] - {"epoch": 12, "update": 11.549, "loss": "2.103", "ppl": "4.3", "wps": "365855", "ups": "3.08", "wpb": "118652", "bsz": "256", "num_updates": "594400", "lr": "0.000409697", "gnorm": "0.702", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.2", "wall": "194005"} [2022-08-03 07:22:38,407][train_inner][INFO] - {"epoch": 12, "update": 11.553, "loss": "2.107", "ppl": "4.31", "wps": "363335", "ups": "3.05", "wpb": "119000", "bsz": "256", "num_updates": "594600", "lr": "0.000409495", "gnorm": "0.699", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "194071"} [2022-08-03 07:23:43,444][train_inner][INFO] - {"epoch": 12, "update": 11.557, "loss": "2.106", "ppl": "4.31", "wps": "363227", "ups": "3.08", "wpb": "118114", "bsz": "256", "num_updates": "594800", "lr": "0.000409293", "gnorm": "0.701", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.8", "wall": "194136"} [2022-08-03 07:24:48,558][train_inner][INFO] - {"epoch": 12, "update": 11.561, "loss": "2.107", "ppl": "4.31", "wps": "364421", "ups": "3.07", "wpb": "118643", "bsz": "256", "num_updates": "595000", "lr": "0.000409091", "gnorm": "0.698", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "194201"} [2022-08-03 07:25:53,461][train_inner][INFO] - {"epoch": 12, "update": 11.564, "loss": "2.105", "ppl": "4.3", "wps": "364136", "ups": "3.08", "wpb": "118166", "bsz": "256", "num_updates": "595200", "lr": "0.000408889", "gnorm": "0.699", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "194266"} [2022-08-03 07:26:59,463][train_inner][INFO] - {"epoch": 12, "update": 11.568, "loss": "2.106", "ppl": "4.31", "wps": "356299", "ups": "3.03", "wpb": "117580", "bsz": "256", "num_updates": "595400", "lr": "0.000408687", "gnorm": "0.701", "clip": "0", "loss_scale": "32", "train_wall": "66", "gb_free": "20", "wall": "194332"} [2022-08-03 07:28:04,619][train_inner][INFO] - {"epoch": 12, "update": 11.572, "loss": "2.113", "ppl": "4.33", "wps": "363722", "ups": "3.07", "wpb": "118491", "bsz": "256", "num_updates": "595600", "lr": "0.000408485", "gnorm": "0.702", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "19.8", "wall": "194397"} [2022-08-03 07:29:08,926][train_inner][INFO] - {"epoch": 12, "update": 11.576, "loss": "2.111", "ppl": "4.32", "wps": "365864", "ups": "3.11", "wpb": "117636", "bsz": "256", "num_updates": "595800", "lr": "0.000408283", "gnorm": "0.705", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "25.2", "wall": "194461"} [2022-08-03 07:30:14,131][train_inner][INFO] - {"epoch": 12, "update": 11.58, "loss": "2.108", "ppl": "4.31", "wps": "363205", "ups": "3.07", "wpb": "118413", "bsz": "256", "num_updates": "596000", "lr": "0.000408081", "gnorm": "0.698", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "22.7", "wall": "194526"} [2022-08-03 07:31:19,250][train_inner][INFO] - {"epoch": 12, "update": 11.584, "loss": "2.103", "ppl": "4.3", "wps": "365333", "ups": "3.07", "wpb": "118949", "bsz": "256", "num_updates": "596200", "lr": "0.000407879", "gnorm": "0.696", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "23.6", "wall": "194592"} [2022-08-03 07:32:24,204][train_inner][INFO] - {"epoch": 12, "update": 11.588, "loss": "2.105", "ppl": "4.3", "wps": "364417", "ups": "3.08", "wpb": "118350", "bsz": "256", "num_updates": "596400", "lr": "0.000407677", "gnorm": "0.699", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "25.2", "wall": "194657"} [2022-08-03 07:33:28,880][train_inner][INFO] - {"epoch": 12, "update": 11.592, "loss": "2.108", "ppl": "4.31", "wps": "365603", "ups": "3.09", "wpb": "118226", "bsz": "256", "num_updates": "596600", "lr": "0.000407475", "gnorm": "0.702", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "22.1", "wall": "194721"} [2022-08-03 07:34:27,559][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 07:34:34,056][train_inner][INFO] - {"epoch": 12, "update": 11.596, "loss": "2.109", "ppl": "4.31", "wps": "362992", "ups": "3.07", "wpb": "118291", "bsz": "256", "num_updates": "596800", "lr": "0.000407273", "gnorm": "0.704", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21", "wall": "194786"} [2022-08-03 07:34:55,182][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-03 07:35:02,452][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-03 07:35:23,676][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 [2022-08-03 07:35:39,288][train_inner][INFO] - {"epoch": 12, "update": 11.599, "loss": "2.111", "ppl": "4.32", "wps": "363176", "ups": "3.07", "wpb": "118452", "bsz": "256", "num_updates": "597000", "lr": "0.000407071", "gnorm": "0.726", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20", "wall": "194852"} [2022-08-03 07:36:44,094][train_inner][INFO] - {"epoch": 12, "update": 11.603, "loss": "2.11", "ppl": "4.32", "wps": "364752", "ups": "3.09", "wpb": "118190", "bsz": "256", "num_updates": "597200", "lr": "0.000406869", "gnorm": "0.738", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.9", "wall": "194916"} [2022-08-03 07:37:49,065][train_inner][INFO] - {"epoch": 12, "update": 11.607, "loss": "2.103", "ppl": "4.3", "wps": "364566", "ups": "3.08", "wpb": "118428", "bsz": "256", "num_updates": "597400", "lr": "0.000406667", "gnorm": "0.703", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "25.8", "wall": "194981"} [2022-08-03 07:38:54,157][train_inner][INFO] - {"epoch": 12, "update": 11.611, "loss": "2.107", "ppl": "4.31", "wps": "363067", "ups": "3.07", "wpb": "118164", "bsz": "256", "num_updates": "597600", "lr": "0.000406465", "gnorm": "0.782", "clip": "0.5", "loss_scale": "2", "train_wall": "65", "gb_free": "21.6", "wall": "195046"} [2022-08-03 07:39:21,618][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 [2022-08-03 07:39:58,786][train_inner][INFO] - {"epoch": 12, "update": 11.615, "loss": "2.111", "ppl": "4.32", "wps": "365477", "ups": "3.09", "wpb": "118099", "bsz": "256", "num_updates": "597800", "lr": "0.000406263", "gnorm": "0.768", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "21.5", "wall": "195111"} [2022-08-03 07:41:03,387][train_inner][INFO] - {"epoch": 12, "update": 11.619, "loss": "2.113", "ppl": "4.33", "wps": "366388", "ups": "3.1", "wpb": "118344", "bsz": "256", "num_updates": "598000", "lr": "0.000406061", "gnorm": "0.764", "clip": "0.5", "loss_scale": "1", "train_wall": "64", "gb_free": "20.4", "wall": "195176"} [2022-08-03 07:42:08,478][train_inner][INFO] - {"epoch": 12, "update": 11.623, "loss": "2.109", "ppl": "4.31", "wps": "362714", "ups": "3.07", "wpb": "118046", "bsz": "256", "num_updates": "598200", "lr": "0.000405859", "gnorm": "0.709", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "23.5", "wall": "195241"} [2022-08-03 07:43:13,243][train_inner][INFO] - {"epoch": 12, "update": 11.627, "loss": "2.105", "ppl": "4.3", "wps": "365332", "ups": "3.09", "wpb": "118302", "bsz": "256", "num_updates": "598400", "lr": "0.000405657", "gnorm": "0.706", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "21", "wall": "195306"} [2022-08-03 07:44:18,280][train_inner][INFO] - {"epoch": 12, "update": 11.631, "loss": "2.102", "ppl": "4.29", "wps": "363847", "ups": "3.08", "wpb": "118317", "bsz": "256", "num_updates": "598600", "lr": "0.000405455", "gnorm": "0.7", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "24.4", "wall": "195371"} [2022-08-03 07:45:22,951][train_inner][INFO] - {"epoch": 12, "update": 11.634, "loss": "2.103", "ppl": "4.3", "wps": "365960", "ups": "3.09", "wpb": "118332", "bsz": "256", "num_updates": "598800", "lr": "0.000405253", "gnorm": "0.697", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "19.9", "wall": "195435"} [2022-08-03 07:46:28,127][train_inner][INFO] - {"epoch": 12, "update": 11.638, "loss": "2.108", "ppl": "4.31", "wps": "363120", "ups": "3.07", "wpb": "118333", "bsz": "256", "num_updates": "599000", "lr": "0.000405051", "gnorm": "0.702", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "19.9", "wall": "195500"} [2022-08-03 07:47:32,872][train_inner][INFO] - {"epoch": 12, "update": 11.642, "loss": "2.108", "ppl": "4.31", "wps": "365081", "ups": "3.09", "wpb": "118185", "bsz": "256", "num_updates": "599200", "lr": "0.000404848", "gnorm": "0.749", "clip": "0.5", "loss_scale": "1", "train_wall": "64", "gb_free": "22.8", "wall": "195565"} [2022-08-03 07:48:38,076][train_inner][INFO] - {"epoch": 12, "update": 11.646, "loss": "2.104", "ppl": "4.3", "wps": "361028", "ups": "3.07", "wpb": "117699", "bsz": "256", "num_updates": "599400", "lr": "0.000404646", "gnorm": "0.704", "clip": "0", "loss_scale": "1", "train_wall": "65", "gb_free": "28.2", "wall": "195630"} [2022-08-03 07:49:42,788][train_inner][INFO] - {"epoch": 12, "update": 11.65, "loss": "2.1", "ppl": "4.29", "wps": "364375", "ups": "3.09", "wpb": "117896", "bsz": "256", "num_updates": "599600", "lr": "0.000404444", "gnorm": "0.704", "clip": "0", "loss_scale": "1", "train_wall": "64", "gb_free": "24", "wall": "195695"} [2022-08-03 07:50:47,454][train_inner][INFO] - {"epoch": 12, "update": 11.654, "loss": "2.109", "ppl": "4.32", "wps": "363617", "ups": "3.09", "wpb": "117566", "bsz": "256", "num_updates": "599800", "lr": "0.000404242", "gnorm": "0.704", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "22.7", "wall": "195760"} [2022-08-03 07:51:51,991][train_inner][INFO] - {"epoch": 12, "update": 11.658, "loss": "2.108", "ppl": "4.31", "wps": "364789", "ups": "3.1", "wpb": "117710", "bsz": "256", "num_updates": "600000", "lr": "0.00040404", "gnorm": "0.705", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.8", "wall": "195824"} [2022-08-03 07:51:51,992][fairseq_cli.train][INFO] - begin validation on "valid" subset [2022-08-03 07:52:14,880][valid][INFO] - {"epoch": 12, "valid_loss": "2.011", "valid_ppl": "4.03", "valid_wps": "1.60884e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "600000", "valid_best_loss": "2.011"} [2022-08-03 07:52:14,885][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 12 @ 600000 updates [2022-08-03 07:52:14,886][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_12_600000.pt [2022-08-03 07:52:24,542][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_12_600000.pt [2022-08-03 07:52:54,886][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/adan2/checkpoint_12_600000.pt (epoch 12 @ 600000 updates, score 2.011) (writing took 40.00082699768245 seconds) [2022-08-03 07:53:59,519][train_inner][INFO] - {"epoch": 12, "update": 11.662, "loss": "2.106", "ppl": "4.31", "wps": "186006", "ups": "1.57", "wpb": "118605", "bsz": "256", "num_updates": "600200", "lr": "0.000403838", "gnorm": "0.698", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.5", "wall": "195952"} [2022-08-03 07:55:04,871][train_inner][INFO] - {"epoch": 12, "update": 11.666, "loss": "2.105", "ppl": "4.3", "wps": "362757", "ups": "3.06", "wpb": "118533", "bsz": "256", "num_updates": "600400", "lr": "0.000403636", "gnorm": "0.702", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.1", "wall": "196017"} [2022-08-03 07:56:10,122][train_inner][INFO] - {"epoch": 12, "update": 11.669, "loss": "2.103", "ppl": "4.3", "wps": "363375", "ups": "3.07", "wpb": "118551", "bsz": "256", "num_updates": "600600", "lr": "0.000403434", "gnorm": "0.7", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.7", "wall": "196082"} [2022-08-03 07:57:15,186][train_inner][INFO] - {"epoch": 12, "update": 11.673, "loss": "2.101", "ppl": "4.29", "wps": "363594", "ups": "3.07", "wpb": "118283", "bsz": "256", "num_updates": "600800", "lr": "0.000403232", "gnorm": "0.712", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "23.1", "wall": "196148"} [2022-08-03 07:58:19,540][train_inner][INFO] - {"epoch": 12, "update": 11.677, "loss": "2.113", "ppl": "4.32", "wps": "364386", "ups": "3.11", "wpb": "117246", "bsz": "256", "num_updates": "601000", "lr": "0.00040303", "gnorm": "0.704", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.7", "wall": "196212"} [2022-08-03 07:59:24,492][train_inner][INFO] - {"epoch": 12, "update": 11.681, "loss": "2.1", "ppl": "4.29", "wps": "363586", "ups": "3.08", "wpb": "118077", "bsz": "256", "num_updates": "601200", "lr": "0.000402828", "gnorm": "0.702", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "23.8", "wall": "196277"} [2022-08-03 08:00:29,591][train_inner][INFO] - {"epoch": 12, "update": 11.685, "loss": "2.109", "ppl": "4.31", "wps": "362580", "ups": "3.07", "wpb": "118017", "bsz": "256", "num_updates": "601400", "lr": "0.000402626", "gnorm": "0.709", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.4", "wall": "196342"} [2022-08-03 08:01:34,677][train_inner][INFO] - {"epoch": 12, "update": 11.689, "loss": "2.105", "ppl": "4.3", "wps": "363649", "ups": "3.07", "wpb": "118340", "bsz": "256", "num_updates": "601600", "lr": "0.000402424", "gnorm": "0.703", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20", "wall": "196407"} [2022-08-03 08:02:40,396][train_inner][INFO] - {"epoch": 12, "update": 11.693, "loss": "2.108", "ppl": "4.31", "wps": "359059", "ups": "3.04", "wpb": "117984", "bsz": "256", "num_updates": "601800", "lr": "0.000402222", "gnorm": "0.703", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "196473"} [2022-08-03 08:03:45,674][train_inner][INFO] - {"epoch": 12, "update": 11.697, "loss": "2.103", "ppl": "4.3", "wps": "363145", "ups": "3.06", "wpb": "118525", "bsz": "256", "num_updates": "602000", "lr": "0.00040202", "gnorm": "0.7", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.3", "wall": "196538"} [2022-08-03 08:04:50,490][train_inner][INFO] - {"epoch": 12, "update": 11.701, "loss": "2.109", "ppl": "4.31", "wps": "364067", "ups": "3.09", "wpb": "117984", "bsz": "256", "num_updates": "602200", "lr": "0.000401818", "gnorm": "0.705", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.9", "wall": "196603"} [2022-08-03 08:05:55,648][train_inner][INFO] - {"epoch": 12, "update": 11.704, "loss": "2.103", "ppl": "4.3", "wps": "363155", "ups": "3.07", "wpb": "118311", "bsz": "256", "num_updates": "602400", "lr": "0.000401616", "gnorm": "0.703", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.3", "wall": "196668"} [2022-08-03 08:06:46,020][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 [2022-08-03 08:07:01,422][train_inner][INFO] - {"epoch": 12, "update": 11.708, "loss": "2.098", "ppl": "4.28", "wps": "361199", "ups": "3.04", "wpb": "118786", "bsz": "256", "num_updates": "602600", "lr": "0.000401414", "gnorm": "0.7", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.7", "wall": "196734"} [2022-08-03 08:08:05,817][train_inner][INFO] - {"epoch": 12, "update": 11.712, "loss": "2.107", "ppl": "4.31", "wps": "365537", "ups": "3.11", "wpb": "117693", "bsz": "256", "num_updates": "602800", "lr": "0.000401212", "gnorm": "0.706", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "19.9", "wall": "196798"} [2022-08-03 08:09:10,667][train_inner][INFO] - {"epoch": 12, "update": 11.716, "loss": "2.103", "ppl": "4.3", "wps": "365250", "ups": "3.08", "wpb": "118430", "bsz": "256", "num_updates": "603000", "lr": "0.00040101", "gnorm": "0.702", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "24.8", "wall": "196863"} [2022-08-03 08:10:15,693][train_inner][INFO] - {"epoch": 12, "update": 11.72, "loss": "2.099", "ppl": "4.28", "wps": "364758", "ups": "3.08", "wpb": "118593", "bsz": "256", "num_updates": "603200", "lr": "0.000400808", "gnorm": "0.698", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.9", "wall": "196928"} [2022-08-03 08:11:20,585][train_inner][INFO] - {"epoch": 12, "update": 11.724, "loss": "2.099", "ppl": "4.28", "wps": "364294", "ups": "3.08", "wpb": "118196", "bsz": "256", "num_updates": "603400", "lr": "0.000400606", "gnorm": "0.701", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21", "wall": "196993"} [2022-08-03 08:12:25,359][train_inner][INFO] - {"epoch": 12, "update": 11.728, "loss": "2.101", "ppl": "4.29", "wps": "364875", "ups": "3.09", "wpb": "118171", "bsz": "256", "num_updates": "603600", "lr": "0.000400404", "gnorm": "0.703", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "27.2", "wall": "197058"} [2022-08-03 08:13:30,198][train_inner][INFO] - {"epoch": 12, "update": 11.732, "loss": "2.1", "ppl": "4.29", "wps": "365659", "ups": "3.08", "wpb": "118543", "bsz": "256", "num_updates": "603800", "lr": "0.000400202", "gnorm": "0.7", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.1", "wall": "197123"} [2022-08-03 08:14:34,759][train_inner][INFO] - {"epoch": 12, "update": 11.735, "loss": "2.102", "ppl": "4.29", "wps": "367598", "ups": "3.1", "wpb": "118661", "bsz": "256", "num_updates": "604000", "lr": "0.0004", "gnorm": "0.7", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.2", "wall": "197187"} [2022-08-03 08:15:39,795][train_inner][INFO] - {"epoch": 12, "update": 11.739, "loss": "2.102", "ppl": "4.29", "wps": "364439", "ups": "3.08", "wpb": "118508", "bsz": "256", "num_updates": "604200", "lr": "0.000399798", "gnorm": "0.701", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.6", "wall": "197252"} [2022-08-03 08:16:44,393][train_inner][INFO] - {"epoch": 12, "update": 11.743, "loss": "2.101", "ppl": "4.29", "wps": "366537", "ups": "3.1", "wpb": "118386", "bsz": "256", "num_updates": "604400", "lr": "0.000399596", "gnorm": "0.704", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "23.4", "wall": "197317"} [2022-08-03 08:17:49,063][train_inner][INFO] - {"epoch": 12, "update": 11.747, "loss": "2.102", "ppl": "4.29", "wps": "367349", "ups": "3.09", "wpb": "118780", "bsz": "256", "num_updates": "604600", "lr": "0.000399394", "gnorm": "0.703", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "26", "wall": "197381"} [2022-08-03 08:18:53,549][train_inner][INFO] - {"epoch": 12, "update": 11.751, "loss": "2.104", "ppl": "4.3", "wps": "364281", "ups": "3.1", "wpb": "117454", "bsz": "256", "num_updates": "604800", "lr": "0.000399192", "gnorm": "0.707", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.9", "wall": "197446"} [2022-08-03 08:19:58,277][train_inner][INFO] - {"epoch": 12, "update": 11.755, "loss": "2.101", "ppl": "4.29", "wps": "364370", "ups": "3.09", "wpb": "117922", "bsz": "256", "num_updates": "605000", "lr": "0.00039899", "gnorm": "0.705", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.8", "wall": "197511"} [2022-08-03 08:21:03,131][train_inner][INFO] - {"epoch": 12, "update": 11.759, "loss": "2.099", "ppl": "4.28", "wps": "366015", "ups": "3.08", "wpb": "118686", "bsz": "256", "num_updates": "605200", "lr": "0.000398788", "gnorm": "0.702", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.4", "wall": "197575"} [2022-08-03 08:22:08,157][train_inner][INFO] - {"epoch": 12, "update": 11.763, "loss": "2.096", "ppl": "4.28", "wps": "366046", "ups": "3.08", "wpb": "119009", "bsz": "256", "num_updates": "605400", "lr": "0.000398586", "gnorm": "0.698", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "197640"} [2022-08-03 08:23:12,989][train_inner][INFO] - {"epoch": 12, "update": 11.767, "loss": "2.105", "ppl": "4.3", "wps": "365608", "ups": "3.08", "wpb": "118515", "bsz": "256", "num_updates": "605600", "lr": "0.000398384", "gnorm": "0.703", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.3", "wall": "197705"} [2022-08-03 08:24:17,852][train_inner][INFO] - {"epoch": 12, "update": 11.77, "loss": "2.105", "ppl": "4.3", "wps": "364631", "ups": "3.08", "wpb": "118253", "bsz": "256", "num_updates": "605800", "lr": "0.000398182", "gnorm": "0.703", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.5", "wall": "197770"} [2022-08-03 08:25:22,076][train_inner][INFO] - {"epoch": 12, "update": 11.774, "loss": "2.098", "ppl": "4.28", "wps": "369134", "ups": "3.11", "wpb": "118533", "bsz": "256", "num_updates": "606000", "lr": "0.00039798", "gnorm": "0.701", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.7", "wall": "197834"} [2022-08-03 08:26:26,659][train_inner][INFO] - {"epoch": 12, "update": 11.778, "loss": "2.099", "ppl": "4.29", "wps": "369667", "ups": "3.1", "wpb": "119370", "bsz": "256", "num_updates": "606200", "lr": "0.000397778", "gnorm": "0.701", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.9", "wall": "197899"} [2022-08-03 08:27:31,780][train_inner][INFO] - {"epoch": 12, "update": 11.782, "loss": "2.104", "ppl": "4.3", "wps": "361571", "ups": "3.07", "wpb": "117729", "bsz": "256", "num_updates": "606400", "lr": "0.000397576", "gnorm": "0.707", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "26", "wall": "197964"} [2022-08-03 08:28:36,549][train_inner][INFO] - {"epoch": 12, "update": 11.786, "loss": "2.102", "ppl": "4.29", "wps": "365200", "ups": "3.09", "wpb": "118265", "bsz": "256", "num_updates": "606600", "lr": "0.000397374", "gnorm": "0.706", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.9", "wall": "198029"} [2022-08-03 08:29:41,501][train_inner][INFO] - {"epoch": 12, "update": 11.79, "loss": "2.099", "ppl": "4.28", "wps": "363955", "ups": "3.08", "wpb": "118197", "bsz": "256", "num_updates": "606800", "lr": "0.000397172", "gnorm": "0.706", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.4", "wall": "198094"} [2022-08-03 08:30:46,487][train_inner][INFO] - {"epoch": 12, "update": 11.794, "loss": "2.104", "ppl": "4.3", "wps": "363848", "ups": "3.08", "wpb": "118224", "bsz": "256", "num_updates": "607000", "lr": "0.00039697", "gnorm": "0.704", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.3", "wall": "198159"} [2022-08-03 08:31:51,189][train_inner][INFO] - {"epoch": 12, "update": 11.798, "loss": "2.102", "ppl": "4.29", "wps": "367138", "ups": "3.09", "wpb": "118770", "bsz": "256", "num_updates": "607200", "lr": "0.000396768", "gnorm": "0.706", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "198224"} [2022-08-03 08:32:56,062][train_inner][INFO] - {"epoch": 12, "update": 11.802, "loss": "2.098", "ppl": "4.28", "wps": "365879", "ups": "3.08", "wpb": "118677", "bsz": "256", "num_updates": "607400", "lr": "0.000396566", "gnorm": "0.704", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.2", "wall": "198288"} [2022-08-03 08:34:01,137][train_inner][INFO] - {"epoch": 12, "update": 11.805, "loss": "2.1", "ppl": "4.29", "wps": "363116", "ups": "3.07", "wpb": "118147", "bsz": "256", "num_updates": "607600", "lr": "0.000396364", "gnorm": "0.705", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.4", "wall": "198353"} [2022-08-03 08:35:06,765][train_inner][INFO] - {"epoch": 12, "update": 11.809, "loss": "2.097", "ppl": "4.28", "wps": "359663", "ups": "3.05", "wpb": "118012", "bsz": "256", "num_updates": "607800", "lr": "0.000396162", "gnorm": "0.703", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "198419"} [2022-08-03 08:36:11,365][train_inner][INFO] - {"epoch": 12, "update": 11.813, "loss": "2.104", "ppl": "4.3", "wps": "368225", "ups": "3.1", "wpb": "118933", "bsz": "256", "num_updates": "608000", "lr": "0.00039596", "gnorm": "0.702", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "28.2", "wall": "198484"} [2022-08-03 08:37:15,716][train_inner][INFO] - {"epoch": 12, "update": 11.817, "loss": "2.104", "ppl": "4.3", "wps": "366250", "ups": "3.11", "wpb": "117841", "bsz": "256", "num_updates": "608200", "lr": "0.000395758", "gnorm": "0.709", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "198548"} [2022-08-03 08:38:21,432][train_inner][INFO] - {"epoch": 12, "update": 11.821, "loss": "2.104", "ppl": "4.3", "wps": "359074", "ups": "3.04", "wpb": "117983", "bsz": "256", "num_updates": "608400", "lr": "0.000395556", "gnorm": "0.706", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "28.2", "wall": "198614"} [2022-08-03 08:39:26,123][train_inner][INFO] - {"epoch": 12, "update": 11.825, "loss": "2.102", "ppl": "4.29", "wps": "366104", "ups": "3.09", "wpb": "118416", "bsz": "256", "num_updates": "608600", "lr": "0.000395354", "gnorm": "0.706", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "26.5", "wall": "198678"} [2022-08-03 08:40:31,287][train_inner][INFO] - {"epoch": 12, "update": 11.829, "loss": "2.097", "ppl": "4.28", "wps": "362166", "ups": "3.07", "wpb": "118001", "bsz": "256", "num_updates": "608800", "lr": "0.000395152", "gnorm": "0.706", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "198744"} [2022-08-03 08:41:36,110][train_inner][INFO] - {"epoch": 12, "update": 11.833, "loss": "2.102", "ppl": "4.29", "wps": "363936", "ups": "3.09", "wpb": "117954", "bsz": "256", "num_updates": "609000", "lr": "0.000394949", "gnorm": "0.706", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "198808"} [2022-08-03 08:42:40,878][train_inner][INFO] - {"epoch": 12, "update": 11.836, "loss": "2.108", "ppl": "4.31", "wps": "364485", "ups": "3.09", "wpb": "118033", "bsz": "256", "num_updates": "609200", "lr": "0.000394747", "gnorm": "0.708", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.4", "wall": "198873"} [2022-08-03 08:43:45,426][train_inner][INFO] - {"epoch": 12, "update": 11.84, "loss": "2.101", "ppl": "4.29", "wps": "365911", "ups": "3.1", "wpb": "118093", "bsz": "256", "num_updates": "609400", "lr": "0.000394545", "gnorm": "0.707", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.3", "wall": "198938"} [2022-08-03 08:44:50,264][train_inner][INFO] - {"epoch": 12, "update": 11.844, "loss": "2.103", "ppl": "4.3", "wps": "363701", "ups": "3.08", "wpb": "117906", "bsz": "256", "num_updates": "609600", "lr": "0.000394343", "gnorm": "0.707", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.3", "wall": "199003"} [2022-08-03 08:45:55,115][train_inner][INFO] - {"epoch": 12, "update": 11.848, "loss": "2.105", "ppl": "4.3", "wps": "365082", "ups": "3.08", "wpb": "118378", "bsz": "256", "num_updates": "609800", "lr": "0.000394141", "gnorm": "0.708", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.4", "wall": "199067"} [2022-08-03 08:47:01,515][train_inner][INFO] - {"epoch": 12, "update": 11.852, "loss": "2.096", "ppl": "4.28", "wps": "357762", "ups": "3.01", "wpb": "118776", "bsz": "256", "num_updates": "610000", "lr": "0.000393939", "gnorm": "0.703", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "20.6", "wall": "199134"} [2022-08-03 08:48:07,265][train_inner][INFO] - {"epoch": 12, "update": 11.856, "loss": "2.108", "ppl": "4.31", "wps": "359388", "ups": "3.04", "wpb": "118147", "bsz": "256", "num_updates": "610200", "lr": "0.000393737", "gnorm": "0.71", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "199200"} [2022-08-03 08:49:12,371][train_inner][INFO] - {"epoch": 12, "update": 11.86, "loss": "2.098", "ppl": "4.28", "wps": "363771", "ups": "3.07", "wpb": "118416", "bsz": "256", "num_updates": "610400", "lr": "0.000393535", "gnorm": "0.704", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.4", "wall": "199265"} [2022-08-03 08:50:17,041][train_inner][INFO] - {"epoch": 12, "update": 11.864, "loss": "2.104", "ppl": "4.3", "wps": "363727", "ups": "3.09", "wpb": "117611", "bsz": "256", "num_updates": "610600", "lr": "0.000393333", "gnorm": "0.705", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.1", "wall": "199329"} [2022-08-03 08:51:21,836][train_inner][INFO] - {"epoch": 12, "update": 11.868, "loss": "2.102", "ppl": "4.29", "wps": "364295", "ups": "3.09", "wpb": "118020", "bsz": "256", "num_updates": "610800", "lr": "0.000393131", "gnorm": "0.705", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "19.7", "wall": "199394"} [2022-08-03 08:51:25,907][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 08:52:26,686][train_inner][INFO] - {"epoch": 12, "update": 11.871, "loss": "2.102", "ppl": "4.29", "wps": "363714", "ups": "3.08", "wpb": "117932", "bsz": "256", "num_updates": "611000", "lr": "0.000392929", "gnorm": "0.706", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "199459"} [2022-08-03 08:53:31,795][train_inner][INFO] - {"epoch": 12, "update": 11.875, "loss": "2.096", "ppl": "4.28", "wps": "364580", "ups": "3.07", "wpb": "118688", "bsz": "256", "num_updates": "611200", "lr": "0.000392727", "gnorm": "0.704", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.9", "wall": "199524"} [2022-08-03 08:54:36,678][train_inner][INFO] - {"epoch": 12, "update": 11.879, "loss": "2.094", "ppl": "4.27", "wps": "364862", "ups": "3.08", "wpb": "118364", "bsz": "256", "num_updates": "611400", "lr": "0.000392525", "gnorm": "0.706", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.2", "wall": "199589"} [2022-08-03 08:55:41,710][train_inner][INFO] - {"epoch": 12, "update": 11.883, "loss": "2.095", "ppl": "4.27", "wps": "364430", "ups": "3.08", "wpb": "118498", "bsz": "256", "num_updates": "611600", "lr": "0.000392323", "gnorm": "0.706", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "199654"} [2022-08-03 08:56:46,579][train_inner][INFO] - {"epoch": 12, "update": 11.887, "loss": "2.104", "ppl": "4.3", "wps": "365727", "ups": "3.08", "wpb": "118619", "bsz": "256", "num_updates": "611800", "lr": "0.000392121", "gnorm": "0.706", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "199719"} [2022-08-03 08:57:51,395][train_inner][INFO] - {"epoch": 12, "update": 11.891, "loss": "2.096", "ppl": "4.27", "wps": "364644", "ups": "3.09", "wpb": "118172", "bsz": "256", "num_updates": "612000", "lr": "0.000391919", "gnorm": "0.704", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.4", "wall": "199784"} [2022-08-03 08:58:07,933][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-03 08:58:56,041][train_inner][INFO] - {"epoch": 12, "update": 11.895, "loss": "2.096", "ppl": "4.28", "wps": "365998", "ups": "3.09", "wpb": "118300", "bsz": "256", "num_updates": "612200", "lr": "0.000391717", "gnorm": "0.704", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.7", "wall": "199848"} [2022-08-03 09:00:01,133][train_inner][INFO] - {"epoch": 12, "update": 11.899, "loss": "2.096", "ppl": "4.27", "wps": "365465", "ups": "3.07", "wpb": "118944", "bsz": "256", "num_updates": "612400", "lr": "0.000391515", "gnorm": "0.703", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "199913"} [2022-08-03 09:01:06,127][train_inner][INFO] - {"epoch": 12, "update": 11.903, "loss": "2.102", "ppl": "4.29", "wps": "364974", "ups": "3.08", "wpb": "118601", "bsz": "256", "num_updates": "612600", "lr": "0.000391313", "gnorm": "0.703", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22", "wall": "199978"} [2022-08-03 09:02:11,195][train_inner][INFO] - {"epoch": 12, "update": 11.906, "loss": "2.103", "ppl": "4.3", "wps": "362016", "ups": "3.07", "wpb": "117777", "bsz": "255.9", "num_updates": "612800", "lr": "0.000391111", "gnorm": "0.708", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.3", "wall": "200044"} [2022-08-03 09:03:16,131][train_inner][INFO] - {"epoch": 12, "update": 11.91, "loss": "2.099", "ppl": "4.28", "wps": "362135", "ups": "3.08", "wpb": "117577", "bsz": "256", "num_updates": "613000", "lr": "0.000390909", "gnorm": "0.708", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.9", "wall": "200108"} [2022-08-03 09:04:20,863][train_inner][INFO] - {"epoch": 12, "update": 11.914, "loss": "2.096", "ppl": "4.27", "wps": "365070", "ups": "3.09", "wpb": "118157", "bsz": "256", "num_updates": "613200", "lr": "0.000390707", "gnorm": "0.706", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "25.4", "wall": "200173"} [2022-08-03 09:05:26,099][train_inner][INFO] - {"epoch": 12, "update": 11.918, "loss": "2.098", "ppl": "4.28", "wps": "363954", "ups": "3.07", "wpb": "118713", "bsz": "256", "num_updates": "613400", "lr": "0.000390505", "gnorm": "0.706", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.5", "wall": "200238"} [2022-08-03 09:06:31,812][train_inner][INFO] - {"epoch": 12, "update": 11.922, "loss": "2.095", "ppl": "4.27", "wps": "361267", "ups": "3.04", "wpb": "118698", "bsz": "256", "num_updates": "613600", "lr": "0.000390303", "gnorm": "0.707", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22", "wall": "200304"} [2022-08-03 09:07:36,722][train_inner][INFO] - {"epoch": 12, "update": 11.926, "loss": "2.096", "ppl": "4.28", "wps": "365864", "ups": "3.08", "wpb": "118738", "bsz": "256", "num_updates": "613800", "lr": "0.000390101", "gnorm": "0.705", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "200369"} [2022-08-03 09:08:41,521][train_inner][INFO] - {"epoch": 12, "update": 11.93, "loss": "2.1", "ppl": "4.29", "wps": "365018", "ups": "3.09", "wpb": "118253", "bsz": "256", "num_updates": "614000", "lr": "0.000389899", "gnorm": "0.708", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.9", "wall": "200434"} [2022-08-03 09:09:47,372][train_inner][INFO] - {"epoch": 12, "update": 11.934, "loss": "2.104", "ppl": "4.3", "wps": "357542", "ups": "3.04", "wpb": "117722", "bsz": "256", "num_updates": "614200", "lr": "0.000389697", "gnorm": "0.711", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "28.2", "wall": "200500"} [2022-08-03 09:10:52,382][train_inner][INFO] - {"epoch": 12, "update": 11.938, "loss": "2.097", "ppl": "4.28", "wps": "365720", "ups": "3.08", "wpb": "118876", "bsz": "256", "num_updates": "614400", "lr": "0.000389495", "gnorm": "0.705", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21", "wall": "200565"} [2022-08-03 09:11:57,234][train_inner][INFO] - {"epoch": 12, "update": 11.941, "loss": "2.094", "ppl": "4.27", "wps": "364967", "ups": "3.08", "wpb": "118342", "bsz": "256", "num_updates": "614600", "lr": "0.000389293", "gnorm": "0.706", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.4", "wall": "200630"} [2022-08-03 09:13:02,155][train_inner][INFO] - {"epoch": 12, "update": 11.945, "loss": "2.096", "ppl": "4.28", "wps": "363746", "ups": "3.08", "wpb": "118072", "bsz": "256", "num_updates": "614800", "lr": "0.000389091", "gnorm": "0.707", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "200694"} [2022-08-03 09:14:06,851][train_inner][INFO] - {"epoch": 12, "update": 11.949, "loss": "2.099", "ppl": "4.28", "wps": "363786", "ups": "3.09", "wpb": "117677", "bsz": "256", "num_updates": "615000", "lr": "0.000388889", "gnorm": "0.709", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "200759"} [2022-08-03 09:15:11,583][train_inner][INFO] - {"epoch": 12, "update": 11.953, "loss": "2.097", "ppl": "4.28", "wps": "367473", "ups": "3.09", "wpb": "118934", "bsz": "256", "num_updates": "615200", "lr": "0.000388687", "gnorm": "0.704", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.6", "wall": "200824"} [2022-08-03 09:16:16,753][train_inner][INFO] - {"epoch": 12, "update": 11.957, "loss": "2.094", "ppl": "4.27", "wps": "364461", "ups": "3.07", "wpb": "118759", "bsz": "256", "num_updates": "615400", "lr": "0.000388485", "gnorm": "0.708", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "200889"} [2022-08-03 09:17:21,378][train_inner][INFO] - {"epoch": 12, "update": 11.961, "loss": "2.1", "ppl": "4.29", "wps": "366056", "ups": "3.09", "wpb": "118279", "bsz": "256", "num_updates": "615600", "lr": "0.000388283", "gnorm": "0.71", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "200954"} [2022-08-03 09:18:26,057][train_inner][INFO] - {"epoch": 12, "update": 11.965, "loss": "2.097", "ppl": "4.28", "wps": "366737", "ups": "3.09", "wpb": "118599", "bsz": "256", "num_updates": "615800", "lr": "0.000388081", "gnorm": "0.706", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.5", "wall": "201018"} [2022-08-03 09:19:30,636][train_inner][INFO] - {"epoch": 12, "update": 11.969, "loss": "2.096", "ppl": "4.28", "wps": "364236", "ups": "3.1", "wpb": "117608", "bsz": "256", "num_updates": "616000", "lr": "0.000387879", "gnorm": "0.711", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.2", "wall": "201083"} [2022-08-03 09:20:35,732][train_inner][INFO] - {"epoch": 12, "update": 11.972, "loss": "2.09", "ppl": "4.26", "wps": "364482", "ups": "3.07", "wpb": "118630", "bsz": "256", "num_updates": "616200", "lr": "0.000387677", "gnorm": "0.705", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21", "wall": "201148"} [2022-08-03 09:21:21,381][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 09:21:40,585][train_inner][INFO] - {"epoch": 12, "update": 11.976, "loss": "2.1", "ppl": "4.29", "wps": "367046", "ups": "3.08", "wpb": "119018", "bsz": "256", "num_updates": "616400", "lr": "0.000387475", "gnorm": "0.707", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.9", "wall": "201213"} [2022-08-03 09:22:45,493][train_inner][INFO] - {"epoch": 12, "update": 11.98, "loss": "2.09", "ppl": "4.26", "wps": "364906", "ups": "3.08", "wpb": "118427", "bsz": "256", "num_updates": "616600", "lr": "0.000387273", "gnorm": "0.705", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "201278"} [2022-08-03 09:23:50,442][train_inner][INFO] - {"epoch": 12, "update": 11.984, "loss": "2.1", "ppl": "4.29", "wps": "363395", "ups": "3.08", "wpb": "118008", "bsz": "256", "num_updates": "616800", "lr": "0.000387071", "gnorm": "0.711", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.4", "wall": "201343"} [2022-08-03 09:24:55,633][train_inner][INFO] - {"epoch": 12, "update": 11.988, "loss": "2.094", "ppl": "4.27", "wps": "364328", "ups": "3.07", "wpb": "118753", "bsz": "256", "num_updates": "617000", "lr": "0.000386869", "gnorm": "0.706", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.4", "wall": "201408"} [2022-08-03 09:26:00,200][train_inner][INFO] - {"epoch": 12, "update": 11.992, "loss": "2.096", "ppl": "4.28", "wps": "365401", "ups": "3.1", "wpb": "117963", "bsz": "256", "num_updates": "617200", "lr": "0.000386667", "gnorm": "0.709", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.4", "wall": "201473"} [2022-08-03 09:27:05,161][train_inner][INFO] - {"epoch": 12, "update": 11.996, "loss": "2.098", "ppl": "4.28", "wps": "363846", "ups": "3.08", "wpb": "118176", "bsz": "256", "num_updates": "617400", "lr": "0.000386465", "gnorm": "0.708", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.9", "wall": "201537"} [2022-08-03 09:28:11,227][train_inner][INFO] - {"epoch": 12, "update": 12.0, "loss": "2.095", "ppl": "4.27", "wps": "356118", "ups": "3.03", "wpb": "117634", "bsz": "256", "num_updates": "617600", "lr": "0.000386263", "gnorm": "0.708", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "20.2", "wall": "201604"} [2022-08-03 09:28:13,814][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-03 09:28:16,407][fairseq_cli.train][INFO] - end of epoch 12 (average epoch stats below) [2022-08-03 09:28:16,408][train][INFO] - {"epoch": 12, "train_loss": "2.109", "train_ppl": "4.32", "train_wps": "362641", "train_ups": "3.07", "train_wpb": "118298", "train_bsz": "256", "train_num_updates": "617615", "train_lr": "0.000386247", "train_gnorm": "0.701", "train_clip": "0", "train_loss_scale": "8", "train_train_wall": "16623", "train_gb_free": "21.5", "train_wall": "201609"} [2022-08-03 09:28:16,492][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 [2022-08-03 09:28:16,495][fairseq.trainer][INFO] - begin training epoch 13 [2022-08-03 09:28:16,496][fairseq_cli.train][INFO] - Start iterating over samples [2022-08-03 09:29:30,283][train_inner][INFO] - {"epoch": 13, "update": 12.004, "loss": "2.092", "ppl": "4.26", "wps": "299694", "ups": "2.53", "wpb": "118463", "bsz": "255.4", "num_updates": "617800", "lr": "0.000386061", "gnorm": "0.707", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21", "wall": "201683"} [2022-08-03 09:30:35,322][train_inner][INFO] - {"epoch": 13, "update": 12.007, "loss": "2.09", "ppl": "4.26", "wps": "365054", "ups": "3.08", "wpb": "118711", "bsz": "255.9", "num_updates": "618000", "lr": "0.000385859", "gnorm": "0.703", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "201748"} [2022-08-03 09:31:40,096][train_inner][INFO] - {"epoch": 13, "update": 12.011, "loss": "2.093", "ppl": "4.27", "wps": "363585", "ups": "3.09", "wpb": "117751", "bsz": "256", "num_updates": "618200", "lr": "0.000385657", "gnorm": "0.711", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.6", "wall": "201812"} [2022-08-03 09:32:45,079][train_inner][INFO] - {"epoch": 13, "update": 12.015, "loss": "2.097", "ppl": "4.28", "wps": "362484", "ups": "3.08", "wpb": "117775", "bsz": "256", "num_updates": "618400", "lr": "0.000385455", "gnorm": "0.711", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.8", "wall": "201877"} [2022-08-03 09:33:49,439][train_inner][INFO] - {"epoch": 13, "update": 12.019, "loss": "2.095", "ppl": "4.27", "wps": "366837", "ups": "3.11", "wpb": "118036", "bsz": "256", "num_updates": "618600", "lr": "0.000385253", "gnorm": "0.711", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "201942"} [2022-08-03 09:34:55,226][train_inner][INFO] - {"epoch": 13, "update": 12.023, "loss": "2.092", "ppl": "4.26", "wps": "359071", "ups": "3.04", "wpb": "118105", "bsz": "256", "num_updates": "618800", "lr": "0.000385051", "gnorm": "0.708", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "202008"} [2022-08-03 09:36:00,068][train_inner][INFO] - {"epoch": 13, "update": 12.027, "loss": "2.09", "ppl": "4.26", "wps": "364874", "ups": "3.08", "wpb": "118295", "bsz": "256", "num_updates": "619000", "lr": "0.000384848", "gnorm": "0.71", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.3", "wall": "202072"} [2022-08-03 09:37:05,182][train_inner][INFO] - {"epoch": 13, "update": 12.031, "loss": "2.093", "ppl": "4.27", "wps": "362746", "ups": "3.07", "wpb": "118098", "bsz": "256", "num_updates": "619200", "lr": "0.000384646", "gnorm": "0.71", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "202138"} [2022-08-03 09:38:10,209][train_inner][INFO] - {"epoch": 13, "update": 12.035, "loss": "2.092", "ppl": "4.26", "wps": "363889", "ups": "3.08", "wpb": "118311", "bsz": "256", "num_updates": "619400", "lr": "0.000384444", "gnorm": "0.705", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.1", "wall": "202203"} [2022-08-03 09:39:14,985][train_inner][INFO] - {"epoch": 13, "update": 12.039, "loss": "2.094", "ppl": "4.27", "wps": "365142", "ups": "3.09", "wpb": "118260", "bsz": "256", "num_updates": "619600", "lr": "0.000384242", "gnorm": "0.709", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.2", "wall": "202267"} [2022-08-03 09:40:19,754][train_inner][INFO] - {"epoch": 13, "update": 12.042, "loss": "2.092", "ppl": "4.26", "wps": "363381", "ups": "3.09", "wpb": "117678", "bsz": "256", "num_updates": "619800", "lr": "0.00038404", "gnorm": "0.71", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.9", "wall": "202332"} [2022-08-03 09:41:25,441][train_inner][INFO] - {"epoch": 13, "update": 12.046, "loss": "2.093", "ppl": "4.27", "wps": "361702", "ups": "3.04", "wpb": "118795", "bsz": "256", "num_updates": "620000", "lr": "0.000383838", "gnorm": "0.708", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.9", "wall": "202398"} [2022-08-03 09:42:30,203][train_inner][INFO] - {"epoch": 13, "update": 12.05, "loss": "2.09", "ppl": "4.26", "wps": "366484", "ups": "3.09", "wpb": "118670", "bsz": "256", "num_updates": "620200", "lr": "0.000383636", "gnorm": "0.708", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22", "wall": "202463"} [2022-08-03 09:43:34,922][train_inner][INFO] - {"epoch": 13, "update": 12.054, "loss": "2.086", "ppl": "4.25", "wps": "367970", "ups": "3.09", "wpb": "119071", "bsz": "256", "num_updates": "620400", "lr": "0.000383434", "gnorm": "0.703", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.1", "wall": "202527"} [2022-08-03 09:44:39,560][train_inner][INFO] - {"epoch": 13, "update": 12.058, "loss": "2.093", "ppl": "4.27", "wps": "366328", "ups": "3.09", "wpb": "118392", "bsz": "256", "num_updates": "620600", "lr": "0.000383232", "gnorm": "0.71", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "202592"} [2022-08-03 09:45:44,612][train_inner][INFO] - {"epoch": 13, "update": 12.062, "loss": "2.095", "ppl": "4.27", "wps": "364468", "ups": "3.07", "wpb": "118545", "bsz": "256", "num_updates": "620800", "lr": "0.00038303", "gnorm": "0.708", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21", "wall": "202657"} [2022-08-03 09:46:49,524][train_inner][INFO] - {"epoch": 13, "update": 12.066, "loss": "2.094", "ppl": "4.27", "wps": "363160", "ups": "3.08", "wpb": "117865", "bsz": "256", "num_updates": "621000", "lr": "0.000382828", "gnorm": "0.709", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "202722"} [2022-08-03 09:47:54,584][train_inner][INFO] - {"epoch": 13, "update": 12.07, "loss": "2.082", "ppl": "4.23", "wps": "366239", "ups": "3.07", "wpb": "119136", "bsz": "256", "num_updates": "621200", "lr": "0.000382626", "gnorm": "0.708", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.9", "wall": "202787"} [2022-08-03 09:48:59,608][train_inner][INFO] - {"epoch": 13, "update": 12.074, "loss": "2.088", "ppl": "4.25", "wps": "363886", "ups": "3.08", "wpb": "118304", "bsz": "256", "num_updates": "621400", "lr": "0.000382424", "gnorm": "0.709", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24", "wall": "202852"} [2022-08-03 09:50:04,386][train_inner][INFO] - {"epoch": 13, "update": 12.077, "loss": "2.09", "ppl": "4.26", "wps": "367525", "ups": "3.09", "wpb": "119037", "bsz": "256", "num_updates": "621600", "lr": "0.000382222", "gnorm": "0.706", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.8", "wall": "202917"} [2022-08-03 09:51:09,432][train_inner][INFO] - {"epoch": 13, "update": 12.081, "loss": "2.088", "ppl": "4.25", "wps": "364056", "ups": "3.07", "wpb": "118399", "bsz": "256", "num_updates": "621800", "lr": "0.00038202", "gnorm": "0.707", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "22.9", "wall": "202982"} [2022-08-03 09:51:29,902][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 09:52:14,859][train_inner][INFO] - {"epoch": 13, "update": 12.085, "loss": "2.093", "ppl": "4.27", "wps": "362982", "ups": "3.06", "wpb": "118743", "bsz": "256", "num_updates": "622000", "lr": "0.000381818", "gnorm": "0.717", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.7", "wall": "203047"} [2022-08-03 09:53:19,592][train_inner][INFO] - {"epoch": 13, "update": 12.089, "loss": "2.093", "ppl": "4.27", "wps": "364441", "ups": "3.09", "wpb": "117955", "bsz": "256", "num_updates": "622200", "lr": "0.000381616", "gnorm": "0.71", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "28.2", "wall": "203112"} [2022-08-03 09:54:24,295][train_inner][INFO] - {"epoch": 13, "update": 12.093, "loss": "2.096", "ppl": "4.27", "wps": "365918", "ups": "3.09", "wpb": "118378", "bsz": "256", "num_updates": "622400", "lr": "0.000381414", "gnorm": "0.708", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.7", "wall": "203177"} [2022-08-03 09:55:29,256][train_inner][INFO] - {"epoch": 13, "update": 12.097, "loss": "2.09", "ppl": "4.26", "wps": "364998", "ups": "3.08", "wpb": "118551", "bsz": "256", "num_updates": "622600", "lr": "0.000381212", "gnorm": "0.707", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "203242"} [2022-08-03 09:56:34,266][train_inner][INFO] - {"epoch": 13, "update": 12.101, "loss": "2.09", "ppl": "4.26", "wps": "363231", "ups": "3.08", "wpb": "118067", "bsz": "256", "num_updates": "622800", "lr": "0.00038101", "gnorm": "0.709", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.9", "wall": "203307"} [2022-08-03 09:57:39,190][train_inner][INFO] - {"epoch": 13, "update": 12.105, "loss": "2.097", "ppl": "4.28", "wps": "365578", "ups": "3.08", "wpb": "118672", "bsz": "256", "num_updates": "623000", "lr": "0.000380808", "gnorm": "0.708", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.5", "wall": "203372"} [2022-08-03 09:58:44,437][train_inner][INFO] - {"epoch": 13, "update": 12.108, "loss": "2.098", "ppl": "4.28", "wps": "361713", "ups": "3.07", "wpb": "118002", "bsz": "256", "num_updates": "623200", "lr": "0.000380606", "gnorm": "0.712", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22", "wall": "203437"} [2022-08-03 09:59:48,835][train_inner][INFO] - {"epoch": 13, "update": 12.112, "loss": "2.093", "ppl": "4.27", "wps": "364369", "ups": "3.11", "wpb": "117322", "bsz": "256", "num_updates": "623400", "lr": "0.000380404", "gnorm": "0.713", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.9", "wall": "203501"} [2022-08-03 10:00:53,638][train_inner][INFO] - {"epoch": 13, "update": 12.116, "loss": "2.093", "ppl": "4.27", "wps": "363404", "ups": "3.09", "wpb": "117746", "bsz": "256", "num_updates": "623600", "lr": "0.000380202", "gnorm": "0.711", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.8", "wall": "203566"} [2022-08-03 10:01:58,440][train_inner][INFO] - {"epoch": 13, "update": 12.12, "loss": "2.088", "ppl": "4.25", "wps": "366559", "ups": "3.09", "wpb": "118767", "bsz": "256", "num_updates": "623800", "lr": "0.00038", "gnorm": "0.71", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21", "wall": "203631"} [2022-08-03 10:03:03,608][train_inner][INFO] - {"epoch": 13, "update": 12.124, "loss": "2.088", "ppl": "4.25", "wps": "365206", "ups": "3.07", "wpb": "118998", "bsz": "256", "num_updates": "624000", "lr": "0.000379798", "gnorm": "0.708", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "23.2", "wall": "203696"} [2022-08-03 10:03:48,389][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 10:04:08,906][train_inner][INFO] - {"epoch": 13, "update": 12.128, "loss": "2.086", "ppl": "4.25", "wps": "363279", "ups": "3.06", "wpb": "118604", "bsz": "256", "num_updates": "624200", "lr": "0.000379596", "gnorm": "0.708", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.5", "wall": "203761"} [2022-08-03 10:05:13,499][train_inner][INFO] - {"epoch": 13, "update": 12.132, "loss": "2.092", "ppl": "4.26", "wps": "365163", "ups": "3.1", "wpb": "117935", "bsz": "256", "num_updates": "624400", "lr": "0.000379394", "gnorm": "0.715", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "203826"} [2022-08-03 10:06:18,624][train_inner][INFO] - {"epoch": 13, "update": 12.136, "loss": "2.095", "ppl": "4.27", "wps": "362003", "ups": "3.07", "wpb": "117875", "bsz": "256", "num_updates": "624600", "lr": "0.000379192", "gnorm": "0.712", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.6", "wall": "203891"} [2022-08-03 10:06:48,476][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-03 10:07:23,448][train_inner][INFO] - {"epoch": 13, "update": 12.14, "loss": "2.096", "ppl": "4.28", "wps": "363184", "ups": "3.09", "wpb": "117714", "bsz": "256", "num_updates": "624800", "lr": "0.00037899", "gnorm": "0.724", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.9", "wall": "203956"} [2022-08-03 10:08:28,348][train_inner][INFO] - {"epoch": 13, "update": 12.143, "loss": "2.095", "ppl": "4.27", "wps": "363746", "ups": "3.08", "wpb": "118035", "bsz": "256", "num_updates": "625000", "lr": "0.000378788", "gnorm": "0.722", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.2", "wall": "204021"} [2022-08-03 10:09:33,573][train_inner][INFO] - {"epoch": 13, "update": 12.147, "loss": "2.088", "ppl": "4.25", "wps": "363962", "ups": "3.07", "wpb": "118694", "bsz": "256", "num_updates": "625200", "lr": "0.000378586", "gnorm": "0.708", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "28.2", "wall": "204086"} [2022-08-03 10:10:38,418][train_inner][INFO] - {"epoch": 13, "update": 12.151, "loss": "2.089", "ppl": "4.25", "wps": "366469", "ups": "3.08", "wpb": "118817", "bsz": "256", "num_updates": "625400", "lr": "0.000378384", "gnorm": "0.708", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "204151"} [2022-08-03 10:11:43,407][train_inner][INFO] - {"epoch": 13, "update": 12.155, "loss": "2.088", "ppl": "4.25", "wps": "364801", "ups": "3.08", "wpb": "118538", "bsz": "256", "num_updates": "625600", "lr": "0.000378182", "gnorm": "0.713", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.8", "wall": "204216"} [2022-08-03 10:12:48,240][train_inner][INFO] - {"epoch": 13, "update": 12.159, "loss": "2.092", "ppl": "4.26", "wps": "365574", "ups": "3.08", "wpb": "118505", "bsz": "256", "num_updates": "625800", "lr": "0.00037798", "gnorm": "0.713", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.1", "wall": "204281"} [2022-08-03 10:13:53,092][train_inner][INFO] - {"epoch": 13, "update": 12.163, "loss": "2.088", "ppl": "4.25", "wps": "365273", "ups": "3.08", "wpb": "118442", "bsz": "256", "num_updates": "626000", "lr": "0.000377778", "gnorm": "0.71", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.3", "wall": "204345"} [2022-08-03 10:14:58,002][train_inner][INFO] - {"epoch": 13, "update": 12.167, "loss": "2.088", "ppl": "4.25", "wps": "364504", "ups": "3.08", "wpb": "118298", "bsz": "256", "num_updates": "626200", "lr": "0.000377576", "gnorm": "0.713", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "204410"} [2022-08-03 10:16:02,334][train_inner][INFO] - {"epoch": 13, "update": 12.171, "loss": "2.096", "ppl": "4.27", "wps": "365665", "ups": "3.11", "wpb": "117617", "bsz": "256", "num_updates": "626400", "lr": "0.000377374", "gnorm": "0.721", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.1", "wall": "204475"} [2022-08-03 10:17:07,187][train_inner][INFO] - {"epoch": 13, "update": 12.175, "loss": "2.092", "ppl": "4.26", "wps": "363541", "ups": "3.08", "wpb": "117883", "bsz": "256", "num_updates": "626600", "lr": "0.000377172", "gnorm": "0.712", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.5", "wall": "204540"} [2022-08-03 10:18:12,273][train_inner][INFO] - {"epoch": 13, "update": 12.178, "loss": "2.094", "ppl": "4.27", "wps": "362559", "ups": "3.07", "wpb": "117985", "bsz": "256", "num_updates": "626800", "lr": "0.00037697", "gnorm": "0.715", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.1", "wall": "204605"} [2022-08-03 10:19:17,538][train_inner][INFO] - {"epoch": 13, "update": 12.182, "loss": "2.086", "ppl": "4.25", "wps": "362576", "ups": "3.06", "wpb": "118316", "bsz": "256", "num_updates": "627000", "lr": "0.000376768", "gnorm": "0.712", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "204670"} [2022-08-03 10:20:22,651][train_inner][INFO] - {"epoch": 13, "update": 12.186, "loss": "2.086", "ppl": "4.25", "wps": "363057", "ups": "3.07", "wpb": "118196", "bsz": "256", "num_updates": "627200", "lr": "0.000376566", "gnorm": "0.714", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "204735"} [2022-08-03 10:21:27,529][train_inner][INFO] - {"epoch": 13, "update": 12.19, "loss": "2.086", "ppl": "4.25", "wps": "364355", "ups": "3.08", "wpb": "118191", "bsz": "256", "num_updates": "627400", "lr": "0.000376364", "gnorm": "0.712", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.6", "wall": "204800"} [2022-08-03 10:22:31,965][train_inner][INFO] - {"epoch": 13, "update": 12.194, "loss": "2.09", "ppl": "4.26", "wps": "364896", "ups": "3.1", "wpb": "117561", "bsz": "256", "num_updates": "627600", "lr": "0.000376162", "gnorm": "0.712", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "204864"} [2022-08-03 10:23:36,884][train_inner][INFO] - {"epoch": 13, "update": 12.198, "loss": "2.091", "ppl": "4.26", "wps": "365437", "ups": "3.08", "wpb": "118617", "bsz": "256", "num_updates": "627800", "lr": "0.00037596", "gnorm": "0.712", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "204929"} [2022-08-03 10:24:41,885][train_inner][INFO] - {"epoch": 13, "update": 12.202, "loss": "2.089", "ppl": "4.26", "wps": "362104", "ups": "3.08", "wpb": "117683", "bsz": "256", "num_updates": "628000", "lr": "0.000375758", "gnorm": "0.714", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.9", "wall": "204994"} [2022-08-03 10:25:06,187][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-03 10:25:47,126][train_inner][INFO] - {"epoch": 13, "update": 12.206, "loss": "2.09", "ppl": "4.26", "wps": "363467", "ups": "3.07", "wpb": "118563", "bsz": "256", "num_updates": "628200", "lr": "0.000375556", "gnorm": "0.729", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.8", "wall": "205059"} [2022-08-03 10:26:52,176][train_inner][INFO] - {"epoch": 13, "update": 12.21, "loss": "2.091", "ppl": "4.26", "wps": "362082", "ups": "3.07", "wpb": "117766", "bsz": "256", "num_updates": "628400", "lr": "0.000375354", "gnorm": "0.715", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.4", "wall": "205124"} [2022-08-03 10:27:57,141][train_inner][INFO] - {"epoch": 13, "update": 12.213, "loss": "2.086", "ppl": "4.25", "wps": "364525", "ups": "3.08", "wpb": "118404", "bsz": "256", "num_updates": "628600", "lr": "0.000375152", "gnorm": "0.713", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "27.3", "wall": "205189"} [2022-08-03 10:29:01,846][train_inner][INFO] - {"epoch": 13, "update": 12.217, "loss": "2.083", "ppl": "4.24", "wps": "366521", "ups": "3.09", "wpb": "118577", "bsz": "256", "num_updates": "628800", "lr": "0.000374949", "gnorm": "0.711", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.6", "wall": "205254"} [2022-08-03 10:30:06,752][train_inner][INFO] - {"epoch": 13, "update": 12.221, "loss": "2.09", "ppl": "4.26", "wps": "364280", "ups": "3.08", "wpb": "118218", "bsz": "256", "num_updates": "629000", "lr": "0.000374747", "gnorm": "0.712", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.8", "wall": "205319"} [2022-08-03 10:31:11,605][train_inner][INFO] - {"epoch": 13, "update": 12.225, "loss": "2.092", "ppl": "4.26", "wps": "364404", "ups": "3.08", "wpb": "118162", "bsz": "256", "num_updates": "629200", "lr": "0.000374545", "gnorm": "0.712", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.2", "wall": "205384"} [2022-08-03 10:32:16,158][train_inner][INFO] - {"epoch": 13, "update": 12.229, "loss": "2.085", "ppl": "4.24", "wps": "366174", "ups": "3.1", "wpb": "118186", "bsz": "256", "num_updates": "629400", "lr": "0.000374343", "gnorm": "0.712", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.2", "wall": "205448"} [2022-08-03 10:33:20,826][train_inner][INFO] - {"epoch": 13, "update": 12.233, "loss": "2.089", "ppl": "4.25", "wps": "365661", "ups": "3.09", "wpb": "118232", "bsz": "256", "num_updates": "629600", "lr": "0.000374141", "gnorm": "0.712", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.7", "wall": "205513"} [2022-08-03 10:34:25,295][train_inner][INFO] - {"epoch": 13, "update": 12.237, "loss": "2.089", "ppl": "4.25", "wps": "365332", "ups": "3.1", "wpb": "117762", "bsz": "256", "num_updates": "629800", "lr": "0.000373939", "gnorm": "0.715", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.2", "wall": "205578"} [2022-08-03 10:35:30,487][train_inner][INFO] - {"epoch": 13, "update": 12.241, "loss": "2.088", "ppl": "4.25", "wps": "362544", "ups": "3.07", "wpb": "118173", "bsz": "256", "num_updates": "630000", "lr": "0.000373737", "gnorm": "0.714", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.1", "wall": "205643"} [2022-08-03 10:36:35,327][train_inner][INFO] - {"epoch": 13, "update": 12.245, "loss": "2.09", "ppl": "4.26", "wps": "364775", "ups": "3.08", "wpb": "118259", "bsz": "256", "num_updates": "630200", "lr": "0.000373535", "gnorm": "0.713", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "205708"} [2022-08-03 10:37:39,931][train_inner][INFO] - {"epoch": 13, "update": 12.248, "loss": "2.086", "ppl": "4.25", "wps": "367468", "ups": "3.1", "wpb": "118697", "bsz": "256", "num_updates": "630400", "lr": "0.000373333", "gnorm": "0.711", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.5", "wall": "205772"} [2022-08-03 10:38:45,730][train_inner][INFO] - {"epoch": 13, "update": 12.252, "loss": "2.086", "ppl": "4.25", "wps": "360409", "ups": "3.04", "wpb": "118571", "bsz": "256", "num_updates": "630600", "lr": "0.000373131", "gnorm": "0.709", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21", "wall": "205838"} [2022-08-03 10:39:50,720][train_inner][INFO] - {"epoch": 13, "update": 12.256, "loss": "2.085", "ppl": "4.24", "wps": "365050", "ups": "3.08", "wpb": "118621", "bsz": "256", "num_updates": "630800", "lr": "0.000372929", "gnorm": "0.713", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "205903"} [2022-08-03 10:40:55,624][train_inner][INFO] - {"epoch": 13, "update": 12.26, "loss": "2.088", "ppl": "4.25", "wps": "364948", "ups": "3.08", "wpb": "118432", "bsz": "256", "num_updates": "631000", "lr": "0.000372727", "gnorm": "0.713", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.5", "wall": "205968"} [2022-08-03 10:42:00,425][train_inner][INFO] - {"epoch": 13, "update": 12.264, "loss": "2.09", "ppl": "4.26", "wps": "364630", "ups": "3.09", "wpb": "118140", "bsz": "256", "num_updates": "631200", "lr": "0.000372525", "gnorm": "0.715", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.5", "wall": "206033"} [2022-08-03 10:43:05,263][train_inner][INFO] - {"epoch": 13, "update": 12.268, "loss": "2.082", "ppl": "4.23", "wps": "366024", "ups": "3.08", "wpb": "118659", "bsz": "256", "num_updates": "631400", "lr": "0.000372323", "gnorm": "0.711", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.6", "wall": "206098"} [2022-08-03 10:44:11,186][train_inner][INFO] - {"epoch": 13, "update": 12.272, "loss": "2.094", "ppl": "4.27", "wps": "357360", "ups": "3.03", "wpb": "117790", "bsz": "256", "num_updates": "631600", "lr": "0.000372121", "gnorm": "0.717", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "24.6", "wall": "206164"} [2022-08-03 10:45:15,683][train_inner][INFO] - {"epoch": 13, "update": 12.276, "loss": "2.087", "ppl": "4.25", "wps": "367680", "ups": "3.1", "wpb": "118569", "bsz": "256", "num_updates": "631800", "lr": "0.000371919", "gnorm": "0.72", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.1", "wall": "206228"} [2022-08-03 10:46:20,600][train_inner][INFO] - {"epoch": 13, "update": 12.279, "loss": "2.085", "ppl": "4.24", "wps": "364806", "ups": "3.08", "wpb": "118409", "bsz": "256", "num_updates": "632000", "lr": "0.000371717", "gnorm": "0.715", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "206293"} [2022-08-03 10:47:25,339][train_inner][INFO] - {"epoch": 13, "update": 12.283, "loss": "2.091", "ppl": "4.26", "wps": "366259", "ups": "3.09", "wpb": "118554", "bsz": "256", "num_updates": "632200", "lr": "0.000371515", "gnorm": "0.715", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.3", "wall": "206358"} [2022-08-03 10:48:30,206][train_inner][INFO] - {"epoch": 13, "update": 12.287, "loss": "2.089", "ppl": "4.26", "wps": "364424", "ups": "3.08", "wpb": "118192", "bsz": "256", "num_updates": "632400", "lr": "0.000371313", "gnorm": "0.713", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "24.3", "wall": "206423"} [2022-08-03 10:48:50,066][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 10:49:35,496][train_inner][INFO] - {"epoch": 13, "update": 12.291, "loss": "2.09", "ppl": "4.26", "wps": "362543", "ups": "3.06", "wpb": "118350", "bsz": "256", "num_updates": "632600", "lr": "0.000371111", "gnorm": "0.714", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.5", "wall": "206488"} [2022-08-03 10:50:40,069][train_inner][INFO] - {"epoch": 13, "update": 12.295, "loss": "2.094", "ppl": "4.27", "wps": "365000", "ups": "3.1", "wpb": "117845", "bsz": "256", "num_updates": "632800", "lr": "0.000370909", "gnorm": "0.722", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.5", "wall": "206552"} [2022-08-03 10:51:44,983][train_inner][INFO] - {"epoch": 13, "update": 12.299, "loss": "2.084", "ppl": "4.24", "wps": "366424", "ups": "3.08", "wpb": "118929", "bsz": "256", "num_updates": "633000", "lr": "0.000370707", "gnorm": "0.716", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.8", "wall": "206617"} [2022-08-03 10:52:49,868][train_inner][INFO] - {"epoch": 13, "update": 12.303, "loss": "2.088", "ppl": "4.25", "wps": "364874", "ups": "3.08", "wpb": "118372", "bsz": "256", "num_updates": "633200", "lr": "0.000370505", "gnorm": "0.715", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.3", "wall": "206682"} [2022-08-03 10:53:54,774][train_inner][INFO] - {"epoch": 13, "update": 12.307, "loss": "2.087", "ppl": "4.25", "wps": "366270", "ups": "3.08", "wpb": "118864", "bsz": "256", "num_updates": "633400", "lr": "0.000370303", "gnorm": "0.71", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "27.5", "wall": "206747"} [2022-08-03 10:54:59,625][train_inner][INFO] - {"epoch": 13, "update": 12.311, "loss": "2.083", "ppl": "4.24", "wps": "364209", "ups": "3.08", "wpb": "118095", "bsz": "256", "num_updates": "633600", "lr": "0.000370101", "gnorm": "0.714", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.5", "wall": "206812"} [2022-08-03 10:55:16,125][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-03 10:56:04,823][train_inner][INFO] - {"epoch": 13, "update": 12.314, "loss": "2.092", "ppl": "4.26", "wps": "362096", "ups": "3.07", "wpb": "118038", "bsz": "256", "num_updates": "633800", "lr": "0.000369899", "gnorm": "0.72", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.2", "wall": "206877"} [2022-08-03 10:57:09,768][train_inner][INFO] - {"epoch": 13, "update": 12.318, "loss": "2.083", "ppl": "4.24", "wps": "364225", "ups": "3.08", "wpb": "118272", "bsz": "256", "num_updates": "634000", "lr": "0.000369697", "gnorm": "0.715", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "206942"} [2022-08-03 10:58:14,557][train_inner][INFO] - {"epoch": 13, "update": 12.322, "loss": "2.085", "ppl": "4.24", "wps": "364303", "ups": "3.09", "wpb": "118012", "bsz": "256", "num_updates": "634200", "lr": "0.000369495", "gnorm": "0.722", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.3", "wall": "207007"} [2022-08-03 10:59:19,938][train_inner][INFO] - {"epoch": 13, "update": 12.326, "loss": "2.086", "ppl": "4.25", "wps": "362323", "ups": "3.06", "wpb": "118444", "bsz": "256", "num_updates": "634400", "lr": "0.000369293", "gnorm": "0.714", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.5", "wall": "207072"} [2022-08-03 11:00:24,498][train_inner][INFO] - {"epoch": 13, "update": 12.33, "loss": "2.088", "ppl": "4.25", "wps": "364955", "ups": "3.1", "wpb": "117805", "bsz": "256", "num_updates": "634600", "lr": "0.000369091", "gnorm": "0.718", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.2", "wall": "207137"} [2022-08-03 11:01:29,284][train_inner][INFO] - {"epoch": 13, "update": 12.334, "loss": "2.088", "ppl": "4.25", "wps": "364996", "ups": "3.09", "wpb": "118231", "bsz": "256", "num_updates": "634800", "lr": "0.000368889", "gnorm": "0.713", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "207202"} [2022-08-03 11:02:24,784][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-03 11:02:30,546][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 [2022-08-03 11:02:34,715][train_inner][INFO] - {"epoch": 13, "update": 12.338, "loss": "2.083", "ppl": "4.24", "wps": "361886", "ups": "3.06", "wpb": "118390", "bsz": "256", "num_updates": "635000", "lr": "0.000368687", "gnorm": "0.722", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22.4", "wall": "207267"} [2022-08-03 11:03:39,827][train_inner][INFO] - {"epoch": 13, "update": 12.342, "loss": "2.092", "ppl": "4.26", "wps": "363276", "ups": "3.07", "wpb": "118267", "bsz": "256", "num_updates": "635200", "lr": "0.000368485", "gnorm": "0.716", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.8", "wall": "207332"} [2022-08-03 11:04:44,811][train_inner][INFO] - {"epoch": 13, "update": 12.346, "loss": "2.078", "ppl": "4.22", "wps": "365300", "ups": "3.08", "wpb": "118692", "bsz": "256", "num_updates": "635400", "lr": "0.000368283", "gnorm": "0.715", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.4", "wall": "207397"} [2022-08-03 11:05:50,070][train_inner][INFO] - {"epoch": 13, "update": 12.349, "loss": "2.083", "ppl": "4.24", "wps": "363660", "ups": "3.06", "wpb": "118660", "bsz": "256", "num_updates": "635600", "lr": "0.000368081", "gnorm": "0.712", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22.9", "wall": "207462"} [2022-08-03 11:06:55,079][train_inner][INFO] - {"epoch": 13, "update": 12.353, "loss": "2.088", "ppl": "4.25", "wps": "364926", "ups": "3.08", "wpb": "118615", "bsz": "256", "num_updates": "635800", "lr": "0.000367879", "gnorm": "0.714", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.1", "wall": "207527"} [2022-08-03 11:07:59,747][train_inner][INFO] - {"epoch": 13, "update": 12.357, "loss": "2.088", "ppl": "4.25", "wps": "363864", "ups": "3.09", "wpb": "117651", "bsz": "256", "num_updates": "636000", "lr": "0.000367677", "gnorm": "0.717", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "22.5", "wall": "207592"} [2022-08-03 11:09:04,364][train_inner][INFO] - {"epoch": 13, "update": 12.361, "loss": "2.085", "ppl": "4.24", "wps": "366241", "ups": "3.1", "wpb": "118324", "bsz": "256", "num_updates": "636200", "lr": "0.000367475", "gnorm": "0.715", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "22.9", "wall": "207657"} [2022-08-03 11:10:09,080][train_inner][INFO] - {"epoch": 13, "update": 12.365, "loss": "2.078", "ppl": "4.22", "wps": "366153", "ups": "3.09", "wpb": "118478", "bsz": "256", "num_updates": "636400", "lr": "0.000367273", "gnorm": "0.713", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.7", "wall": "207721"} [2022-08-03 11:11:13,938][train_inner][INFO] - {"epoch": 13, "update": 12.369, "loss": "2.084", "ppl": "4.24", "wps": "364709", "ups": "3.08", "wpb": "118270", "bsz": "256", "num_updates": "636600", "lr": "0.000367071", "gnorm": "0.716", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.6", "wall": "207786"} [2022-08-03 11:12:18,651][train_inner][INFO] - {"epoch": 13, "update": 12.373, "loss": "2.085", "ppl": "4.24", "wps": "366229", "ups": "3.09", "wpb": "118498", "bsz": "256", "num_updates": "636800", "lr": "0.000366869", "gnorm": "0.712", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "21.7", "wall": "207851"} [2022-08-03 11:13:23,496][train_inner][INFO] - {"epoch": 13, "update": 12.377, "loss": "2.084", "ppl": "4.24", "wps": "362947", "ups": "3.08", "wpb": "117674", "bsz": "256", "num_updates": "637000", "lr": "0.000366667", "gnorm": "0.718", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "26.2", "wall": "207916"} [2022-08-03 11:14:28,261][train_inner][INFO] - {"epoch": 13, "update": 12.381, "loss": "2.087", "ppl": "4.25", "wps": "364311", "ups": "3.09", "wpb": "117971", "bsz": "256", "num_updates": "637200", "lr": "0.000366465", "gnorm": "0.718", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "25.7", "wall": "207981"} [2022-08-03 11:15:32,901][train_inner][INFO] - {"epoch": 13, "update": 12.384, "loss": "2.08", "ppl": "4.23", "wps": "365980", "ups": "3.09", "wpb": "118283", "bsz": "256", "num_updates": "637400", "lr": "0.000366263", "gnorm": "0.716", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.4", "wall": "208045"} [2022-08-03 11:16:37,878][train_inner][INFO] - {"epoch": 13, "update": 12.388, "loss": "2.085", "ppl": "4.24", "wps": "363932", "ups": "3.08", "wpb": "118235", "bsz": "256", "num_updates": "637600", "lr": "0.000366061", "gnorm": "0.715", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.2", "wall": "208110"} [2022-08-03 11:17:42,743][train_inner][INFO] - {"epoch": 13, "update": 12.392, "loss": "2.079", "ppl": "4.22", "wps": "365150", "ups": "3.08", "wpb": "118426", "bsz": "256", "num_updates": "637800", "lr": "0.000365859", "gnorm": "0.712", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "24.9", "wall": "208175"} [2022-08-03 11:18:47,878][train_inner][INFO] - {"epoch": 13, "update": 12.396, "loss": "2.076", "ppl": "4.22", "wps": "365320", "ups": "3.07", "wpb": "118974", "bsz": "256", "num_updates": "638000", "lr": "0.000365657", "gnorm": "0.712", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.8", "wall": "208240"} [2022-08-03 11:19:52,831][train_inner][INFO] - {"epoch": 13, "update": 12.4, "loss": "2.083", "ppl": "4.24", "wps": "366061", "ups": "3.08", "wpb": "118881", "bsz": "256", "num_updates": "638200", "lr": "0.000365455", "gnorm": "0.716", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "24.4", "wall": "208305"} [2022-08-03 11:20:58,635][train_inner][INFO] - {"epoch": 13, "update": 12.404, "loss": "2.088", "ppl": "4.25", "wps": "358710", "ups": "3.04", "wpb": "118021", "bsz": "256", "num_updates": "638400", "lr": "0.000365253", "gnorm": "0.716", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.3", "wall": "208371"} [2022-08-03 11:22:03,587][train_inner][INFO] - {"epoch": 13, "update": 12.408, "loss": "2.085", "ppl": "4.24", "wps": "365259", "ups": "3.08", "wpb": "118620", "bsz": "256", "num_updates": "638600", "lr": "0.000365051", "gnorm": "0.717", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.1", "wall": "208436"} [2022-08-03 11:23:08,536][train_inner][INFO] - {"epoch": 13, "update": 12.412, "loss": "2.084", "ppl": "4.24", "wps": "364149", "ups": "3.08", "wpb": "118253", "bsz": "256", "num_updates": "638800", "lr": "0.000364848", "gnorm": "0.716", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "208501"} [2022-08-03 11:24:13,333][train_inner][INFO] - {"epoch": 13, "update": 12.415, "loss": "2.083", "ppl": "4.24", "wps": "364303", "ups": "3.09", "wpb": "118028", "bsz": "256", "num_updates": "639000", "lr": "0.000364646", "gnorm": "0.716", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "21.7", "wall": "208566"} [2022-08-03 11:25:18,533][train_inner][INFO] - {"epoch": 13, "update": 12.419, "loss": "2.081", "ppl": "4.23", "wps": "364823", "ups": "3.07", "wpb": "118931", "bsz": "256", "num_updates": "639200", "lr": "0.000364444", "gnorm": "0.713", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "208631"} [2022-08-03 11:26:23,214][train_inner][INFO] - {"epoch": 13, "update": 12.423, "loss": "2.084", "ppl": "4.24", "wps": "364291", "ups": "3.09", "wpb": "117811", "bsz": "256", "num_updates": "639400", "lr": "0.000364242", "gnorm": "0.72", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "208696"} [2022-08-03 11:26:39,799][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-03 11:27:28,411][train_inner][INFO] - {"epoch": 13, "update": 12.427, "loss": "2.085", "ppl": "4.24", "wps": "364450", "ups": "3.07", "wpb": "118804", "bsz": "256", "num_updates": "639600", "lr": "0.00036404", "gnorm": "0.718", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.3", "wall": "208761"} [2022-08-03 11:28:32,970][train_inner][INFO] - {"epoch": 13, "update": 12.431, "loss": "2.082", "ppl": "4.24", "wps": "365724", "ups": "3.1", "wpb": "118054", "bsz": "256", "num_updates": "639800", "lr": "0.000363838", "gnorm": "0.716", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.6", "wall": "208825"} [2022-08-03 11:29:37,635][train_inner][INFO] - {"epoch": 13, "update": 12.435, "loss": "2.084", "ppl": "4.24", "wps": "366958", "ups": "3.09", "wpb": "118643", "bsz": "256", "num_updates": "640000", "lr": "0.000363636", "gnorm": "0.716", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "23.2", "wall": "208890"} [2022-08-03 11:30:42,678][train_inner][INFO] - {"epoch": 13, "update": 12.439, "loss": "2.082", "ppl": "4.23", "wps": "361278", "ups": "3.07", "wpb": "117491", "bsz": "256", "num_updates": "640200", "lr": "0.000363434", "gnorm": "0.727", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "208955"} [2022-08-03 11:31:47,360][train_inner][INFO] - {"epoch": 13, "update": 12.443, "loss": "2.087", "ppl": "4.25", "wps": "365097", "ups": "3.09", "wpb": "118074", "bsz": "256", "num_updates": "640400", "lr": "0.000363232", "gnorm": "0.716", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "26.6", "wall": "209020"} [2022-08-03 11:32:52,286][train_inner][INFO] - {"epoch": 13, "update": 12.447, "loss": "2.088", "ppl": "4.25", "wps": "362506", "ups": "3.08", "wpb": "117679", "bsz": "256", "num_updates": "640600", "lr": "0.00036303", "gnorm": "0.722", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.4", "wall": "209085"} [2022-08-03 11:33:58,196][train_inner][INFO] - {"epoch": 13, "update": 12.45, "loss": "2.084", "ppl": "4.24", "wps": "359139", "ups": "3.03", "wpb": "118352", "bsz": "256", "num_updates": "640800", "lr": "0.000362828", "gnorm": "0.717", "clip": "0", "loss_scale": "4", "train_wall": "66", "gb_free": "20.2", "wall": "209151"} [2022-08-03 11:35:03,005][train_inner][INFO] - {"epoch": 13, "update": 12.454, "loss": "2.079", "ppl": "4.22", "wps": "366461", "ups": "3.09", "wpb": "118749", "bsz": "256", "num_updates": "641000", "lr": "0.000362626", "gnorm": "0.716", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.8", "wall": "209215"} [2022-08-03 11:36:08,005][train_inner][INFO] - {"epoch": 13, "update": 12.458, "loss": "2.088", "ppl": "4.25", "wps": "363188", "ups": "3.08", "wpb": "118035", "bsz": "256", "num_updates": "641200", "lr": "0.000362424", "gnorm": "0.719", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.6", "wall": "209280"} [2022-08-03 11:37:14,251][train_inner][INFO] - {"epoch": 13, "update": 12.462, "loss": "2.083", "ppl": "4.24", "wps": "356935", "ups": "3.02", "wpb": "118225", "bsz": "256", "num_updates": "641400", "lr": "0.000362222", "gnorm": "0.722", "clip": "0", "loss_scale": "4", "train_wall": "66", "gb_free": "20.1", "wall": "209347"} [2022-08-03 11:38:19,130][train_inner][INFO] - {"epoch": 13, "update": 12.466, "loss": "2.084", "ppl": "4.24", "wps": "363794", "ups": "3.08", "wpb": "118012", "bsz": "256", "num_updates": "641600", "lr": "0.00036202", "gnorm": "0.719", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "209411"} [2022-08-03 11:39:23,739][train_inner][INFO] - {"epoch": 13, "update": 12.47, "loss": "2.083", "ppl": "4.24", "wps": "364909", "ups": "3.1", "wpb": "117879", "bsz": "256", "num_updates": "641800", "lr": "0.000361818", "gnorm": "0.72", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.7", "wall": "209476"} [2022-08-03 11:40:28,788][train_inner][INFO] - {"epoch": 13, "update": 12.474, "loss": "2.086", "ppl": "4.25", "wps": "362638", "ups": "3.07", "wpb": "117945", "bsz": "256", "num_updates": "642000", "lr": "0.000361616", "gnorm": "0.72", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "209541"} [2022-08-03 11:41:33,823][train_inner][INFO] - {"epoch": 13, "update": 12.478, "loss": "2.078", "ppl": "4.22", "wps": "362921", "ups": "3.08", "wpb": "118010", "bsz": "256", "num_updates": "642200", "lr": "0.000361414", "gnorm": "0.718", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.8", "wall": "209606"} [2022-08-03 11:42:38,891][train_inner][INFO] - {"epoch": 13, "update": 12.482, "loss": "2.08", "ppl": "4.23", "wps": "362871", "ups": "3.07", "wpb": "118054", "bsz": "256", "num_updates": "642400", "lr": "0.000361212", "gnorm": "0.72", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "209671"} [2022-08-03 11:43:43,518][train_inner][INFO] - {"epoch": 13, "update": 12.485, "loss": "2.079", "ppl": "4.23", "wps": "366183", "ups": "3.09", "wpb": "118316", "bsz": "256", "num_updates": "642600", "lr": "0.00036101", "gnorm": "0.72", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.7", "wall": "209736"} [2022-08-03 11:44:48,427][train_inner][INFO] - {"epoch": 13, "update": 12.489, "loss": "2.077", "ppl": "4.22", "wps": "365779", "ups": "3.08", "wpb": "118706", "bsz": "256", "num_updates": "642800", "lr": "0.000360808", "gnorm": "0.716", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "209801"} [2022-08-03 11:45:53,440][train_inner][INFO] - {"epoch": 13, "update": 12.493, "loss": "2.077", "ppl": "4.22", "wps": "363416", "ups": "3.08", "wpb": "118132", "bsz": "256", "num_updates": "643000", "lr": "0.000360606", "gnorm": "0.717", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.2", "wall": "209866"} [2022-08-03 11:46:58,341][train_inner][INFO] - {"epoch": 13, "update": 12.497, "loss": "2.08", "ppl": "4.23", "wps": "366599", "ups": "3.08", "wpb": "118962", "bsz": "256", "num_updates": "643200", "lr": "0.000360404", "gnorm": "0.716", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "209931"} [2022-08-03 11:48:03,290][train_inner][INFO] - {"epoch": 13, "update": 12.501, "loss": "2.083", "ppl": "4.24", "wps": "363586", "ups": "3.08", "wpb": "118070", "bsz": "256", "num_updates": "643400", "lr": "0.000360202", "gnorm": "0.721", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "28.2", "wall": "209996"} [2022-08-03 11:49:08,264][train_inner][INFO] - {"epoch": 13, "update": 12.505, "loss": "2.08", "ppl": "4.23", "wps": "363992", "ups": "3.08", "wpb": "118249", "bsz": "256", "num_updates": "643600", "lr": "0.00036", "gnorm": "0.718", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "210061"} [2022-08-03 11:50:13,197][train_inner][INFO] - {"epoch": 13, "update": 12.509, "loss": "2.077", "ppl": "4.22", "wps": "363669", "ups": "3.08", "wpb": "118068", "bsz": "256", "num_updates": "643800", "lr": "0.000359798", "gnorm": "0.722", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.3", "wall": "210126"} [2022-08-03 11:51:18,432][train_inner][INFO] - {"epoch": 13, "update": 12.513, "loss": "2.078", "ppl": "4.22", "wps": "363826", "ups": "3.07", "wpb": "118669", "bsz": "256", "num_updates": "644000", "lr": "0.000359596", "gnorm": "0.717", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.4", "wall": "210191"} [2022-08-03 11:52:22,967][train_inner][INFO] - {"epoch": 13, "update": 12.517, "loss": "2.082", "ppl": "4.23", "wps": "368378", "ups": "3.1", "wpb": "118865", "bsz": "256", "num_updates": "644200", "lr": "0.000359394", "gnorm": "0.713", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.5", "wall": "210255"} [2022-08-03 11:53:27,960][train_inner][INFO] - {"epoch": 13, "update": 12.52, "loss": "2.083", "ppl": "4.24", "wps": "364246", "ups": "3.08", "wpb": "118365", "bsz": "256", "num_updates": "644400", "lr": "0.000359192", "gnorm": "0.716", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.4", "wall": "210320"} [2022-08-03 11:54:32,881][train_inner][INFO] - {"epoch": 13, "update": 12.524, "loss": "2.081", "ppl": "4.23", "wps": "363846", "ups": "3.08", "wpb": "118105", "bsz": "256", "num_updates": "644600", "lr": "0.00035899", "gnorm": "0.719", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.5", "wall": "210385"} [2022-08-03 11:55:37,565][train_inner][INFO] - {"epoch": 13, "update": 12.528, "loss": "2.079", "ppl": "4.22", "wps": "365990", "ups": "3.09", "wpb": "118367", "bsz": "256", "num_updates": "644800", "lr": "0.000358788", "gnorm": "0.719", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "28.2", "wall": "210450"} [2022-08-03 11:56:43,368][train_inner][INFO] - {"epoch": 13, "update": 12.532, "loss": "2.08", "ppl": "4.23", "wps": "359234", "ups": "3.04", "wpb": "118191", "bsz": "256", "num_updates": "645000", "lr": "0.000358586", "gnorm": "0.721", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "210516"} [2022-08-03 11:57:48,157][train_inner][INFO] - {"epoch": 13, "update": 12.536, "loss": "2.08", "ppl": "4.23", "wps": "364923", "ups": "3.09", "wpb": "118213", "bsz": "256", "num_updates": "645200", "lr": "0.000358384", "gnorm": "0.718", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "210580"} [2022-08-03 11:58:53,298][train_inner][INFO] - {"epoch": 13, "update": 12.54, "loss": "2.078", "ppl": "4.22", "wps": "362277", "ups": "3.07", "wpb": "117995", "bsz": "256", "num_updates": "645400", "lr": "0.000358182", "gnorm": "0.719", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "210646"} [2022-08-03 11:59:57,927][train_inner][INFO] - {"epoch": 13, "update": 12.544, "loss": "2.088", "ppl": "4.25", "wps": "364425", "ups": "3.09", "wpb": "117759", "bsz": "256", "num_updates": "645600", "lr": "0.00035798", "gnorm": "0.723", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "19.9", "wall": "210710"} [2022-08-03 12:01:02,730][train_inner][INFO] - {"epoch": 13, "update": 12.548, "loss": "2.078", "ppl": "4.22", "wps": "364581", "ups": "3.09", "wpb": "118128", "bsz": "256", "num_updates": "645800", "lr": "0.000357778", "gnorm": "0.724", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "20.3", "wall": "210775"} [2022-08-03 12:01:16,701][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 12:02:08,320][train_inner][INFO] - {"epoch": 13, "update": 12.551, "loss": "2.072", "ppl": "4.21", "wps": "361554", "ups": "3.05", "wpb": "118571", "bsz": "256", "num_updates": "646000", "lr": "0.000357576", "gnorm": "0.715", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.4", "wall": "210841"} [2022-08-03 12:03:12,628][train_inner][INFO] - {"epoch": 13, "update": 12.555, "loss": "2.08", "ppl": "4.23", "wps": "365165", "ups": "3.11", "wpb": "117413", "bsz": "256", "num_updates": "646200", "lr": "0.000357374", "gnorm": "0.718", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.8", "wall": "210905"} [2022-08-03 12:04:17,560][train_inner][INFO] - {"epoch": 13, "update": 12.559, "loss": "2.074", "ppl": "4.21", "wps": "366516", "ups": "3.08", "wpb": "118992", "bsz": "256", "num_updates": "646400", "lr": "0.000357172", "gnorm": "0.717", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.1", "wall": "210970"} [2022-08-03 12:05:22,391][train_inner][INFO] - {"epoch": 13, "update": 12.563, "loss": "2.077", "ppl": "4.22", "wps": "365951", "ups": "3.08", "wpb": "118623", "bsz": "256", "num_updates": "646600", "lr": "0.00035697", "gnorm": "0.715", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.2", "wall": "211035"} [2022-08-03 12:06:28,277][train_inner][INFO] - {"epoch": 13, "update": 12.567, "loss": "2.076", "ppl": "4.22", "wps": "361051", "ups": "3.04", "wpb": "118939", "bsz": "256", "num_updates": "646800", "lr": "0.000356768", "gnorm": "0.723", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "21.9", "wall": "211101"} [2022-08-03 12:07:32,917][train_inner][INFO] - {"epoch": 13, "update": 12.571, "loss": "2.078", "ppl": "4.22", "wps": "364995", "ups": "3.09", "wpb": "117965", "bsz": "256", "num_updates": "647000", "lr": "0.000356566", "gnorm": "0.724", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.4", "wall": "211165"} [2022-08-03 12:08:37,679][train_inner][INFO] - {"epoch": 13, "update": 12.575, "loss": "2.079", "ppl": "4.23", "wps": "365743", "ups": "3.09", "wpb": "118429", "bsz": "256", "num_updates": "647200", "lr": "0.000356364", "gnorm": "0.723", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.9", "wall": "211230"} [2022-08-03 12:09:42,817][train_inner][INFO] - {"epoch": 13, "update": 12.579, "loss": "2.077", "ppl": "4.22", "wps": "362886", "ups": "3.07", "wpb": "118186", "bsz": "256", "num_updates": "647400", "lr": "0.000356162", "gnorm": "0.719", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "211295"} [2022-08-03 12:10:47,592][train_inner][INFO] - {"epoch": 13, "update": 12.583, "loss": "2.078", "ppl": "4.22", "wps": "365307", "ups": "3.09", "wpb": "118313", "bsz": "256", "num_updates": "647600", "lr": "0.00035596", "gnorm": "0.72", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.9", "wall": "211360"} [2022-08-03 12:11:52,323][train_inner][INFO] - {"epoch": 13, "update": 12.586, "loss": "2.075", "ppl": "4.21", "wps": "366672", "ups": "3.09", "wpb": "118673", "bsz": "256", "num_updates": "647800", "lr": "0.000355758", "gnorm": "0.72", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.7", "wall": "211425"} [2022-08-03 12:12:48,313][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 12:12:57,423][train_inner][INFO] - {"epoch": 13, "update": 12.59, "loss": "2.077", "ppl": "4.22", "wps": "364297", "ups": "3.07", "wpb": "118576", "bsz": "256", "num_updates": "648000", "lr": "0.000355556", "gnorm": "0.721", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.2", "wall": "211490"} [2022-08-03 12:14:02,109][train_inner][INFO] - {"epoch": 13, "update": 12.594, "loss": "2.083", "ppl": "4.24", "wps": "366801", "ups": "3.09", "wpb": "118634", "bsz": "256", "num_updates": "648200", "lr": "0.000355354", "gnorm": "0.721", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "211554"} [2022-08-03 12:15:06,957][train_inner][INFO] - {"epoch": 13, "update": 12.598, "loss": "2.079", "ppl": "4.23", "wps": "364998", "ups": "3.08", "wpb": "118344", "bsz": "256", "num_updates": "648400", "lr": "0.000355152", "gnorm": "0.72", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.3", "wall": "211619"} [2022-08-03 12:16:11,966][train_inner][INFO] - {"epoch": 13, "update": 12.602, "loss": "2.076", "ppl": "4.22", "wps": "365052", "ups": "3.08", "wpb": "118657", "bsz": "256", "num_updates": "648600", "lr": "0.000354949", "gnorm": "0.718", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "211684"} [2022-08-03 12:17:16,787][train_inner][INFO] - {"epoch": 13, "update": 12.606, "loss": "2.078", "ppl": "4.22", "wps": "365975", "ups": "3.09", "wpb": "118612", "bsz": "256", "num_updates": "648800", "lr": "0.000354747", "gnorm": "0.72", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.2", "wall": "211749"} [2022-08-03 12:18:21,500][train_inner][INFO] - {"epoch": 13, "update": 12.61, "loss": "2.077", "ppl": "4.22", "wps": "365810", "ups": "3.09", "wpb": "118362", "bsz": "256", "num_updates": "649000", "lr": "0.000354545", "gnorm": "0.72", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "211814"} [2022-08-03 12:19:26,350][train_inner][INFO] - {"epoch": 13, "update": 12.614, "loss": "2.075", "ppl": "4.21", "wps": "365619", "ups": "3.08", "wpb": "118550", "bsz": "256", "num_updates": "649200", "lr": "0.000354343", "gnorm": "0.719", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21", "wall": "211879"} [2022-08-03 12:19:54,009][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-03 12:20:31,361][train_inner][INFO] - {"epoch": 13, "update": 12.618, "loss": "2.073", "ppl": "4.21", "wps": "362429", "ups": "3.08", "wpb": "117807", "bsz": "256", "num_updates": "649400", "lr": "0.000354141", "gnorm": "0.725", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "211944"} [2022-08-03 12:21:36,312][train_inner][INFO] - {"epoch": 13, "update": 12.621, "loss": "2.077", "ppl": "4.22", "wps": "363714", "ups": "3.08", "wpb": "118118", "bsz": "256", "num_updates": "649600", "lr": "0.000353939", "gnorm": "0.72", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "212009"} [2022-08-03 12:22:41,555][train_inner][INFO] - {"epoch": 13, "update": 12.625, "loss": "2.078", "ppl": "4.22", "wps": "363703", "ups": "3.07", "wpb": "118641", "bsz": "256", "num_updates": "649800", "lr": "0.000353737", "gnorm": "0.718", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "212074"} [2022-08-03 12:23:46,466][train_inner][INFO] - {"epoch": 13, "update": 12.629, "loss": "2.08", "ppl": "4.23", "wps": "364621", "ups": "3.08", "wpb": "118339", "bsz": "256", "num_updates": "650000", "lr": "0.000353535", "gnorm": "0.718", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.8", "wall": "212139"} [2022-08-03 12:23:46,468][fairseq_cli.train][INFO] - begin validation on "valid" subset [2022-08-03 12:24:09,314][valid][INFO] - {"epoch": 13, "valid_loss": "1.988", "valid_ppl": "3.97", "valid_wps": "1.6055e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "650000", "valid_best_loss": "1.988"} [2022-08-03 12:24:09,317][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 13 @ 650000 updates [2022-08-03 12:24:09,317][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_13_650000.pt [2022-08-03 12:24:19,540][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_13_650000.pt [2022-08-03 12:24:48,816][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/adan2/checkpoint_13_650000.pt (epoch 13 @ 650000 updates, score 1.988) (writing took 39.49901276547462 seconds) [2022-08-03 12:25:53,963][train_inner][INFO] - {"epoch": 13, "update": 12.633, "loss": "2.082", "ppl": "4.24", "wps": "185264", "ups": "1.57", "wpb": "118102", "bsz": "256", "num_updates": "650200", "lr": "0.000353333", "gnorm": "0.725", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "212266"} [2022-08-03 12:26:58,713][train_inner][INFO] - {"epoch": 13, "update": 12.637, "loss": "2.072", "ppl": "4.21", "wps": "367694", "ups": "3.09", "wpb": "119039", "bsz": "256", "num_updates": "650400", "lr": "0.000353131", "gnorm": "0.719", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "212331"} [2022-08-03 12:28:03,660][train_inner][INFO] - {"epoch": 13, "update": 12.641, "loss": "2.075", "ppl": "4.21", "wps": "365588", "ups": "3.08", "wpb": "118717", "bsz": "256", "num_updates": "650600", "lr": "0.000352929", "gnorm": "0.718", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "212396"} [2022-08-03 12:29:08,532][train_inner][INFO] - {"epoch": 13, "update": 12.645, "loss": "2.078", "ppl": "4.22", "wps": "363475", "ups": "3.08", "wpb": "117895", "bsz": "256", "num_updates": "650800", "lr": "0.000352727", "gnorm": "0.721", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "212461"} [2022-08-03 12:30:13,480][train_inner][INFO] - {"epoch": 13, "update": 12.649, "loss": "2.075", "ppl": "4.21", "wps": "364948", "ups": "3.08", "wpb": "118512", "bsz": "256", "num_updates": "651000", "lr": "0.000352525", "gnorm": "0.732", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "28.2", "wall": "212526"} [2022-08-03 12:31:18,884][train_inner][INFO] - {"epoch": 13, "update": 12.653, "loss": "2.079", "ppl": "4.23", "wps": "361710", "ups": "3.06", "wpb": "118284", "bsz": "256", "num_updates": "651200", "lr": "0.000352323", "gnorm": "0.719", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.8", "wall": "212591"} [2022-08-03 12:32:23,959][train_inner][INFO] - {"epoch": 13, "update": 12.656, "loss": "2.075", "ppl": "4.21", "wps": "362887", "ups": "3.07", "wpb": "118072", "bsz": "256", "num_updates": "651400", "lr": "0.000352121", "gnorm": "0.722", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.6", "wall": "212656"} [2022-08-03 12:33:28,761][train_inner][INFO] - {"epoch": 13, "update": 12.66, "loss": "2.08", "ppl": "4.23", "wps": "364229", "ups": "3.09", "wpb": "118013", "bsz": "256", "num_updates": "651600", "lr": "0.000351919", "gnorm": "0.722", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.5", "wall": "212721"} [2022-08-03 12:34:33,446][train_inner][INFO] - {"epoch": 13, "update": 12.664, "loss": "2.074", "ppl": "4.21", "wps": "365622", "ups": "3.09", "wpb": "118250", "bsz": "256", "num_updates": "651800", "lr": "0.000351717", "gnorm": "0.719", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "212786"} [2022-08-03 12:35:38,166][train_inner][INFO] - {"epoch": 13, "update": 12.668, "loss": "2.075", "ppl": "4.21", "wps": "366478", "ups": "3.09", "wpb": "118590", "bsz": "256", "num_updates": "652000", "lr": "0.000351515", "gnorm": "0.72", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.7", "wall": "212850"} [2022-08-03 12:36:43,074][train_inner][INFO] - {"epoch": 13, "update": 12.672, "loss": "2.077", "ppl": "4.22", "wps": "362253", "ups": "3.08", "wpb": "117565", "bsz": "256", "num_updates": "652200", "lr": "0.000351313", "gnorm": "0.724", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "212915"} [2022-08-03 12:37:47,629][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-03 12:37:48,576][train_inner][INFO] - {"epoch": 13, "update": 12.676, "loss": "2.073", "ppl": "4.21", "wps": "361675", "ups": "3.05", "wpb": "118449", "bsz": "256", "num_updates": "652400", "lr": "0.000351111", "gnorm": "0.723", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.4", "wall": "212981"} [2022-08-03 12:38:53,461][train_inner][INFO] - {"epoch": 13, "update": 12.68, "loss": "2.081", "ppl": "4.23", "wps": "364840", "ups": "3.08", "wpb": "118363", "bsz": "256", "num_updates": "652600", "lr": "0.000350909", "gnorm": "0.732", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "213046"} [2022-08-03 12:39:58,186][train_inner][INFO] - {"epoch": 13, "update": 12.684, "loss": "2.074", "ppl": "4.21", "wps": "365206", "ups": "3.09", "wpb": "118188", "bsz": "256", "num_updates": "652800", "lr": "0.000350707", "gnorm": "0.72", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "25.2", "wall": "213111"} [2022-08-03 12:41:03,005][train_inner][INFO] - {"epoch": 13, "update": 12.688, "loss": "2.081", "ppl": "4.23", "wps": "365387", "ups": "3.09", "wpb": "118419", "bsz": "256", "num_updates": "653000", "lr": "0.000350505", "gnorm": "0.72", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.4", "wall": "213175"} [2022-08-03 12:42:07,670][train_inner][INFO] - {"epoch": 13, "update": 12.691, "loss": "2.072", "ppl": "4.2", "wps": "365444", "ups": "3.09", "wpb": "118155", "bsz": "255.9", "num_updates": "653200", "lr": "0.000350303", "gnorm": "0.722", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "26.5", "wall": "213240"} [2022-08-03 12:43:12,771][train_inner][INFO] - {"epoch": 13, "update": 12.695, "loss": "2.076", "ppl": "4.22", "wps": "363635", "ups": "3.07", "wpb": "118363", "bsz": "256", "num_updates": "653400", "lr": "0.000350101", "gnorm": "0.721", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "213305"} [2022-08-03 12:44:17,825][train_inner][INFO] - {"epoch": 13, "update": 12.699, "loss": "2.075", "ppl": "4.21", "wps": "363212", "ups": "3.07", "wpb": "118140", "bsz": "256", "num_updates": "653600", "lr": "0.000349899", "gnorm": "0.724", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "213370"} [2022-08-03 12:45:23,060][train_inner][INFO] - {"epoch": 13, "update": 12.703, "loss": "2.081", "ppl": "4.23", "wps": "361324", "ups": "3.07", "wpb": "117854", "bsz": "256", "num_updates": "653800", "lr": "0.000349697", "gnorm": "0.722", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.6", "wall": "213435"} [2022-08-03 12:46:28,247][train_inner][INFO] - {"epoch": 13, "update": 12.707, "loss": "2.076", "ppl": "4.22", "wps": "363217", "ups": "3.07", "wpb": "118383", "bsz": "256", "num_updates": "654000", "lr": "0.000349495", "gnorm": "0.723", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.5", "wall": "213501"} [2022-08-03 12:47:33,029][train_inner][INFO] - {"epoch": 13, "update": 12.711, "loss": "2.081", "ppl": "4.23", "wps": "362748", "ups": "3.09", "wpb": "117496", "bsz": "256", "num_updates": "654200", "lr": "0.000349293", "gnorm": "0.727", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.9", "wall": "213565"} [2022-08-03 12:48:37,774][train_inner][INFO] - {"epoch": 13, "update": 12.715, "loss": "2.078", "ppl": "4.22", "wps": "364092", "ups": "3.09", "wpb": "117863", "bsz": "256", "num_updates": "654400", "lr": "0.000349091", "gnorm": "0.724", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.3", "wall": "213630"} [2022-08-03 12:49:42,835][train_inner][INFO] - {"epoch": 13, "update": 12.719, "loss": "2.073", "ppl": "4.21", "wps": "364237", "ups": "3.07", "wpb": "118488", "bsz": "256", "num_updates": "654600", "lr": "0.000348889", "gnorm": "0.719", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.3", "wall": "213695"} [2022-08-03 12:50:47,982][train_inner][INFO] - {"epoch": 13, "update": 12.722, "loss": "2.069", "ppl": "4.2", "wps": "364174", "ups": "3.07", "wpb": "118621", "bsz": "256", "num_updates": "654800", "lr": "0.000348687", "gnorm": "0.724", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.4", "wall": "213760"} [2022-08-03 12:51:52,867][train_inner][INFO] - {"epoch": 13, "update": 12.726, "loss": "2.076", "ppl": "4.22", "wps": "363890", "ups": "3.08", "wpb": "118054", "bsz": "256", "num_updates": "655000", "lr": "0.000348485", "gnorm": "0.723", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "213825"} [2022-08-03 12:52:57,792][train_inner][INFO] - {"epoch": 13, "update": 12.73, "loss": "2.072", "ppl": "4.2", "wps": "365338", "ups": "3.08", "wpb": "118595", "bsz": "256", "num_updates": "655200", "lr": "0.000348283", "gnorm": "0.721", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "213890"} [2022-08-03 12:54:02,971][train_inner][INFO] - {"epoch": 13, "update": 12.734, "loss": "2.067", "ppl": "4.19", "wps": "364365", "ups": "3.07", "wpb": "118743", "bsz": "256", "num_updates": "655400", "lr": "0.000348081", "gnorm": "0.719", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "28.2", "wall": "213955"} [2022-08-03 12:55:07,765][train_inner][INFO] - {"epoch": 13, "update": 12.738, "loss": "2.072", "ppl": "4.2", "wps": "365756", "ups": "3.09", "wpb": "118493", "bsz": "256", "num_updates": "655600", "lr": "0.000347879", "gnorm": "0.718", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.8", "wall": "214020"} [2022-08-03 12:56:12,586][train_inner][INFO] - {"epoch": 13, "update": 12.742, "loss": "2.079", "ppl": "4.22", "wps": "365218", "ups": "3.09", "wpb": "118367", "bsz": "256", "num_updates": "655800", "lr": "0.000347677", "gnorm": "0.724", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "214085"} [2022-08-03 12:57:17,545][train_inner][INFO] - {"epoch": 13, "update": 12.746, "loss": "2.07", "ppl": "4.2", "wps": "365993", "ups": "3.08", "wpb": "118871", "bsz": "256", "num_updates": "656000", "lr": "0.000347475", "gnorm": "0.721", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "214150"} [2022-08-03 12:58:22,172][train_inner][INFO] - {"epoch": 13, "update": 12.75, "loss": "2.078", "ppl": "4.22", "wps": "366893", "ups": "3.09", "wpb": "118553", "bsz": "256", "num_updates": "656200", "lr": "0.000347273", "gnorm": "0.723", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.4", "wall": "214214"} [2022-08-03 12:59:27,065][train_inner][INFO] - {"epoch": 13, "update": 12.754, "loss": "2.076", "ppl": "4.22", "wps": "365309", "ups": "3.08", "wpb": "118530", "bsz": "256", "num_updates": "656400", "lr": "0.000347071", "gnorm": "0.721", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.1", "wall": "214279"} [2022-08-03 13:00:16,152][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 13:00:32,562][train_inner][INFO] - {"epoch": 13, "update": 12.757, "loss": "2.074", "ppl": "4.21", "wps": "361632", "ups": "3.05", "wpb": "118426", "bsz": "256", "num_updates": "656600", "lr": "0.000346869", "gnorm": "0.724", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.5", "wall": "214345"} [2022-08-03 13:01:37,386][train_inner][INFO] - {"epoch": 13, "update": 12.761, "loss": "2.065", "ppl": "4.18", "wps": "368951", "ups": "3.09", "wpb": "119585", "bsz": "256", "num_updates": "656800", "lr": "0.000346667", "gnorm": "0.718", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "214410"} [2022-08-03 13:02:42,689][train_inner][INFO] - {"epoch": 13, "update": 12.765, "loss": "2.064", "ppl": "4.18", "wps": "363015", "ups": "3.06", "wpb": "118527", "bsz": "256", "num_updates": "657000", "lr": "0.000346465", "gnorm": "0.72", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.8", "wall": "214475"} [2022-08-03 13:03:47,388][train_inner][INFO] - {"epoch": 13, "update": 12.769, "loss": "2.068", "ppl": "4.19", "wps": "367470", "ups": "3.09", "wpb": "118873", "bsz": "256", "num_updates": "657200", "lr": "0.000346263", "gnorm": "0.722", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.4", "wall": "214540"} [2022-08-03 13:04:52,430][train_inner][INFO] - {"epoch": 13, "update": 12.773, "loss": "2.072", "ppl": "4.2", "wps": "365175", "ups": "3.07", "wpb": "118757", "bsz": "256", "num_updates": "657400", "lr": "0.000346061", "gnorm": "0.722", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "214605"} [2022-08-03 13:05:57,179][train_inner][INFO] - {"epoch": 13, "update": 12.777, "loss": "2.074", "ppl": "4.21", "wps": "365097", "ups": "3.09", "wpb": "118197", "bsz": "256", "num_updates": "657600", "lr": "0.000345859", "gnorm": "0.722", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.9", "wall": "214670"} [2022-08-03 13:07:01,891][train_inner][INFO] - {"epoch": 13, "update": 12.781, "loss": "2.074", "ppl": "4.21", "wps": "364881", "ups": "3.09", "wpb": "118060", "bsz": "256", "num_updates": "657800", "lr": "0.000345657", "gnorm": "0.724", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "27.3", "wall": "214734"} [2022-08-03 13:08:06,764][train_inner][INFO] - {"epoch": 13, "update": 12.785, "loss": "2.074", "ppl": "4.21", "wps": "365716", "ups": "3.08", "wpb": "118624", "bsz": "256", "num_updates": "658000", "lr": "0.000345455", "gnorm": "0.72", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.6", "wall": "214799"} [2022-08-03 13:09:11,827][train_inner][INFO] - {"epoch": 13, "update": 12.789, "loss": "2.073", "ppl": "4.21", "wps": "363370", "ups": "3.07", "wpb": "118207", "bsz": "256", "num_updates": "658200", "lr": "0.000345253", "gnorm": "0.727", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.4", "wall": "214864"} [2022-08-03 13:10:16,676][train_inner][INFO] - {"epoch": 13, "update": 12.792, "loss": "2.075", "ppl": "4.21", "wps": "363421", "ups": "3.08", "wpb": "117835", "bsz": "256", "num_updates": "658400", "lr": "0.000345051", "gnorm": "0.728", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.2", "wall": "214929"} [2022-08-03 13:11:21,809][train_inner][INFO] - {"epoch": 13, "update": 12.796, "loss": "2.073", "ppl": "4.21", "wps": "363527", "ups": "3.07", "wpb": "118387", "bsz": "256", "num_updates": "658600", "lr": "0.000344848", "gnorm": "0.723", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.7", "wall": "214994"} [2022-08-03 13:11:38,594][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 13:12:27,037][train_inner][INFO] - {"epoch": 13, "update": 12.8, "loss": "2.075", "ppl": "4.21", "wps": "361605", "ups": "3.07", "wpb": "117931", "bsz": "256", "num_updates": "658800", "lr": "0.000344646", "gnorm": "0.726", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.8", "wall": "215059"} [2022-08-03 13:13:31,919][train_inner][INFO] - {"epoch": 13, "update": 12.804, "loss": "2.071", "ppl": "4.2", "wps": "363734", "ups": "3.08", "wpb": "117998", "bsz": "256", "num_updates": "659000", "lr": "0.000344444", "gnorm": "0.724", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "215124"} [2022-08-03 13:14:36,767][train_inner][INFO] - {"epoch": 13, "update": 12.808, "loss": "2.075", "ppl": "4.21", "wps": "363983", "ups": "3.08", "wpb": "118016", "bsz": "256", "num_updates": "659200", "lr": "0.000344242", "gnorm": "0.724", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.4", "wall": "215189"} [2022-08-03 13:15:41,735][train_inner][INFO] - {"epoch": 13, "update": 12.812, "loss": "2.068", "ppl": "4.19", "wps": "363625", "ups": "3.08", "wpb": "118118", "bsz": "256", "num_updates": "659400", "lr": "0.00034404", "gnorm": "0.723", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.7", "wall": "215254"} [2022-08-03 13:16:46,804][train_inner][INFO] - {"epoch": 13, "update": 12.816, "loss": "2.073", "ppl": "4.21", "wps": "363176", "ups": "3.07", "wpb": "118156", "bsz": "256", "num_updates": "659600", "lr": "0.000343838", "gnorm": "0.723", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.6", "wall": "215319"} [2022-08-03 13:17:51,892][train_inner][INFO] - {"epoch": 13, "update": 12.82, "loss": "2.071", "ppl": "4.2", "wps": "362936", "ups": "3.07", "wpb": "118111", "bsz": "256", "num_updates": "659800", "lr": "0.000343636", "gnorm": "0.725", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.9", "wall": "215384"} [2022-08-03 13:18:56,773][train_inner][INFO] - {"epoch": 13, "update": 12.823, "loss": "2.07", "ppl": "4.2", "wps": "364899", "ups": "3.08", "wpb": "118373", "bsz": "256", "num_updates": "660000", "lr": "0.000343434", "gnorm": "0.725", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "215449"} [2022-08-03 13:20:01,591][train_inner][INFO] - {"epoch": 13, "update": 12.827, "loss": "2.073", "ppl": "4.21", "wps": "367119", "ups": "3.09", "wpb": "118979", "bsz": "256", "num_updates": "660200", "lr": "0.000343232", "gnorm": "0.721", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.3", "wall": "215514"} [2022-08-03 13:21:06,847][train_inner][INFO] - {"epoch": 13, "update": 12.831, "loss": "2.071", "ppl": "4.2", "wps": "362184", "ups": "3.06", "wpb": "118171", "bsz": "256", "num_updates": "660400", "lr": "0.00034303", "gnorm": "0.724", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.2", "wall": "215579"} [2022-08-03 13:22:11,868][train_inner][INFO] - {"epoch": 13, "update": 12.835, "loss": "2.071", "ppl": "4.2", "wps": "364216", "ups": "3.08", "wpb": "118407", "bsz": "256", "num_updates": "660600", "lr": "0.000342828", "gnorm": "0.726", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.1", "wall": "215644"} [2022-08-03 13:23:17,626][train_inner][INFO] - {"epoch": 13, "update": 12.839, "loss": "2.079", "ppl": "4.22", "wps": "358524", "ups": "3.04", "wpb": "117877", "bsz": "256", "num_updates": "660800", "lr": "0.000342626", "gnorm": "0.727", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "20.2", "wall": "215710"} [2022-08-03 13:23:24,104][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 13:24:23,022][train_inner][INFO] - {"epoch": 13, "update": 12.843, "loss": "2.072", "ppl": "4.21", "wps": "360728", "ups": "3.06", "wpb": "117949", "bsz": "256", "num_updates": "661000", "lr": "0.000342424", "gnorm": "0.726", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.3", "wall": "215775"} [2022-08-03 13:25:28,085][train_inner][INFO] - {"epoch": 13, "update": 12.847, "loss": "2.066", "ppl": "4.19", "wps": "364993", "ups": "3.07", "wpb": "118736", "bsz": "256", "num_updates": "661200", "lr": "0.000342222", "gnorm": "0.721", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "215840"} [2022-08-03 13:26:32,679][train_inner][INFO] - {"epoch": 13, "update": 12.851, "loss": "2.075", "ppl": "4.21", "wps": "364353", "ups": "3.1", "wpb": "117674", "bsz": "256", "num_updates": "661400", "lr": "0.00034202", "gnorm": "0.729", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.8", "wall": "215905"} [2022-08-03 13:27:37,574][train_inner][INFO] - {"epoch": 13, "update": 12.855, "loss": "2.074", "ppl": "4.21", "wps": "365146", "ups": "3.08", "wpb": "118479", "bsz": "256", "num_updates": "661600", "lr": "0.000341818", "gnorm": "0.724", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.5", "wall": "215970"} [2022-08-03 13:28:42,376][train_inner][INFO] - {"epoch": 13, "update": 12.858, "loss": "2.075", "ppl": "4.21", "wps": "363036", "ups": "3.09", "wpb": "117626", "bsz": "256", "num_updates": "661800", "lr": "0.000341616", "gnorm": "0.728", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.8", "wall": "216035"} [2022-08-03 13:29:47,618][train_inner][INFO] - {"epoch": 13, "update": 12.862, "loss": "2.073", "ppl": "4.21", "wps": "363242", "ups": "3.07", "wpb": "118491", "bsz": "256", "num_updates": "662000", "lr": "0.000341414", "gnorm": "0.731", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.9", "wall": "216100"} [2022-08-03 13:30:52,344][train_inner][INFO] - {"epoch": 13, "update": 12.866, "loss": "2.07", "ppl": "4.2", "wps": "366107", "ups": "3.09", "wpb": "118482", "bsz": "256", "num_updates": "662200", "lr": "0.000341212", "gnorm": "0.723", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "216165"} [2022-08-03 13:31:57,138][train_inner][INFO] - {"epoch": 13, "update": 12.87, "loss": "2.079", "ppl": "4.22", "wps": "364518", "ups": "3.09", "wpb": "118091", "bsz": "256", "num_updates": "662400", "lr": "0.00034101", "gnorm": "0.726", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.8", "wall": "216229"} [2022-08-03 13:32:50,368][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-03 13:33:02,410][train_inner][INFO] - {"epoch": 13, "update": 12.874, "loss": "2.076", "ppl": "4.22", "wps": "363602", "ups": "3.06", "wpb": "118664", "bsz": "256", "num_updates": "662600", "lr": "0.000340808", "gnorm": "0.725", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.5", "wall": "216295"} [2022-08-03 13:34:07,506][train_inner][INFO] - {"epoch": 13, "update": 12.878, "loss": "2.064", "ppl": "4.18", "wps": "366050", "ups": "3.07", "wpb": "119142", "bsz": "256", "num_updates": "662800", "lr": "0.000340606", "gnorm": "0.723", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.3", "wall": "216360"} [2022-08-03 13:35:12,437][train_inner][INFO] - {"epoch": 13, "update": 12.882, "loss": "2.076", "ppl": "4.22", "wps": "363863", "ups": "3.08", "wpb": "118127", "bsz": "256", "num_updates": "663000", "lr": "0.000340404", "gnorm": "0.727", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "26.4", "wall": "216425"} [2022-08-03 13:36:03,656][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-03 13:36:17,971][train_inner][INFO] - {"epoch": 13, "update": 12.886, "loss": "2.066", "ppl": "4.19", "wps": "362282", "ups": "3.05", "wpb": "118707", "bsz": "256", "num_updates": "663200", "lr": "0.000340202", "gnorm": "0.722", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.8", "wall": "216490"} [2022-08-03 13:37:22,757][train_inner][INFO] - {"epoch": 13, "update": 12.89, "loss": "2.07", "ppl": "4.2", "wps": "365529", "ups": "3.09", "wpb": "118407", "bsz": "256", "num_updates": "663400", "lr": "0.00034", "gnorm": "0.723", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.4", "wall": "216555"} [2022-08-03 13:38:27,566][train_inner][INFO] - {"epoch": 13, "update": 12.893, "loss": "2.072", "ppl": "4.21", "wps": "364585", "ups": "3.09", "wpb": "118140", "bsz": "256", "num_updates": "663600", "lr": "0.000339798", "gnorm": "0.727", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.3", "wall": "216620"} [2022-08-03 13:39:32,504][train_inner][INFO] - {"epoch": 13, "update": 12.897, "loss": "2.065", "ppl": "4.18", "wps": "365727", "ups": "3.08", "wpb": "118746", "bsz": "256", "num_updates": "663800", "lr": "0.000339596", "gnorm": "0.722", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "216685"} [2022-08-03 13:40:37,378][train_inner][INFO] - {"epoch": 13, "update": 12.901, "loss": "2.07", "ppl": "4.2", "wps": "361406", "ups": "3.08", "wpb": "117227", "bsz": "256", "num_updates": "664000", "lr": "0.000339394", "gnorm": "0.731", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.9", "wall": "216750"} [2022-08-03 13:41:42,127][train_inner][INFO] - {"epoch": 13, "update": 12.905, "loss": "2.068", "ppl": "4.19", "wps": "364500", "ups": "3.09", "wpb": "118003", "bsz": "256", "num_updates": "664200", "lr": "0.000339192", "gnorm": "0.727", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "22.6", "wall": "216814"} [2022-08-03 13:42:46,781][train_inner][INFO] - {"epoch": 13, "update": 12.909, "loss": "2.074", "ppl": "4.21", "wps": "365577", "ups": "3.09", "wpb": "118178", "bsz": "256", "num_updates": "664400", "lr": "0.00033899", "gnorm": "0.725", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.9", "wall": "216879"} [2022-08-03 13:43:51,510][train_inner][INFO] - {"epoch": 13, "update": 12.913, "loss": "2.07", "ppl": "4.2", "wps": "364001", "ups": "3.09", "wpb": "117805", "bsz": "256", "num_updates": "664600", "lr": "0.000338788", "gnorm": "0.726", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.8", "wall": "216944"} [2022-08-03 13:44:56,561][train_inner][INFO] - {"epoch": 13, "update": 12.917, "loss": "2.074", "ppl": "4.21", "wps": "362154", "ups": "3.07", "wpb": "117790", "bsz": "256", "num_updates": "664800", "lr": "0.000338586", "gnorm": "0.729", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.1", "wall": "217009"} [2022-08-03 13:46:01,475][train_inner][INFO] - {"epoch": 13, "update": 12.921, "loss": "2.061", "ppl": "4.17", "wps": "365404", "ups": "3.08", "wpb": "118597", "bsz": "256", "num_updates": "665000", "lr": "0.000338384", "gnorm": "0.726", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.9", "wall": "217074"} [2022-08-03 13:47:06,188][train_inner][INFO] - {"epoch": 13, "update": 12.925, "loss": "2.069", "ppl": "4.2", "wps": "365254", "ups": "3.09", "wpb": "118182", "bsz": "256", "num_updates": "665200", "lr": "0.000338182", "gnorm": "0.725", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "23.8", "wall": "217139"} [2022-08-03 13:48:11,271][train_inner][INFO] - {"epoch": 13, "update": 12.928, "loss": "2.063", "ppl": "4.18", "wps": "363337", "ups": "3.07", "wpb": "118234", "bsz": "256", "num_updates": "665400", "lr": "0.00033798", "gnorm": "0.724", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.6", "wall": "217204"} [2022-08-03 13:49:16,357][train_inner][INFO] - {"epoch": 13, "update": 12.932, "loss": "2.072", "ppl": "4.21", "wps": "364604", "ups": "3.07", "wpb": "118652", "bsz": "256", "num_updates": "665600", "lr": "0.000337778", "gnorm": "0.727", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "217269"} [2022-08-03 13:50:21,567][train_inner][INFO] - {"epoch": 13, "update": 12.936, "loss": "2.066", "ppl": "4.19", "wps": "365038", "ups": "3.07", "wpb": "119018", "bsz": "256", "num_updates": "665800", "lr": "0.000337576", "gnorm": "0.722", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.3", "wall": "217334"} [2022-08-03 13:51:26,681][train_inner][INFO] - {"epoch": 13, "update": 12.94, "loss": "2.067", "ppl": "4.19", "wps": "363909", "ups": "3.07", "wpb": "118477", "bsz": "256", "num_updates": "666000", "lr": "0.000337374", "gnorm": "0.725", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.1", "wall": "217399"} [2022-08-03 13:52:31,236][train_inner][INFO] - {"epoch": 13, "update": 12.944, "loss": "2.069", "ppl": "4.2", "wps": "366097", "ups": "3.1", "wpb": "118164", "bsz": "256", "num_updates": "666200", "lr": "0.000337172", "gnorm": "0.728", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.2", "wall": "217464"} [2022-08-03 13:53:36,117][train_inner][INFO] - {"epoch": 13, "update": 12.948, "loss": "2.068", "ppl": "4.19", "wps": "364649", "ups": "3.08", "wpb": "118293", "bsz": "256", "num_updates": "666400", "lr": "0.00033697", "gnorm": "0.725", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.5", "wall": "217528"} [2022-08-03 13:54:41,026][train_inner][INFO] - {"epoch": 13, "update": 12.952, "loss": "2.074", "ppl": "4.21", "wps": "364008", "ups": "3.08", "wpb": "118134", "bsz": "256", "num_updates": "666600", "lr": "0.000336768", "gnorm": "0.729", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.9", "wall": "217593"} [2022-08-03 13:55:45,819][train_inner][INFO] - {"epoch": 13, "update": 12.956, "loss": "2.065", "ppl": "4.18", "wps": "367155", "ups": "3.09", "wpb": "118944", "bsz": "256", "num_updates": "666800", "lr": "0.000336566", "gnorm": "0.728", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.4", "wall": "217658"} [2022-08-03 13:56:50,706][train_inner][INFO] - {"epoch": 13, "update": 12.96, "loss": "2.073", "ppl": "4.21", "wps": "363241", "ups": "3.08", "wpb": "117846", "bsz": "256", "num_updates": "667000", "lr": "0.000336364", "gnorm": "0.73", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "217723"} [2022-08-03 13:57:55,854][train_inner][INFO] - {"epoch": 13, "update": 12.963, "loss": "2.064", "ppl": "4.18", "wps": "364109", "ups": "3.07", "wpb": "118604", "bsz": "256", "num_updates": "667200", "lr": "0.000336162", "gnorm": "0.726", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22", "wall": "217788"} [2022-08-03 13:59:00,338][train_inner][INFO] - {"epoch": 13, "update": 12.967, "loss": "2.064", "ppl": "4.18", "wps": "366941", "ups": "3.1", "wpb": "118305", "bsz": "256", "num_updates": "667400", "lr": "0.00033596", "gnorm": "0.727", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "217853"} [2022-08-03 14:00:04,771][train_inner][INFO] - {"epoch": 13, "update": 12.971, "loss": "2.071", "ppl": "4.2", "wps": "366986", "ups": "3.1", "wpb": "118228", "bsz": "256", "num_updates": "667600", "lr": "0.000335758", "gnorm": "0.733", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.9", "wall": "217917"} [2022-08-03 14:01:09,453][train_inner][INFO] - {"epoch": 13, "update": 12.975, "loss": "2.07", "ppl": "4.2", "wps": "365516", "ups": "3.09", "wpb": "118211", "bsz": "256", "num_updates": "667800", "lr": "0.000335556", "gnorm": "0.73", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.9", "wall": "217982"} [2022-08-03 14:02:14,266][train_inner][INFO] - {"epoch": 13, "update": 12.979, "loss": "2.066", "ppl": "4.19", "wps": "362640", "ups": "3.09", "wpb": "117517", "bsz": "256", "num_updates": "668000", "lr": "0.000335354", "gnorm": "0.731", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "218047"} [2022-08-03 14:03:18,879][train_inner][INFO] - {"epoch": 13, "update": 12.983, "loss": "2.068", "ppl": "4.19", "wps": "366945", "ups": "3.1", "wpb": "118544", "bsz": "256", "num_updates": "668200", "lr": "0.000335152", "gnorm": "0.725", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "26.4", "wall": "218111"} [2022-08-03 14:04:23,606][train_inner][INFO] - {"epoch": 13, "update": 12.987, "loss": "2.067", "ppl": "4.19", "wps": "364768", "ups": "3.09", "wpb": "118049", "bsz": "256", "num_updates": "668400", "lr": "0.000334949", "gnorm": "0.728", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.6", "wall": "218176"} [2022-08-03 14:05:28,124][train_inner][INFO] - {"epoch": 13, "update": 12.991, "loss": "2.064", "ppl": "4.18", "wps": "366065", "ups": "3.1", "wpb": "118088", "bsz": "256", "num_updates": "668600", "lr": "0.000334747", "gnorm": "0.727", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.9", "wall": "218240"} [2022-08-03 14:06:33,166][train_inner][INFO] - {"epoch": 13, "update": 12.994, "loss": "2.067", "ppl": "4.19", "wps": "362114", "ups": "3.07", "wpb": "117761", "bsz": "256", "num_updates": "668800", "lr": "0.000334545", "gnorm": "0.73", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.6", "wall": "218305"} [2022-08-03 14:07:38,126][train_inner][INFO] - {"epoch": 13, "update": 12.998, "loss": "2.074", "ppl": "4.21", "wps": "364253", "ups": "3.08", "wpb": "118308", "bsz": "256", "num_updates": "669000", "lr": "0.000334343", "gnorm": "0.73", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "218370"} [2022-08-03 14:08:05,744][fairseq_cli.train][INFO] - end of epoch 13 (average epoch stats below) [2022-08-03 14:08:05,744][train][INFO] - {"epoch": 13, "train_loss": "2.081", "train_ppl": "4.23", "train_wps": "362660", "train_ups": "3.07", "train_wpb": "118298", "train_bsz": "256", "train_num_updates": "669085", "train_lr": "0.000334258", "train_gnorm": "0.719", "train_clip": "0", "train_loss_scale": "16", "train_train_wall": "16618", "train_gb_free": "22.3", "train_wall": "218398"} [2022-08-03 14:08:05,855][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 [2022-08-03 14:08:05,858][fairseq.trainer][INFO] - begin training epoch 14 [2022-08-03 14:08:05,858][fairseq_cli.train][INFO] - Start iterating over samples [2022-08-03 14:08:56,438][train_inner][INFO] - {"epoch": 14, "update": 13.002, "loss": "2.064", "ppl": "4.18", "wps": "301922", "ups": "2.55", "wpb": "118219", "bsz": "255.4", "num_updates": "669200", "lr": "0.000334141", "gnorm": "0.727", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "218449"} [2022-08-03 14:09:38,379][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 14:10:01,025][train_inner][INFO] - {"epoch": 14, "update": 13.006, "loss": "2.063", "ppl": "4.18", "wps": "364966", "ups": "3.1", "wpb": "117859", "bsz": "256", "num_updates": "669400", "lr": "0.000333939", "gnorm": "0.728", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "218513"} [2022-08-03 14:11:05,909][train_inner][INFO] - {"epoch": 14, "update": 13.01, "loss": "2.056", "ppl": "4.16", "wps": "365424", "ups": "3.08", "wpb": "118549", "bsz": "256", "num_updates": "669600", "lr": "0.000333737", "gnorm": "0.726", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "218578"} [2022-08-03 14:12:10,335][train_inner][INFO] - {"epoch": 14, "update": 13.014, "loss": "2.057", "ppl": "4.16", "wps": "368979", "ups": "3.1", "wpb": "118858", "bsz": "256", "num_updates": "669800", "lr": "0.000333535", "gnorm": "0.726", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.2", "wall": "218643"} [2022-08-03 14:13:15,030][train_inner][INFO] - {"epoch": 14, "update": 13.018, "loss": "2.06", "ppl": "4.17", "wps": "365555", "ups": "3.09", "wpb": "118246", "bsz": "256", "num_updates": "670000", "lr": "0.000333333", "gnorm": "0.729", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "218707"} [2022-08-03 14:14:19,521][train_inner][INFO] - {"epoch": 14, "update": 13.022, "loss": "2.057", "ppl": "4.16", "wps": "367650", "ups": "3.1", "wpb": "118548", "bsz": "256", "num_updates": "670200", "lr": "0.000333131", "gnorm": "0.728", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.6", "wall": "218772"} [2022-08-03 14:15:24,368][train_inner][INFO] - {"epoch": 14, "update": 13.026, "loss": "2.063", "ppl": "4.18", "wps": "363998", "ups": "3.08", "wpb": "118019", "bsz": "256", "num_updates": "670400", "lr": "0.000332929", "gnorm": "0.731", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.4", "wall": "218837"} [2022-08-03 14:16:28,926][train_inner][INFO] - {"epoch": 14, "update": 13.029, "loss": "2.061", "ppl": "4.17", "wps": "365625", "ups": "3.1", "wpb": "118018", "bsz": "256", "num_updates": "670600", "lr": "0.000332727", "gnorm": "0.731", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24", "wall": "218901"} [2022-08-03 14:17:33,480][train_inner][INFO] - {"epoch": 14, "update": 13.033, "loss": "2.063", "ppl": "4.18", "wps": "366807", "ups": "3.1", "wpb": "118394", "bsz": "256", "num_updates": "670800", "lr": "0.000332525", "gnorm": "0.729", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.6", "wall": "218966"} [2022-08-03 14:18:38,655][train_inner][INFO] - {"epoch": 14, "update": 13.037, "loss": "2.065", "ppl": "4.18", "wps": "365440", "ups": "3.07", "wpb": "119086", "bsz": "256", "num_updates": "671000", "lr": "0.000332323", "gnorm": "0.726", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.6", "wall": "219031"} [2022-08-03 14:19:43,396][train_inner][INFO] - {"epoch": 14, "update": 13.041, "loss": "2.064", "ppl": "4.18", "wps": "366150", "ups": "3.09", "wpb": "118523", "bsz": "256", "num_updates": "671200", "lr": "0.000332121", "gnorm": "0.729", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.1", "wall": "219096"} [2022-08-03 14:20:47,834][train_inner][INFO] - {"epoch": 14, "update": 13.045, "loss": "2.069", "ppl": "4.2", "wps": "366376", "ups": "3.1", "wpb": "118040", "bsz": "256", "num_updates": "671400", "lr": "0.000331919", "gnorm": "0.727", "clip": "0", "loss_scale": "32", "train_wall": "64", "gb_free": "19.9", "wall": "219160"} [2022-08-03 14:20:59,885][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 14:21:53,087][train_inner][INFO] - {"epoch": 14, "update": 13.049, "loss": "2.062", "ppl": "4.18", "wps": "362894", "ups": "3.07", "wpb": "118398", "bsz": "256", "num_updates": "671600", "lr": "0.000331717", "gnorm": "0.729", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.2", "wall": "219225"} [2022-08-03 14:22:58,349][train_inner][INFO] - {"epoch": 14, "update": 13.053, "loss": "2.06", "ppl": "4.17", "wps": "363308", "ups": "3.06", "wpb": "118550", "bsz": "256", "num_updates": "671800", "lr": "0.000331515", "gnorm": "0.726", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.5", "wall": "219291"} [2022-08-03 14:24:03,290][train_inner][INFO] - {"epoch": 14, "update": 13.057, "loss": "2.069", "ppl": "4.19", "wps": "364502", "ups": "3.08", "wpb": "118355", "bsz": "256", "num_updates": "672000", "lr": "0.000331313", "gnorm": "0.727", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.5", "wall": "219356"} [2022-08-03 14:25:08,041][train_inner][INFO] - {"epoch": 14, "update": 13.061, "loss": "2.065", "ppl": "4.18", "wps": "366450", "ups": "3.09", "wpb": "118637", "bsz": "256", "num_updates": "672200", "lr": "0.000331111", "gnorm": "0.728", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "219420"} [2022-08-03 14:26:12,947][train_inner][INFO] - {"epoch": 14, "update": 13.064, "loss": "2.06", "ppl": "4.17", "wps": "364349", "ups": "3.08", "wpb": "118241", "bsz": "256", "num_updates": "672400", "lr": "0.000330909", "gnorm": "0.726", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "219485"} [2022-08-03 14:27:18,031][train_inner][INFO] - {"epoch": 14, "update": 13.068, "loss": "2.061", "ppl": "4.17", "wps": "363648", "ups": "3.07", "wpb": "118337", "bsz": "256", "num_updates": "672600", "lr": "0.000330707", "gnorm": "0.726", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "219550"} [2022-08-03 14:28:22,632][train_inner][INFO] - {"epoch": 14, "update": 13.072, "loss": "2.057", "ppl": "4.16", "wps": "367318", "ups": "3.1", "wpb": "118643", "bsz": "256", "num_updates": "672800", "lr": "0.000330505", "gnorm": "0.726", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.2", "wall": "219615"} [2022-08-03 14:29:27,739][train_inner][INFO] - {"epoch": 14, "update": 13.076, "loss": "2.059", "ppl": "4.17", "wps": "362926", "ups": "3.07", "wpb": "118143", "bsz": "256", "num_updates": "673000", "lr": "0.000330303", "gnorm": "0.728", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25.7", "wall": "219680"} [2022-08-03 14:30:32,675][train_inner][INFO] - {"epoch": 14, "update": 13.08, "loss": "2.062", "ppl": "4.18", "wps": "364375", "ups": "3.08", "wpb": "118304", "bsz": "256", "num_updates": "673200", "lr": "0.000330101", "gnorm": "0.729", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.5", "wall": "219745"} [2022-08-03 14:31:37,749][train_inner][INFO] - {"epoch": 14, "update": 13.084, "loss": "2.059", "ppl": "4.17", "wps": "363615", "ups": "3.07", "wpb": "118308", "bsz": "256", "num_updates": "673400", "lr": "0.000329899", "gnorm": "0.729", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.6", "wall": "219810"} [2022-08-03 14:32:28,413][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 14:32:42,519][train_inner][INFO] - {"epoch": 14, "update": 13.088, "loss": "2.062", "ppl": "4.18", "wps": "364391", "ups": "3.09", "wpb": "118006", "bsz": "256", "num_updates": "673600", "lr": "0.000329697", "gnorm": "0.73", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "219875"} [2022-08-03 14:33:47,379][train_inner][INFO] - {"epoch": 14, "update": 13.092, "loss": "2.062", "ppl": "4.18", "wps": "365028", "ups": "3.08", "wpb": "118377", "bsz": "256", "num_updates": "673800", "lr": "0.000329495", "gnorm": "0.73", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.5", "wall": "219940"} [2022-08-03 14:34:52,321][train_inner][INFO] - {"epoch": 14, "update": 13.096, "loss": "2.061", "ppl": "4.17", "wps": "365350", "ups": "3.08", "wpb": "118631", "bsz": "256", "num_updates": "674000", "lr": "0.000329293", "gnorm": "0.729", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "220005"} [2022-08-03 14:35:57,347][train_inner][INFO] - {"epoch": 14, "update": 13.099, "loss": "2.062", "ppl": "4.18", "wps": "365151", "ups": "3.08", "wpb": "118720", "bsz": "256", "num_updates": "674200", "lr": "0.000329091", "gnorm": "0.73", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.4", "wall": "220070"} [2022-08-03 14:37:02,851][train_inner][INFO] - {"epoch": 14, "update": 13.103, "loss": "2.07", "ppl": "4.2", "wps": "361856", "ups": "3.05", "wpb": "118514", "bsz": "256", "num_updates": "674400", "lr": "0.000328889", "gnorm": "0.731", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.8", "wall": "220135"} [2022-08-03 14:38:07,608][train_inner][INFO] - {"epoch": 14, "update": 13.107, "loss": "2.061", "ppl": "4.17", "wps": "363648", "ups": "3.09", "wpb": "117741", "bsz": "256", "num_updates": "674600", "lr": "0.000328687", "gnorm": "0.732", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.2", "wall": "220200"} [2022-08-03 14:39:12,784][train_inner][INFO] - {"epoch": 14, "update": 13.111, "loss": "2.063", "ppl": "4.18", "wps": "363232", "ups": "3.07", "wpb": "118370", "bsz": "256", "num_updates": "674800", "lr": "0.000328485", "gnorm": "0.728", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "220265"} [2022-08-03 14:40:17,961][train_inner][INFO] - {"epoch": 14, "update": 13.115, "loss": "2.062", "ppl": "4.18", "wps": "362070", "ups": "3.07", "wpb": "117991", "bsz": "256", "num_updates": "675000", "lr": "0.000328283", "gnorm": "0.73", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "220330"} [2022-08-03 14:41:22,835][train_inner][INFO] - {"epoch": 14, "update": 13.119, "loss": "2.069", "ppl": "4.2", "wps": "360922", "ups": "3.08", "wpb": "117071", "bsz": "256", "num_updates": "675200", "lr": "0.000328081", "gnorm": "0.736", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21", "wall": "220395"} [2022-08-03 14:42:27,563][train_inner][INFO] - {"epoch": 14, "update": 13.123, "loss": "2.065", "ppl": "4.18", "wps": "363787", "ups": "3.09", "wpb": "117734", "bsz": "256", "num_updates": "675400", "lr": "0.000327879", "gnorm": "0.735", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.1", "wall": "220460"} [2022-08-03 14:43:32,537][train_inner][INFO] - {"epoch": 14, "update": 13.127, "loss": "2.06", "ppl": "4.17", "wps": "363754", "ups": "3.08", "wpb": "118171", "bsz": "256", "num_updates": "675600", "lr": "0.000327677", "gnorm": "0.731", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.9", "wall": "220525"} [2022-08-03 14:43:40,185][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 14:44:37,718][train_inner][INFO] - {"epoch": 14, "update": 13.13, "loss": "2.062", "ppl": "4.17", "wps": "363537", "ups": "3.07", "wpb": "118475", "bsz": "256", "num_updates": "675800", "lr": "0.000327475", "gnorm": "0.728", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.2", "wall": "220590"} [2022-08-03 14:45:43,596][train_inner][INFO] - {"epoch": 14, "update": 13.134, "loss": "2.062", "ppl": "4.18", "wps": "358268", "ups": "3.04", "wpb": "118010", "bsz": "256", "num_updates": "676000", "lr": "0.000327273", "gnorm": "0.743", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "20.4", "wall": "220656"} [2022-08-03 14:46:48,437][train_inner][INFO] - {"epoch": 14, "update": 13.138, "loss": "2.062", "ppl": "4.18", "wps": "364921", "ups": "3.08", "wpb": "118307", "bsz": "256", "num_updates": "676200", "lr": "0.000327071", "gnorm": "0.728", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.8", "wall": "220721"} [2022-08-03 14:47:53,431][train_inner][INFO] - {"epoch": 14, "update": 13.142, "loss": "2.056", "ppl": "4.16", "wps": "364677", "ups": "3.08", "wpb": "118508", "bsz": "256", "num_updates": "676400", "lr": "0.000326869", "gnorm": "0.729", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "220786"} [2022-08-03 14:48:59,492][train_inner][INFO] - {"epoch": 14, "update": 13.146, "loss": "2.06", "ppl": "4.17", "wps": "358118", "ups": "3.03", "wpb": "118286", "bsz": "256", "num_updates": "676600", "lr": "0.000326667", "gnorm": "0.731", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "21.5", "wall": "220852"} [2022-08-03 14:50:04,659][train_inner][INFO] - {"epoch": 14, "update": 13.15, "loss": "2.062", "ppl": "4.18", "wps": "364248", "ups": "3.07", "wpb": "118682", "bsz": "256", "num_updates": "676800", "lr": "0.000326465", "gnorm": "0.725", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.8", "wall": "220917"} [2022-08-03 14:51:09,509][train_inner][INFO] - {"epoch": 14, "update": 13.154, "loss": "2.055", "ppl": "4.16", "wps": "366406", "ups": "3.08", "wpb": "118806", "bsz": "256", "num_updates": "677000", "lr": "0.000326263", "gnorm": "0.73", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.7", "wall": "220982"} [2022-08-03 14:52:15,446][train_inner][INFO] - {"epoch": 14, "update": 13.158, "loss": "2.064", "ppl": "4.18", "wps": "357161", "ups": "3.03", "wpb": "117748", "bsz": "256", "num_updates": "677200", "lr": "0.000326061", "gnorm": "0.732", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "27.9", "wall": "221048"} [2022-08-03 14:53:20,256][train_inner][INFO] - {"epoch": 14, "update": 13.162, "loss": "2.06", "ppl": "4.17", "wps": "364618", "ups": "3.09", "wpb": "118154", "bsz": "256", "num_updates": "677400", "lr": "0.000325859", "gnorm": "0.734", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.9", "wall": "221113"} [2022-08-03 14:54:24,991][train_inner][INFO] - {"epoch": 14, "update": 13.165, "loss": "2.066", "ppl": "4.19", "wps": "362840", "ups": "3.09", "wpb": "117440", "bsz": "256", "num_updates": "677600", "lr": "0.000325657", "gnorm": "0.733", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.1", "wall": "221177"} [2022-08-03 14:54:55,621][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 14:55:30,322][train_inner][INFO] - {"epoch": 14, "update": 13.169, "loss": "2.062", "ppl": "4.18", "wps": "362502", "ups": "3.06", "wpb": "118411", "bsz": "256", "num_updates": "677800", "lr": "0.000325455", "gnorm": "0.733", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.4", "wall": "221243"} [2022-08-03 14:56:35,323][train_inner][INFO] - {"epoch": 14, "update": 13.173, "loss": "2.055", "ppl": "4.16", "wps": "364763", "ups": "3.08", "wpb": "118548", "bsz": "256", "num_updates": "678000", "lr": "0.000325253", "gnorm": "0.729", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.4", "wall": "221308"} [2022-08-03 14:57:40,303][train_inner][INFO] - {"epoch": 14, "update": 13.177, "loss": "2.057", "ppl": "4.16", "wps": "364859", "ups": "3.08", "wpb": "118542", "bsz": "256", "num_updates": "678200", "lr": "0.000325051", "gnorm": "0.73", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.4", "wall": "221373"} [2022-08-03 14:58:45,290][train_inner][INFO] - {"epoch": 14, "update": 13.181, "loss": "2.057", "ppl": "4.16", "wps": "363297", "ups": "3.08", "wpb": "118046", "bsz": "256", "num_updates": "678400", "lr": "0.000324848", "gnorm": "0.733", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.9", "wall": "221438"} [2022-08-03 14:59:50,021][train_inner][INFO] - {"epoch": 14, "update": 13.185, "loss": "2.065", "ppl": "4.18", "wps": "364296", "ups": "3.09", "wpb": "117903", "bsz": "256", "num_updates": "678600", "lr": "0.000324646", "gnorm": "0.734", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.1", "wall": "221502"} [2022-08-03 15:00:54,983][train_inner][INFO] - {"epoch": 14, "update": 13.189, "loss": "2.064", "ppl": "4.18", "wps": "362148", "ups": "3.08", "wpb": "117628", "bsz": "256", "num_updates": "678800", "lr": "0.000324444", "gnorm": "0.732", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.9", "wall": "221567"} [2022-08-03 15:02:00,830][train_inner][INFO] - {"epoch": 14, "update": 13.193, "loss": "2.059", "ppl": "4.17", "wps": "359535", "ups": "3.04", "wpb": "118369", "bsz": "256", "num_updates": "679000", "lr": "0.000324242", "gnorm": "0.727", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "221633"} [2022-08-03 15:03:05,847][train_inner][INFO] - {"epoch": 14, "update": 13.197, "loss": "2.057", "ppl": "4.16", "wps": "363052", "ups": "3.08", "wpb": "118022", "bsz": "256", "num_updates": "679200", "lr": "0.00032404", "gnorm": "0.73", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.8", "wall": "221698"} [2022-08-03 15:03:23,404][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-03 15:04:10,785][train_inner][INFO] - {"epoch": 14, "update": 13.2, "loss": "2.061", "ppl": "4.17", "wps": "365820", "ups": "3.08", "wpb": "118776", "bsz": "256", "num_updates": "679400", "lr": "0.000323838", "gnorm": "0.732", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "221763"} [2022-08-03 15:05:15,931][train_inner][INFO] - {"epoch": 14, "update": 13.204, "loss": "2.06", "ppl": "4.17", "wps": "365745", "ups": "3.07", "wpb": "119132", "bsz": "256", "num_updates": "679600", "lr": "0.000323636", "gnorm": "0.739", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "221828"} [2022-08-03 15:06:21,168][train_inner][INFO] - {"epoch": 14, "update": 13.208, "loss": "2.059", "ppl": "4.17", "wps": "363348", "ups": "3.07", "wpb": "118518", "bsz": "256", "num_updates": "679800", "lr": "0.000323434", "gnorm": "0.729", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "221893"} [2022-08-03 15:07:25,955][train_inner][INFO] - {"epoch": 14, "update": 13.212, "loss": "2.056", "ppl": "4.16", "wps": "363978", "ups": "3.09", "wpb": "117904", "bsz": "256", "num_updates": "680000", "lr": "0.000323232", "gnorm": "0.733", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.6", "wall": "221958"} [2022-08-03 15:08:30,851][train_inner][INFO] - {"epoch": 14, "update": 13.216, "loss": "2.064", "ppl": "4.18", "wps": "362510", "ups": "3.08", "wpb": "117625", "bsz": "256", "num_updates": "680200", "lr": "0.00032303", "gnorm": "0.733", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "222023"} [2022-08-03 15:09:35,909][train_inner][INFO] - {"epoch": 14, "update": 13.22, "loss": "2.068", "ppl": "4.19", "wps": "364030", "ups": "3.07", "wpb": "118414", "bsz": "256", "num_updates": "680400", "lr": "0.000322828", "gnorm": "0.739", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.6", "wall": "222088"} [2022-08-03 15:10:40,733][train_inner][INFO] - {"epoch": 14, "update": 13.224, "loss": "2.057", "ppl": "4.16", "wps": "363970", "ups": "3.09", "wpb": "117968", "bsz": "256", "num_updates": "680600", "lr": "0.000322626", "gnorm": "0.733", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.9", "wall": "222153"} [2022-08-03 15:11:45,163][train_inner][INFO] - {"epoch": 14, "update": 13.228, "loss": "2.063", "ppl": "4.18", "wps": "366576", "ups": "3.1", "wpb": "118091", "bsz": "256", "num_updates": "680800", "lr": "0.000322424", "gnorm": "0.733", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.4", "wall": "222217"} [2022-08-03 15:12:50,316][train_inner][INFO] - {"epoch": 14, "update": 13.232, "loss": "2.058", "ppl": "4.16", "wps": "363233", "ups": "3.07", "wpb": "118328", "bsz": "256", "num_updates": "681000", "lr": "0.000322222", "gnorm": "0.734", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "222283"} [2022-08-03 15:13:56,095][train_inner][INFO] - {"epoch": 14, "update": 13.235, "loss": "2.057", "ppl": "4.16", "wps": "360758", "ups": "3.04", "wpb": "118649", "bsz": "256", "num_updates": "681200", "lr": "0.00032202", "gnorm": "0.73", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.6", "wall": "222348"} [2022-08-03 15:15:01,131][train_inner][INFO] - {"epoch": 14, "update": 13.239, "loss": "2.059", "ppl": "4.17", "wps": "364274", "ups": "3.08", "wpb": "118453", "bsz": "256", "num_updates": "681400", "lr": "0.000321818", "gnorm": "0.733", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.7", "wall": "222413"} [2022-08-03 15:16:06,042][train_inner][INFO] - {"epoch": 14, "update": 13.243, "loss": "2.058", "ppl": "4.16", "wps": "364949", "ups": "3.08", "wpb": "118444", "bsz": "256", "num_updates": "681600", "lr": "0.000321616", "gnorm": "0.73", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25.5", "wall": "222478"} [2022-08-03 15:17:10,553][train_inner][INFO] - {"epoch": 14, "update": 13.247, "loss": "2.057", "ppl": "4.16", "wps": "369586", "ups": "3.1", "wpb": "119210", "bsz": "256", "num_updates": "681800", "lr": "0.000321414", "gnorm": "0.731", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.1", "wall": "222543"} [2022-08-03 15:18:15,330][train_inner][INFO] - {"epoch": 14, "update": 13.251, "loss": "2.061", "ppl": "4.17", "wps": "363906", "ups": "3.09", "wpb": "117862", "bsz": "256", "num_updates": "682000", "lr": "0.000321212", "gnorm": "0.736", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.9", "wall": "222608"} [2022-08-03 15:19:19,932][train_inner][INFO] - {"epoch": 14, "update": 13.255, "loss": "2.057", "ppl": "4.16", "wps": "367105", "ups": "3.1", "wpb": "118577", "bsz": "256", "num_updates": "682200", "lr": "0.00032101", "gnorm": "0.732", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.8", "wall": "222672"} [2022-08-03 15:20:24,611][train_inner][INFO] - {"epoch": 14, "update": 13.259, "loss": "2.055", "ppl": "4.16", "wps": "367293", "ups": "3.09", "wpb": "118778", "bsz": "256", "num_updates": "682400", "lr": "0.000320808", "gnorm": "0.732", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "222737"} [2022-08-03 15:21:29,504][train_inner][INFO] - {"epoch": 14, "update": 13.263, "loss": "2.061", "ppl": "4.17", "wps": "363431", "ups": "3.08", "wpb": "117920", "bsz": "256", "num_updates": "682600", "lr": "0.000320606", "gnorm": "0.732", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.4", "wall": "222802"} [2022-08-03 15:22:34,312][train_inner][INFO] - {"epoch": 14, "update": 13.266, "loss": "2.062", "ppl": "4.18", "wps": "365696", "ups": "3.09", "wpb": "118497", "bsz": "256", "num_updates": "682800", "lr": "0.000320404", "gnorm": "0.733", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.3", "wall": "222867"} [2022-08-03 15:23:39,267][train_inner][INFO] - {"epoch": 14, "update": 13.27, "loss": "2.067", "ppl": "4.19", "wps": "362369", "ups": "3.08", "wpb": "117687", "bsz": "256", "num_updates": "683000", "lr": "0.000320202", "gnorm": "0.738", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.4", "wall": "222932"} [2022-08-03 15:24:44,035][train_inner][INFO] - {"epoch": 14, "update": 13.274, "loss": "2.064", "ppl": "4.18", "wps": "364318", "ups": "3.09", "wpb": "117979", "bsz": "256", "num_updates": "683200", "lr": "0.00032", "gnorm": "0.735", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23", "wall": "222996"} [2022-08-03 15:25:49,286][train_inner][INFO] - {"epoch": 14, "update": 13.278, "loss": "2.059", "ppl": "4.17", "wps": "362179", "ups": "3.07", "wpb": "118160", "bsz": "256", "num_updates": "683400", "lr": "0.000319798", "gnorm": "0.732", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.3", "wall": "223062"} [2022-08-03 15:26:01,943][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 15:26:54,800][train_inner][INFO] - {"epoch": 14, "update": 13.282, "loss": "2.057", "ppl": "4.16", "wps": "360899", "ups": "3.05", "wpb": "118218", "bsz": "256", "num_updates": "683600", "lr": "0.000319596", "gnorm": "0.733", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "223127"} [2022-08-03 15:27:59,317][train_inner][INFO] - {"epoch": 14, "update": 13.286, "loss": "2.059", "ppl": "4.17", "wps": "366798", "ups": "3.1", "wpb": "118323", "bsz": "256", "num_updates": "683800", "lr": "0.000319394", "gnorm": "0.737", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.1", "wall": "223192"} [2022-08-03 15:29:04,568][train_inner][INFO] - {"epoch": 14, "update": 13.29, "loss": "2.061", "ppl": "4.17", "wps": "362639", "ups": "3.07", "wpb": "118312", "bsz": "256", "num_updates": "684000", "lr": "0.000319192", "gnorm": "0.735", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "223257"} [2022-08-03 15:30:09,572][train_inner][INFO] - {"epoch": 14, "update": 13.294, "loss": "2.057", "ppl": "4.16", "wps": "363272", "ups": "3.08", "wpb": "118070", "bsz": "256", "num_updates": "684200", "lr": "0.00031899", "gnorm": "0.735", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "223322"} [2022-08-03 15:31:14,126][train_inner][INFO] - {"epoch": 14, "update": 13.298, "loss": "2.059", "ppl": "4.17", "wps": "366062", "ups": "3.1", "wpb": "118151", "bsz": "256", "num_updates": "684400", "lr": "0.000318788", "gnorm": "0.736", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.3", "wall": "223386"} [2022-08-03 15:32:19,368][train_inner][INFO] - {"epoch": 14, "update": 13.301, "loss": "2.055", "ppl": "4.16", "wps": "364916", "ups": "3.07", "wpb": "119038", "bsz": "256", "num_updates": "684600", "lr": "0.000318586", "gnorm": "0.726", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.7", "wall": "223452"} [2022-08-03 15:33:24,534][train_inner][INFO] - {"epoch": 14, "update": 13.305, "loss": "2.055", "ppl": "4.16", "wps": "363174", "ups": "3.07", "wpb": "118330", "bsz": "255.9", "num_updates": "684800", "lr": "0.000318384", "gnorm": "0.734", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.4", "wall": "223517"} [2022-08-03 15:34:29,809][train_inner][INFO] - {"epoch": 14, "update": 13.309, "loss": "2.059", "ppl": "4.17", "wps": "362375", "ups": "3.06", "wpb": "118269", "bsz": "256", "num_updates": "685000", "lr": "0.000318182", "gnorm": "0.734", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.5", "wall": "223582"} [2022-08-03 15:35:34,207][train_inner][INFO] - {"epoch": 14, "update": 13.313, "loss": "2.062", "ppl": "4.18", "wps": "366107", "ups": "3.11", "wpb": "117882", "bsz": "256", "num_updates": "685200", "lr": "0.00031798", "gnorm": "0.735", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.5", "wall": "223647"} [2022-08-03 15:36:39,050][train_inner][INFO] - {"epoch": 14, "update": 13.317, "loss": "2.056", "ppl": "4.16", "wps": "365584", "ups": "3.08", "wpb": "118525", "bsz": "256", "num_updates": "685400", "lr": "0.000317778", "gnorm": "0.733", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "223711"} [2022-08-03 15:37:41,582][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 15:37:44,256][train_inner][INFO] - {"epoch": 14, "update": 13.321, "loss": "2.059", "ppl": "4.17", "wps": "360752", "ups": "3.07", "wpb": "117614", "bsz": "256", "num_updates": "685600", "lr": "0.000317576", "gnorm": "0.737", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24", "wall": "223777"} [2022-08-03 15:38:49,059][train_inner][INFO] - {"epoch": 14, "update": 13.325, "loss": "2.054", "ppl": "4.15", "wps": "364730", "ups": "3.09", "wpb": "118178", "bsz": "256", "num_updates": "685800", "lr": "0.000317374", "gnorm": "0.733", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "223841"} [2022-08-03 15:39:54,951][train_inner][INFO] - {"epoch": 14, "update": 13.329, "loss": "2.052", "ppl": "4.15", "wps": "358572", "ups": "3.04", "wpb": "118132", "bsz": "256", "num_updates": "686000", "lr": "0.000317172", "gnorm": "0.734", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "26.4", "wall": "223907"} [2022-08-03 15:40:59,796][train_inner][INFO] - {"epoch": 14, "update": 13.333, "loss": "2.054", "ppl": "4.15", "wps": "366639", "ups": "3.08", "wpb": "118872", "bsz": "256", "num_updates": "686200", "lr": "0.00031697", "gnorm": "0.735", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "223972"} [2022-08-03 15:42:04,654][train_inner][INFO] - {"epoch": 14, "update": 13.336, "loss": "2.06", "ppl": "4.17", "wps": "365386", "ups": "3.08", "wpb": "118489", "bsz": "256", "num_updates": "686400", "lr": "0.000316768", "gnorm": "0.733", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "224037"} [2022-08-03 15:43:09,378][train_inner][INFO] - {"epoch": 14, "update": 13.34, "loss": "2.055", "ppl": "4.15", "wps": "365732", "ups": "3.09", "wpb": "118356", "bsz": "256", "num_updates": "686600", "lr": "0.000316566", "gnorm": "0.734", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.1", "wall": "224102"} [2022-08-03 15:44:14,325][train_inner][INFO] - {"epoch": 14, "update": 13.344, "loss": "2.054", "ppl": "4.15", "wps": "364851", "ups": "3.08", "wpb": "118478", "bsz": "256", "num_updates": "686800", "lr": "0.000316364", "gnorm": "0.735", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.2", "wall": "224167"} [2022-08-03 15:44:42,829][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-03 15:45:19,623][train_inner][INFO] - {"epoch": 14, "update": 13.348, "loss": "2.062", "ppl": "4.18", "wps": "361554", "ups": "3.06", "wpb": "118042", "bsz": "256", "num_updates": "687000", "lr": "0.000316162", "gnorm": "0.737", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.8", "wall": "224232"} [2022-08-03 15:46:24,624][train_inner][INFO] - {"epoch": 14, "update": 13.352, "loss": "2.05", "ppl": "4.14", "wps": "366167", "ups": "3.08", "wpb": "119005", "bsz": "256", "num_updates": "687200", "lr": "0.00031596", "gnorm": "0.731", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.6", "wall": "224297"} [2022-08-03 15:47:29,277][train_inner][INFO] - {"epoch": 14, "update": 13.356, "loss": "2.056", "ppl": "4.16", "wps": "363271", "ups": "3.09", "wpb": "117431", "bsz": "256", "num_updates": "687400", "lr": "0.000315758", "gnorm": "0.75", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23", "wall": "224362"} [2022-08-03 15:48:34,465][train_inner][INFO] - {"epoch": 14, "update": 13.36, "loss": "2.054", "ppl": "4.15", "wps": "364244", "ups": "3.07", "wpb": "118719", "bsz": "256", "num_updates": "687600", "lr": "0.000315556", "gnorm": "0.732", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.8", "wall": "224427"} [2022-08-03 15:49:38,976][train_inner][INFO] - {"epoch": 14, "update": 13.364, "loss": "2.059", "ppl": "4.17", "wps": "366694", "ups": "3.1", "wpb": "118278", "bsz": "256", "num_updates": "687800", "lr": "0.000315354", "gnorm": "0.737", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "224491"} [2022-08-03 15:50:43,852][train_inner][INFO] - {"epoch": 14, "update": 13.368, "loss": "2.059", "ppl": "4.17", "wps": "364826", "ups": "3.08", "wpb": "118339", "bsz": "256", "num_updates": "688000", "lr": "0.000315152", "gnorm": "0.733", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "224556"} [2022-08-03 15:51:48,685][train_inner][INFO] - {"epoch": 14, "update": 13.371, "loss": "2.058", "ppl": "4.16", "wps": "364855", "ups": "3.08", "wpb": "118271", "bsz": "256", "num_updates": "688200", "lr": "0.000314949", "gnorm": "0.735", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.9", "wall": "224621"} [2022-08-03 15:52:53,868][train_inner][INFO] - {"epoch": 14, "update": 13.375, "loss": "2.059", "ppl": "4.17", "wps": "360792", "ups": "3.07", "wpb": "117587", "bsz": "256", "num_updates": "688400", "lr": "0.000314747", "gnorm": "0.735", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.8", "wall": "224686"} [2022-08-03 15:53:58,650][train_inner][INFO] - {"epoch": 14, "update": 13.379, "loss": "2.053", "ppl": "4.15", "wps": "365839", "ups": "3.09", "wpb": "118498", "bsz": "256", "num_updates": "688600", "lr": "0.000314545", "gnorm": "0.732", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.7", "wall": "224751"} [2022-08-03 15:55:03,767][train_inner][INFO] - {"epoch": 14, "update": 13.383, "loss": "2.053", "ppl": "4.15", "wps": "364145", "ups": "3.07", "wpb": "118558", "bsz": "256", "num_updates": "688800", "lr": "0.000314343", "gnorm": "0.734", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "224816"} [2022-08-03 15:56:08,682][train_inner][INFO] - {"epoch": 14, "update": 13.387, "loss": "2.061", "ppl": "4.17", "wps": "362033", "ups": "3.08", "wpb": "117506", "bsz": "256", "num_updates": "689000", "lr": "0.000314141", "gnorm": "0.742", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "224881"} [2022-08-03 15:57:13,483][train_inner][INFO] - {"epoch": 14, "update": 13.391, "loss": "2.056", "ppl": "4.16", "wps": "364472", "ups": "3.09", "wpb": "118089", "bsz": "256", "num_updates": "689200", "lr": "0.000313939", "gnorm": "0.737", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "224946"} [2022-08-03 15:58:18,477][train_inner][INFO] - {"epoch": 14, "update": 13.395, "loss": "2.052", "ppl": "4.15", "wps": "363479", "ups": "3.08", "wpb": "118118", "bsz": "256", "num_updates": "689400", "lr": "0.000313737", "gnorm": "0.736", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "225011"} [2022-08-03 15:59:23,202][train_inner][INFO] - {"epoch": 14, "update": 13.399, "loss": "2.057", "ppl": "4.16", "wps": "364476", "ups": "3.09", "wpb": "117952", "bsz": "256", "num_updates": "689600", "lr": "0.000313535", "gnorm": "0.75", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.2", "wall": "225076"} [2022-08-03 16:00:28,269][train_inner][INFO] - {"epoch": 14, "update": 13.403, "loss": "2.061", "ppl": "4.17", "wps": "363338", "ups": "3.07", "wpb": "118205", "bsz": "256", "num_updates": "689800", "lr": "0.000313333", "gnorm": "0.737", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "225141"} [2022-08-03 16:01:33,004][train_inner][INFO] - {"epoch": 14, "update": 13.406, "loss": "2.051", "ppl": "4.14", "wps": "364729", "ups": "3.09", "wpb": "118050", "bsz": "256", "num_updates": "690000", "lr": "0.000313131", "gnorm": "0.738", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.7", "wall": "225205"} [2022-08-03 16:02:38,125][train_inner][INFO] - {"epoch": 14, "update": 13.41, "loss": "2.057", "ppl": "4.16", "wps": "364152", "ups": "3.07", "wpb": "118569", "bsz": "256", "num_updates": "690200", "lr": "0.000312929", "gnorm": "0.736", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.3", "wall": "225270"} [2022-08-03 16:03:42,883][train_inner][INFO] - {"epoch": 14, "update": 13.414, "loss": "2.055", "ppl": "4.16", "wps": "364111", "ups": "3.09", "wpb": "117893", "bsz": "256", "num_updates": "690400", "lr": "0.000312727", "gnorm": "0.738", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.4", "wall": "225335"} [2022-08-03 16:04:47,915][train_inner][INFO] - {"epoch": 14, "update": 13.418, "loss": "2.051", "ppl": "4.14", "wps": "363969", "ups": "3.08", "wpb": "118346", "bsz": "256", "num_updates": "690600", "lr": "0.000312525", "gnorm": "0.732", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "27.2", "wall": "225400"} [2022-08-03 16:05:53,934][train_inner][INFO] - {"epoch": 14, "update": 13.422, "loss": "2.054", "ppl": "4.15", "wps": "358754", "ups": "3.03", "wpb": "118421", "bsz": "256", "num_updates": "690800", "lr": "0.000312323", "gnorm": "0.734", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "20.9", "wall": "225466"} [2022-08-03 16:06:55,739][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 16:06:59,174][train_inner][INFO] - {"epoch": 14, "update": 13.426, "loss": "2.048", "ppl": "4.14", "wps": "363329", "ups": "3.07", "wpb": "118516", "bsz": "256", "num_updates": "691000", "lr": "0.000312121", "gnorm": "0.734", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "225531"} [2022-08-03 16:08:04,057][train_inner][INFO] - {"epoch": 14, "update": 13.43, "loss": "2.048", "ppl": "4.14", "wps": "363615", "ups": "3.08", "wpb": "117962", "bsz": "256", "num_updates": "691200", "lr": "0.000311919", "gnorm": "0.743", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.4", "wall": "225596"} [2022-08-03 16:09:08,774][train_inner][INFO] - {"epoch": 14, "update": 13.434, "loss": "2.055", "ppl": "4.16", "wps": "365733", "ups": "3.09", "wpb": "118344", "bsz": "256", "num_updates": "691400", "lr": "0.000311717", "gnorm": "0.738", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.6", "wall": "225661"} [2022-08-03 16:09:41,757][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-03 16:10:13,822][train_inner][INFO] - {"epoch": 14, "update": 13.438, "loss": "2.058", "ppl": "4.16", "wps": "363022", "ups": "3.07", "wpb": "118066", "bsz": "256", "num_updates": "691600", "lr": "0.000311515", "gnorm": "0.738", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "225726"} [2022-08-03 16:11:19,077][train_inner][INFO] - {"epoch": 14, "update": 13.441, "loss": "2.056", "ppl": "4.16", "wps": "362073", "ups": "3.06", "wpb": "118135", "bsz": "256", "num_updates": "691800", "lr": "0.000311313", "gnorm": "0.737", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "225791"} [2022-08-03 16:12:23,790][train_inner][INFO] - {"epoch": 14, "update": 13.445, "loss": "2.055", "ppl": "4.16", "wps": "366544", "ups": "3.09", "wpb": "118599", "bsz": "256", "num_updates": "692000", "lr": "0.000311111", "gnorm": "0.736", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.4", "wall": "225856"} [2022-08-03 16:13:28,831][train_inner][INFO] - {"epoch": 14, "update": 13.449, "loss": "2.054", "ppl": "4.15", "wps": "364087", "ups": "3.08", "wpb": "118400", "bsz": "256", "num_updates": "692200", "lr": "0.000310909", "gnorm": "0.735", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.6", "wall": "225921"} [2022-08-03 16:14:34,068][train_inner][INFO] - {"epoch": 14, "update": 13.453, "loss": "2.059", "ppl": "4.17", "wps": "363089", "ups": "3.07", "wpb": "118434", "bsz": "256", "num_updates": "692400", "lr": "0.000310707", "gnorm": "0.738", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "225986"} [2022-08-03 16:15:39,044][train_inner][INFO] - {"epoch": 14, "update": 13.457, "loss": "2.055", "ppl": "4.16", "wps": "362350", "ups": "3.08", "wpb": "117717", "bsz": "256", "num_updates": "692600", "lr": "0.000310505", "gnorm": "0.739", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22", "wall": "226051"} [2022-08-03 16:16:44,151][train_inner][INFO] - {"epoch": 14, "update": 13.461, "loss": "2.049", "ppl": "4.14", "wps": "363686", "ups": "3.07", "wpb": "118391", "bsz": "256", "num_updates": "692800", "lr": "0.000310303", "gnorm": "0.737", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.9", "wall": "226116"} [2022-08-03 16:17:49,098][train_inner][INFO] - {"epoch": 14, "update": 13.465, "loss": "2.055", "ppl": "4.16", "wps": "363901", "ups": "3.08", "wpb": "118170", "bsz": "256", "num_updates": "693000", "lr": "0.000310101", "gnorm": "0.739", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.7", "wall": "226181"} [2022-08-03 16:18:54,324][train_inner][INFO] - {"epoch": 14, "update": 13.469, "loss": "2.054", "ppl": "4.15", "wps": "364850", "ups": "3.07", "wpb": "118987", "bsz": "256", "num_updates": "693200", "lr": "0.000309899", "gnorm": "0.736", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "226247"} [2022-08-03 16:19:59,251][train_inner][INFO] - {"epoch": 14, "update": 13.472, "loss": "2.055", "ppl": "4.16", "wps": "364852", "ups": "3.08", "wpb": "118442", "bsz": "256", "num_updates": "693400", "lr": "0.000309697", "gnorm": "0.738", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.3", "wall": "226312"} [2022-08-03 16:21:04,111][train_inner][INFO] - {"epoch": 14, "update": 13.476, "loss": "2.053", "ppl": "4.15", "wps": "364820", "ups": "3.08", "wpb": "118308", "bsz": "256", "num_updates": "693600", "lr": "0.000309495", "gnorm": "0.739", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.5", "wall": "226376"} [2022-08-03 16:22:08,894][train_inner][INFO] - {"epoch": 14, "update": 13.48, "loss": "2.052", "ppl": "4.15", "wps": "365366", "ups": "3.09", "wpb": "118346", "bsz": "256", "num_updates": "693800", "lr": "0.000309293", "gnorm": "0.738", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "226441"} [2022-08-03 16:23:13,975][train_inner][INFO] - {"epoch": 14, "update": 13.484, "loss": "2.052", "ppl": "4.15", "wps": "363494", "ups": "3.07", "wpb": "118282", "bsz": "256", "num_updates": "694000", "lr": "0.000309091", "gnorm": "0.738", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.9", "wall": "226506"} [2022-08-03 16:24:19,131][train_inner][INFO] - {"epoch": 14, "update": 13.488, "loss": "2.052", "ppl": "4.15", "wps": "363016", "ups": "3.07", "wpb": "118260", "bsz": "256", "num_updates": "694200", "lr": "0.000308889", "gnorm": "0.739", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.8", "wall": "226571"} [2022-08-03 16:25:25,014][train_inner][INFO] - {"epoch": 14, "update": 13.492, "loss": "2.05", "ppl": "4.14", "wps": "359106", "ups": "3.04", "wpb": "118294", "bsz": "256", "num_updates": "694400", "lr": "0.000308687", "gnorm": "0.738", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "20.2", "wall": "226637"} [2022-08-03 16:26:29,637][train_inner][INFO] - {"epoch": 14, "update": 13.496, "loss": "2.053", "ppl": "4.15", "wps": "365388", "ups": "3.09", "wpb": "118061", "bsz": "256", "num_updates": "694600", "lr": "0.000308485", "gnorm": "0.737", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "226702"} [2022-08-03 16:27:34,504][train_inner][INFO] - {"epoch": 14, "update": 13.5, "loss": "2.05", "ppl": "4.14", "wps": "365240", "ups": "3.08", "wpb": "118457", "bsz": "256", "num_updates": "694800", "lr": "0.000308283", "gnorm": "0.737", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "226767"} [2022-08-03 16:28:39,630][train_inner][INFO] - {"epoch": 14, "update": 13.504, "loss": "2.047", "ppl": "4.13", "wps": "364493", "ups": "3.07", "wpb": "118690", "bsz": "256", "num_updates": "695000", "lr": "0.000308081", "gnorm": "0.737", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "226832"} [2022-08-03 16:29:44,608][train_inner][INFO] - {"epoch": 14, "update": 13.507, "loss": "2.05", "ppl": "4.14", "wps": "365170", "ups": "3.08", "wpb": "118638", "bsz": "256", "num_updates": "695200", "lr": "0.000307879", "gnorm": "0.735", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.7", "wall": "226897"} [2022-08-03 16:30:49,371][train_inner][INFO] - {"epoch": 14, "update": 13.511, "loss": "2.044", "ppl": "4.12", "wps": "364789", "ups": "3.09", "wpb": "118121", "bsz": "256", "num_updates": "695400", "lr": "0.000307677", "gnorm": "0.736", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "25.2", "wall": "226962"} [2022-08-03 16:31:54,531][train_inner][INFO] - {"epoch": 14, "update": 13.515, "loss": "2.053", "ppl": "4.15", "wps": "363378", "ups": "3.07", "wpb": "118387", "bsz": "256", "num_updates": "695600", "lr": "0.000307475", "gnorm": "0.74", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "22.5", "wall": "227027"} [2022-08-03 16:32:02,719][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 16:32:59,785][train_inner][INFO] - {"epoch": 14, "update": 13.519, "loss": "2.048", "ppl": "4.13", "wps": "363795", "ups": "3.06", "wpb": "118694", "bsz": "256", "num_updates": "695800", "lr": "0.000307273", "gnorm": "0.736", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.6", "wall": "227092"} [2022-08-03 16:34:04,331][train_inner][INFO] - {"epoch": 14, "update": 13.523, "loss": "2.05", "ppl": "4.14", "wps": "366389", "ups": "3.1", "wpb": "118243", "bsz": "256", "num_updates": "696000", "lr": "0.000307071", "gnorm": "0.737", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.8", "wall": "227157"} [2022-08-03 16:35:10,199][train_inner][INFO] - {"epoch": 14, "update": 13.527, "loss": "2.05", "ppl": "4.14", "wps": "360142", "ups": "3.04", "wpb": "118607", "bsz": "256", "num_updates": "696200", "lr": "0.000306869", "gnorm": "0.739", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.1", "wall": "227223"} [2022-08-03 16:36:15,194][train_inner][INFO] - {"epoch": 14, "update": 13.531, "loss": "2.06", "ppl": "4.17", "wps": "365708", "ups": "3.08", "wpb": "118845", "bsz": "256", "num_updates": "696400", "lr": "0.000306667", "gnorm": "0.739", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.4", "wall": "227288"} [2022-08-03 16:37:20,142][train_inner][INFO] - {"epoch": 14, "update": 13.535, "loss": "2.049", "ppl": "4.14", "wps": "363907", "ups": "3.08", "wpb": "118174", "bsz": "256", "num_updates": "696600", "lr": "0.000306465", "gnorm": "0.737", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21", "wall": "227352"} [2022-08-03 16:38:26,203][train_inner][INFO] - {"epoch": 14, "update": 13.539, "loss": "2.05", "ppl": "4.14", "wps": "358358", "ups": "3.03", "wpb": "118365", "bsz": "256", "num_updates": "696800", "lr": "0.000306263", "gnorm": "0.743", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "20", "wall": "227419"} [2022-08-03 16:39:31,085][train_inner][INFO] - {"epoch": 14, "update": 13.542, "loss": "2.05", "ppl": "4.14", "wps": "366338", "ups": "3.08", "wpb": "118842", "bsz": "256", "num_updates": "697000", "lr": "0.000306061", "gnorm": "0.738", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.2", "wall": "227483"} [2022-08-03 16:40:36,862][train_inner][INFO] - {"epoch": 14, "update": 13.546, "loss": "2.049", "ppl": "4.14", "wps": "358359", "ups": "3.04", "wpb": "117858", "bsz": "256", "num_updates": "697200", "lr": "0.000305859", "gnorm": "0.738", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "227549"} [2022-08-03 16:41:42,013][train_inner][INFO] - {"epoch": 14, "update": 13.55, "loss": "2.053", "ppl": "4.15", "wps": "362521", "ups": "3.07", "wpb": "118091", "bsz": "256", "num_updates": "697400", "lr": "0.000305657", "gnorm": "0.74", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25", "wall": "227614"} [2022-08-03 16:42:46,690][train_inner][INFO] - {"epoch": 14, "update": 13.554, "loss": "2.047", "ppl": "4.13", "wps": "366138", "ups": "3.09", "wpb": "118401", "bsz": "256", "num_updates": "697600", "lr": "0.000305455", "gnorm": "0.741", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24", "wall": "227679"} [2022-08-03 16:43:13,282][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 16:43:52,199][train_inner][INFO] - {"epoch": 14, "update": 13.558, "loss": "2.041", "ppl": "4.11", "wps": "361612", "ups": "3.05", "wpb": "118442", "bsz": "256", "num_updates": "697800", "lr": "0.000305253", "gnorm": "0.738", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "227745"} [2022-08-03 16:44:56,965][train_inner][INFO] - {"epoch": 14, "update": 13.562, "loss": "2.048", "ppl": "4.14", "wps": "364910", "ups": "3.09", "wpb": "118168", "bsz": "256", "num_updates": "698000", "lr": "0.000305051", "gnorm": "0.739", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.6", "wall": "227809"} [2022-08-03 16:46:01,539][train_inner][INFO] - {"epoch": 14, "update": 13.566, "loss": "2.048", "ppl": "4.14", "wps": "366225", "ups": "3.1", "wpb": "118241", "bsz": "256", "num_updates": "698200", "lr": "0.000304848", "gnorm": "0.737", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.8", "wall": "227874"} [2022-08-03 16:47:06,334][train_inner][INFO] - {"epoch": 14, "update": 13.57, "loss": "2.051", "ppl": "4.15", "wps": "365374", "ups": "3.09", "wpb": "118371", "bsz": "256", "num_updates": "698400", "lr": "0.000304646", "gnorm": "0.736", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "227939"} [2022-08-03 16:48:11,356][train_inner][INFO] - {"epoch": 14, "update": 13.573, "loss": "2.051", "ppl": "4.14", "wps": "364632", "ups": "3.08", "wpb": "118543", "bsz": "256", "num_updates": "698600", "lr": "0.000304444", "gnorm": "0.738", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "228004"} [2022-08-03 16:49:15,799][train_inner][INFO] - {"epoch": 14, "update": 13.577, "loss": "2.044", "ppl": "4.12", "wps": "366205", "ups": "3.1", "wpb": "117995", "bsz": "256", "num_updates": "698800", "lr": "0.000304242", "gnorm": "0.741", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.5", "wall": "228068"} [2022-08-03 16:50:21,019][train_inner][INFO] - {"epoch": 14, "update": 13.581, "loss": "2.046", "ppl": "4.13", "wps": "362803", "ups": "3.07", "wpb": "118310", "bsz": "256", "num_updates": "699000", "lr": "0.00030404", "gnorm": "0.74", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.5", "wall": "228133"} [2022-08-03 16:51:27,306][train_inner][INFO] - {"epoch": 14, "update": 13.585, "loss": "2.051", "ppl": "4.15", "wps": "358163", "ups": "3.02", "wpb": "118705", "bsz": "256", "num_updates": "699200", "lr": "0.000303838", "gnorm": "0.742", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "21.7", "wall": "228200"} [2022-08-03 16:52:31,760][train_inner][INFO] - {"epoch": 14, "update": 13.589, "loss": "2.054", "ppl": "4.15", "wps": "364812", "ups": "3.1", "wpb": "117567", "bsz": "256", "num_updates": "699400", "lr": "0.000303636", "gnorm": "0.742", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.7", "wall": "228264"} [2022-08-03 16:53:37,985][train_inner][INFO] - {"epoch": 14, "update": 13.593, "loss": "2.05", "ppl": "4.14", "wps": "358029", "ups": "3.02", "wpb": "118550", "bsz": "256", "num_updates": "699600", "lr": "0.000303434", "gnorm": "0.737", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "21", "wall": "228330"} [2022-08-03 16:54:38,880][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 16:54:43,444][train_inner][INFO] - {"epoch": 14, "update": 13.597, "loss": "2.052", "ppl": "4.15", "wps": "361968", "ups": "3.06", "wpb": "118469", "bsz": "256", "num_updates": "699800", "lr": "0.000303232", "gnorm": "0.744", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.8", "wall": "228396"} [2022-08-03 16:55:48,040][train_inner][INFO] - {"epoch": 14, "update": 13.601, "loss": "2.053", "ppl": "4.15", "wps": "364054", "ups": "3.1", "wpb": "117580", "bsz": "256", "num_updates": "700000", "lr": "0.00030303", "gnorm": "0.746", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.9", "wall": "228460"} [2022-08-03 16:55:48,041][fairseq_cli.train][INFO] - begin validation on "valid" subset [2022-08-03 16:56:10,827][valid][INFO] - {"epoch": 14, "valid_loss": "1.958", "valid_ppl": "3.88", "valid_wps": "1.57266e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "700000", "valid_best_loss": "1.958"} [2022-08-03 16:56:10,831][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 14 @ 700000 updates [2022-08-03 16:56:10,831][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_14_700000.pt [2022-08-03 16:56:23,650][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_14_700000.pt [2022-08-03 16:56:53,272][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/adan2/checkpoint_14_700000.pt (epoch 14 @ 700000 updates, score 1.958) (writing took 42.4408537838608 seconds) [2022-08-03 16:57:58,336][train_inner][INFO] - {"epoch": 14, "update": 13.605, "loss": "2.055", "ppl": "4.16", "wps": "180829", "ups": "1.53", "wpb": "117805", "bsz": "256", "num_updates": "700200", "lr": "0.000302828", "gnorm": "0.745", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.7", "wall": "228591"} [2022-08-03 16:58:48,529][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-03 16:59:04,451][train_inner][INFO] - {"epoch": 14, "update": 13.608, "loss": "2.051", "ppl": "4.14", "wps": "356976", "ups": "3.03", "wpb": "118006", "bsz": "256", "num_updates": "700400", "lr": "0.000302626", "gnorm": "0.744", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "20", "wall": "228657"} [2022-08-03 17:00:10,512][train_inner][INFO] - {"epoch": 14, "update": 13.612, "loss": "2.052", "ppl": "4.15", "wps": "359539", "ups": "3.03", "wpb": "118757", "bsz": "256", "num_updates": "700600", "lr": "0.000302424", "gnorm": "0.747", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "23.9", "wall": "228723"} [2022-08-03 17:01:16,728][train_inner][INFO] - {"epoch": 14, "update": 13.616, "loss": "2.043", "ppl": "4.12", "wps": "356934", "ups": "3.02", "wpb": "118171", "bsz": "256", "num_updates": "700800", "lr": "0.000302222", "gnorm": "0.75", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "20.2", "wall": "228789"} [2022-08-03 17:02:22,632][train_inner][INFO] - {"epoch": 14, "update": 13.62, "loss": "2.06", "ppl": "4.17", "wps": "356767", "ups": "3.03", "wpb": "117560", "bsz": "256", "num_updates": "701000", "lr": "0.00030202", "gnorm": "0.744", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "19.9", "wall": "228855"} [2022-08-03 17:03:28,717][train_inner][INFO] - {"epoch": 14, "update": 13.624, "loss": "2.045", "ppl": "4.13", "wps": "359954", "ups": "3.03", "wpb": "118937", "bsz": "256", "num_updates": "701200", "lr": "0.000301818", "gnorm": "0.737", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "24.1", "wall": "228921"} [2022-08-03 17:04:33,317][train_inner][INFO] - {"epoch": 14, "update": 13.628, "loss": "2.048", "ppl": "4.14", "wps": "365777", "ups": "3.1", "wpb": "118144", "bsz": "256", "num_updates": "701400", "lr": "0.000301616", "gnorm": "0.742", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "228986"} [2022-08-03 17:05:38,225][train_inner][INFO] - {"epoch": 14, "update": 13.632, "loss": "2.054", "ppl": "4.15", "wps": "365108", "ups": "3.08", "wpb": "118490", "bsz": "256", "num_updates": "701600", "lr": "0.000301414", "gnorm": "0.74", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "28.2", "wall": "229051"} [2022-08-03 17:06:43,298][train_inner][INFO] - {"epoch": 14, "update": 13.636, "loss": "2.044", "ppl": "4.12", "wps": "363069", "ups": "3.07", "wpb": "118129", "bsz": "256", "num_updates": "701800", "lr": "0.000301212", "gnorm": "0.742", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.2", "wall": "229116"} [2022-08-03 17:07:48,094][train_inner][INFO] - {"epoch": 14, "update": 13.64, "loss": "2.052", "ppl": "4.15", "wps": "366622", "ups": "3.09", "wpb": "118776", "bsz": "256", "num_updates": "702000", "lr": "0.00030101", "gnorm": "0.741", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21", "wall": "229180"} [2022-08-03 17:08:52,895][train_inner][INFO] - {"epoch": 14, "update": 13.643, "loss": "2.043", "ppl": "4.12", "wps": "365110", "ups": "3.09", "wpb": "118296", "bsz": "256", "num_updates": "702200", "lr": "0.000300808", "gnorm": "0.739", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.9", "wall": "229245"} [2022-08-03 17:09:58,006][train_inner][INFO] - {"epoch": 14, "update": 13.647, "loss": "2.049", "ppl": "4.14", "wps": "363423", "ups": "3.07", "wpb": "118313", "bsz": "256", "num_updates": "702400", "lr": "0.000300606", "gnorm": "0.749", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.9", "wall": "229310"} [2022-08-03 17:11:03,121][train_inner][INFO] - {"epoch": 14, "update": 13.651, "loss": "2.046", "ppl": "4.13", "wps": "365096", "ups": "3.07", "wpb": "118864", "bsz": "256", "num_updates": "702600", "lr": "0.000300404", "gnorm": "0.779", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "229375"} [2022-08-03 17:12:08,156][train_inner][INFO] - {"epoch": 14, "update": 13.655, "loss": "2.047", "ppl": "4.13", "wps": "364247", "ups": "3.08", "wpb": "118441", "bsz": "256", "num_updates": "702800", "lr": "0.000300202", "gnorm": "0.755", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "229440"} [2022-08-03 17:13:12,953][train_inner][INFO] - {"epoch": 14, "update": 13.659, "loss": "2.048", "ppl": "4.14", "wps": "366438", "ups": "3.09", "wpb": "118720", "bsz": "256", "num_updates": "703000", "lr": "0.0003", "gnorm": "0.751", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.2", "wall": "229505"} [2022-08-03 17:14:17,689][train_inner][INFO] - {"epoch": 14, "update": 13.663, "loss": "2.045", "ppl": "4.13", "wps": "367058", "ups": "3.09", "wpb": "118808", "bsz": "256", "num_updates": "703200", "lr": "0.000299798", "gnorm": "0.738", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.6", "wall": "229570"} [2022-08-03 17:15:23,779][train_inner][INFO] - {"epoch": 14, "update": 13.667, "loss": "2.046", "ppl": "4.13", "wps": "355390", "ups": "3.03", "wpb": "117436", "bsz": "256", "num_updates": "703400", "lr": "0.000299596", "gnorm": "0.747", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "24.3", "wall": "229636"} [2022-08-03 17:16:28,836][train_inner][INFO] - {"epoch": 14, "update": 13.671, "loss": "2.05", "ppl": "4.14", "wps": "363062", "ups": "3.07", "wpb": "118096", "bsz": "255.9", "num_updates": "703600", "lr": "0.000299394", "gnorm": "0.744", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.5", "wall": "229701"} [2022-08-03 17:17:33,938][train_inner][INFO] - {"epoch": 14, "update": 13.675, "loss": "2.046", "ppl": "4.13", "wps": "364100", "ups": "3.07", "wpb": "118517", "bsz": "256", "num_updates": "703800", "lr": "0.000299192", "gnorm": "0.74", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.9", "wall": "229766"} [2022-08-03 17:18:40,072][train_inner][INFO] - {"epoch": 14, "update": 13.678, "loss": "2.053", "ppl": "4.15", "wps": "357706", "ups": "3.02", "wpb": "118281", "bsz": "256", "num_updates": "704000", "lr": "0.00029899", "gnorm": "0.743", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "21.6", "wall": "229832"} [2022-08-03 17:19:45,187][train_inner][INFO] - {"epoch": 14, "update": 13.682, "loss": "2.044", "ppl": "4.12", "wps": "363765", "ups": "3.07", "wpb": "118431", "bsz": "256", "num_updates": "704200", "lr": "0.000298788", "gnorm": "0.743", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.5", "wall": "229898"} [2022-08-03 17:20:50,142][train_inner][INFO] - {"epoch": 14, "update": 13.686, "loss": "2.046", "ppl": "4.13", "wps": "364880", "ups": "3.08", "wpb": "118502", "bsz": "256", "num_updates": "704400", "lr": "0.000298586", "gnorm": "0.744", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "229962"} [2022-08-03 17:21:08,137][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 17:21:55,625][train_inner][INFO] - {"epoch": 14, "update": 13.69, "loss": "2.044", "ppl": "4.12", "wps": "359759", "ups": "3.05", "wpb": "117788", "bsz": "256", "num_updates": "704600", "lr": "0.000298384", "gnorm": "0.747", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "230028"} [2022-08-03 17:22:13,120][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-03 17:23:01,938][train_inner][INFO] - {"epoch": 14, "update": 13.694, "loss": "2.048", "ppl": "4.14", "wps": "358397", "ups": "3.02", "wpb": "118831", "bsz": "256", "num_updates": "704800", "lr": "0.000298182", "gnorm": "0.745", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "23.4", "wall": "230094"} [2022-08-03 17:24:06,679][train_inner][INFO] - {"epoch": 14, "update": 13.698, "loss": "2.052", "ppl": "4.15", "wps": "362968", "ups": "3.09", "wpb": "117494", "bsz": "256", "num_updates": "705000", "lr": "0.00029798", "gnorm": "0.755", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "230159"} [2022-08-03 17:25:11,593][train_inner][INFO] - {"epoch": 14, "update": 13.702, "loss": "2.048", "ppl": "4.14", "wps": "364575", "ups": "3.08", "wpb": "118328", "bsz": "256", "num_updates": "705200", "lr": "0.000297778", "gnorm": "0.741", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "230224"} [2022-08-03 17:26:16,792][train_inner][INFO] - {"epoch": 14, "update": 13.706, "loss": "2.045", "ppl": "4.13", "wps": "363247", "ups": "3.07", "wpb": "118415", "bsz": "256", "num_updates": "705400", "lr": "0.000297576", "gnorm": "0.742", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "230289"} [2022-08-03 17:27:21,778][train_inner][INFO] - {"epoch": 14, "update": 13.71, "loss": "2.048", "ppl": "4.13", "wps": "364195", "ups": "3.08", "wpb": "118337", "bsz": "256", "num_updates": "705600", "lr": "0.000297374", "gnorm": "0.746", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.8", "wall": "230354"} [2022-08-03 17:28:26,812][train_inner][INFO] - {"epoch": 14, "update": 13.713, "loss": "2.047", "ppl": "4.13", "wps": "364195", "ups": "3.08", "wpb": "118423", "bsz": "256", "num_updates": "705800", "lr": "0.000297172", "gnorm": "0.741", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.2", "wall": "230419"} [2022-08-03 17:29:31,812][train_inner][INFO] - {"epoch": 14, "update": 13.717, "loss": "2.047", "ppl": "4.13", "wps": "365342", "ups": "3.08", "wpb": "118735", "bsz": "256", "num_updates": "706000", "lr": "0.00029697", "gnorm": "0.74", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "230484"} [2022-08-03 17:30:36,641][train_inner][INFO] - {"epoch": 14, "update": 13.721, "loss": "2.046", "ppl": "4.13", "wps": "366971", "ups": "3.09", "wpb": "118951", "bsz": "256", "num_updates": "706200", "lr": "0.000296768", "gnorm": "0.742", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "230549"} [2022-08-03 17:31:41,738][train_inner][INFO] - {"epoch": 14, "update": 13.725, "loss": "2.045", "ppl": "4.13", "wps": "363683", "ups": "3.07", "wpb": "118370", "bsz": "256", "num_updates": "706400", "lr": "0.000296566", "gnorm": "0.753", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "230614"} [2022-08-03 17:32:46,415][train_inner][INFO] - {"epoch": 14, "update": 13.729, "loss": "2.046", "ppl": "4.13", "wps": "364708", "ups": "3.09", "wpb": "117940", "bsz": "256", "num_updates": "706600", "lr": "0.000296364", "gnorm": "0.744", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.7", "wall": "230679"} [2022-08-03 17:33:51,275][train_inner][INFO] - {"epoch": 14, "update": 13.733, "loss": "2.041", "ppl": "4.12", "wps": "364813", "ups": "3.08", "wpb": "118306", "bsz": "256", "num_updates": "706800", "lr": "0.000296162", "gnorm": "0.74", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.9", "wall": "230744"} [2022-08-03 17:34:56,273][train_inner][INFO] - {"epoch": 14, "update": 13.737, "loss": "2.047", "ppl": "4.13", "wps": "363672", "ups": "3.08", "wpb": "118189", "bsz": "256", "num_updates": "707000", "lr": "0.00029596", "gnorm": "0.746", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "26.1", "wall": "230809"} [2022-08-03 17:36:01,202][train_inner][INFO] - {"epoch": 14, "update": 13.741, "loss": "2.054", "ppl": "4.15", "wps": "367049", "ups": "3.08", "wpb": "119160", "bsz": "256", "num_updates": "707200", "lr": "0.000295758", "gnorm": "0.74", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25.6", "wall": "230874"} [2022-08-03 17:37:05,689][train_inner][INFO] - {"epoch": 14, "update": 13.744, "loss": "2.049", "ppl": "4.14", "wps": "365486", "ups": "3.1", "wpb": "117844", "bsz": "256", "num_updates": "707400", "lr": "0.000295556", "gnorm": "0.746", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "27.1", "wall": "230938"} [2022-08-03 17:38:10,411][train_inner][INFO] - {"epoch": 14, "update": 13.748, "loss": "2.041", "ppl": "4.11", "wps": "365769", "ups": "3.09", "wpb": "118364", "bsz": "256", "num_updates": "707600", "lr": "0.000295354", "gnorm": "0.742", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "231003"} [2022-08-03 17:39:15,183][train_inner][INFO] - {"epoch": 14, "update": 13.752, "loss": "2.036", "ppl": "4.1", "wps": "367349", "ups": "3.09", "wpb": "118967", "bsz": "256", "num_updates": "707800", "lr": "0.000295152", "gnorm": "0.739", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.5", "wall": "231068"} [2022-08-03 17:40:19,980][train_inner][INFO] - {"epoch": 14, "update": 13.756, "loss": "2.042", "ppl": "4.12", "wps": "363235", "ups": "3.09", "wpb": "117682", "bsz": "256", "num_updates": "708000", "lr": "0.000294949", "gnorm": "0.745", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.8", "wall": "231132"} [2022-08-03 17:41:24,844][train_inner][INFO] - {"epoch": 14, "update": 13.76, "loss": "2.053", "ppl": "4.15", "wps": "361128", "ups": "3.08", "wpb": "117120", "bsz": "256", "num_updates": "708200", "lr": "0.000294747", "gnorm": "0.749", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "231197"} [2022-08-03 17:42:29,881][train_inner][INFO] - {"epoch": 14, "update": 13.764, "loss": "2.045", "ppl": "4.13", "wps": "365173", "ups": "3.08", "wpb": "118746", "bsz": "256", "num_updates": "708400", "lr": "0.000294545", "gnorm": "0.742", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.6", "wall": "231262"} [2022-08-03 17:43:34,740][train_inner][INFO] - {"epoch": 14, "update": 13.768, "loss": "2.046", "ppl": "4.13", "wps": "366846", "ups": "3.08", "wpb": "118966", "bsz": "256", "num_updates": "708600", "lr": "0.000294343", "gnorm": "0.741", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.5", "wall": "231327"} [2022-08-03 17:44:26,201][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 17:44:39,576][train_inner][INFO] - {"epoch": 14, "update": 13.772, "loss": "2.041", "ppl": "4.12", "wps": "366362", "ups": "3.08", "wpb": "118765", "bsz": "256", "num_updates": "708800", "lr": "0.000294141", "gnorm": "0.742", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.3", "wall": "231392"} [2022-08-03 17:45:44,480][train_inner][INFO] - {"epoch": 14, "update": 13.776, "loss": "2.046", "ppl": "4.13", "wps": "363254", "ups": "3.08", "wpb": "117882", "bsz": "256", "num_updates": "709000", "lr": "0.000293939", "gnorm": "0.743", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.5", "wall": "231457"} [2022-08-03 17:46:49,401][train_inner][INFO] - {"epoch": 14, "update": 13.779, "loss": "2.048", "ppl": "4.13", "wps": "362736", "ups": "3.08", "wpb": "117745", "bsz": "256", "num_updates": "709200", "lr": "0.000293737", "gnorm": "0.748", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.7", "wall": "231522"} [2022-08-03 17:47:54,410][train_inner][INFO] - {"epoch": 14, "update": 13.783, "loss": "2.038", "ppl": "4.11", "wps": "364836", "ups": "3.08", "wpb": "118586", "bsz": "256", "num_updates": "709400", "lr": "0.000293535", "gnorm": "0.742", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.1", "wall": "231587"} [2022-08-03 17:48:59,447][train_inner][INFO] - {"epoch": 14, "update": 13.787, "loss": "2.044", "ppl": "4.12", "wps": "364184", "ups": "3.08", "wpb": "118426", "bsz": "256", "num_updates": "709600", "lr": "0.000293333", "gnorm": "0.748", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25.9", "wall": "231652"} [2022-08-03 17:50:04,161][train_inner][INFO] - {"epoch": 14, "update": 13.791, "loss": "2.041", "ppl": "4.11", "wps": "366921", "ups": "3.09", "wpb": "118722", "bsz": "256", "num_updates": "709800", "lr": "0.000293131", "gnorm": "0.744", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22", "wall": "231716"} [2022-08-03 17:51:09,091][train_inner][INFO] - {"epoch": 14, "update": 13.795, "loss": "2.049", "ppl": "4.14", "wps": "363441", "ups": "3.08", "wpb": "117990", "bsz": "256", "num_updates": "710000", "lr": "0.000292929", "gnorm": "0.745", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.4", "wall": "231781"} [2022-08-03 17:52:14,027][train_inner][INFO] - {"epoch": 14, "update": 13.799, "loss": "2.04", "ppl": "4.11", "wps": "365513", "ups": "3.08", "wpb": "118673", "bsz": "256", "num_updates": "710200", "lr": "0.000292727", "gnorm": "0.738", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.6", "wall": "231846"} [2022-08-03 17:53:19,803][train_inner][INFO] - {"epoch": 14, "update": 13.803, "loss": "2.045", "ppl": "4.13", "wps": "358457", "ups": "3.04", "wpb": "117887", "bsz": "256", "num_updates": "710400", "lr": "0.000292525", "gnorm": "0.745", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.2", "wall": "231912"} [2022-08-03 17:54:24,771][train_inner][INFO] - {"epoch": 14, "update": 13.807, "loss": "2.043", "ppl": "4.12", "wps": "364424", "ups": "3.08", "wpb": "118378", "bsz": "256", "num_updates": "710600", "lr": "0.000292323", "gnorm": "0.742", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.8", "wall": "231977"} [2022-08-03 17:55:29,661][train_inner][INFO] - {"epoch": 14, "update": 13.811, "loss": "2.048", "ppl": "4.14", "wps": "364592", "ups": "3.08", "wpb": "118292", "bsz": "256", "num_updates": "710800", "lr": "0.000292121", "gnorm": "0.747", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.3", "wall": "232042"} [2022-08-03 17:55:52,673][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 17:56:34,605][train_inner][INFO] - {"epoch": 14, "update": 13.814, "loss": "2.043", "ppl": "4.12", "wps": "364444", "ups": "3.08", "wpb": "118340", "bsz": "256", "num_updates": "711000", "lr": "0.000291919", "gnorm": "0.748", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "232107"} [2022-08-03 17:57:39,393][train_inner][INFO] - {"epoch": 14, "update": 13.818, "loss": "2.046", "ppl": "4.13", "wps": "366180", "ups": "3.09", "wpb": "118620", "bsz": "256", "num_updates": "711200", "lr": "0.000291717", "gnorm": "0.743", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.6", "wall": "232172"} [2022-08-03 17:58:44,134][train_inner][INFO] - {"epoch": 14, "update": 13.822, "loss": "2.043", "ppl": "4.12", "wps": "364354", "ups": "3.09", "wpb": "117941", "bsz": "256", "num_updates": "711400", "lr": "0.000291515", "gnorm": "0.755", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "232236"} [2022-08-03 17:59:48,900][train_inner][INFO] - {"epoch": 14, "update": 13.826, "loss": "2.042", "ppl": "4.12", "wps": "366129", "ups": "3.09", "wpb": "118562", "bsz": "256", "num_updates": "711600", "lr": "0.000291313", "gnorm": "0.745", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.7", "wall": "232301"} [2022-08-03 18:00:53,686][train_inner][INFO] - {"epoch": 14, "update": 13.83, "loss": "2.04", "ppl": "4.11", "wps": "366635", "ups": "3.09", "wpb": "118761", "bsz": "256", "num_updates": "711800", "lr": "0.000291111", "gnorm": "0.747", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "232366"} [2022-08-03 18:01:58,724][train_inner][INFO] - {"epoch": 14, "update": 13.834, "loss": "2.04", "ppl": "4.11", "wps": "363564", "ups": "3.08", "wpb": "118226", "bsz": "256", "num_updates": "712000", "lr": "0.000290909", "gnorm": "0.747", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "232431"} [2022-08-03 18:03:03,024][train_inner][INFO] - {"epoch": 14, "update": 13.838, "loss": "2.044", "ppl": "4.12", "wps": "366446", "ups": "3.11", "wpb": "117809", "bsz": "256", "num_updates": "712200", "lr": "0.000290707", "gnorm": "0.747", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.8", "wall": "232495"} [2022-08-03 18:04:08,025][train_inner][INFO] - {"epoch": 14, "update": 13.842, "loss": "2.04", "ppl": "4.11", "wps": "363688", "ups": "3.08", "wpb": "118199", "bsz": "256", "num_updates": "712400", "lr": "0.000290505", "gnorm": "0.743", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.2", "wall": "232560"} [2022-08-03 18:05:13,096][train_inner][INFO] - {"epoch": 14, "update": 13.846, "loss": "2.039", "ppl": "4.11", "wps": "362892", "ups": "3.07", "wpb": "118068", "bsz": "256", "num_updates": "712600", "lr": "0.000290303", "gnorm": "0.748", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "232625"} [2022-08-03 18:06:17,733][train_inner][INFO] - {"epoch": 14, "update": 13.849, "loss": "2.044", "ppl": "4.12", "wps": "366137", "ups": "3.09", "wpb": "118327", "bsz": "256", "num_updates": "712800", "lr": "0.000290101", "gnorm": "0.748", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.8", "wall": "232690"} [2022-08-03 18:06:59,922][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 18:07:22,704][train_inner][INFO] - {"epoch": 14, "update": 13.853, "loss": "2.048", "ppl": "4.13", "wps": "364413", "ups": "3.08", "wpb": "118380", "bsz": "256", "num_updates": "713000", "lr": "0.000289899", "gnorm": "0.745", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25.4", "wall": "232755"} [2022-08-03 18:07:36,198][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-03 18:08:27,999][train_inner][INFO] - {"epoch": 14, "update": 13.857, "loss": "2.039", "ppl": "4.11", "wps": "361549", "ups": "3.06", "wpb": "118036", "bsz": "256", "num_updates": "713200", "lr": "0.000289697", "gnorm": "0.757", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.2", "wall": "232820"} [2022-08-03 18:09:33,199][train_inner][INFO] - {"epoch": 14, "update": 13.861, "loss": "2.041", "ppl": "4.11", "wps": "363893", "ups": "3.07", "wpb": "118629", "bsz": "256", "num_updates": "713400", "lr": "0.000289495", "gnorm": "0.747", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.4", "wall": "232886"} [2022-08-03 18:10:39,065][train_inner][INFO] - {"epoch": 14, "update": 13.865, "loss": "2.038", "ppl": "4.11", "wps": "360118", "ups": "3.04", "wpb": "118595", "bsz": "256", "num_updates": "713600", "lr": "0.000289293", "gnorm": "0.749", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "232951"} [2022-08-03 18:11:44,267][train_inner][INFO] - {"epoch": 14, "update": 13.869, "loss": "2.04", "ppl": "4.11", "wps": "362524", "ups": "3.07", "wpb": "118185", "bsz": "256", "num_updates": "713800", "lr": "0.000289091", "gnorm": "0.744", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.7", "wall": "233017"} [2022-08-03 18:12:48,849][train_inner][INFO] - {"epoch": 14, "update": 13.873, "loss": "2.042", "ppl": "4.12", "wps": "365042", "ups": "3.1", "wpb": "117875", "bsz": "256", "num_updates": "714000", "lr": "0.000288889", "gnorm": "0.748", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.7", "wall": "233081"} [2022-08-03 18:13:53,893][train_inner][INFO] - {"epoch": 14, "update": 13.877, "loss": "2.037", "ppl": "4.1", "wps": "364482", "ups": "3.07", "wpb": "118534", "bsz": "256", "num_updates": "714200", "lr": "0.000288687", "gnorm": "0.745", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.3", "wall": "233146"} [2022-08-03 18:14:58,801][train_inner][INFO] - {"epoch": 14, "update": 13.881, "loss": "2.042", "ppl": "4.12", "wps": "365065", "ups": "3.08", "wpb": "118476", "bsz": "256", "num_updates": "714400", "lr": "0.000288485", "gnorm": "0.746", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "233211"} [2022-08-03 18:16:03,664][train_inner][INFO] - {"epoch": 14, "update": 13.884, "loss": "2.045", "ppl": "4.13", "wps": "364426", "ups": "3.08", "wpb": "118188", "bsz": "256", "num_updates": "714600", "lr": "0.000288283", "gnorm": "0.752", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.1", "wall": "233276"} [2022-08-03 18:17:08,712][train_inner][INFO] - {"epoch": 14, "update": 13.888, "loss": "2.042", "ppl": "4.12", "wps": "363563", "ups": "3.07", "wpb": "118243", "bsz": "256", "num_updates": "714800", "lr": "0.000288081", "gnorm": "0.748", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "26.1", "wall": "233341"} [2022-08-03 18:18:13,866][train_inner][INFO] - {"epoch": 14, "update": 13.892, "loss": "2.04", "ppl": "4.11", "wps": "362671", "ups": "3.07", "wpb": "118146", "bsz": "256", "num_updates": "715000", "lr": "0.000287879", "gnorm": "0.748", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "233406"} [2022-08-03 18:19:18,300][train_inner][INFO] - {"epoch": 14, "update": 13.896, "loss": "2.041", "ppl": "4.11", "wps": "365498", "ups": "3.1", "wpb": "117750", "bsz": "256", "num_updates": "715200", "lr": "0.000287677", "gnorm": "0.748", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "233471"} [2022-08-03 18:20:23,308][train_inner][INFO] - {"epoch": 14, "update": 13.9, "loss": "2.049", "ppl": "4.14", "wps": "364453", "ups": "3.08", "wpb": "118460", "bsz": "256", "num_updates": "715400", "lr": "0.000287475", "gnorm": "0.746", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.9", "wall": "233536"} [2022-08-03 18:21:28,042][train_inner][INFO] - {"epoch": 14, "update": 13.904, "loss": "2.033", "ppl": "4.09", "wps": "365460", "ups": "3.09", "wpb": "118287", "bsz": "256", "num_updates": "715600", "lr": "0.000287273", "gnorm": "0.743", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "28.2", "wall": "233600"} [2022-08-03 18:22:33,160][train_inner][INFO] - {"epoch": 14, "update": 13.908, "loss": "2.043", "ppl": "4.12", "wps": "363976", "ups": "3.07", "wpb": "118506", "bsz": "256", "num_updates": "715800", "lr": "0.000287071", "gnorm": "0.75", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.6", "wall": "233665"} [2022-08-03 18:23:37,858][train_inner][INFO] - {"epoch": 14, "update": 13.912, "loss": "2.038", "ppl": "4.11", "wps": "366807", "ups": "3.09", "wpb": "118656", "bsz": "256", "num_updates": "716000", "lr": "0.000286869", "gnorm": "0.744", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.1", "wall": "233730"} [2022-08-03 18:24:42,352][train_inner][INFO] - {"epoch": 14, "update": 13.915, "loss": "2.042", "ppl": "4.12", "wps": "364902", "ups": "3.1", "wpb": "117668", "bsz": "256", "num_updates": "716200", "lr": "0.000286667", "gnorm": "0.75", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.2", "wall": "233795"} [2022-08-03 18:25:47,134][train_inner][INFO] - {"epoch": 14, "update": 13.919, "loss": "2.044", "ppl": "4.12", "wps": "365896", "ups": "3.09", "wpb": "118516", "bsz": "256", "num_updates": "716400", "lr": "0.000286465", "gnorm": "0.749", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "233859"} [2022-08-03 18:26:52,017][train_inner][INFO] - {"epoch": 14, "update": 13.923, "loss": "2.041", "ppl": "4.12", "wps": "363711", "ups": "3.08", "wpb": "117990", "bsz": "256", "num_updates": "716600", "lr": "0.000286263", "gnorm": "0.751", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.4", "wall": "233924"} [2022-08-03 18:27:56,904][train_inner][INFO] - {"epoch": 14, "update": 13.927, "loss": "2.041", "ppl": "4.11", "wps": "364473", "ups": "3.08", "wpb": "118247", "bsz": "256", "num_updates": "716800", "lr": "0.000286061", "gnorm": "0.747", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.4", "wall": "233989"} [2022-08-03 18:29:01,800][train_inner][INFO] - {"epoch": 14, "update": 13.931, "loss": "2.037", "ppl": "4.11", "wps": "364436", "ups": "3.08", "wpb": "118250", "bsz": "256", "num_updates": "717000", "lr": "0.000285859", "gnorm": "0.762", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.8", "wall": "234054"} [2022-08-03 18:29:47,875][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 18:30:07,103][train_inner][INFO] - {"epoch": 14, "update": 13.935, "loss": "2.039", "ppl": "4.11", "wps": "362312", "ups": "3.06", "wpb": "118299", "bsz": "256", "num_updates": "717200", "lr": "0.000285657", "gnorm": "0.745", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22", "wall": "234119"} [2022-08-03 18:31:12,120][train_inner][INFO] - {"epoch": 14, "update": 13.939, "loss": "2.04", "ppl": "4.11", "wps": "362986", "ups": "3.08", "wpb": "118000", "bsz": "256", "num_updates": "717400", "lr": "0.000285455", "gnorm": "0.75", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "234184"} [2022-08-03 18:32:17,200][train_inner][INFO] - {"epoch": 14, "update": 13.943, "loss": "2.038", "ppl": "4.11", "wps": "363691", "ups": "3.07", "wpb": "118344", "bsz": "256", "num_updates": "717600", "lr": "0.000285253", "gnorm": "0.75", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.9", "wall": "234250"} [2022-08-03 18:33:22,196][train_inner][INFO] - {"epoch": 14, "update": 13.947, "loss": "2.04", "ppl": "4.11", "wps": "366519", "ups": "3.08", "wpb": "119108", "bsz": "256", "num_updates": "717800", "lr": "0.000285051", "gnorm": "0.749", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.7", "wall": "234315"} [2022-08-03 18:34:27,291][train_inner][INFO] - {"epoch": 14, "update": 13.95, "loss": "2.037", "ppl": "4.1", "wps": "364153", "ups": "3.07", "wpb": "118521", "bsz": "256", "num_updates": "718000", "lr": "0.000284848", "gnorm": "0.747", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "234380"} [2022-08-03 18:35:32,298][train_inner][INFO] - {"epoch": 14, "update": 13.954, "loss": "2.037", "ppl": "4.1", "wps": "365149", "ups": "3.08", "wpb": "118684", "bsz": "256", "num_updates": "718200", "lr": "0.000284646", "gnorm": "0.744", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "234445"} [2022-08-03 18:36:36,943][train_inner][INFO] - {"epoch": 14, "update": 13.958, "loss": "2.039", "ppl": "4.11", "wps": "365785", "ups": "3.09", "wpb": "118231", "bsz": "256", "num_updates": "718400", "lr": "0.000284444", "gnorm": "0.747", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "234509"} [2022-08-03 18:37:41,737][train_inner][INFO] - {"epoch": 14, "update": 13.962, "loss": "2.034", "ppl": "4.1", "wps": "362798", "ups": "3.09", "wpb": "117533", "bsz": "256", "num_updates": "718600", "lr": "0.000284242", "gnorm": "0.75", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.3", "wall": "234574"} [2022-08-03 18:38:46,249][train_inner][INFO] - {"epoch": 14, "update": 13.966, "loss": "2.031", "ppl": "4.09", "wps": "368112", "ups": "3.1", "wpb": "118736", "bsz": "256", "num_updates": "718800", "lr": "0.00028404", "gnorm": "0.745", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.6", "wall": "234639"} [2022-08-03 18:39:50,854][train_inner][INFO] - {"epoch": 14, "update": 13.97, "loss": "2.042", "ppl": "4.12", "wps": "364383", "ups": "3.1", "wpb": "117704", "bsz": "256", "num_updates": "719000", "lr": "0.000283838", "gnorm": "0.752", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.9", "wall": "234703"} [2022-08-03 18:40:55,779][train_inner][INFO] - {"epoch": 14, "update": 13.974, "loss": "2.039", "ppl": "4.11", "wps": "361781", "ups": "3.08", "wpb": "117440", "bsz": "256", "num_updates": "719200", "lr": "0.000283636", "gnorm": "0.752", "clip": "0", "loss_scale": "32", "train_wall": "65", "gb_free": "21.1", "wall": "234768"} [2022-08-03 18:40:59,315][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 18:42:00,801][train_inner][INFO] - {"epoch": 14, "update": 13.978, "loss": "2.04", "ppl": "4.11", "wps": "362482", "ups": "3.08", "wpb": "117846", "bsz": "256", "num_updates": "719400", "lr": "0.000283434", "gnorm": "0.754", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.2", "wall": "234833"} [2022-08-03 18:43:05,637][train_inner][INFO] - {"epoch": 14, "update": 13.982, "loss": "2.038", "ppl": "4.11", "wps": "365223", "ups": "3.08", "wpb": "118395", "bsz": "256", "num_updates": "719600", "lr": "0.000283232", "gnorm": "0.75", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "234898"} [2022-08-03 18:44:10,590][train_inner][INFO] - {"epoch": 14, "update": 13.985, "loss": "2.04", "ppl": "4.11", "wps": "364793", "ups": "3.08", "wpb": "118471", "bsz": "256", "num_updates": "719800", "lr": "0.00028303", "gnorm": "0.75", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22", "wall": "234963"} [2022-08-03 18:45:15,635][train_inner][INFO] - {"epoch": 14, "update": 13.989, "loss": "2.04", "ppl": "4.11", "wps": "364653", "ups": "3.07", "wpb": "118593", "bsz": "256", "num_updates": "720000", "lr": "0.000282828", "gnorm": "0.758", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "26.3", "wall": "235028"} [2022-08-03 18:46:20,378][train_inner][INFO] - {"epoch": 14, "update": 13.993, "loss": "2.04", "ppl": "4.11", "wps": "366741", "ups": "3.09", "wpb": "118718", "bsz": "256", "num_updates": "720200", "lr": "0.000282626", "gnorm": "0.75", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.3", "wall": "235093"} [2022-08-03 18:47:25,396][train_inner][INFO] - {"epoch": 14, "update": 13.997, "loss": "2.04", "ppl": "4.11", "wps": "365793", "ups": "3.08", "wpb": "118913", "bsz": "256", "num_updates": "720400", "lr": "0.000282424", "gnorm": "0.746", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.7", "wall": "235158"} [2022-08-03 18:48:13,939][fairseq_cli.train][INFO] - end of epoch 14 (average epoch stats below) [2022-08-03 18:48:13,940][train][INFO] - {"epoch": 14, "train_loss": "2.052", "train_ppl": "4.15", "train_wps": "362218", "train_ups": "3.06", "train_wpb": "118298", "train_bsz": "256", "train_num_updates": "720550", "train_lr": "0.000282273", "train_gnorm": "0.739", "train_clip": "0", "train_loss_scale": "16", "train_train_wall": "16635", "train_gb_free": "21.3", "train_wall": "235206"} [2022-08-03 18:48:14,024][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 [2022-08-03 18:48:14,027][fairseq.trainer][INFO] - begin training epoch 15 [2022-08-03 18:48:14,027][fairseq_cli.train][INFO] - Start iterating over samples [2022-08-03 18:48:45,602][train_inner][INFO] - {"epoch": 15, "update": 14.001, "loss": "2.035", "ppl": "4.1", "wps": "295738", "ups": "2.49", "wpb": "118598", "bsz": "255.4", "num_updates": "720600", "lr": "0.000282222", "gnorm": "0.748", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.9", "wall": "235238"} [2022-08-03 18:49:50,467][train_inner][INFO] - {"epoch": 15, "update": 14.005, "loss": "2.037", "ppl": "4.11", "wps": "363811", "ups": "3.08", "wpb": "117992", "bsz": "256", "num_updates": "720800", "lr": "0.00028202", "gnorm": "0.751", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.7", "wall": "235303"} [2022-08-03 18:50:55,213][train_inner][INFO] - {"epoch": 15, "update": 14.009, "loss": "2.037", "ppl": "4.1", "wps": "366214", "ups": "3.09", "wpb": "118554", "bsz": "256", "num_updates": "721000", "lr": "0.000281818", "gnorm": "0.749", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "235368"} [2022-08-03 18:52:00,088][train_inner][INFO] - {"epoch": 15, "update": 14.013, "loss": "2.034", "ppl": "4.1", "wps": "364574", "ups": "3.08", "wpb": "118255", "bsz": "256", "num_updates": "721200", "lr": "0.000281616", "gnorm": "0.749", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.2", "wall": "235432"} [2022-08-03 18:52:23,090][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 18:53:04,869][train_inner][INFO] - {"epoch": 15, "update": 14.017, "loss": "2.034", "ppl": "4.09", "wps": "365333", "ups": "3.09", "wpb": "118333", "bsz": "256", "num_updates": "721400", "lr": "0.000281414", "gnorm": "0.749", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.2", "wall": "235497"} [2022-08-03 18:54:09,725][train_inner][INFO] - {"epoch": 15, "update": 14.02, "loss": "2.035", "ppl": "4.1", "wps": "364455", "ups": "3.08", "wpb": "118184", "bsz": "256", "num_updates": "721600", "lr": "0.000281212", "gnorm": "0.748", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "235562"} [2022-08-03 18:55:14,628][train_inner][INFO] - {"epoch": 15, "update": 14.024, "loss": "2.034", "ppl": "4.1", "wps": "365438", "ups": "3.08", "wpb": "118588", "bsz": "256", "num_updates": "721800", "lr": "0.00028101", "gnorm": "0.749", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "235627"} [2022-08-03 18:56:19,085][train_inner][INFO] - {"epoch": 15, "update": 14.028, "loss": "2.035", "ppl": "4.1", "wps": "367032", "ups": "3.1", "wpb": "118289", "bsz": "256", "num_updates": "722000", "lr": "0.000280808", "gnorm": "0.753", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.6", "wall": "235691"} [2022-08-03 18:57:25,257][train_inner][INFO] - {"epoch": 15, "update": 14.032, "loss": "2.032", "ppl": "4.09", "wps": "355210", "ups": "3.02", "wpb": "117523", "bsz": "256", "num_updates": "722200", "lr": "0.000280606", "gnorm": "0.763", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "23.3", "wall": "235758"} [2022-08-03 18:58:30,349][train_inner][INFO] - {"epoch": 15, "update": 14.036, "loss": "2.034", "ppl": "4.09", "wps": "363030", "ups": "3.07", "wpb": "118149", "bsz": "256", "num_updates": "722400", "lr": "0.000280404", "gnorm": "0.751", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "235823"} [2022-08-03 18:59:35,411][train_inner][INFO] - {"epoch": 15, "update": 14.04, "loss": "2.039", "ppl": "4.11", "wps": "362546", "ups": "3.07", "wpb": "117938", "bsz": "256", "num_updates": "722600", "lr": "0.000280202", "gnorm": "0.754", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "235888"} [2022-08-03 19:00:41,530][train_inner][INFO] - {"epoch": 15, "update": 14.044, "loss": "2.039", "ppl": "4.11", "wps": "355642", "ups": "3.02", "wpb": "117572", "bsz": "256", "num_updates": "722800", "lr": "0.00028", "gnorm": "0.752", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "25.9", "wall": "235954"} [2022-08-03 19:01:46,322][train_inner][INFO] - {"epoch": 15, "update": 14.048, "loss": "2.034", "ppl": "4.09", "wps": "365777", "ups": "3.09", "wpb": "118497", "bsz": "256", "num_updates": "723000", "lr": "0.000279798", "gnorm": "0.752", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "236019"} [2022-08-03 19:02:51,531][train_inner][INFO] - {"epoch": 15, "update": 14.051, "loss": "2.029", "ppl": "4.08", "wps": "363939", "ups": "3.07", "wpb": "118659", "bsz": "256", "num_updates": "723200", "lr": "0.000279596", "gnorm": "0.748", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22", "wall": "236084"} [2022-08-03 19:03:30,856][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 19:03:56,500][train_inner][INFO] - {"epoch": 15, "update": 14.055, "loss": "2.035", "ppl": "4.1", "wps": "361525", "ups": "3.08", "wpb": "117437", "bsz": "256", "num_updates": "723400", "lr": "0.000279394", "gnorm": "0.754", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "236149"} [2022-08-03 19:05:01,423][train_inner][INFO] - {"epoch": 15, "update": 14.059, "loss": "2.031", "ppl": "4.09", "wps": "365714", "ups": "3.08", "wpb": "118715", "bsz": "256", "num_updates": "723600", "lr": "0.000279192", "gnorm": "0.749", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.4", "wall": "236214"} [2022-08-03 19:06:06,490][train_inner][INFO] - {"epoch": 15, "update": 14.063, "loss": "2.025", "ppl": "4.07", "wps": "364808", "ups": "3.07", "wpb": "118684", "bsz": "256", "num_updates": "723800", "lr": "0.00027899", "gnorm": "0.75", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.5", "wall": "236279"} [2022-08-03 19:07:11,239][train_inner][INFO] - {"epoch": 15, "update": 14.067, "loss": "2.041", "ppl": "4.11", "wps": "364103", "ups": "3.09", "wpb": "117875", "bsz": "256", "num_updates": "724000", "lr": "0.000278788", "gnorm": "0.756", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "236344"} [2022-08-03 19:08:16,179][train_inner][INFO] - {"epoch": 15, "update": 14.071, "loss": "2.034", "ppl": "4.1", "wps": "363640", "ups": "3.08", "wpb": "118072", "bsz": "256", "num_updates": "724200", "lr": "0.000278586", "gnorm": "0.752", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "236409"} [2022-08-03 19:09:04,475][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-03 19:09:21,645][train_inner][INFO] - {"epoch": 15, "update": 14.075, "loss": "2.031", "ppl": "4.09", "wps": "361076", "ups": "3.06", "wpb": "118189", "bsz": "256", "num_updates": "724400", "lr": "0.000278384", "gnorm": "0.75", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.7", "wall": "236474"} [2022-08-03 19:10:26,744][train_inner][INFO] - {"epoch": 15, "update": 14.079, "loss": "2.038", "ppl": "4.11", "wps": "363662", "ups": "3.07", "wpb": "118368", "bsz": "256", "num_updates": "724600", "lr": "0.000278182", "gnorm": "0.749", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "236539"} [2022-08-03 19:11:31,689][train_inner][INFO] - {"epoch": 15, "update": 14.083, "loss": "2.029", "ppl": "4.08", "wps": "364607", "ups": "3.08", "wpb": "118397", "bsz": "256", "num_updates": "724800", "lr": "0.00027798", "gnorm": "0.748", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.7", "wall": "236604"} [2022-08-03 19:12:37,523][train_inner][INFO] - {"epoch": 15, "update": 14.086, "loss": "2.036", "ppl": "4.1", "wps": "358113", "ups": "3.04", "wpb": "117878", "bsz": "256", "num_updates": "725000", "lr": "0.000277778", "gnorm": "0.751", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "27.4", "wall": "236670"} [2022-08-03 19:13:42,221][train_inner][INFO] - {"epoch": 15, "update": 14.09, "loss": "2.03", "ppl": "4.09", "wps": "365775", "ups": "3.09", "wpb": "118322", "bsz": "256", "num_updates": "725200", "lr": "0.000277576", "gnorm": "0.752", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.8", "wall": "236735"} [2022-08-03 19:14:47,008][train_inner][INFO] - {"epoch": 15, "update": 14.094, "loss": "2.028", "ppl": "4.08", "wps": "364592", "ups": "3.09", "wpb": "118103", "bsz": "256", "num_updates": "725400", "lr": "0.000277374", "gnorm": "0.757", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.9", "wall": "236799"} [2022-08-03 19:15:52,233][train_inner][INFO] - {"epoch": 15, "update": 14.098, "loss": "2.037", "ppl": "4.1", "wps": "361186", "ups": "3.07", "wpb": "117790", "bsz": "256", "num_updates": "725600", "lr": "0.000277172", "gnorm": "0.751", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "236865"} [2022-08-03 19:16:57,320][train_inner][INFO] - {"epoch": 15, "update": 14.102, "loss": "2.028", "ppl": "4.08", "wps": "363951", "ups": "3.07", "wpb": "118441", "bsz": "256", "num_updates": "725800", "lr": "0.00027697", "gnorm": "0.755", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "236930"} [2022-08-03 19:18:01,923][train_inner][INFO] - {"epoch": 15, "update": 14.106, "loss": "2.039", "ppl": "4.11", "wps": "367120", "ups": "3.1", "wpb": "118584", "bsz": "256", "num_updates": "726000", "lr": "0.000276768", "gnorm": "0.751", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.4", "wall": "236994"} [2022-08-03 19:19:07,087][train_inner][INFO] - {"epoch": 15, "update": 14.11, "loss": "2.032", "ppl": "4.09", "wps": "362778", "ups": "3.07", "wpb": "118198", "bsz": "256", "num_updates": "726200", "lr": "0.000276566", "gnorm": "0.753", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.6", "wall": "237059"} [2022-08-03 19:20:11,998][train_inner][INFO] - {"epoch": 15, "update": 14.114, "loss": "2.032", "ppl": "4.09", "wps": "365247", "ups": "3.08", "wpb": "118541", "bsz": "256", "num_updates": "726400", "lr": "0.000276364", "gnorm": "0.755", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "237124"} [2022-08-03 19:21:17,082][train_inner][INFO] - {"epoch": 15, "update": 14.118, "loss": "2.032", "ppl": "4.09", "wps": "362224", "ups": "3.07", "wpb": "117873", "bsz": "256", "num_updates": "726600", "lr": "0.000276162", "gnorm": "0.752", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25.4", "wall": "237189"} [2022-08-03 19:22:22,060][train_inner][INFO] - {"epoch": 15, "update": 14.121, "loss": "2.034", "ppl": "4.1", "wps": "363696", "ups": "3.08", "wpb": "118161", "bsz": "256", "num_updates": "726800", "lr": "0.00027596", "gnorm": "0.752", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.4", "wall": "237254"} [2022-08-03 19:23:26,677][train_inner][INFO] - {"epoch": 15, "update": 14.125, "loss": "2.035", "ppl": "4.1", "wps": "365648", "ups": "3.1", "wpb": "118132", "bsz": "256", "num_updates": "727000", "lr": "0.000275758", "gnorm": "0.753", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "237319"} [2022-08-03 19:24:32,347][train_inner][INFO] - {"epoch": 15, "update": 14.129, "loss": "2.033", "ppl": "4.09", "wps": "360067", "ups": "3.05", "wpb": "118226", "bsz": "256", "num_updates": "727200", "lr": "0.000275556", "gnorm": "0.755", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "26.5", "wall": "237385"} [2022-08-03 19:25:37,187][train_inner][INFO] - {"epoch": 15, "update": 14.133, "loss": "2.031", "ppl": "4.09", "wps": "365046", "ups": "3.08", "wpb": "118347", "bsz": "256", "num_updates": "727400", "lr": "0.000275354", "gnorm": "0.755", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "237450"} [2022-08-03 19:26:42,129][train_inner][INFO] - {"epoch": 15, "update": 14.137, "loss": "2.026", "ppl": "4.07", "wps": "363832", "ups": "3.08", "wpb": "118138", "bsz": "256", "num_updates": "727600", "lr": "0.000275152", "gnorm": "0.755", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "237514"} [2022-08-03 19:27:46,821][train_inner][INFO] - {"epoch": 15, "update": 14.141, "loss": "2.032", "ppl": "4.09", "wps": "365789", "ups": "3.09", "wpb": "118315", "bsz": "256", "num_updates": "727800", "lr": "0.000274949", "gnorm": "0.753", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22", "wall": "237579"} [2022-08-03 19:28:51,643][train_inner][INFO] - {"epoch": 15, "update": 14.145, "loss": "2.025", "ppl": "4.07", "wps": "366460", "ups": "3.09", "wpb": "118772", "bsz": "256", "num_updates": "728000", "lr": "0.000274747", "gnorm": "0.75", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.6", "wall": "237644"} [2022-08-03 19:29:56,657][train_inner][INFO] - {"epoch": 15, "update": 14.149, "loss": "2.035", "ppl": "4.1", "wps": "364572", "ups": "3.08", "wpb": "118510", "bsz": "256", "num_updates": "728200", "lr": "0.000274545", "gnorm": "0.755", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "28.2", "wall": "237709"} [2022-08-03 19:31:01,335][train_inner][INFO] - {"epoch": 15, "update": 14.153, "loss": "2.033", "ppl": "4.09", "wps": "365781", "ups": "3.09", "wpb": "118288", "bsz": "256", "num_updates": "728400", "lr": "0.000274343", "gnorm": "0.753", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "237774"} [2022-08-03 19:31:26,772][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 19:32:06,432][train_inner][INFO] - {"epoch": 15, "update": 14.156, "loss": "2.033", "ppl": "4.09", "wps": "364532", "ups": "3.07", "wpb": "118649", "bsz": "256", "num_updates": "728600", "lr": "0.000274141", "gnorm": "0.754", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "237839"} [2022-08-03 19:33:11,540][train_inner][INFO] - {"epoch": 15, "update": 14.16, "loss": "2.028", "ppl": "4.08", "wps": "362798", "ups": "3.07", "wpb": "118104", "bsz": "256", "num_updates": "728800", "lr": "0.000273939", "gnorm": "0.755", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.6", "wall": "237904"} [2022-08-03 19:34:16,246][train_inner][INFO] - {"epoch": 15, "update": 14.164, "loss": "2.033", "ppl": "4.09", "wps": "364194", "ups": "3.09", "wpb": "117826", "bsz": "256", "num_updates": "729000", "lr": "0.000273737", "gnorm": "0.756", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "27", "wall": "237969"} [2022-08-03 19:35:21,063][train_inner][INFO] - {"epoch": 15, "update": 14.168, "loss": "2.033", "ppl": "4.09", "wps": "365770", "ups": "3.09", "wpb": "118539", "bsz": "256", "num_updates": "729200", "lr": "0.000273535", "gnorm": "0.758", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.6", "wall": "238033"} [2022-08-03 19:36:25,825][train_inner][INFO] - {"epoch": 15, "update": 14.172, "loss": "2.03", "ppl": "4.08", "wps": "365244", "ups": "3.09", "wpb": "118268", "bsz": "256", "num_updates": "729400", "lr": "0.000273333", "gnorm": "0.751", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "238098"} [2022-08-03 19:37:30,622][train_inner][INFO] - {"epoch": 15, "update": 14.176, "loss": "2.035", "ppl": "4.1", "wps": "367200", "ups": "3.09", "wpb": "118965", "bsz": "256", "num_updates": "729600", "lr": "0.000273131", "gnorm": "0.753", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "238163"} [2022-08-03 19:38:35,448][train_inner][INFO] - {"epoch": 15, "update": 14.18, "loss": "2.028", "ppl": "4.08", "wps": "365363", "ups": "3.09", "wpb": "118423", "bsz": "256", "num_updates": "729800", "lr": "0.000272929", "gnorm": "0.754", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "28.2", "wall": "238228"} [2022-08-03 19:39:40,551][train_inner][INFO] - {"epoch": 15, "update": 14.184, "loss": "2.028", "ppl": "4.08", "wps": "362542", "ups": "3.07", "wpb": "118013", "bsz": "256", "num_updates": "730000", "lr": "0.000272727", "gnorm": "0.755", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.1", "wall": "238293"} [2022-08-03 19:40:45,773][train_inner][INFO] - {"epoch": 15, "update": 14.188, "loss": "2.035", "ppl": "4.1", "wps": "364132", "ups": "3.07", "wpb": "118745", "bsz": "256", "num_updates": "730200", "lr": "0.000272525", "gnorm": "0.753", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.3", "wall": "238358"} [2022-08-03 19:41:50,791][train_inner][INFO] - {"epoch": 15, "update": 14.191, "loss": "2.027", "ppl": "4.08", "wps": "364230", "ups": "3.08", "wpb": "118405", "bsz": "256", "num_updates": "730400", "lr": "0.000272323", "gnorm": "0.755", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.8", "wall": "238423"} [2022-08-03 19:42:36,215][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 19:42:55,953][train_inner][INFO] - {"epoch": 15, "update": 14.195, "loss": "2.027", "ppl": "4.07", "wps": "363098", "ups": "3.07", "wpb": "118298", "bsz": "256", "num_updates": "730600", "lr": "0.000272121", "gnorm": "0.76", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.4", "wall": "238488"} [2022-08-03 19:44:00,719][train_inner][INFO] - {"epoch": 15, "update": 14.199, "loss": "2.033", "ppl": "4.09", "wps": "366242", "ups": "3.09", "wpb": "118600", "bsz": "256", "num_updates": "730800", "lr": "0.000271919", "gnorm": "0.76", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.1", "wall": "238553"} [2022-08-03 19:45:05,686][train_inner][INFO] - {"epoch": 15, "update": 14.203, "loss": "2.035", "ppl": "4.1", "wps": "362329", "ups": "3.08", "wpb": "117695", "bsz": "256", "num_updates": "731000", "lr": "0.000271717", "gnorm": "0.756", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.1", "wall": "238618"} [2022-08-03 19:46:10,458][train_inner][INFO] - {"epoch": 15, "update": 14.207, "loss": "2.034", "ppl": "4.1", "wps": "366026", "ups": "3.09", "wpb": "118540", "bsz": "256", "num_updates": "731200", "lr": "0.000271515", "gnorm": "0.76", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "238683"} [2022-08-03 19:47:15,370][train_inner][INFO] - {"epoch": 15, "update": 14.211, "loss": "2.022", "ppl": "4.06", "wps": "366239", "ups": "3.08", "wpb": "118865", "bsz": "256", "num_updates": "731400", "lr": "0.000271313", "gnorm": "0.754", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "238748"} [2022-08-03 19:48:20,075][train_inner][INFO] - {"epoch": 15, "update": 14.215, "loss": "2.032", "ppl": "4.09", "wps": "366626", "ups": "3.09", "wpb": "118610", "bsz": "256", "num_updates": "731600", "lr": "0.000271111", "gnorm": "0.756", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.1", "wall": "238812"} [2022-08-03 19:49:25,114][train_inner][INFO] - {"epoch": 15, "update": 14.219, "loss": "2.033", "ppl": "4.09", "wps": "361932", "ups": "3.08", "wpb": "117697", "bsz": "256", "num_updates": "731800", "lr": "0.000270909", "gnorm": "0.757", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "238877"} [2022-08-03 19:50:30,044][train_inner][INFO] - {"epoch": 15, "update": 14.222, "loss": "2.03", "ppl": "4.08", "wps": "365343", "ups": "3.08", "wpb": "118606", "bsz": "256", "num_updates": "732000", "lr": "0.000270707", "gnorm": "0.753", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "238942"} [2022-08-03 19:51:34,850][train_inner][INFO] - {"epoch": 15, "update": 14.226, "loss": "2.028", "ppl": "4.08", "wps": "363179", "ups": "3.09", "wpb": "117680", "bsz": "256", "num_updates": "732200", "lr": "0.000270505", "gnorm": "0.757", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "239007"} [2022-08-03 19:52:39,572][train_inner][INFO] - {"epoch": 15, "update": 14.23, "loss": "2.033", "ppl": "4.09", "wps": "367420", "ups": "3.09", "wpb": "118899", "bsz": "256", "num_updates": "732400", "lr": "0.000270303", "gnorm": "0.755", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.1", "wall": "239072"} [2022-08-03 19:53:43,512][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 19:53:45,102][train_inner][INFO] - {"epoch": 15, "update": 14.234, "loss": "2.025", "ppl": "4.07", "wps": "360944", "ups": "3.05", "wpb": "118261", "bsz": "256", "num_updates": "732600", "lr": "0.000270101", "gnorm": "0.758", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "26.3", "wall": "239137"} [2022-08-03 19:54:49,952][train_inner][INFO] - {"epoch": 15, "update": 14.238, "loss": "2.034", "ppl": "4.1", "wps": "365426", "ups": "3.08", "wpb": "118488", "bsz": "256", "num_updates": "732800", "lr": "0.000269899", "gnorm": "0.753", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.4", "wall": "239202"} [2022-08-03 19:55:54,956][train_inner][INFO] - {"epoch": 15, "update": 14.242, "loss": "2.03", "ppl": "4.09", "wps": "365015", "ups": "3.08", "wpb": "118636", "bsz": "256", "num_updates": "733000", "lr": "0.000269697", "gnorm": "0.755", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.1", "wall": "239267"} [2022-08-03 19:56:59,190][train_inner][INFO] - {"epoch": 15, "update": 14.246, "loss": "2.029", "ppl": "4.08", "wps": "369026", "ups": "3.11", "wpb": "118518", "bsz": "256", "num_updates": "733200", "lr": "0.000269495", "gnorm": "0.753", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.8", "wall": "239332"} [2022-08-03 19:58:03,658][train_inner][INFO] - {"epoch": 15, "update": 14.25, "loss": "2.027", "ppl": "4.07", "wps": "366856", "ups": "3.1", "wpb": "118251", "bsz": "256", "num_updates": "733400", "lr": "0.000269293", "gnorm": "0.76", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.8", "wall": "239396"} [2022-08-03 19:59:08,678][train_inner][INFO] - {"epoch": 15, "update": 14.254, "loss": "2.026", "ppl": "4.07", "wps": "364041", "ups": "3.08", "wpb": "118349", "bsz": "256", "num_updates": "733600", "lr": "0.000269091", "gnorm": "0.756", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.1", "wall": "239461"} [2022-08-03 20:00:14,146][train_inner][INFO] - {"epoch": 15, "update": 14.257, "loss": "2.028", "ppl": "4.08", "wps": "362082", "ups": "3.05", "wpb": "118521", "bsz": "256", "num_updates": "733800", "lr": "0.000268889", "gnorm": "0.754", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.1", "wall": "239526"} [2022-08-03 20:01:18,522][train_inner][INFO] - {"epoch": 15, "update": 14.261, "loss": "2.027", "ppl": "4.07", "wps": "366924", "ups": "3.11", "wpb": "118103", "bsz": "256", "num_updates": "734000", "lr": "0.000268687", "gnorm": "0.752", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.4", "wall": "239591"} [2022-08-03 20:02:23,171][train_inner][INFO] - {"epoch": 15, "update": 14.265, "loss": "2.029", "ppl": "4.08", "wps": "365946", "ups": "3.09", "wpb": "118289", "bsz": "256", "num_updates": "734200", "lr": "0.000268485", "gnorm": "0.756", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.5", "wall": "239655"} [2022-08-03 20:03:27,827][train_inner][INFO] - {"epoch": 15, "update": 14.269, "loss": "2.024", "ppl": "4.07", "wps": "365715", "ups": "3.09", "wpb": "118226", "bsz": "256", "num_updates": "734400", "lr": "0.000268283", "gnorm": "0.757", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.2", "wall": "239720"} [2022-08-03 20:04:32,637][train_inner][INFO] - {"epoch": 15, "update": 14.273, "loss": "2.027", "ppl": "4.08", "wps": "364717", "ups": "3.09", "wpb": "118184", "bsz": "256", "num_updates": "734600", "lr": "0.000268081", "gnorm": "0.757", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.9", "wall": "239785"} [2022-08-03 20:04:47,289][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 20:05:37,839][train_inner][INFO] - {"epoch": 15, "update": 14.277, "loss": "2.027", "ppl": "4.08", "wps": "363188", "ups": "3.07", "wpb": "118402", "bsz": "256", "num_updates": "734800", "lr": "0.000267879", "gnorm": "0.756", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.3", "wall": "239850"} [2022-08-03 20:06:42,615][train_inner][INFO] - {"epoch": 15, "update": 14.281, "loss": "2.028", "ppl": "4.08", "wps": "365994", "ups": "3.09", "wpb": "118537", "bsz": "256", "num_updates": "735000", "lr": "0.000267677", "gnorm": "0.761", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.5", "wall": "239915"} [2022-08-03 20:07:47,679][train_inner][INFO] - {"epoch": 15, "update": 14.285, "loss": "2.034", "ppl": "4.09", "wps": "364478", "ups": "3.07", "wpb": "118570", "bsz": "256", "num_updates": "735200", "lr": "0.000267475", "gnorm": "0.754", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "239980"} [2022-08-03 20:08:53,205][train_inner][INFO] - {"epoch": 15, "update": 14.289, "loss": "2.026", "ppl": "4.07", "wps": "361530", "ups": "3.05", "wpb": "118446", "bsz": "256", "num_updates": "735400", "lr": "0.000267273", "gnorm": "0.755", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.3", "wall": "240046"} [2022-08-03 20:09:58,407][train_inner][INFO] - {"epoch": 15, "update": 14.292, "loss": "2.024", "ppl": "4.07", "wps": "363183", "ups": "3.07", "wpb": "118401", "bsz": "256", "num_updates": "735600", "lr": "0.000267071", "gnorm": "0.756", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.1", "wall": "240111"} [2022-08-03 20:11:03,045][train_inner][INFO] - {"epoch": 15, "update": 14.296, "loss": "2.019", "ppl": "4.05", "wps": "368518", "ups": "3.09", "wpb": "119098", "bsz": "256", "num_updates": "735800", "lr": "0.000266869", "gnorm": "0.751", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.7", "wall": "240175"} [2022-08-03 20:12:08,148][train_inner][INFO] - {"epoch": 15, "update": 14.3, "loss": "2.03", "ppl": "4.08", "wps": "362639", "ups": "3.07", "wpb": "118043", "bsz": "256", "num_updates": "736000", "lr": "0.000266667", "gnorm": "0.759", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.4", "wall": "240240"} [2022-08-03 20:13:14,047][train_inner][INFO] - {"epoch": 15, "update": 14.304, "loss": "2.027", "ppl": "4.08", "wps": "359055", "ups": "3.03", "wpb": "118306", "bsz": "256", "num_updates": "736200", "lr": "0.000266465", "gnorm": "0.76", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "21.3", "wall": "240306"} [2022-08-03 20:14:18,749][train_inner][INFO] - {"epoch": 15, "update": 14.308, "loss": "2.036", "ppl": "4.1", "wps": "364613", "ups": "3.09", "wpb": "117953", "bsz": "256", "num_updates": "736400", "lr": "0.000266263", "gnorm": "0.759", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "240371"} [2022-08-03 20:15:23,683][train_inner][INFO] - {"epoch": 15, "update": 14.312, "loss": "2.023", "ppl": "4.06", "wps": "365407", "ups": "3.08", "wpb": "118635", "bsz": "256", "num_updates": "736600", "lr": "0.000266061", "gnorm": "0.757", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.2", "wall": "240436"} [2022-08-03 20:15:58,181][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 20:16:29,067][train_inner][INFO] - {"epoch": 15, "update": 14.316, "loss": "2.022", "ppl": "4.06", "wps": "363117", "ups": "3.06", "wpb": "118708", "bsz": "256", "num_updates": "736800", "lr": "0.000265859", "gnorm": "0.756", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.5", "wall": "240501"} [2022-08-03 20:17:33,954][train_inner][INFO] - {"epoch": 15, "update": 14.32, "loss": "2.031", "ppl": "4.09", "wps": "366346", "ups": "3.08", "wpb": "118854", "bsz": "256", "num_updates": "737000", "lr": "0.000265657", "gnorm": "0.757", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.6", "wall": "240566"} [2022-08-03 20:17:47,971][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-03 20:18:38,878][train_inner][INFO] - {"epoch": 15, "update": 14.324, "loss": "2.024", "ppl": "4.07", "wps": "364621", "ups": "3.08", "wpb": "118362", "bsz": "256", "num_updates": "737200", "lr": "0.000265455", "gnorm": "0.759", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.9", "wall": "240631"} [2022-08-03 20:19:43,648][train_inner][INFO] - {"epoch": 15, "update": 14.327, "loss": "2.021", "ppl": "4.06", "wps": "366621", "ups": "3.09", "wpb": "118730", "bsz": "256", "num_updates": "737400", "lr": "0.000265253", "gnorm": "0.758", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "240696"} [2022-08-03 20:20:48,294][train_inner][INFO] - {"epoch": 15, "update": 14.331, "loss": "2.029", "ppl": "4.08", "wps": "363866", "ups": "3.09", "wpb": "117610", "bsz": "256", "num_updates": "737600", "lr": "0.000265051", "gnorm": "0.762", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.1", "wall": "240761"} [2022-08-03 20:21:53,407][train_inner][INFO] - {"epoch": 15, "update": 14.335, "loss": "2.019", "ppl": "4.05", "wps": "366652", "ups": "3.07", "wpb": "119368", "bsz": "256", "num_updates": "737800", "lr": "0.000264848", "gnorm": "0.753", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "240826"} [2022-08-03 20:22:59,455][train_inner][INFO] - {"epoch": 15, "update": 14.339, "loss": "2.027", "ppl": "4.08", "wps": "356484", "ups": "3.03", "wpb": "117724", "bsz": "256", "num_updates": "738000", "lr": "0.000264646", "gnorm": "0.76", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "24.6", "wall": "240892"} [2022-08-03 20:24:03,833][train_inner][INFO] - {"epoch": 15, "update": 14.343, "loss": "2.03", "ppl": "4.08", "wps": "366391", "ups": "3.11", "wpb": "117935", "bsz": "256", "num_updates": "738200", "lr": "0.000264444", "gnorm": "0.762", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "240956"} [2022-08-03 20:25:09,132][train_inner][INFO] - {"epoch": 15, "update": 14.347, "loss": "2.028", "ppl": "4.08", "wps": "363154", "ups": "3.06", "wpb": "118566", "bsz": "256", "num_updates": "738400", "lr": "0.000264242", "gnorm": "0.759", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "241021"} [2022-08-03 20:26:14,000][train_inner][INFO] - {"epoch": 15, "update": 14.351, "loss": "2.028", "ppl": "4.08", "wps": "364578", "ups": "3.08", "wpb": "118245", "bsz": "256", "num_updates": "738600", "lr": "0.00026404", "gnorm": "0.759", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.8", "wall": "241086"} [2022-08-03 20:27:18,931][train_inner][INFO] - {"epoch": 15, "update": 14.355, "loss": "2.028", "ppl": "4.08", "wps": "363692", "ups": "3.08", "wpb": "118074", "bsz": "256", "num_updates": "738800", "lr": "0.000263838", "gnorm": "0.76", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "241151"} [2022-08-03 20:28:23,888][train_inner][INFO] - {"epoch": 15, "update": 14.359, "loss": "2.027", "ppl": "4.08", "wps": "365925", "ups": "3.08", "wpb": "118844", "bsz": "256", "num_updates": "739000", "lr": "0.000263636", "gnorm": "0.758", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "241216"} [2022-08-03 20:29:28,424][train_inner][INFO] - {"epoch": 15, "update": 14.362, "loss": "2.031", "ppl": "4.09", "wps": "363917", "ups": "3.1", "wpb": "117427", "bsz": "255.9", "num_updates": "739200", "lr": "0.000263434", "gnorm": "0.764", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.7", "wall": "241281"} [2022-08-03 20:30:33,199][train_inner][INFO] - {"epoch": 15, "update": 14.366, "loss": "2.022", "ppl": "4.06", "wps": "363332", "ups": "3.09", "wpb": "117673", "bsz": "256", "num_updates": "739400", "lr": "0.000263232", "gnorm": "0.76", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.1", "wall": "241346"} [2022-08-03 20:31:38,073][train_inner][INFO] - {"epoch": 15, "update": 14.37, "loss": "2.025", "ppl": "4.07", "wps": "364326", "ups": "3.08", "wpb": "118174", "bsz": "256", "num_updates": "739600", "lr": "0.00026303", "gnorm": "0.762", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "241410"} [2022-08-03 20:32:42,609][train_inner][INFO] - {"epoch": 15, "update": 14.374, "loss": "2.029", "ppl": "4.08", "wps": "365463", "ups": "3.1", "wpb": "117927", "bsz": "256", "num_updates": "739800", "lr": "0.000262828", "gnorm": "0.76", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.7", "wall": "241475"} [2022-08-03 20:33:47,723][train_inner][INFO] - {"epoch": 15, "update": 14.378, "loss": "2.028", "ppl": "4.08", "wps": "362480", "ups": "3.07", "wpb": "118010", "bsz": "256", "num_updates": "740000", "lr": "0.000262626", "gnorm": "0.759", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "241540"} [2022-08-03 20:34:52,242][train_inner][INFO] - {"epoch": 15, "update": 14.382, "loss": "2.021", "ppl": "4.06", "wps": "366059", "ups": "3.1", "wpb": "118086", "bsz": "256", "num_updates": "740200", "lr": "0.000262424", "gnorm": "0.758", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "241605"} [2022-08-03 20:35:57,227][train_inner][INFO] - {"epoch": 15, "update": 14.386, "loss": "2.022", "ppl": "4.06", "wps": "363844", "ups": "3.08", "wpb": "118221", "bsz": "256", "num_updates": "740400", "lr": "0.000262222", "gnorm": "0.758", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "241670"} [2022-08-03 20:37:02,308][train_inner][INFO] - {"epoch": 15, "update": 14.39, "loss": "2.027", "ppl": "4.08", "wps": "362732", "ups": "3.07", "wpb": "118033", "bsz": "256", "num_updates": "740600", "lr": "0.00026202", "gnorm": "0.764", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "241735"} [2022-08-03 20:38:07,207][train_inner][INFO] - {"epoch": 15, "update": 14.393, "loss": "2.025", "ppl": "4.07", "wps": "365025", "ups": "3.08", "wpb": "118446", "bsz": "256", "num_updates": "740800", "lr": "0.000261818", "gnorm": "0.761", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.9", "wall": "241800"} [2022-08-03 20:39:12,279][train_inner][INFO] - {"epoch": 15, "update": 14.397, "loss": "2.027", "ppl": "4.07", "wps": "363859", "ups": "3.07", "wpb": "118384", "bsz": "256", "num_updates": "741000", "lr": "0.000261616", "gnorm": "0.758", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.3", "wall": "241865"} [2022-08-03 20:40:02,010][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 20:40:18,589][train_inner][INFO] - {"epoch": 15, "update": 14.401, "loss": "2.019", "ppl": "4.05", "wps": "357447", "ups": "3.02", "wpb": "118510", "bsz": "256", "num_updates": "741200", "lr": "0.000261414", "gnorm": "0.756", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "21.2", "wall": "241931"} [2022-08-03 20:41:23,442][train_inner][INFO] - {"epoch": 15, "update": 14.405, "loss": "2.026", "ppl": "4.07", "wps": "364549", "ups": "3.08", "wpb": "118209", "bsz": "256", "num_updates": "741400", "lr": "0.000261212", "gnorm": "0.773", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.7", "wall": "241996"} [2022-08-03 20:42:28,468][train_inner][INFO] - {"epoch": 15, "update": 14.409, "loss": "2.021", "ppl": "4.06", "wps": "364204", "ups": "3.08", "wpb": "118413", "bsz": "256", "num_updates": "741600", "lr": "0.00026101", "gnorm": "0.762", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.6", "wall": "242061"} [2022-08-03 20:43:33,366][train_inner][INFO] - {"epoch": 15, "update": 14.413, "loss": "2.021", "ppl": "4.06", "wps": "365488", "ups": "3.08", "wpb": "118596", "bsz": "256", "num_updates": "741800", "lr": "0.000260808", "gnorm": "0.759", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.3", "wall": "242126"} [2022-08-03 20:44:37,969][train_inner][INFO] - {"epoch": 15, "update": 14.417, "loss": "2.023", "ppl": "4.06", "wps": "365073", "ups": "3.1", "wpb": "117923", "bsz": "256", "num_updates": "742000", "lr": "0.000260606", "gnorm": "0.763", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "25.7", "wall": "242190"} [2022-08-03 20:45:43,111][train_inner][INFO] - {"epoch": 15, "update": 14.421, "loss": "2.027", "ppl": "4.08", "wps": "362354", "ups": "3.07", "wpb": "118019", "bsz": "256", "num_updates": "742200", "lr": "0.000260404", "gnorm": "0.76", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "242255"} [2022-08-03 20:46:47,865][train_inner][INFO] - {"epoch": 15, "update": 14.425, "loss": "2.02", "ppl": "4.06", "wps": "364603", "ups": "3.09", "wpb": "118046", "bsz": "256", "num_updates": "742400", "lr": "0.000260202", "gnorm": "0.758", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "242320"} [2022-08-03 20:47:52,752][train_inner][INFO] - {"epoch": 15, "update": 14.428, "loss": "2.023", "ppl": "4.06", "wps": "363876", "ups": "3.08", "wpb": "118052", "bsz": "256", "num_updates": "742600", "lr": "0.00026", "gnorm": "0.759", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.3", "wall": "242385"} [2022-08-03 20:48:57,738][train_inner][INFO] - {"epoch": 15, "update": 14.432, "loss": "2.02", "ppl": "4.06", "wps": "364178", "ups": "3.08", "wpb": "118332", "bsz": "256", "num_updates": "742800", "lr": "0.000259798", "gnorm": "0.757", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "242450"} [2022-08-03 20:50:02,635][train_inner][INFO] - {"epoch": 15, "update": 14.436, "loss": "2.025", "ppl": "4.07", "wps": "365010", "ups": "3.08", "wpb": "118437", "bsz": "256", "num_updates": "743000", "lr": "0.000259596", "gnorm": "0.764", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.5", "wall": "242515"} [2022-08-03 20:51:07,432][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 20:51:08,117][train_inner][INFO] - {"epoch": 15, "update": 14.44, "loss": "2.024", "ppl": "4.07", "wps": "362132", "ups": "3.05", "wpb": "118566", "bsz": "256", "num_updates": "743200", "lr": "0.000259394", "gnorm": "0.76", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "242580"} [2022-08-03 20:52:13,145][train_inner][INFO] - {"epoch": 15, "update": 14.444, "loss": "2.021", "ppl": "4.06", "wps": "363013", "ups": "3.08", "wpb": "118029", "bsz": "256", "num_updates": "743400", "lr": "0.000259192", "gnorm": "0.763", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.8", "wall": "242645"} [2022-08-03 20:53:18,167][train_inner][INFO] - {"epoch": 15, "update": 14.448, "loss": "2.026", "ppl": "4.07", "wps": "364536", "ups": "3.08", "wpb": "118512", "bsz": "256", "num_updates": "743600", "lr": "0.00025899", "gnorm": "0.759", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "242710"} [2022-08-03 20:54:23,197][train_inner][INFO] - {"epoch": 15, "update": 14.452, "loss": "2.023", "ppl": "4.06", "wps": "362445", "ups": "3.08", "wpb": "117848", "bsz": "256", "num_updates": "743800", "lr": "0.000258788", "gnorm": "0.767", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "242776"} [2022-08-03 20:55:28,290][train_inner][INFO] - {"epoch": 15, "update": 14.456, "loss": "2.019", "ppl": "4.05", "wps": "362895", "ups": "3.07", "wpb": "118108", "bsz": "256", "num_updates": "744000", "lr": "0.000258586", "gnorm": "0.763", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25.8", "wall": "242841"} [2022-08-03 20:56:28,797][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-03 20:56:33,053][train_inner][INFO] - {"epoch": 15, "update": 14.46, "loss": "2.026", "ppl": "4.07", "wps": "364401", "ups": "3.09", "wpb": "117997", "bsz": "256", "num_updates": "744200", "lr": "0.000258384", "gnorm": "0.775", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.1", "wall": "242905"} [2022-08-03 20:57:39,987][train_inner][INFO] - {"epoch": 15, "update": 14.463, "loss": "2.025", "ppl": "4.07", "wps": "354812", "ups": "2.99", "wpb": "118744", "bsz": "256", "num_updates": "744400", "lr": "0.000258182", "gnorm": "0.76", "clip": "0", "loss_scale": "8", "train_wall": "67", "gb_free": "21.1", "wall": "242972"} [2022-08-03 20:58:45,313][train_inner][INFO] - {"epoch": 15, "update": 14.467, "loss": "2.024", "ppl": "4.07", "wps": "362485", "ups": "3.06", "wpb": "118398", "bsz": "256", "num_updates": "744600", "lr": "0.00025798", "gnorm": "0.76", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.2", "wall": "243038"} [2022-08-03 20:59:50,150][train_inner][INFO] - {"epoch": 15, "update": 14.471, "loss": "2.026", "ppl": "4.07", "wps": "364672", "ups": "3.08", "wpb": "118220", "bsz": "256", "num_updates": "744800", "lr": "0.000257778", "gnorm": "0.759", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.7", "wall": "243102"} [2022-08-03 21:00:54,847][train_inner][INFO] - {"epoch": 15, "update": 14.475, "loss": "2.026", "ppl": "4.07", "wps": "367024", "ups": "3.09", "wpb": "118724", "bsz": "256", "num_updates": "745000", "lr": "0.000257576", "gnorm": "0.76", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22.7", "wall": "243167"} [2022-08-03 21:01:59,834][train_inner][INFO] - {"epoch": 15, "update": 14.479, "loss": "2.025", "ppl": "4.07", "wps": "365269", "ups": "3.08", "wpb": "118688", "bsz": "256", "num_updates": "745200", "lr": "0.000257374", "gnorm": "0.764", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "243232"} [2022-08-03 21:03:04,675][train_inner][INFO] - {"epoch": 15, "update": 14.483, "loss": "2.02", "ppl": "4.06", "wps": "365637", "ups": "3.08", "wpb": "118540", "bsz": "256", "num_updates": "745400", "lr": "0.000257172", "gnorm": "0.76", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "28", "wall": "243297"} [2022-08-03 21:04:09,455][train_inner][INFO] - {"epoch": 15, "update": 14.487, "loss": "2.028", "ppl": "4.08", "wps": "366851", "ups": "3.09", "wpb": "118821", "bsz": "256", "num_updates": "745600", "lr": "0.00025697", "gnorm": "0.76", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.4", "wall": "243362"} [2022-08-03 21:05:14,468][train_inner][INFO] - {"epoch": 15, "update": 14.491, "loss": "2.02", "ppl": "4.06", "wps": "362172", "ups": "3.08", "wpb": "117727", "bsz": "256", "num_updates": "745800", "lr": "0.000256768", "gnorm": "0.765", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.5", "wall": "243427"} [2022-08-03 21:06:19,148][train_inner][INFO] - {"epoch": 15, "update": 14.495, "loss": "2.02", "ppl": "4.06", "wps": "366253", "ups": "3.09", "wpb": "118445", "bsz": "256", "num_updates": "746000", "lr": "0.000256566", "gnorm": "0.76", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.4", "wall": "243491"} [2022-08-03 21:07:24,186][train_inner][INFO] - {"epoch": 15, "update": 14.498, "loss": "2.017", "ppl": "4.05", "wps": "363920", "ups": "3.08", "wpb": "118341", "bsz": "256", "num_updates": "746200", "lr": "0.000256364", "gnorm": "0.764", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.5", "wall": "243557"} [2022-08-03 21:08:28,855][train_inner][INFO] - {"epoch": 15, "update": 14.502, "loss": "2.02", "ppl": "4.05", "wps": "368995", "ups": "3.09", "wpb": "119312", "bsz": "256", "num_updates": "746400", "lr": "0.000256162", "gnorm": "0.758", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.4", "wall": "243621"} [2022-08-03 21:09:33,184][train_inner][INFO] - {"epoch": 15, "update": 14.506, "loss": "2.019", "ppl": "4.05", "wps": "370628", "ups": "3.11", "wpb": "119209", "bsz": "256", "num_updates": "746600", "lr": "0.00025596", "gnorm": "0.76", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.1", "wall": "243686"} [2022-08-03 21:10:38,011][train_inner][INFO] - {"epoch": 15, "update": 14.51, "loss": "2.023", "ppl": "4.06", "wps": "363787", "ups": "3.09", "wpb": "117914", "bsz": "255.9", "num_updates": "746800", "lr": "0.000255758", "gnorm": "0.763", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.2", "wall": "243750"} [2022-08-03 21:11:43,213][train_inner][INFO] - {"epoch": 15, "update": 14.514, "loss": "2.024", "ppl": "4.07", "wps": "362710", "ups": "3.07", "wpb": "118245", "bsz": "256", "num_updates": "747000", "lr": "0.000255556", "gnorm": "0.764", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.1", "wall": "243816"} [2022-08-03 21:12:48,195][train_inner][INFO] - {"epoch": 15, "update": 14.518, "loss": "2.023", "ppl": "4.06", "wps": "363119", "ups": "3.08", "wpb": "117980", "bsz": "256", "num_updates": "747200", "lr": "0.000255354", "gnorm": "0.762", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.5", "wall": "243881"} [2022-08-03 21:13:52,536][train_inner][INFO] - {"epoch": 15, "update": 14.522, "loss": "2.024", "ppl": "4.07", "wps": "366872", "ups": "3.11", "wpb": "118022", "bsz": "256", "num_updates": "747400", "lr": "0.000255152", "gnorm": "0.765", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.2", "wall": "243945"} [2022-08-03 21:14:57,318][train_inner][INFO] - {"epoch": 15, "update": 14.526, "loss": "2.022", "ppl": "4.06", "wps": "365896", "ups": "3.09", "wpb": "118515", "bsz": "256", "num_updates": "747600", "lr": "0.000254949", "gnorm": "0.765", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.5", "wall": "244010"} [2022-08-03 21:16:01,774][train_inner][INFO] - {"epoch": 15, "update": 14.529, "loss": "2.016", "ppl": "4.04", "wps": "368738", "ups": "3.1", "wpb": "118837", "bsz": "256", "num_updates": "747800", "lr": "0.000254747", "gnorm": "0.762", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.8", "wall": "244074"} [2022-08-03 21:17:06,671][train_inner][INFO] - {"epoch": 15, "update": 14.533, "loss": "2.02", "ppl": "4.06", "wps": "364780", "ups": "3.08", "wpb": "118363", "bsz": "256", "num_updates": "748000", "lr": "0.000254545", "gnorm": "0.765", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.5", "wall": "244139"} [2022-08-03 21:18:11,589][train_inner][INFO] - {"epoch": 15, "update": 14.537, "loss": "2.02", "ppl": "4.06", "wps": "365138", "ups": "3.08", "wpb": "118518", "bsz": "256", "num_updates": "748200", "lr": "0.000254343", "gnorm": "0.764", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "244204"} [2022-08-03 21:18:39,976][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 21:19:16,271][train_inner][INFO] - {"epoch": 15, "update": 14.541, "loss": "2.021", "ppl": "4.06", "wps": "364666", "ups": "3.09", "wpb": "117936", "bsz": "256", "num_updates": "748400", "lr": "0.000254141", "gnorm": "0.769", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.5", "wall": "244269"} [2022-08-03 21:20:01,989][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-03 21:20:21,487][train_inner][INFO] - {"epoch": 15, "update": 14.545, "loss": "2.024", "ppl": "4.07", "wps": "363257", "ups": "3.07", "wpb": "118449", "bsz": "256", "num_updates": "748600", "lr": "0.000253939", "gnorm": "0.775", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "244334"} [2022-08-03 21:21:27,455][train_inner][INFO] - {"epoch": 15, "update": 14.549, "loss": "2.02", "ppl": "4.06", "wps": "358075", "ups": "3.03", "wpb": "118107", "bsz": "256", "num_updates": "748800", "lr": "0.000253737", "gnorm": "0.766", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "23.2", "wall": "244400"} [2022-08-03 21:22:32,111][train_inner][INFO] - {"epoch": 15, "update": 14.553, "loss": "2.022", "ppl": "4.06", "wps": "365796", "ups": "3.09", "wpb": "118253", "bsz": "256", "num_updates": "749000", "lr": "0.000253535", "gnorm": "0.767", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21", "wall": "244464"} [2022-08-03 21:23:36,973][train_inner][INFO] - {"epoch": 15, "update": 14.557, "loss": "2.018", "ppl": "4.05", "wps": "366842", "ups": "3.08", "wpb": "118970", "bsz": "256", "num_updates": "749200", "lr": "0.000253333", "gnorm": "0.761", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.4", "wall": "244529"} [2022-08-03 21:24:42,712][train_inner][INFO] - {"epoch": 15, "update": 14.561, "loss": "2.021", "ppl": "4.06", "wps": "356212", "ups": "3.04", "wpb": "117082", "bsz": "256", "num_updates": "749400", "lr": "0.000253131", "gnorm": "0.77", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.6", "wall": "244595"} [2022-08-03 21:25:44,822][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-03 21:25:47,778][train_inner][INFO] - {"epoch": 15, "update": 14.565, "loss": "2.023", "ppl": "4.06", "wps": "362094", "ups": "3.07", "wpb": "117798", "bsz": "256", "num_updates": "749600", "lr": "0.000252929", "gnorm": "0.767", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.6", "wall": "244660"} [2022-08-03 21:26:52,715][train_inner][INFO] - {"epoch": 15, "update": 14.568, "loss": "2.021", "ppl": "4.06", "wps": "365046", "ups": "3.08", "wpb": "118524", "bsz": "256", "num_updates": "749800", "lr": "0.000252727", "gnorm": "0.763", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "23.9", "wall": "244725"} [2022-08-03 21:27:57,457][train_inner][INFO] - {"epoch": 15, "update": 14.572, "loss": "2.022", "ppl": "4.06", "wps": "363720", "ups": "3.09", "wpb": "117738", "bsz": "256", "num_updates": "750000", "lr": "0.000252525", "gnorm": "0.776", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.8", "wall": "244790"} [2022-08-03 21:27:57,458][fairseq_cli.train][INFO] - begin validation on "valid" subset [2022-08-03 21:28:20,509][valid][INFO] - {"epoch": 15, "valid_loss": "1.931", "valid_ppl": "3.81", "valid_wps": "1.57705e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "750000", "valid_best_loss": "1.931"} [2022-08-03 21:28:20,512][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 15 @ 750000 updates [2022-08-03 21:28:20,513][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_15_750000.pt [2022-08-03 21:28:30,017][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_15_750000.pt [2022-08-03 21:29:00,142][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/adan2/checkpoint_15_750000.pt (epoch 15 @ 750000 updates, score 1.931) (writing took 39.630318000912666 seconds) [2022-08-03 21:30:05,088][train_inner][INFO] - {"epoch": 15, "update": 14.576, "loss": "2.012", "ppl": "4.03", "wps": "186172", "ups": "1.57", "wpb": "118806", "bsz": "256", "num_updates": "750200", "lr": "0.000252323", "gnorm": "0.761", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20", "wall": "244917"} [2022-08-03 21:31:10,146][train_inner][INFO] - {"epoch": 15, "update": 14.58, "loss": "2.022", "ppl": "4.06", "wps": "364152", "ups": "3.07", "wpb": "118452", "bsz": "256", "num_updates": "750400", "lr": "0.000252121", "gnorm": "0.765", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.5", "wall": "244982"} [2022-08-03 21:32:14,776][train_inner][INFO] - {"epoch": 15, "update": 14.584, "loss": "2.018", "ppl": "4.05", "wps": "366447", "ups": "3.09", "wpb": "118416", "bsz": "256", "num_updates": "750600", "lr": "0.000251919", "gnorm": "0.765", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.8", "wall": "245047"} [2022-08-03 21:33:19,535][train_inner][INFO] - {"epoch": 15, "update": 14.588, "loss": "2.025", "ppl": "4.07", "wps": "363122", "ups": "3.09", "wpb": "117577", "bsz": "256", "num_updates": "750800", "lr": "0.000251717", "gnorm": "0.771", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "19.9", "wall": "245112"} [2022-08-03 21:34:24,634][train_inner][INFO] - {"epoch": 15, "update": 14.592, "loss": "2.017", "ppl": "4.05", "wps": "363405", "ups": "3.07", "wpb": "118283", "bsz": "256", "num_updates": "751000", "lr": "0.000251515", "gnorm": "0.766", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.3", "wall": "245177"} [2022-08-03 21:35:29,749][train_inner][INFO] - {"epoch": 15, "update": 14.596, "loss": "2.027", "ppl": "4.08", "wps": "361707", "ups": "3.07", "wpb": "117762", "bsz": "256", "num_updates": "751200", "lr": "0.000251313", "gnorm": "0.773", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.7", "wall": "245242"} [2022-08-03 21:36:34,612][train_inner][INFO] - {"epoch": 15, "update": 14.599, "loss": "2.015", "ppl": "4.04", "wps": "365988", "ups": "3.08", "wpb": "118694", "bsz": "256", "num_updates": "751400", "lr": "0.000251111", "gnorm": "0.762", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.8", "wall": "245307"} [2022-08-03 21:37:39,563][train_inner][INFO] - {"epoch": 15, "update": 14.603, "loss": "2.015", "ppl": "4.04", "wps": "364336", "ups": "3.08", "wpb": "118316", "bsz": "256", "num_updates": "751600", "lr": "0.000250909", "gnorm": "0.768", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "19.8", "wall": "245372"} [2022-08-03 21:38:44,171][train_inner][INFO] - {"epoch": 15, "update": 14.607, "loss": "2.02", "ppl": "4.06", "wps": "365394", "ups": "3.1", "wpb": "118035", "bsz": "256", "num_updates": "751800", "lr": "0.000250707", "gnorm": "0.767", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.7", "wall": "245436"} [2022-08-03 21:39:49,225][train_inner][INFO] - {"epoch": 15, "update": 14.611, "loss": "2.022", "ppl": "4.06", "wps": "364404", "ups": "3.07", "wpb": "118527", "bsz": "256", "num_updates": "752000", "lr": "0.000250505", "gnorm": "0.764", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.6", "wall": "245502"} [2022-08-03 21:40:55,510][train_inner][INFO] - {"epoch": 15, "update": 14.615, "loss": "2.022", "ppl": "4.06", "wps": "355907", "ups": "3.02", "wpb": "117955", "bsz": "256", "num_updates": "752200", "lr": "0.000250303", "gnorm": "0.772", "clip": "0", "loss_scale": "8", "train_wall": "66", "gb_free": "22.4", "wall": "245568"} [2022-08-03 21:41:59,915][train_inner][INFO] - {"epoch": 15, "update": 14.619, "loss": "2.021", "ppl": "4.06", "wps": "365378", "ups": "3.11", "wpb": "117660", "bsz": "256", "num_updates": "752400", "lr": "0.000250101", "gnorm": "0.772", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "245632"} [2022-08-03 21:43:04,782][train_inner][INFO] - {"epoch": 15, "update": 14.623, "loss": "2.023", "ppl": "4.06", "wps": "364548", "ups": "3.08", "wpb": "118233", "bsz": "256", "num_updates": "752600", "lr": "0.000249899", "gnorm": "0.768", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.3", "wall": "245697"} [2022-08-03 21:44:09,509][train_inner][INFO] - {"epoch": 15, "update": 14.627, "loss": "2.02", "ppl": "4.05", "wps": "363936", "ups": "3.09", "wpb": "117780", "bsz": "256", "num_updates": "752800", "lr": "0.000249697", "gnorm": "0.766", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "245762"} [2022-08-03 21:45:13,793][train_inner][INFO] - {"epoch": 15, "update": 14.631, "loss": "2.021", "ppl": "4.06", "wps": "367190", "ups": "3.11", "wpb": "118021", "bsz": "256", "num_updates": "753000", "lr": "0.000249495", "gnorm": "0.77", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "245826"} [2022-08-03 21:46:18,373][train_inner][INFO] - {"epoch": 15, "update": 14.634, "loss": "2.014", "ppl": "4.04", "wps": "366450", "ups": "3.1", "wpb": "118326", "bsz": "256", "num_updates": "753200", "lr": "0.000249293", "gnorm": "0.77", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.7", "wall": "245891"} [2022-08-03 21:47:23,224][train_inner][INFO] - {"epoch": 15, "update": 14.638, "loss": "2.017", "ppl": "4.05", "wps": "364436", "ups": "3.08", "wpb": "118167", "bsz": "256", "num_updates": "753400", "lr": "0.000249091", "gnorm": "0.766", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.9", "wall": "245956"} [2022-08-03 21:48:28,302][train_inner][INFO] - {"epoch": 15, "update": 14.642, "loss": "2.015", "ppl": "4.04", "wps": "365933", "ups": "3.07", "wpb": "119069", "bsz": "256", "num_updates": "753600", "lr": "0.000248889", "gnorm": "0.763", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.1", "wall": "246021"} [2022-08-03 21:49:33,073][train_inner][INFO] - {"epoch": 15, "update": 14.646, "loss": "2.02", "ppl": "4.06", "wps": "365108", "ups": "3.09", "wpb": "118240", "bsz": "256", "num_updates": "753800", "lr": "0.000248687", "gnorm": "0.766", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "27.4", "wall": "246085"} [2022-08-03 21:50:37,481][train_inner][INFO] - {"epoch": 15, "update": 14.65, "loss": "2.019", "ppl": "4.05", "wps": "368606", "ups": "3.11", "wpb": "118704", "bsz": "256", "num_updates": "754000", "lr": "0.000248485", "gnorm": "0.765", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.1", "wall": "246150"} [2022-08-03 21:51:41,912][train_inner][INFO] - {"epoch": 15, "update": 14.654, "loss": "2.023", "ppl": "4.06", "wps": "366492", "ups": "3.1", "wpb": "118064", "bsz": "256", "num_updates": "754200", "lr": "0.000248283", "gnorm": "0.768", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.1", "wall": "246214"} [2022-08-03 21:52:46,909][train_inner][INFO] - {"epoch": 15, "update": 14.658, "loss": "2.025", "ppl": "4.07", "wps": "364586", "ups": "3.08", "wpb": "118484", "bsz": "256", "num_updates": "754400", "lr": "0.000248081", "gnorm": "0.768", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "246279"} [2022-08-03 21:53:52,142][train_inner][INFO] - {"epoch": 15, "update": 14.662, "loss": "2.013", "ppl": "4.04", "wps": "363461", "ups": "3.07", "wpb": "118546", "bsz": "256", "num_updates": "754600", "lr": "0.000247879", "gnorm": "0.765", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "27.5", "wall": "246344"} [2022-08-03 21:54:57,293][train_inner][INFO] - {"epoch": 15, "update": 14.665, "loss": "2.015", "ppl": "4.04", "wps": "363564", "ups": "3.07", "wpb": "118431", "bsz": "256", "num_updates": "754800", "lr": "0.000247677", "gnorm": "0.768", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "246410"} [2022-08-03 21:56:02,127][train_inner][INFO] - {"epoch": 15, "update": 14.669, "loss": "2.017", "ppl": "4.05", "wps": "365478", "ups": "3.08", "wpb": "118475", "bsz": "256", "num_updates": "755000", "lr": "0.000247475", "gnorm": "0.766", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "246474"} [2022-08-03 21:57:06,754][train_inner][INFO] - {"epoch": 15, "update": 14.673, "loss": "2.021", "ppl": "4.06", "wps": "366079", "ups": "3.09", "wpb": "118291", "bsz": "256", "num_updates": "755200", "lr": "0.000247273", "gnorm": "0.771", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.6", "wall": "246539"} [2022-08-03 21:58:11,792][train_inner][INFO] - {"epoch": 15, "update": 14.677, "loss": "2.013", "ppl": "4.04", "wps": "365393", "ups": "3.08", "wpb": "118822", "bsz": "256", "num_updates": "755400", "lr": "0.000247071", "gnorm": "0.763", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "246604"} [2022-08-03 21:59:16,392][train_inner][INFO] - {"epoch": 15, "update": 14.681, "loss": "2.02", "ppl": "4.06", "wps": "365588", "ups": "3.1", "wpb": "118082", "bsz": "256", "num_updates": "755600", "lr": "0.000246869", "gnorm": "0.767", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.8", "wall": "246669"} [2022-08-03 22:00:02,029][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 22:00:21,496][train_inner][INFO] - {"epoch": 15, "update": 14.685, "loss": "2.021", "ppl": "4.06", "wps": "362941", "ups": "3.07", "wpb": "118143", "bsz": "256", "num_updates": "755800", "lr": "0.000246667", "gnorm": "0.767", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "246734"} [2022-08-03 22:01:26,751][train_inner][INFO] - {"epoch": 15, "update": 14.689, "loss": "2.013", "ppl": "4.04", "wps": "363176", "ups": "3.06", "wpb": "118494", "bsz": "256", "num_updates": "756000", "lr": "0.000246465", "gnorm": "0.765", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.4", "wall": "246799"} [2022-08-03 22:02:31,928][train_inner][INFO] - {"epoch": 15, "update": 14.693, "loss": "2.019", "ppl": "4.05", "wps": "363794", "ups": "3.07", "wpb": "118553", "bsz": "256", "num_updates": "756200", "lr": "0.000246263", "gnorm": "0.772", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "246864"} [2022-08-03 22:03:36,681][train_inner][INFO] - {"epoch": 15, "update": 14.697, "loss": "2.011", "ppl": "4.03", "wps": "367942", "ups": "3.09", "wpb": "119126", "bsz": "256", "num_updates": "756400", "lr": "0.000246061", "gnorm": "0.768", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.4", "wall": "246929"} [2022-08-03 22:04:41,823][train_inner][INFO] - {"epoch": 15, "update": 14.7, "loss": "2.019", "ppl": "4.05", "wps": "364081", "ups": "3.07", "wpb": "118583", "bsz": "256", "num_updates": "756600", "lr": "0.000245859", "gnorm": "0.768", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.8", "wall": "246994"} [2022-08-03 22:05:46,812][train_inner][INFO] - {"epoch": 15, "update": 14.704, "loss": "2.016", "ppl": "4.04", "wps": "365762", "ups": "3.08", "wpb": "118850", "bsz": "256", "num_updates": "756800", "lr": "0.000245657", "gnorm": "0.77", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "247059"} [2022-08-03 22:06:51,913][train_inner][INFO] - {"epoch": 15, "update": 14.708, "loss": "2.014", "ppl": "4.04", "wps": "364413", "ups": "3.07", "wpb": "118617", "bsz": "256", "num_updates": "757000", "lr": "0.000245455", "gnorm": "0.765", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "247124"} [2022-08-03 22:07:56,613][train_inner][INFO] - {"epoch": 15, "update": 14.712, "loss": "2.017", "ppl": "4.05", "wps": "364970", "ups": "3.09", "wpb": "118066", "bsz": "256", "num_updates": "757200", "lr": "0.000245253", "gnorm": "0.768", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "247189"} [2022-08-03 22:09:01,271][train_inner][INFO] - {"epoch": 15, "update": 14.716, "loss": "2.019", "ppl": "4.05", "wps": "365785", "ups": "3.09", "wpb": "118252", "bsz": "256", "num_updates": "757400", "lr": "0.000245051", "gnorm": "0.774", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.9", "wall": "247254"} [2022-08-03 22:10:06,142][train_inner][INFO] - {"epoch": 15, "update": 14.72, "loss": "2.007", "ppl": "4.02", "wps": "365210", "ups": "3.08", "wpb": "118455", "bsz": "256", "num_updates": "757600", "lr": "0.000244848", "gnorm": "0.769", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.6", "wall": "247318"} [2022-08-03 22:10:24,599][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-03 22:11:11,332][train_inner][INFO] - {"epoch": 15, "update": 14.724, "loss": "2.017", "ppl": "4.05", "wps": "363358", "ups": "3.07", "wpb": "118436", "bsz": "256", "num_updates": "757800", "lr": "0.000244646", "gnorm": "0.768", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.5", "wall": "247384"} [2022-08-03 22:12:16,229][train_inner][INFO] - {"epoch": 15, "update": 14.728, "loss": "2.017", "ppl": "4.05", "wps": "363539", "ups": "3.08", "wpb": "117961", "bsz": "256", "num_updates": "758000", "lr": "0.000244444", "gnorm": "0.771", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.6", "wall": "247449"} [2022-08-03 22:13:21,125][train_inner][INFO] - {"epoch": 15, "update": 14.732, "loss": "2.017", "ppl": "4.05", "wps": "366290", "ups": "3.08", "wpb": "118853", "bsz": "256", "num_updates": "758200", "lr": "0.000244242", "gnorm": "0.771", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.7", "wall": "247513"} [2022-08-03 22:14:25,899][train_inner][INFO] - {"epoch": 15, "update": 14.735, "loss": "2.024", "ppl": "4.07", "wps": "365312", "ups": "3.09", "wpb": "118312", "bsz": "256", "num_updates": "758400", "lr": "0.00024404", "gnorm": "0.775", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.3", "wall": "247578"} [2022-08-03 22:15:30,881][train_inner][INFO] - {"epoch": 15, "update": 14.739, "loss": "2.013", "ppl": "4.04", "wps": "363805", "ups": "3.08", "wpb": "118201", "bsz": "256", "num_updates": "758600", "lr": "0.000243838", "gnorm": "0.766", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.3", "wall": "247643"} [2022-08-03 22:16:35,501][train_inner][INFO] - {"epoch": 15, "update": 14.743, "loss": "2.019", "ppl": "4.05", "wps": "366801", "ups": "3.1", "wpb": "118512", "bsz": "256", "num_updates": "758800", "lr": "0.000243636", "gnorm": "0.769", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.7", "wall": "247708"} [2022-08-03 22:17:40,382][train_inner][INFO] - {"epoch": 15, "update": 14.747, "loss": "2.016", "ppl": "4.05", "wps": "366106", "ups": "3.08", "wpb": "118765", "bsz": "256", "num_updates": "759000", "lr": "0.000243434", "gnorm": "0.767", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.6", "wall": "247773"} [2022-08-03 22:18:44,892][train_inner][INFO] - {"epoch": 15, "update": 14.751, "loss": "2.018", "ppl": "4.05", "wps": "364156", "ups": "3.1", "wpb": "117456", "bsz": "256", "num_updates": "759200", "lr": "0.000243232", "gnorm": "0.771", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.9", "wall": "247837"} [2022-08-03 22:19:49,778][train_inner][INFO] - {"epoch": 15, "update": 14.755, "loss": "2.015", "ppl": "4.04", "wps": "363510", "ups": "3.08", "wpb": "117933", "bsz": "256", "num_updates": "759400", "lr": "0.00024303", "gnorm": "0.772", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.2", "wall": "247902"} [2022-08-03 22:20:54,838][train_inner][INFO] - {"epoch": 15, "update": 14.759, "loss": "2.013", "ppl": "4.04", "wps": "363849", "ups": "3.07", "wpb": "118358", "bsz": "256", "num_updates": "759600", "lr": "0.000242828", "gnorm": "0.784", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "26.5", "wall": "247967"} [2022-08-03 22:21:59,664][train_inner][INFO] - {"epoch": 15, "update": 14.763, "loss": "2.012", "ppl": "4.03", "wps": "366000", "ups": "3.09", "wpb": "118630", "bsz": "256", "num_updates": "759800", "lr": "0.000242626", "gnorm": "0.769", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.9", "wall": "248032"} [2022-08-03 22:23:04,503][train_inner][INFO] - {"epoch": 15, "update": 14.767, "loss": "2.02", "ppl": "4.06", "wps": "363188", "ups": "3.08", "wpb": "117742", "bsz": "256", "num_updates": "760000", "lr": "0.000242424", "gnorm": "0.773", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.7", "wall": "248097"} [2022-08-03 22:24:09,202][train_inner][INFO] - {"epoch": 15, "update": 14.77, "loss": "2.014", "ppl": "4.04", "wps": "367571", "ups": "3.09", "wpb": "118904", "bsz": "256", "num_updates": "760200", "lr": "0.000242222", "gnorm": "0.769", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "248162"} [2022-08-03 22:25:14,180][train_inner][INFO] - {"epoch": 15, "update": 14.774, "loss": "2.014", "ppl": "4.04", "wps": "365880", "ups": "3.08", "wpb": "118869", "bsz": "256", "num_updates": "760400", "lr": "0.00024202", "gnorm": "0.77", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.9", "wall": "248227"} [2022-08-03 22:26:19,077][train_inner][INFO] - {"epoch": 15, "update": 14.778, "loss": "2.013", "ppl": "4.04", "wps": "363588", "ups": "3.08", "wpb": "117978", "bsz": "256", "num_updates": "760600", "lr": "0.000241818", "gnorm": "0.77", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.4", "wall": "248291"} [2022-08-03 22:27:23,984][train_inner][INFO] - {"epoch": 15, "update": 14.782, "loss": "2.019", "ppl": "4.05", "wps": "363695", "ups": "3.08", "wpb": "118030", "bsz": "256", "num_updates": "760800", "lr": "0.000241616", "gnorm": "0.771", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.8", "wall": "248356"} [2022-08-03 22:28:29,347][train_inner][INFO] - {"epoch": 15, "update": 14.786, "loss": "2.02", "ppl": "4.06", "wps": "361715", "ups": "3.06", "wpb": "118212", "bsz": "256", "num_updates": "761000", "lr": "0.000241414", "gnorm": "0.774", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.7", "wall": "248422"} [2022-08-03 22:29:34,554][train_inner][INFO] - {"epoch": 15, "update": 14.79, "loss": "2.011", "ppl": "4.03", "wps": "364392", "ups": "3.07", "wpb": "118803", "bsz": "256", "num_updates": "761200", "lr": "0.000241212", "gnorm": "0.769", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "248487"} [2022-08-03 22:30:39,295][train_inner][INFO] - {"epoch": 15, "update": 14.794, "loss": "2.014", "ppl": "4.04", "wps": "366897", "ups": "3.09", "wpb": "118766", "bsz": "256", "num_updates": "761400", "lr": "0.00024101", "gnorm": "0.77", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.9", "wall": "248552"} [2022-08-03 22:31:44,075][train_inner][INFO] - {"epoch": 15, "update": 14.798, "loss": "2.01", "ppl": "4.03", "wps": "365296", "ups": "3.09", "wpb": "118317", "bsz": "256", "num_updates": "761600", "lr": "0.000240808", "gnorm": "0.772", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "25.6", "wall": "248616"} [2022-08-03 22:32:36,836][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 22:32:49,682][train_inner][INFO] - {"epoch": 15, "update": 14.802, "loss": "2.007", "ppl": "4.02", "wps": "361693", "ups": "3.05", "wpb": "118646", "bsz": "256", "num_updates": "761800", "lr": "0.000240606", "gnorm": "0.772", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "248682"} [2022-08-03 22:33:54,314][train_inner][INFO] - {"epoch": 15, "update": 14.805, "loss": "2.007", "ppl": "4.02", "wps": "364935", "ups": "3.09", "wpb": "117931", "bsz": "256", "num_updates": "762000", "lr": "0.000240404", "gnorm": "0.767", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22", "wall": "248747"} [2022-08-03 22:34:58,703][train_inner][INFO] - {"epoch": 15, "update": 14.809, "loss": "2.022", "ppl": "4.06", "wps": "366890", "ups": "3.11", "wpb": "118117", "bsz": "256", "num_updates": "762200", "lr": "0.000240202", "gnorm": "0.776", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "28", "wall": "248811"} [2022-08-03 22:36:03,483][train_inner][INFO] - {"epoch": 15, "update": 14.813, "loss": "2.014", "ppl": "4.04", "wps": "366610", "ups": "3.09", "wpb": "118744", "bsz": "256", "num_updates": "762400", "lr": "0.00024", "gnorm": "0.771", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.4", "wall": "248876"} [2022-08-03 22:37:08,673][train_inner][INFO] - {"epoch": 15, "update": 14.817, "loss": "2.01", "ppl": "4.03", "wps": "362478", "ups": "3.07", "wpb": "118148", "bsz": "256", "num_updates": "762600", "lr": "0.000239798", "gnorm": "0.772", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.7", "wall": "248941"} [2022-08-03 22:38:13,647][train_inner][INFO] - {"epoch": 15, "update": 14.821, "loss": "2.011", "ppl": "4.03", "wps": "365640", "ups": "3.08", "wpb": "118784", "bsz": "256", "num_updates": "762800", "lr": "0.000239596", "gnorm": "0.767", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "249006"} [2022-08-03 22:39:18,939][train_inner][INFO] - {"epoch": 15, "update": 14.825, "loss": "2.008", "ppl": "4.02", "wps": "361239", "ups": "3.06", "wpb": "117928", "bsz": "256", "num_updates": "763000", "lr": "0.000239394", "gnorm": "0.775", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.2", "wall": "249071"} [2022-08-03 22:40:23,624][train_inner][INFO] - {"epoch": 15, "update": 14.829, "loss": "2.01", "ppl": "4.03", "wps": "366894", "ups": "3.09", "wpb": "118661", "bsz": "256", "num_updates": "763200", "lr": "0.000239192", "gnorm": "0.769", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.9", "wall": "249136"} [2022-08-03 22:41:15,600][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-03 22:41:28,568][train_inner][INFO] - {"epoch": 15, "update": 14.833, "loss": "2.014", "ppl": "4.04", "wps": "364207", "ups": "3.08", "wpb": "118264", "bsz": "256", "num_updates": "763400", "lr": "0.00023899", "gnorm": "0.772", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.1", "wall": "249201"} [2022-08-03 22:42:33,370][train_inner][INFO] - {"epoch": 15, "update": 14.836, "loss": "2.012", "ppl": "4.03", "wps": "363074", "ups": "3.09", "wpb": "117638", "bsz": "256", "num_updates": "763600", "lr": "0.000238788", "gnorm": "0.775", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "26.6", "wall": "249266"} [2022-08-03 22:43:38,921][train_inner][INFO] - {"epoch": 15, "update": 14.84, "loss": "2.011", "ppl": "4.03", "wps": "360176", "ups": "3.05", "wpb": "118048", "bsz": "256", "num_updates": "763800", "lr": "0.000238586", "gnorm": "0.773", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.5", "wall": "249331"} [2022-08-03 22:44:43,696][train_inner][INFO] - {"epoch": 15, "update": 14.844, "loss": "2.01", "ppl": "4.03", "wps": "364197", "ups": "3.09", "wpb": "117952", "bsz": "256", "num_updates": "764000", "lr": "0.000238384", "gnorm": "0.774", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "28.2", "wall": "249396"} [2022-08-03 22:45:48,436][train_inner][INFO] - {"epoch": 15, "update": 14.848, "loss": "2.022", "ppl": "4.06", "wps": "363457", "ups": "3.09", "wpb": "117650", "bsz": "256", "num_updates": "764200", "lr": "0.000238182", "gnorm": "0.776", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.2", "wall": "249461"} [2022-08-03 22:46:53,302][train_inner][INFO] - {"epoch": 15, "update": 14.852, "loss": "2.012", "ppl": "4.03", "wps": "364162", "ups": "3.08", "wpb": "118106", "bsz": "256", "num_updates": "764400", "lr": "0.00023798", "gnorm": "0.775", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.6", "wall": "249526"} [2022-08-03 22:47:57,880][train_inner][INFO] - {"epoch": 15, "update": 14.856, "loss": "2.01", "ppl": "4.03", "wps": "366524", "ups": "3.1", "wpb": "118346", "bsz": "256", "num_updates": "764600", "lr": "0.000237778", "gnorm": "0.775", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.7", "wall": "249590"} [2022-08-03 22:49:02,548][train_inner][INFO] - {"epoch": 15, "update": 14.86, "loss": "2.012", "ppl": "4.03", "wps": "364780", "ups": "3.09", "wpb": "117945", "bsz": "256", "num_updates": "764800", "lr": "0.000237576", "gnorm": "0.773", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.6", "wall": "249655"} [2022-08-03 22:50:07,235][train_inner][INFO] - {"epoch": 15, "update": 14.864, "loss": "2.01", "ppl": "4.03", "wps": "365011", "ups": "3.09", "wpb": "118055", "bsz": "256", "num_updates": "765000", "lr": "0.000237374", "gnorm": "0.78", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.3", "wall": "249720"} [2022-08-03 22:51:12,507][train_inner][INFO] - {"epoch": 15, "update": 14.868, "loss": "2.012", "ppl": "4.03", "wps": "361354", "ups": "3.06", "wpb": "117931", "bsz": "256", "num_updates": "765200", "lr": "0.000237172", "gnorm": "0.774", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.3", "wall": "249785"} [2022-08-03 22:52:17,405][train_inner][INFO] - {"epoch": 15, "update": 14.871, "loss": "2.015", "ppl": "4.04", "wps": "363822", "ups": "3.08", "wpb": "118055", "bsz": "256", "num_updates": "765400", "lr": "0.00023697", "gnorm": "0.777", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "249850"} [2022-08-03 22:53:22,708][train_inner][INFO] - {"epoch": 15, "update": 14.875, "loss": "2.008", "ppl": "4.02", "wps": "362599", "ups": "3.06", "wpb": "118392", "bsz": "256", "num_updates": "765600", "lr": "0.000236768", "gnorm": "0.776", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.7", "wall": "249915"} [2022-08-03 22:54:27,624][train_inner][INFO] - {"epoch": 15, "update": 14.879, "loss": "2.014", "ppl": "4.04", "wps": "364034", "ups": "3.08", "wpb": "118156", "bsz": "256", "num_updates": "765800", "lr": "0.000236566", "gnorm": "0.773", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.7", "wall": "249980"} [2022-08-03 22:55:32,723][train_inner][INFO] - {"epoch": 15, "update": 14.883, "loss": "2.011", "ppl": "4.03", "wps": "363517", "ups": "3.07", "wpb": "118321", "bsz": "256", "num_updates": "766000", "lr": "0.000236364", "gnorm": "0.775", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "250045"} [2022-08-03 22:56:37,763][train_inner][INFO] - {"epoch": 15, "update": 14.887, "loss": "2.007", "ppl": "4.02", "wps": "364219", "ups": "3.08", "wpb": "118442", "bsz": "256", "num_updates": "766200", "lr": "0.000236162", "gnorm": "0.77", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "250110"} [2022-08-03 22:57:42,816][train_inner][INFO] - {"epoch": 15, "update": 14.891, "loss": "2.007", "ppl": "4.02", "wps": "365598", "ups": "3.07", "wpb": "118916", "bsz": "256", "num_updates": "766400", "lr": "0.00023596", "gnorm": "0.773", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.1", "wall": "250175"} [2022-08-03 22:58:47,382][train_inner][INFO] - {"epoch": 15, "update": 14.895, "loss": "2.011", "ppl": "4.03", "wps": "365186", "ups": "3.1", "wpb": "117891", "bsz": "256", "num_updates": "766600", "lr": "0.000235758", "gnorm": "0.777", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.4", "wall": "250240"} [2022-08-03 22:59:52,297][train_inner][INFO] - {"epoch": 15, "update": 14.899, "loss": "2.01", "ppl": "4.03", "wps": "363957", "ups": "3.08", "wpb": "118130", "bsz": "256", "num_updates": "766800", "lr": "0.000235556", "gnorm": "0.775", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22", "wall": "250305"} [2022-08-03 23:00:57,476][train_inner][INFO] - {"epoch": 15, "update": 14.903, "loss": "2.013", "ppl": "4.04", "wps": "363595", "ups": "3.07", "wpb": "118492", "bsz": "256", "num_updates": "767000", "lr": "0.000235354", "gnorm": "0.774", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "250370"} [2022-08-03 23:02:02,099][train_inner][INFO] - {"epoch": 15, "update": 14.906, "loss": "2.014", "ppl": "4.04", "wps": "367754", "ups": "3.09", "wpb": "118824", "bsz": "256", "num_updates": "767200", "lr": "0.000235152", "gnorm": "0.773", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "25.5", "wall": "250434"} [2022-08-03 23:03:07,099][train_inner][INFO] - {"epoch": 15, "update": 14.91, "loss": "2.008", "ppl": "4.02", "wps": "361779", "ups": "3.08", "wpb": "117578", "bsz": "256", "num_updates": "767400", "lr": "0.000234949", "gnorm": "0.78", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.7", "wall": "250499"} [2022-08-03 23:03:25,966][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 23:04:12,329][train_inner][INFO] - {"epoch": 15, "update": 14.914, "loss": "2.008", "ppl": "4.02", "wps": "362983", "ups": "3.07", "wpb": "118385", "bsz": "256", "num_updates": "767600", "lr": "0.000234747", "gnorm": "0.775", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "250565"} [2022-08-03 23:05:17,194][train_inner][INFO] - {"epoch": 15, "update": 14.918, "loss": "2.008", "ppl": "4.02", "wps": "364108", "ups": "3.08", "wpb": "118088", "bsz": "256", "num_updates": "767800", "lr": "0.000234545", "gnorm": "0.777", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23", "wall": "250630"} [2022-08-03 23:06:23,148][train_inner][INFO] - {"epoch": 15, "update": 14.922, "loss": "2.012", "ppl": "4.03", "wps": "359223", "ups": "3.03", "wpb": "118461", "bsz": "256", "num_updates": "768000", "lr": "0.000234343", "gnorm": "0.775", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "20", "wall": "250695"} [2022-08-03 23:07:27,763][train_inner][INFO] - {"epoch": 15, "update": 14.926, "loss": "2.009", "ppl": "4.03", "wps": "364474", "ups": "3.1", "wpb": "117750", "bsz": "256", "num_updates": "768200", "lr": "0.000234141", "gnorm": "0.775", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "250760"} [2022-08-03 23:08:32,768][train_inner][INFO] - {"epoch": 15, "update": 14.93, "loss": "2.008", "ppl": "4.02", "wps": "363936", "ups": "3.08", "wpb": "118286", "bsz": "256", "num_updates": "768400", "lr": "0.000233939", "gnorm": "0.775", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.4", "wall": "250825"} [2022-08-03 23:09:39,226][train_inner][INFO] - {"epoch": 15, "update": 14.934, "loss": "2.006", "ppl": "4.02", "wps": "356199", "ups": "3.01", "wpb": "118361", "bsz": "256", "num_updates": "768600", "lr": "0.000233737", "gnorm": "0.781", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "19.8", "wall": "250892"} [2022-08-03 23:10:44,213][train_inner][INFO] - {"epoch": 15, "update": 14.938, "loss": "2.007", "ppl": "4.02", "wps": "363858", "ups": "3.08", "wpb": "118228", "bsz": "256", "num_updates": "768800", "lr": "0.000233535", "gnorm": "0.775", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.5", "wall": "250957"} [2022-08-03 23:11:48,964][train_inner][INFO] - {"epoch": 15, "update": 14.941, "loss": "2.013", "ppl": "4.03", "wps": "364358", "ups": "3.09", "wpb": "117960", "bsz": "256", "num_updates": "769000", "lr": "0.000233333", "gnorm": "0.776", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.7", "wall": "251021"} [2022-08-03 23:12:53,687][train_inner][INFO] - {"epoch": 15, "update": 14.945, "loss": "2.009", "ppl": "4.03", "wps": "364038", "ups": "3.09", "wpb": "117808", "bsz": "256", "num_updates": "769200", "lr": "0.000233131", "gnorm": "0.777", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "28.2", "wall": "251086"} [2022-08-03 23:13:58,693][train_inner][INFO] - {"epoch": 15, "update": 14.949, "loss": "2.008", "ppl": "4.02", "wps": "365124", "ups": "3.08", "wpb": "118673", "bsz": "256", "num_updates": "769400", "lr": "0.000232929", "gnorm": "0.771", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "251151"} [2022-08-03 23:14:33,350][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 23:15:03,906][train_inner][INFO] - {"epoch": 15, "update": 14.953, "loss": "2.013", "ppl": "4.04", "wps": "361566", "ups": "3.07", "wpb": "117892", "bsz": "256", "num_updates": "769600", "lr": "0.000232727", "gnorm": "0.777", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "251216"} [2022-08-03 23:16:08,560][train_inner][INFO] - {"epoch": 15, "update": 14.957, "loss": "2.011", "ppl": "4.03", "wps": "364766", "ups": "3.09", "wpb": "117918", "bsz": "256", "num_updates": "769800", "lr": "0.000232525", "gnorm": "0.777", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22.6", "wall": "251281"} [2022-08-03 23:17:13,453][train_inner][INFO] - {"epoch": 15, "update": 14.961, "loss": "2.016", "ppl": "4.04", "wps": "363388", "ups": "3.08", "wpb": "117905", "bsz": "256", "num_updates": "770000", "lr": "0.000232323", "gnorm": "0.778", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.7", "wall": "251346"} [2022-08-03 23:18:18,426][train_inner][INFO] - {"epoch": 15, "update": 14.965, "loss": "2.011", "ppl": "4.03", "wps": "365661", "ups": "3.08", "wpb": "118788", "bsz": "256", "num_updates": "770200", "lr": "0.000232121", "gnorm": "0.773", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.7", "wall": "251411"} [2022-08-03 23:19:23,521][train_inner][INFO] - {"epoch": 15, "update": 14.969, "loss": "2.005", "ppl": "4.01", "wps": "362729", "ups": "3.07", "wpb": "118056", "bsz": "256", "num_updates": "770400", "lr": "0.000231919", "gnorm": "0.779", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "251476"} [2022-08-03 23:20:28,877][train_inner][INFO] - {"epoch": 15, "update": 14.972, "loss": "2.008", "ppl": "4.02", "wps": "362447", "ups": "3.06", "wpb": "118439", "bsz": "256", "num_updates": "770600", "lr": "0.000231717", "gnorm": "0.775", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "251541"} [2022-08-03 23:21:33,325][train_inner][INFO] - {"epoch": 15, "update": 14.976, "loss": "2.01", "ppl": "4.03", "wps": "366064", "ups": "3.1", "wpb": "117959", "bsz": "256", "num_updates": "770800", "lr": "0.000231515", "gnorm": "0.781", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "251606"} [2022-08-03 23:22:38,123][train_inner][INFO] - {"epoch": 15, "update": 14.98, "loss": "2.009", "ppl": "4.02", "wps": "364852", "ups": "3.09", "wpb": "118208", "bsz": "256", "num_updates": "771000", "lr": "0.000231313", "gnorm": "0.777", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "251670"} [2022-08-03 23:23:43,097][train_inner][INFO] - {"epoch": 15, "update": 14.984, "loss": "2.011", "ppl": "4.03", "wps": "364895", "ups": "3.08", "wpb": "118540", "bsz": "256", "num_updates": "771200", "lr": "0.000231111", "gnorm": "0.779", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.4", "wall": "251735"} [2022-08-03 23:24:47,787][train_inner][INFO] - {"epoch": 15, "update": 14.988, "loss": "2.006", "ppl": "4.02", "wps": "366771", "ups": "3.09", "wpb": "118631", "bsz": "256", "num_updates": "771400", "lr": "0.000230909", "gnorm": "0.775", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21", "wall": "251800"} [2022-08-03 23:25:39,756][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 23:25:53,005][train_inner][INFO] - {"epoch": 15, "update": 14.992, "loss": "2.005", "ppl": "4.01", "wps": "363598", "ups": "3.07", "wpb": "118564", "bsz": "256", "num_updates": "771600", "lr": "0.000230707", "gnorm": "0.774", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.3", "wall": "251865"} [2022-08-03 23:26:58,170][train_inner][INFO] - {"epoch": 15, "update": 14.996, "loss": "2.004", "ppl": "4.01", "wps": "365250", "ups": "3.07", "wpb": "119005", "bsz": "256", "num_updates": "771800", "lr": "0.000230505", "gnorm": "0.775", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.6", "wall": "251930"} [2022-08-03 23:28:03,144][train_inner][INFO] - {"epoch": 15, "update": 15.0, "loss": "2.014", "ppl": "4.04", "wps": "363639", "ups": "3.08", "wpb": "118134", "bsz": "256", "num_updates": "772000", "lr": "0.000230303", "gnorm": "0.78", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.7", "wall": "251995"} [2022-08-03 23:28:08,370][fairseq_cli.train][INFO] - begin validation on "valid" subset [2022-08-03 23:28:31,198][valid][INFO] - {"epoch": 15, "valid_loss": "1.918", "valid_ppl": "3.78", "valid_wps": "1.59017e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "772016", "valid_best_loss": "1.918"} [2022-08-03 23:28:31,201][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 15 @ 772016 updates [2022-08-03 23:28:31,202][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_best.pt [2022-08-03 23:28:42,148][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_best.pt [2022-08-03 23:29:03,902][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/adan2/checkpoint_best.pt (epoch 15 @ 772016 updates, score 1.918) (writing took 32.700798738747835 seconds) [2022-08-03 23:29:03,902][fairseq_cli.train][INFO] - end of epoch 15 (average epoch stats below) [2022-08-03 23:29:03,903][train][INFO] - {"epoch": 15, "train_loss": "2.022", "train_ppl": "4.06", "train_wps": "361324", "train_ups": "3.05", "train_wpb": "118298", "train_bsz": "256", "train_num_updates": "772016", "train_lr": "0.000230287", "train_gnorm": "0.764", "train_clip": "0", "train_loss_scale": "16", "train_train_wall": "16621", "train_gb_free": "21.3", "train_wall": "252056"} [2022-08-03 23:29:04,013][fairseq.data.iterators][INFO] - grouped total_num_itrs = 51488 [2022-08-03 23:29:04,016][fairseq.trainer][INFO] - begin training epoch 16 [2022-08-03 23:29:04,017][fairseq_cli.train][INFO] - Start iterating over samples [2022-08-03 23:30:14,218][train_inner][INFO] - {"epoch": 16, "update": 15.004, "loss": "2.006", "ppl": "4.02", "wps": "180758", "ups": "1.53", "wpb": "118463", "bsz": "255.4", "num_updates": "772200", "lr": "0.000230101", "gnorm": "0.775", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24", "wall": "252127"} [2022-08-03 23:31:18,849][train_inner][INFO] - {"epoch": 16, "update": 15.007, "loss": "2.01", "ppl": "4.03", "wps": "365355", "ups": "3.09", "wpb": "118064", "bsz": "256", "num_updates": "772400", "lr": "0.000229899", "gnorm": "0.781", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.7", "wall": "252191"} [2022-08-03 23:32:23,532][train_inner][INFO] - {"epoch": 16, "update": 15.011, "loss": "2.004", "ppl": "4.01", "wps": "365173", "ups": "3.09", "wpb": "118100", "bsz": "256", "num_updates": "772600", "lr": "0.000229697", "gnorm": "0.783", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.9", "wall": "252256"} [2022-08-03 23:33:28,666][train_inner][INFO] - {"epoch": 16, "update": 15.015, "loss": "2.002", "ppl": "4", "wps": "360684", "ups": "3.07", "wpb": "117463", "bsz": "256", "num_updates": "772800", "lr": "0.000229495", "gnorm": "0.784", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.6", "wall": "252321"} [2022-08-03 23:34:33,300][train_inner][INFO] - {"epoch": 16, "update": 15.019, "loss": "2.004", "ppl": "4.01", "wps": "365663", "ups": "3.09", "wpb": "118168", "bsz": "256", "num_updates": "773000", "lr": "0.000229293", "gnorm": "0.777", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "252386"} [2022-08-03 23:35:38,245][train_inner][INFO] - {"epoch": 16, "update": 15.023, "loss": "2.011", "ppl": "4.03", "wps": "364042", "ups": "3.08", "wpb": "118212", "bsz": "256", "num_updates": "773200", "lr": "0.000229091", "gnorm": "0.78", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.3", "wall": "252451"} [2022-08-03 23:36:43,089][train_inner][INFO] - {"epoch": 16, "update": 15.027, "loss": "2.004", "ppl": "4.01", "wps": "364004", "ups": "3.08", "wpb": "118015", "bsz": "256", "num_updates": "773400", "lr": "0.000228889", "gnorm": "0.778", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.8", "wall": "252515"} [2022-08-03 23:37:48,051][train_inner][INFO] - {"epoch": 16, "update": 15.031, "loss": "2.004", "ppl": "4.01", "wps": "363165", "ups": "3.08", "wpb": "117957", "bsz": "256", "num_updates": "773600", "lr": "0.000228687", "gnorm": "0.779", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25", "wall": "252580"} [2022-08-03 23:37:55,209][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-03 23:38:54,347][train_inner][INFO] - {"epoch": 16, "update": 15.035, "loss": "2.002", "ppl": "4.01", "wps": "355664", "ups": "3.02", "wpb": "117895", "bsz": "256", "num_updates": "773800", "lr": "0.000228485", "gnorm": "0.778", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "20.5", "wall": "252647"} [2022-08-03 23:39:59,488][train_inner][INFO] - {"epoch": 16, "update": 15.039, "loss": "2.008", "ppl": "4.02", "wps": "362897", "ups": "3.07", "wpb": "118197", "bsz": "256", "num_updates": "774000", "lr": "0.000228283", "gnorm": "0.774", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.7", "wall": "252712"} [2022-08-03 23:41:04,087][train_inner][INFO] - {"epoch": 16, "update": 15.042, "loss": "2.003", "ppl": "4.01", "wps": "363039", "ups": "3.1", "wpb": "117258", "bsz": "256", "num_updates": "774200", "lr": "0.000228081", "gnorm": "0.782", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "252776"} [2022-08-03 23:42:09,161][train_inner][INFO] - {"epoch": 16, "update": 15.046, "loss": "2.002", "ppl": "4", "wps": "365230", "ups": "3.07", "wpb": "118833", "bsz": "256", "num_updates": "774400", "lr": "0.000227879", "gnorm": "0.778", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.3", "wall": "252841"} [2022-08-03 23:43:14,111][train_inner][INFO] - {"epoch": 16, "update": 15.05, "loss": "2.002", "ppl": "4.01", "wps": "363415", "ups": "3.08", "wpb": "118018", "bsz": "256", "num_updates": "774600", "lr": "0.000227677", "gnorm": "0.782", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "26", "wall": "252906"} [2022-08-03 23:44:18,896][train_inner][INFO] - {"epoch": 16, "update": 15.054, "loss": "2.004", "ppl": "4.01", "wps": "365631", "ups": "3.09", "wpb": "118435", "bsz": "256", "num_updates": "774800", "lr": "0.000227475", "gnorm": "0.776", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.9", "wall": "252971"} [2022-08-03 23:45:23,583][train_inner][INFO] - {"epoch": 16, "update": 15.058, "loss": "2.008", "ppl": "4.02", "wps": "365349", "ups": "3.09", "wpb": "118164", "bsz": "256", "num_updates": "775000", "lr": "0.000227273", "gnorm": "0.782", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "253036"} [2022-08-03 23:46:22,422][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-03 23:46:28,535][train_inner][INFO] - {"epoch": 16, "update": 15.062, "loss": "2.012", "ppl": "4.03", "wps": "363186", "ups": "3.08", "wpb": "117946", "bsz": "256", "num_updates": "775200", "lr": "0.000227071", "gnorm": "0.783", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.3", "wall": "253101"} [2022-08-03 23:47:33,623][train_inner][INFO] - {"epoch": 16, "update": 15.066, "loss": "2.003", "ppl": "4.01", "wps": "362102", "ups": "3.07", "wpb": "117842", "bsz": "256", "num_updates": "775400", "lr": "0.000226869", "gnorm": "0.78", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.3", "wall": "253166"} [2022-08-03 23:48:38,345][train_inner][INFO] - {"epoch": 16, "update": 15.07, "loss": "2.005", "ppl": "4.01", "wps": "365975", "ups": "3.09", "wpb": "118431", "bsz": "256", "num_updates": "775600", "lr": "0.000226667", "gnorm": "0.779", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.7", "wall": "253231"} [2022-08-03 23:49:43,192][train_inner][INFO] - {"epoch": 16, "update": 15.074, "loss": "2.002", "ppl": "4.01", "wps": "366176", "ups": "3.08", "wpb": "118726", "bsz": "256", "num_updates": "775800", "lr": "0.000226465", "gnorm": "0.778", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "253296"} [2022-08-03 23:50:48,053][train_inner][INFO] - {"epoch": 16, "update": 15.077, "loss": "2.007", "ppl": "4.02", "wps": "362026", "ups": "3.08", "wpb": "117406", "bsz": "256", "num_updates": "776000", "lr": "0.000226263", "gnorm": "0.785", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.2", "wall": "253360"} [2022-08-03 23:51:52,754][train_inner][INFO] - {"epoch": 16, "update": 15.081, "loss": "2.004", "ppl": "4.01", "wps": "366527", "ups": "3.09", "wpb": "118570", "bsz": "256", "num_updates": "776200", "lr": "0.000226061", "gnorm": "0.779", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.1", "wall": "253425"} [2022-08-03 23:52:57,973][train_inner][INFO] - {"epoch": 16, "update": 15.085, "loss": "2", "ppl": "4", "wps": "364780", "ups": "3.07", "wpb": "118951", "bsz": "256", "num_updates": "776400", "lr": "0.000225859", "gnorm": "0.776", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "253490"} [2022-08-03 23:54:02,894][train_inner][INFO] - {"epoch": 16, "update": 15.089, "loss": "1.994", "ppl": "3.98", "wps": "364642", "ups": "3.08", "wpb": "118364", "bsz": "256", "num_updates": "776600", "lr": "0.000225657", "gnorm": "0.783", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "253555"} [2022-08-03 23:55:08,021][train_inner][INFO] - {"epoch": 16, "update": 15.093, "loss": "2.005", "ppl": "4.01", "wps": "364087", "ups": "3.07", "wpb": "118557", "bsz": "256", "num_updates": "776800", "lr": "0.000225455", "gnorm": "0.786", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.7", "wall": "253620"} [2022-08-03 23:56:12,869][train_inner][INFO] - {"epoch": 16, "update": 15.097, "loss": "2.003", "ppl": "4.01", "wps": "364049", "ups": "3.08", "wpb": "118039", "bsz": "256", "num_updates": "777000", "lr": "0.000225253", "gnorm": "0.784", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.6", "wall": "253685"} [2022-08-03 23:57:17,719][train_inner][INFO] - {"epoch": 16, "update": 15.101, "loss": "2.008", "ppl": "4.02", "wps": "366323", "ups": "3.08", "wpb": "118777", "bsz": "256", "num_updates": "777200", "lr": "0.000225051", "gnorm": "0.809", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.2", "wall": "253750"} [2022-08-03 23:58:22,421][train_inner][INFO] - {"epoch": 16, "update": 15.105, "loss": "2.009", "ppl": "4.02", "wps": "364279", "ups": "3.09", "wpb": "117847", "bsz": "256", "num_updates": "777400", "lr": "0.000224848", "gnorm": "0.787", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23", "wall": "253815"} [2022-08-03 23:59:27,602][train_inner][INFO] - {"epoch": 16, "update": 15.108, "loss": "1.994", "ppl": "3.98", "wps": "362134", "ups": "3.07", "wpb": "118018", "bsz": "256", "num_updates": "777600", "lr": "0.000224646", "gnorm": "0.779", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "253880"} [2022-08-04 00:00:32,417][train_inner][INFO] - {"epoch": 16, "update": 15.112, "loss": "2.003", "ppl": "4.01", "wps": "365891", "ups": "3.09", "wpb": "118574", "bsz": "256", "num_updates": "777800", "lr": "0.000224444", "gnorm": "0.778", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24.8", "wall": "253945"} [2022-08-04 00:01:37,376][train_inner][INFO] - {"epoch": 16, "update": 15.116, "loss": "1.999", "ppl": "4", "wps": "365070", "ups": "3.08", "wpb": "118572", "bsz": "256", "num_updates": "778000", "lr": "0.000224242", "gnorm": "0.781", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.5", "wall": "254010"} [2022-08-04 00:02:42,571][train_inner][INFO] - {"epoch": 16, "update": 15.12, "loss": "2.008", "ppl": "4.02", "wps": "362471", "ups": "3.07", "wpb": "118155", "bsz": "256", "num_updates": "778200", "lr": "0.00022404", "gnorm": "0.782", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "23.5", "wall": "254075"} [2022-08-04 00:03:47,150][train_inner][INFO] - {"epoch": 16, "update": 15.124, "loss": "2.002", "ppl": "4.01", "wps": "365852", "ups": "3.1", "wpb": "118130", "bsz": "256", "num_updates": "778400", "lr": "0.000223838", "gnorm": "0.782", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.8", "wall": "254139"} [2022-08-04 00:04:51,494][train_inner][INFO] - {"epoch": 16, "update": 15.128, "loss": "1.998", "ppl": "3.99", "wps": "367679", "ups": "3.11", "wpb": "118286", "bsz": "256", "num_updates": "778600", "lr": "0.000223636", "gnorm": "0.781", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "23.4", "wall": "254204"} [2022-08-04 00:05:56,512][train_inner][INFO] - {"epoch": 16, "update": 15.132, "loss": "2.002", "ppl": "4", "wps": "363713", "ups": "3.08", "wpb": "118238", "bsz": "256", "num_updates": "778800", "lr": "0.000223434", "gnorm": "0.784", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "254269"} [2022-08-04 00:07:01,106][train_inner][INFO] - {"epoch": 16, "update": 15.136, "loss": "2.01", "ppl": "4.03", "wps": "365680", "ups": "3.1", "wpb": "118102", "bsz": "256", "num_updates": "779000", "lr": "0.000223232", "gnorm": "0.779", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "19.7", "wall": "254333"} [2022-08-04 00:08:06,342][train_inner][INFO] - {"epoch": 16, "update": 15.14, "loss": "2.005", "ppl": "4.01", "wps": "362639", "ups": "3.07", "wpb": "118284", "bsz": "256", "num_updates": "779200", "lr": "0.00022303", "gnorm": "0.782", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.3", "wall": "254399"} [2022-08-04 00:08:31,239][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-04 00:09:11,434][train_inner][INFO] - {"epoch": 16, "update": 15.143, "loss": "2.005", "ppl": "4.01", "wps": "362027", "ups": "3.07", "wpb": "117822", "bsz": "256", "num_updates": "779400", "lr": "0.000222828", "gnorm": "0.783", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "26.9", "wall": "254464"} [2022-08-04 00:09:34,748][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-04 00:10:16,312][train_inner][INFO] - {"epoch": 16, "update": 15.147, "loss": "2.005", "ppl": "4.02", "wps": "365279", "ups": "3.08", "wpb": "118492", "bsz": "256", "num_updates": "779600", "lr": "0.000222626", "gnorm": "0.78", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.9", "wall": "254529"} [2022-08-04 00:11:21,291][train_inner][INFO] - {"epoch": 16, "update": 15.151, "loss": "1.996", "ppl": "3.99", "wps": "365452", "ups": "3.08", "wpb": "118733", "bsz": "256", "num_updates": "779800", "lr": "0.000222424", "gnorm": "0.781", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "254594"} [2022-08-04 00:12:25,897][train_inner][INFO] - {"epoch": 16, "update": 15.155, "loss": "2.005", "ppl": "4.01", "wps": "365310", "ups": "3.1", "wpb": "118004", "bsz": "256", "num_updates": "780000", "lr": "0.000222222", "gnorm": "0.784", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "28.2", "wall": "254658"} [2022-08-04 00:13:30,837][train_inner][INFO] - {"epoch": 16, "update": 15.159, "loss": "1.997", "ppl": "3.99", "wps": "365251", "ups": "3.08", "wpb": "118595", "bsz": "256", "num_updates": "780200", "lr": "0.00022202", "gnorm": "0.781", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "23.7", "wall": "254723"} [2022-08-04 00:14:35,740][train_inner][INFO] - {"epoch": 16, "update": 15.163, "loss": "2.009", "ppl": "4.03", "wps": "363101", "ups": "3.08", "wpb": "117830", "bsz": "256", "num_updates": "780400", "lr": "0.000221818", "gnorm": "0.781", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "254788"} [2022-08-04 00:15:40,432][train_inner][INFO] - {"epoch": 16, "update": 15.167, "loss": "2.009", "ppl": "4.02", "wps": "365612", "ups": "3.09", "wpb": "118259", "bsz": "256", "num_updates": "780600", "lr": "0.000221616", "gnorm": "0.785", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.2", "wall": "254853"} [2022-08-04 00:16:45,782][train_inner][INFO] - {"epoch": 16, "update": 15.171, "loss": "2", "ppl": "4", "wps": "361832", "ups": "3.06", "wpb": "118228", "bsz": "256", "num_updates": "780800", "lr": "0.000221414", "gnorm": "0.785", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.4", "wall": "254918"} [2022-08-04 00:17:50,532][train_inner][INFO] - {"epoch": 16, "update": 15.175, "loss": "1.999", "ppl": "4", "wps": "366181", "ups": "3.09", "wpb": "118549", "bsz": "256", "num_updates": "781000", "lr": "0.000221212", "gnorm": "0.783", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "22", "wall": "254983"} [2022-08-04 00:18:55,187][train_inner][INFO] - {"epoch": 16, "update": 15.178, "loss": "2.003", "ppl": "4.01", "wps": "365219", "ups": "3.09", "wpb": "118064", "bsz": "256", "num_updates": "781200", "lr": "0.00022101", "gnorm": "0.783", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "25.9", "wall": "255048"} [2022-08-04 00:20:00,622][train_inner][INFO] - {"epoch": 16, "update": 15.182, "loss": "1.996", "ppl": "3.99", "wps": "362212", "ups": "3.06", "wpb": "118505", "bsz": "256", "num_updates": "781400", "lr": "0.000220808", "gnorm": "0.78", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.4", "wall": "255113"} [2022-08-04 00:21:05,585][train_inner][INFO] - {"epoch": 16, "update": 15.186, "loss": "1.997", "ppl": "3.99", "wps": "364551", "ups": "3.08", "wpb": "118409", "bsz": "256", "num_updates": "781600", "lr": "0.000220606", "gnorm": "0.784", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.2", "wall": "255178"} [2022-08-04 00:22:10,772][train_inner][INFO] - {"epoch": 16, "update": 15.19, "loss": "1.999", "ppl": "4", "wps": "363767", "ups": "3.07", "wpb": "118564", "bsz": "256", "num_updates": "781800", "lr": "0.000220404", "gnorm": "0.784", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "27.8", "wall": "255243"} [2022-08-04 00:23:15,322][train_inner][INFO] - {"epoch": 16, "update": 15.194, "loss": "1.994", "ppl": "3.98", "wps": "368132", "ups": "3.1", "wpb": "118811", "bsz": "256", "num_updates": "782000", "lr": "0.000220202", "gnorm": "0.782", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "255308"} [2022-08-04 00:24:19,727][train_inner][INFO] - {"epoch": 16, "update": 15.198, "loss": "1.995", "ppl": "3.99", "wps": "366420", "ups": "3.11", "wpb": "117996", "bsz": "256", "num_updates": "782200", "lr": "0.00022", "gnorm": "0.785", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22", "wall": "255372"} [2022-08-04 00:25:24,359][train_inner][INFO] - {"epoch": 16, "update": 15.202, "loss": "2.001", "ppl": "4", "wps": "365165", "ups": "3.09", "wpb": "118005", "bsz": "256", "num_updates": "782400", "lr": "0.000219798", "gnorm": "0.784", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.6", "wall": "255437"} [2022-08-04 00:26:11,398][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-04 00:26:29,612][train_inner][INFO] - {"epoch": 16, "update": 15.206, "loss": "2.006", "ppl": "4.02", "wps": "362826", "ups": "3.07", "wpb": "118375", "bsz": "256", "num_updates": "782600", "lr": "0.000219596", "gnorm": "0.783", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.8", "wall": "255502"} [2022-08-04 00:27:33,957][train_inner][INFO] - {"epoch": 16, "update": 15.21, "loss": "2.001", "ppl": "4", "wps": "365640", "ups": "3.11", "wpb": "117636", "bsz": "256", "num_updates": "782800", "lr": "0.000219394", "gnorm": "0.783", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.4", "wall": "255566"} [2022-08-04 00:28:38,702][train_inner][INFO] - {"epoch": 16, "update": 15.213, "loss": "2.002", "ppl": "4.01", "wps": "362812", "ups": "3.09", "wpb": "117448", "bsz": "256", "num_updates": "783000", "lr": "0.000219192", "gnorm": "0.784", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.7", "wall": "255631"} [2022-08-04 00:29:43,763][train_inner][INFO] - {"epoch": 16, "update": 15.217, "loss": "2.003", "ppl": "4.01", "wps": "363251", "ups": "3.07", "wpb": "118166", "bsz": "256", "num_updates": "783200", "lr": "0.00021899", "gnorm": "0.783", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "255696"} [2022-08-04 00:30:48,389][train_inner][INFO] - {"epoch": 16, "update": 15.221, "loss": "1.998", "ppl": "4", "wps": "365836", "ups": "3.09", "wpb": "118210", "bsz": "256", "num_updates": "783400", "lr": "0.000218788", "gnorm": "0.785", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "26.5", "wall": "255761"} [2022-08-04 00:31:53,385][train_inner][INFO] - {"epoch": 16, "update": 15.225, "loss": "1.998", "ppl": "4", "wps": "363172", "ups": "3.08", "wpb": "118022", "bsz": "256", "num_updates": "783600", "lr": "0.000218586", "gnorm": "0.792", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.9", "wall": "255826"} [2022-08-04 00:32:58,459][train_inner][INFO] - {"epoch": 16, "update": 15.229, "loss": "1.999", "ppl": "4", "wps": "365225", "ups": "3.07", "wpb": "118832", "bsz": "256", "num_updates": "783800", "lr": "0.000218384", "gnorm": "0.786", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.2", "wall": "255891"} [2022-08-04 00:34:02,795][train_inner][INFO] - {"epoch": 16, "update": 15.233, "loss": "1.999", "ppl": "4", "wps": "366704", "ups": "3.11", "wpb": "117959", "bsz": "256", "num_updates": "784000", "lr": "0.000218182", "gnorm": "0.786", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "21.4", "wall": "255955"} [2022-08-04 00:35:07,680][train_inner][INFO] - {"epoch": 16, "update": 15.237, "loss": "1.993", "ppl": "3.98", "wps": "364482", "ups": "3.08", "wpb": "118246", "bsz": "256", "num_updates": "784200", "lr": "0.00021798", "gnorm": "0.785", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "27.1", "wall": "256020"} [2022-08-04 00:36:12,630][train_inner][INFO] - {"epoch": 16, "update": 15.241, "loss": "1.998", "ppl": "3.99", "wps": "363987", "ups": "3.08", "wpb": "118203", "bsz": "256", "num_updates": "784400", "lr": "0.000217778", "gnorm": "0.785", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.9", "wall": "256085"} [2022-08-04 00:37:17,604][train_inner][INFO] - {"epoch": 16, "update": 15.245, "loss": "2", "ppl": "4", "wps": "364344", "ups": "3.08", "wpb": "118362", "bsz": "256", "num_updates": "784600", "lr": "0.000217576", "gnorm": "0.785", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "256150"} [2022-08-04 00:38:22,583][train_inner][INFO] - {"epoch": 16, "update": 15.248, "loss": "2.001", "ppl": "4", "wps": "363822", "ups": "3.08", "wpb": "118202", "bsz": "256", "num_updates": "784800", "lr": "0.000217374", "gnorm": "0.789", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "256215"} [2022-08-04 00:39:27,378][train_inner][INFO] - {"epoch": 16, "update": 15.252, "loss": "2.002", "ppl": "4", "wps": "365453", "ups": "3.09", "wpb": "118396", "bsz": "256", "num_updates": "785000", "lr": "0.000217172", "gnorm": "0.786", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.6", "wall": "256280"} [2022-08-04 00:40:32,703][train_inner][INFO] - {"epoch": 16, "update": 15.256, "loss": "1.998", "ppl": "3.99", "wps": "362543", "ups": "3.06", "wpb": "118415", "bsz": "256", "num_updates": "785200", "lr": "0.00021697", "gnorm": "0.785", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.1", "wall": "256345"} [2022-08-04 00:41:37,434][train_inner][INFO] - {"epoch": 16, "update": 15.26, "loss": "1.995", "ppl": "3.99", "wps": "367067", "ups": "3.09", "wpb": "118801", "bsz": "256", "num_updates": "785400", "lr": "0.000216768", "gnorm": "0.781", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "24", "wall": "256410"} [2022-08-04 00:42:42,354][train_inner][INFO] - {"epoch": 16, "update": 15.264, "loss": "2.006", "ppl": "4.02", "wps": "362626", "ups": "3.08", "wpb": "117706", "bsz": "256", "num_updates": "785600", "lr": "0.000216566", "gnorm": "0.793", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "256475"} [2022-08-04 00:43:46,998][train_inner][INFO] - {"epoch": 16, "update": 15.268, "loss": "1.998", "ppl": "4", "wps": "367128", "ups": "3.09", "wpb": "118661", "bsz": "256", "num_updates": "785800", "lr": "0.000216364", "gnorm": "0.791", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.3", "wall": "256539"} [2022-08-04 00:44:51,718][train_inner][INFO] - {"epoch": 16, "update": 15.272, "loss": "1.997", "ppl": "3.99", "wps": "366085", "ups": "3.09", "wpb": "118464", "bsz": "256", "num_updates": "786000", "lr": "0.000216162", "gnorm": "0.784", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "25.4", "wall": "256604"} [2022-08-04 00:45:56,496][train_inner][INFO] - {"epoch": 16, "update": 15.276, "loss": "1.996", "ppl": "3.99", "wps": "366419", "ups": "3.09", "wpb": "118677", "bsz": "256", "num_updates": "786200", "lr": "0.00021596", "gnorm": "0.786", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "22", "wall": "256669"} [2022-08-04 00:47:01,612][train_inner][INFO] - {"epoch": 16, "update": 15.279, "loss": "1.99", "ppl": "3.97", "wps": "363123", "ups": "3.07", "wpb": "118224", "bsz": "256", "num_updates": "786400", "lr": "0.000215758", "gnorm": "0.786", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.8", "wall": "256734"} [2022-08-04 00:48:06,697][train_inner][INFO] - {"epoch": 16, "update": 15.283, "loss": "1.997", "ppl": "3.99", "wps": "363721", "ups": "3.07", "wpb": "118363", "bsz": "256", "num_updates": "786600", "lr": "0.000215556", "gnorm": "0.786", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22.4", "wall": "256799"} [2022-08-04 00:48:20,067][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-04 00:49:11,695][train_inner][INFO] - {"epoch": 16, "update": 15.287, "loss": "1.995", "ppl": "3.99", "wps": "363613", "ups": "3.08", "wpb": "118168", "bsz": "256", "num_updates": "786800", "lr": "0.000215354", "gnorm": "0.786", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "256864"} [2022-08-04 00:50:16,662][train_inner][INFO] - {"epoch": 16, "update": 15.291, "loss": "2.003", "ppl": "4.01", "wps": "362202", "ups": "3.08", "wpb": "117655", "bsz": "256", "num_updates": "787000", "lr": "0.000215152", "gnorm": "0.789", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.8", "wall": "256929"} [2022-08-04 00:51:21,891][train_inner][INFO] - {"epoch": 16, "update": 15.295, "loss": "1.996", "ppl": "3.99", "wps": "365320", "ups": "3.07", "wpb": "119145", "bsz": "256", "num_updates": "787200", "lr": "0.000214949", "gnorm": "0.784", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.2", "wall": "256994"} [2022-08-04 00:52:26,862][train_inner][INFO] - {"epoch": 16, "update": 15.299, "loss": "2.001", "ppl": "4", "wps": "363925", "ups": "3.08", "wpb": "118222", "bsz": "256", "num_updates": "787400", "lr": "0.000214747", "gnorm": "0.788", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.1", "wall": "257059"} [2022-08-04 00:53:32,495][train_inner][INFO] - {"epoch": 16, "update": 15.303, "loss": "1.992", "ppl": "3.98", "wps": "360030", "ups": "3.05", "wpb": "118147", "bsz": "256", "num_updates": "787600", "lr": "0.000214545", "gnorm": "0.792", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.5", "wall": "257125"} [2022-08-04 00:54:37,487][train_inner][INFO] - {"epoch": 16, "update": 15.307, "loss": "1.993", "ppl": "3.98", "wps": "366072", "ups": "3.08", "wpb": "118957", "bsz": "256", "num_updates": "787800", "lr": "0.000214343", "gnorm": "0.785", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "257190"} [2022-08-04 00:55:42,278][train_inner][INFO] - {"epoch": 16, "update": 15.311, "loss": "1.995", "ppl": "3.99", "wps": "364645", "ups": "3.09", "wpb": "118127", "bsz": "256", "num_updates": "788000", "lr": "0.000214141", "gnorm": "0.786", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.1", "wall": "257255"} [2022-08-04 00:56:48,327][train_inner][INFO] - {"epoch": 16, "update": 15.314, "loss": "1.994", "ppl": "3.98", "wps": "360045", "ups": "3.03", "wpb": "118901", "bsz": "256", "num_updates": "788200", "lr": "0.000213939", "gnorm": "0.785", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "20.5", "wall": "257321"} [2022-08-04 00:57:53,234][train_inner][INFO] - {"epoch": 16, "update": 15.318, "loss": "2", "ppl": "4", "wps": "364694", "ups": "3.08", "wpb": "118354", "bsz": "256", "num_updates": "788400", "lr": "0.000213737", "gnorm": "0.79", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "257386"} [2022-08-04 00:58:59,231][train_inner][INFO] - {"epoch": 16, "update": 15.322, "loss": "1.998", "ppl": "3.99", "wps": "358107", "ups": "3.03", "wpb": "118168", "bsz": "256", "num_updates": "788600", "lr": "0.000213535", "gnorm": "0.791", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "21.4", "wall": "257452"} [2022-08-04 00:59:32,302][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-04 01:00:04,521][train_inner][INFO] - {"epoch": 16, "update": 15.326, "loss": "1.996", "ppl": "3.99", "wps": "362614", "ups": "3.06", "wpb": "118372", "bsz": "256", "num_updates": "788800", "lr": "0.000213333", "gnorm": "0.79", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "22", "wall": "257517"} [2022-08-04 01:01:09,309][train_inner][INFO] - {"epoch": 16, "update": 15.33, "loss": "1.999", "ppl": "4", "wps": "364823", "ups": "3.09", "wpb": "118180", "bsz": "256", "num_updates": "789000", "lr": "0.000213131", "gnorm": "0.789", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.3", "wall": "257582"} [2022-08-04 01:02:13,902][train_inner][INFO] - {"epoch": 16, "update": 15.334, "loss": "2.001", "ppl": "4", "wps": "365127", "ups": "3.1", "wpb": "117921", "bsz": "256", "num_updates": "789200", "lr": "0.000212929", "gnorm": "0.789", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.2", "wall": "257646"} [2022-08-04 01:03:18,868][train_inner][INFO] - {"epoch": 16, "update": 15.338, "loss": "1.994", "ppl": "3.98", "wps": "364635", "ups": "3.08", "wpb": "118442", "bsz": "256", "num_updates": "789400", "lr": "0.000212727", "gnorm": "0.789", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.4", "wall": "257711"} [2022-08-04 01:04:23,480][train_inner][INFO] - {"epoch": 16, "update": 15.342, "loss": "1.991", "ppl": "3.97", "wps": "367427", "ups": "3.1", "wpb": "118699", "bsz": "256", "num_updates": "789600", "lr": "0.000212525", "gnorm": "0.786", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20", "wall": "257776"} [2022-08-04 01:05:28,526][train_inner][INFO] - {"epoch": 16, "update": 15.346, "loss": "1.997", "ppl": "3.99", "wps": "364352", "ups": "3.07", "wpb": "118496", "bsz": "256", "num_updates": "789800", "lr": "0.000212323", "gnorm": "0.786", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.2", "wall": "257841"} [2022-08-04 01:06:33,139][train_inner][INFO] - {"epoch": 16, "update": 15.349, "loss": "2.001", "ppl": "4", "wps": "365375", "ups": "3.1", "wpb": "118039", "bsz": "256", "num_updates": "790000", "lr": "0.000212121", "gnorm": "0.788", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.5", "wall": "257905"} [2022-08-04 01:07:38,118][train_inner][INFO] - {"epoch": 16, "update": 15.353, "loss": "1.991", "ppl": "3.98", "wps": "365466", "ups": "3.08", "wpb": "118736", "bsz": "256", "num_updates": "790200", "lr": "0.000211919", "gnorm": "0.786", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.7", "wall": "257970"} [2022-08-04 01:08:43,152][train_inner][INFO] - {"epoch": 16, "update": 15.357, "loss": "1.994", "ppl": "3.98", "wps": "365269", "ups": "3.08", "wpb": "118772", "bsz": "256", "num_updates": "790400", "lr": "0.000211717", "gnorm": "0.788", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "24.5", "wall": "258035"} [2022-08-04 01:09:48,091][train_inner][INFO] - {"epoch": 16, "update": 15.361, "loss": "1.992", "ppl": "3.98", "wps": "366092", "ups": "3.08", "wpb": "118866", "bsz": "256", "num_updates": "790600", "lr": "0.000211515", "gnorm": "0.785", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "258100"} [2022-08-04 01:10:39,053][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2022-08-04 01:10:53,370][train_inner][INFO] - {"epoch": 16, "update": 15.365, "loss": "1.992", "ppl": "3.98", "wps": "362256", "ups": "3.06", "wpb": "118238", "bsz": "256", "num_updates": "790800", "lr": "0.000211313", "gnorm": "0.79", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.8", "wall": "258166"} [2022-08-04 01:11:58,170][train_inner][INFO] - {"epoch": 16, "update": 15.369, "loss": "1.995", "ppl": "3.99", "wps": "364103", "ups": "3.09", "wpb": "117967", "bsz": "256", "num_updates": "791000", "lr": "0.000211111", "gnorm": "0.789", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.1", "wall": "258230"} [2022-08-04 01:13:03,245][train_inner][INFO] - {"epoch": 16, "update": 15.373, "loss": "1.991", "ppl": "3.97", "wps": "365622", "ups": "3.07", "wpb": "118964", "bsz": "256", "num_updates": "791200", "lr": "0.000210909", "gnorm": "0.786", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "258296"} [2022-08-04 01:14:08,003][train_inner][INFO] - {"epoch": 16, "update": 15.377, "loss": "1.996", "ppl": "3.99", "wps": "365717", "ups": "3.09", "wpb": "118413", "bsz": "256", "num_updates": "791400", "lr": "0.000210707", "gnorm": "0.788", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21", "wall": "258360"} [2022-08-04 01:15:13,477][train_inner][INFO] - {"epoch": 16, "update": 15.381, "loss": "1.992", "ppl": "3.98", "wps": "362162", "ups": "3.05", "wpb": "118559", "bsz": "256", "num_updates": "791600", "lr": "0.000210505", "gnorm": "0.787", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "258426"} [2022-08-04 01:16:19,346][train_inner][INFO] - {"epoch": 16, "update": 15.384, "loss": "1.995", "ppl": "3.98", "wps": "359541", "ups": "3.04", "wpb": "118412", "bsz": "256", "num_updates": "791800", "lr": "0.000210303", "gnorm": "0.786", "clip": "0", "loss_scale": "16", "train_wall": "66", "gb_free": "19.8", "wall": "258492"} [2022-08-04 01:17:24,349][train_inner][INFO] - {"epoch": 16, "update": 15.388, "loss": "1.992", "ppl": "3.98", "wps": "364416", "ups": "3.08", "wpb": "118438", "bsz": "256", "num_updates": "792000", "lr": "0.000210101", "gnorm": "0.79", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.6", "wall": "258557"} [2022-08-04 01:18:30,121][train_inner][INFO] - {"epoch": 16, "update": 15.392, "loss": "1.998", "ppl": "3.99", "wps": "358604", "ups": "3.04", "wpb": "117927", "bsz": "256", "num_updates": "792200", "lr": "0.000209899", "gnorm": "0.791", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.1", "wall": "258622"} [2022-08-04 01:19:35,080][train_inner][INFO] - {"epoch": 16, "update": 15.396, "loss": "1.99", "ppl": "3.97", "wps": "364552", "ups": "3.08", "wpb": "118404", "bsz": "256", "num_updates": "792400", "lr": "0.000209697", "gnorm": "0.788", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.9", "wall": "258687"} [2022-08-04 01:20:15,866][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-04 01:20:40,602][train_inner][INFO] - {"epoch": 16, "update": 15.4, "loss": "1.997", "ppl": "3.99", "wps": "361086", "ups": "3.05", "wpb": "118294", "bsz": "256", "num_updates": "792600", "lr": "0.000209495", "gnorm": "0.787", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.3", "wall": "258753"} [2022-08-04 01:21:45,509][train_inner][INFO] - {"epoch": 16, "update": 15.404, "loss": "1.993", "ppl": "3.98", "wps": "365890", "ups": "3.08", "wpb": "118743", "bsz": "256", "num_updates": "792800", "lr": "0.000209293", "gnorm": "0.784", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "25.8", "wall": "258818"} [2022-08-04 01:22:50,700][train_inner][INFO] - {"epoch": 16, "update": 15.408, "loss": "1.989", "ppl": "3.97", "wps": "362158", "ups": "3.07", "wpb": "118045", "bsz": "256", "num_updates": "793000", "lr": "0.000209091", "gnorm": "0.786", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "22.7", "wall": "258883"} [2022-08-04 01:23:55,554][train_inner][INFO] - {"epoch": 16, "update": 15.412, "loss": "1.993", "ppl": "3.98", "wps": "364008", "ups": "3.08", "wpb": "118036", "bsz": "256", "num_updates": "793200", "lr": "0.000208889", "gnorm": "0.787", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.5", "wall": "258948"} [2022-08-04 01:25:00,379][train_inner][INFO] - {"epoch": 16, "update": 15.415, "loss": "1.991", "ppl": "3.98", "wps": "367052", "ups": "3.09", "wpb": "118969", "bsz": "256", "num_updates": "793400", "lr": "0.000208687", "gnorm": "0.79", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20", "wall": "259013"} [2022-08-04 01:26:05,075][train_inner][INFO] - {"epoch": 16, "update": 15.419, "loss": "1.992", "ppl": "3.98", "wps": "365369", "ups": "3.09", "wpb": "118188", "bsz": "256", "num_updates": "793600", "lr": "0.000208485", "gnorm": "0.787", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "259077"} [2022-08-04 01:27:09,815][train_inner][INFO] - {"epoch": 16, "update": 15.423, "loss": "1.992", "ppl": "3.98", "wps": "365533", "ups": "3.09", "wpb": "118320", "bsz": "256", "num_updates": "793800", "lr": "0.000208283", "gnorm": "0.794", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23", "wall": "259142"} [2022-08-04 01:28:14,974][train_inner][INFO] - {"epoch": 16, "update": 15.427, "loss": "1.993", "ppl": "3.98", "wps": "363914", "ups": "3.07", "wpb": "118560", "bsz": "256", "num_updates": "794000", "lr": "0.000208081", "gnorm": "0.787", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.1", "wall": "259207"} [2022-08-04 01:29:19,712][train_inner][INFO] - {"epoch": 16, "update": 15.431, "loss": "2.004", "ppl": "4.01", "wps": "364767", "ups": "3.09", "wpb": "118071", "bsz": "256", "num_updates": "794200", "lr": "0.000207879", "gnorm": "0.79", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.1", "wall": "259272"} [2022-08-04 01:30:24,778][train_inner][INFO] - {"epoch": 16, "update": 15.435, "loss": "1.994", "ppl": "3.98", "wps": "364013", "ups": "3.07", "wpb": "118423", "bsz": "256", "num_updates": "794400", "lr": "0.000207677", "gnorm": "0.794", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.6", "wall": "259337"} [2022-08-04 01:31:30,150][train_inner][INFO] - {"epoch": 16, "update": 15.439, "loss": "1.991", "ppl": "3.98", "wps": "362512", "ups": "3.06", "wpb": "118489", "bsz": "256", "num_updates": "794600", "lr": "0.000207475", "gnorm": "0.794", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.9", "wall": "259402"} [2022-08-04 01:32:35,089][train_inner][INFO] - {"epoch": 16, "update": 15.443, "loss": "2.001", "ppl": "4", "wps": "363641", "ups": "3.08", "wpb": "118069", "bsz": "256", "num_updates": "794800", "lr": "0.000207273", "gnorm": "0.801", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "21.1", "wall": "259467"} [2022-08-04 01:33:40,107][train_inner][INFO] - {"epoch": 16, "update": 15.447, "loss": "1.989", "ppl": "3.97", "wps": "365951", "ups": "3.08", "wpb": "118967", "bsz": "256", "num_updates": "795000", "lr": "0.000207071", "gnorm": "0.788", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.5", "wall": "259532"} [2022-08-04 01:34:15,649][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-04 01:34:45,299][train_inner][INFO] - {"epoch": 16, "update": 15.45, "loss": "1.988", "ppl": "3.97", "wps": "363843", "ups": "3.07", "wpb": "118596", "bsz": "256", "num_updates": "795200", "lr": "0.000206869", "gnorm": "0.788", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.3", "wall": "259598"} [2022-08-04 01:35:49,913][train_inner][INFO] - {"epoch": 16, "update": 15.454, "loss": "1.993", "ppl": "3.98", "wps": "366730", "ups": "3.1", "wpb": "118478", "bsz": "256", "num_updates": "795400", "lr": "0.000206667", "gnorm": "0.791", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.4", "wall": "259662"} [2022-08-04 01:36:54,682][train_inner][INFO] - {"epoch": 16, "update": 15.458, "loss": "1.997", "ppl": "3.99", "wps": "366214", "ups": "3.09", "wpb": "118595", "bsz": "256", "num_updates": "795600", "lr": "0.000206465", "gnorm": "0.79", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.8", "wall": "259727"} [2022-08-04 01:37:59,769][train_inner][INFO] - {"epoch": 16, "update": 15.462, "loss": "1.99", "ppl": "3.97", "wps": "365210", "ups": "3.07", "wpb": "118848", "bsz": "256", "num_updates": "795800", "lr": "0.000206263", "gnorm": "0.786", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.3", "wall": "259792"} [2022-08-04 01:39:04,644][train_inner][INFO] - {"epoch": 16, "update": 15.466, "loss": "1.992", "ppl": "3.98", "wps": "363885", "ups": "3.08", "wpb": "118033", "bsz": "256", "num_updates": "796000", "lr": "0.000206061", "gnorm": "0.802", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.8", "wall": "259857"} [2022-08-04 01:40:00,005][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2022-08-04 01:40:00,295][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 [2022-08-04 01:40:10,118][train_inner][INFO] - {"epoch": 16, "update": 15.47, "loss": "1.995", "ppl": "3.99", "wps": "363016", "ups": "3.05", "wpb": "118839", "bsz": "256", "num_updates": "796200", "lr": "0.000205859", "gnorm": "0.796", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.7", "wall": "259922"} [2022-08-04 01:41:14,719][train_inner][INFO] - {"epoch": 16, "update": 15.474, "loss": "1.995", "ppl": "3.99", "wps": "365617", "ups": "3.1", "wpb": "118096", "bsz": "256", "num_updates": "796400", "lr": "0.000205657", "gnorm": "0.793", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "24.5", "wall": "259987"} [2022-08-04 01:42:19,678][train_inner][INFO] - {"epoch": 16, "update": 15.478, "loss": "1.989", "ppl": "3.97", "wps": "364556", "ups": "3.08", "wpb": "118403", "bsz": "256", "num_updates": "796600", "lr": "0.000205455", "gnorm": "0.795", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "23", "wall": "260052"} [2022-08-04 01:43:24,831][train_inner][INFO] - {"epoch": 16, "update": 15.482, "loss": "1.988", "ppl": "3.97", "wps": "365817", "ups": "3.07", "wpb": "119169", "bsz": "256", "num_updates": "796800", "lr": "0.000205253", "gnorm": "0.782", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.8", "wall": "260117"} [2022-08-04 01:44:30,225][train_inner][INFO] - {"epoch": 16, "update": 15.485, "loss": "1.989", "ppl": "3.97", "wps": "361456", "ups": "3.06", "wpb": "118184", "bsz": "256", "num_updates": "797000", "lr": "0.000205051", "gnorm": "0.79", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22.2", "wall": "260183"} [2022-08-04 01:45:35,521][train_inner][INFO] - {"epoch": 16, "update": 15.489, "loss": "1.992", "ppl": "3.98", "wps": "362507", "ups": "3.06", "wpb": "118350", "bsz": "256", "num_updates": "797200", "lr": "0.000204848", "gnorm": "0.804", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.4", "wall": "260248"} [2022-08-04 01:46:40,745][train_inner][INFO] - {"epoch": 16, "update": 15.493, "loss": "1.989", "ppl": "3.97", "wps": "363266", "ups": "3.07", "wpb": "118465", "bsz": "256", "num_updates": "797400", "lr": "0.000204646", "gnorm": "0.79", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.7", "wall": "260313"} [2022-08-04 01:47:45,805][train_inner][INFO] - {"epoch": 16, "update": 15.497, "loss": "1.983", "ppl": "3.95", "wps": "364686", "ups": "3.07", "wpb": "118620", "bsz": "256", "num_updates": "797600", "lr": "0.000204444", "gnorm": "0.79", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "28.2", "wall": "260378"} [2022-08-04 01:48:50,978][train_inner][INFO] - {"epoch": 16, "update": 15.501, "loss": "1.996", "ppl": "3.99", "wps": "364056", "ups": "3.07", "wpb": "118632", "bsz": "256", "num_updates": "797800", "lr": "0.000204242", "gnorm": "0.8", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.9", "wall": "260443"} [2022-08-04 01:49:56,029][train_inner][INFO] - {"epoch": 16, "update": 15.505, "loss": "1.99", "ppl": "3.97", "wps": "362383", "ups": "3.07", "wpb": "117864", "bsz": "256", "num_updates": "798000", "lr": "0.00020404", "gnorm": "0.795", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.1", "wall": "260508"} [2022-08-04 01:51:01,109][train_inner][INFO] - {"epoch": 16, "update": 15.509, "loss": "1.992", "ppl": "3.98", "wps": "364792", "ups": "3.07", "wpb": "118701", "bsz": "256", "num_updates": "798200", "lr": "0.000203838", "gnorm": "0.793", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20.4", "wall": "260573"} [2022-08-04 01:52:06,263][train_inner][INFO] - {"epoch": 16, "update": 15.513, "loss": "1.997", "ppl": "3.99", "wps": "364554", "ups": "3.07", "wpb": "118760", "bsz": "256", "num_updates": "798400", "lr": "0.000203636", "gnorm": "0.794", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.6", "wall": "260639"} [2022-08-04 01:53:10,975][train_inner][INFO] - {"epoch": 16, "update": 15.517, "loss": "1.99", "ppl": "3.97", "wps": "364406", "ups": "3.09", "wpb": "117905", "bsz": "256", "num_updates": "798600", "lr": "0.000203434", "gnorm": "0.8", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "23.6", "wall": "260703"} [2022-08-04 01:54:15,709][train_inner][INFO] - {"epoch": 16, "update": 15.52, "loss": "1.991", "ppl": "3.98", "wps": "363842", "ups": "3.09", "wpb": "117762", "bsz": "256", "num_updates": "798800", "lr": "0.000203232", "gnorm": "0.794", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "24.8", "wall": "260768"} [2022-08-04 01:55:21,258][train_inner][INFO] - {"epoch": 16, "update": 15.524, "loss": "1.989", "ppl": "3.97", "wps": "359673", "ups": "3.05", "wpb": "117880", "bsz": "256", "num_updates": "799000", "lr": "0.00020303", "gnorm": "0.796", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "21.5", "wall": "260834"} [2022-08-04 01:56:26,243][train_inner][INFO] - {"epoch": 16, "update": 15.528, "loss": "1.991", "ppl": "3.98", "wps": "365720", "ups": "3.08", "wpb": "118829", "bsz": "256", "num_updates": "799200", "lr": "0.000202828", "gnorm": "0.792", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "26.6", "wall": "260899"} [2022-08-04 01:56:53,421][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 [2022-08-04 01:57:31,587][train_inner][INFO] - {"epoch": 16, "update": 15.532, "loss": "1.988", "ppl": "3.97", "wps": "361679", "ups": "3.06", "wpb": "118165", "bsz": "256", "num_updates": "799400", "lr": "0.000202626", "gnorm": "0.795", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22.3", "wall": "260964"} [2022-08-04 01:58:36,220][train_inner][INFO] - {"epoch": 16, "update": 15.536, "loss": "1.994", "ppl": "3.98", "wps": "364436", "ups": "3.09", "wpb": "117773", "bsz": "256", "num_updates": "799600", "lr": "0.000202424", "gnorm": "0.796", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "23.8", "wall": "261029"} [2022-08-04 01:59:41,321][train_inner][INFO] - {"epoch": 16, "update": 15.54, "loss": "1.99", "ppl": "3.97", "wps": "361654", "ups": "3.07", "wpb": "117717", "bsz": "256", "num_updates": "799800", "lr": "0.000202222", "gnorm": "0.797", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "22.5", "wall": "261094"} [2022-08-04 02:00:46,530][train_inner][INFO] - {"epoch": 16, "update": 15.544, "loss": "1.983", "ppl": "3.95", "wps": "362606", "ups": "3.07", "wpb": "118224", "bsz": "256", "num_updates": "800000", "lr": "0.00020202", "gnorm": "0.792", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21.9", "wall": "261159"} [2022-08-04 02:00:46,532][fairseq_cli.train][INFO] - begin validation on "valid" subset [2022-08-04 02:01:09,447][valid][INFO] - {"epoch": 16, "valid_loss": "1.903", "valid_ppl": "3.74", "valid_wps": "1.61134e+06", "valid_wpb": "117287", "valid_bsz": "255.5", "valid_num_updates": "800000", "valid_best_loss": "1.903"} [2022-08-04 02:01:09,450][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 16 @ 800000 updates [2022-08-04 02:01:09,450][fairseq.trainer][INFO] - Saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_16_800000.pt [2022-08-04 02:01:19,982][fairseq.trainer][INFO] - Finished saving checkpoint to /vit-opt/fairseq/multirun/2022-08-01/01-27-25/0/bert/adan2/checkpoint_16_800000.pt [2022-08-04 02:01:46,409][fairseq.checkpoint_utils][INFO] - Saved checkpoint bert/adan2/checkpoint_16_800000.pt (epoch 16 @ 800000 updates, score 1.903) (writing took 36.959332070313394 seconds) [2022-08-04 02:02:51,400][train_inner][INFO] - {"epoch": 16, "update": 15.548, "loss": "1.998", "ppl": "4", "wps": "189015", "ups": "1.6", "wpb": "118010", "bsz": "256", "num_updates": "800200", "lr": "0.000201818", "gnorm": "0.796", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "21", "wall": "261284"} [2022-08-04 02:03:56,529][train_inner][INFO] - {"epoch": 16, "update": 15.552, "loss": "1.993", "ppl": "3.98", "wps": "363662", "ups": "3.07", "wpb": "118423", "bsz": "256", "num_updates": "800400", "lr": "0.000201616", "gnorm": "0.794", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "20", "wall": "261349"} [2022-08-04 02:05:01,407][train_inner][INFO] - {"epoch": 16, "update": 15.555, "loss": "1.991", "ppl": "3.97", "wps": "365895", "ups": "3.08", "wpb": "118692", "bsz": "256", "num_updates": "800600", "lr": "0.000201414", "gnorm": "0.81", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.7", "wall": "261414"} [2022-08-04 02:06:06,090][train_inner][INFO] - {"epoch": 16, "update": 15.559, "loss": "1.996", "ppl": "3.99", "wps": "365049", "ups": "3.09", "wpb": "118061", "bsz": "256", "num_updates": "800800", "lr": "0.000201212", "gnorm": "0.796", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.3", "wall": "261478"} [2022-08-04 02:07:11,096][train_inner][INFO] - {"epoch": 16, "update": 15.563, "loss": "1.996", "ppl": "3.99", "wps": "363513", "ups": "3.08", "wpb": "118150", "bsz": "256", "num_updates": "801000", "lr": "0.00020101", "gnorm": "0.802", "clip": "0", "loss_scale": "2", "train_wall": "65", "gb_free": "19.9", "wall": "261543"} [2022-08-04 02:08:15,723][train_inner][INFO] - {"epoch": 16, "update": 15.567, "loss": "1.989", "ppl": "3.97", "wps": "364406", "ups": "3.09", "wpb": "117750", "bsz": "256", "num_updates": "801200", "lr": "0.000200808", "gnorm": "0.796", "clip": "0", "loss_scale": "2", "train_wall": "64", "gb_free": "20.7", "wall": "261608"} [2022-08-04 02:09:20,735][train_inner][INFO] - {"epoch": 16, "update": 15.571, "loss": "1.992", "ppl": "3.98", "wps": "363858", "ups": "3.08", "wpb": "118272", "bsz": "256", "num_updates": "801400", "lr": "0.000200606", "gnorm": "0.798", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.5", "wall": "261673"} [2022-08-04 02:10:25,619][train_inner][INFO] - {"epoch": 16, "update": 15.575, "loss": "1.992", "ppl": "3.98", "wps": "365766", "ups": "3.08", "wpb": "118660", "bsz": "256", "num_updates": "801600", "lr": "0.000200404", "gnorm": "0.791", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "25", "wall": "261738"} [2022-08-04 02:11:30,554][train_inner][INFO] - {"epoch": 16, "update": 15.579, "loss": "1.992", "ppl": "3.98", "wps": "364894", "ups": "3.08", "wpb": "118471", "bsz": "256", "num_updates": "801800", "lr": "0.000200202", "gnorm": "0.793", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "26", "wall": "261803"} [2022-08-04 02:12:35,315][train_inner][INFO] - {"epoch": 16, "update": 15.583, "loss": "1.993", "ppl": "3.98", "wps": "366675", "ups": "3.09", "wpb": "118730", "bsz": "256", "num_updates": "802000", "lr": "0.0002", "gnorm": "0.797", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20", "wall": "261868"} [2022-08-04 02:13:40,140][train_inner][INFO] - {"epoch": 16, "update": 15.586, "loss": "1.995", "ppl": "3.98", "wps": "364370", "ups": "3.09", "wpb": "118099", "bsz": "256", "num_updates": "802200", "lr": "0.000199798", "gnorm": "0.802", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "20.4", "wall": "261932"} [2022-08-04 02:14:45,004][train_inner][INFO] - {"epoch": 16, "update": 15.59, "loss": "1.988", "ppl": "3.97", "wps": "364174", "ups": "3.08", "wpb": "118108", "bsz": "256", "num_updates": "802400", "lr": "0.000199596", "gnorm": "0.796", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.8", "wall": "261997"} [2022-08-04 02:15:50,023][train_inner][INFO] - {"epoch": 16, "update": 15.594, "loss": "1.983", "ppl": "3.95", "wps": "363235", "ups": "3.08", "wpb": "118083", "bsz": "256", "num_updates": "802600", "lr": "0.000199394", "gnorm": "0.798", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.7", "wall": "262062"} [2022-08-04 02:16:54,812][train_inner][INFO] - {"epoch": 16, "update": 15.598, "loss": "1.983", "ppl": "3.95", "wps": "366229", "ups": "3.09", "wpb": "118638", "bsz": "256", "num_updates": "802800", "lr": "0.000199192", "gnorm": "0.795", "clip": "0", "loss_scale": "4", "train_wall": "64", "gb_free": "24.5", "wall": "262127"} [2022-08-04 02:17:59,993][train_inner][INFO] - {"epoch": 16, "update": 15.602, "loss": "1.994", "ppl": "3.98", "wps": "362967", "ups": "3.07", "wpb": "118290", "bsz": "256", "num_updates": "803000", "lr": "0.00019899", "gnorm": "0.796", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "20.5", "wall": "262192"} [2022-08-04 02:19:05,110][train_inner][INFO] - {"epoch": 16, "update": 15.606, "loss": "1.988", "ppl": "3.97", "wps": "363442", "ups": "3.07", "wpb": "118330", "bsz": "256", "num_updates": "803200", "lr": "0.000198788", "gnorm": "0.796", "clip": "0", "loss_scale": "4", "train_wall": "65", "gb_free": "22.1", "wall": "262257"} [2022-08-04 02:20:09,959][train_inner][INFO] - {"epoch": 16, "update": 15.61, "loss": "1.99", "ppl": "3.97", "wps": "365126", "ups": "3.08", "wpb": "118390", "bsz": "256", "num_updates": "803400", "lr": "0.000198586", "gnorm": "0.795", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.2", "wall": "262322"} [2022-08-04 02:21:14,654][train_inner][INFO] - {"epoch": 16, "update": 15.614, "loss": "1.982", "ppl": "3.95", "wps": "364938", "ups": "3.09", "wpb": "118046", "bsz": "256", "num_updates": "803600", "lr": "0.000198384", "gnorm": "0.795", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "20.2", "wall": "262387"} [2022-08-04 02:22:19,297][train_inner][INFO] - {"epoch": 16, "update": 15.618, "loss": "1.986", "ppl": "3.96", "wps": "367672", "ups": "3.09", "wpb": "118835", "bsz": "256", "num_updates": "803800", "lr": "0.000198182", "gnorm": "0.791", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "262452"} [2022-08-04 02:23:23,889][train_inner][INFO] - {"epoch": 16, "update": 15.621, "loss": "1.986", "ppl": "3.96", "wps": "364464", "ups": "3.1", "wpb": "117704", "bsz": "256", "num_updates": "804000", "lr": "0.00019798", "gnorm": "0.803", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "23.3", "wall": "262516"} [2022-08-04 02:24:28,894][train_inner][INFO] - {"epoch": 16, "update": 15.625, "loss": "1.986", "ppl": "3.96", "wps": "364697", "ups": "3.08", "wpb": "118534", "bsz": "256", "num_updates": "804200", "lr": "0.000197778", "gnorm": "0.796", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.2", "wall": "262581"} [2022-08-04 02:25:34,062][train_inner][INFO] - {"epoch": 16, "update": 15.629, "loss": "1.986", "ppl": "3.96", "wps": "363480", "ups": "3.07", "wpb": "118436", "bsz": "256", "num_updates": "804400", "lr": "0.000197576", "gnorm": "0.796", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "24.8", "wall": "262646"} [2022-08-04 02:26:38,723][train_inner][INFO] - {"epoch": 16, "update": 15.633, "loss": "1.979", "ppl": "3.94", "wps": "367340", "ups": "3.09", "wpb": "118760", "bsz": "256", "num_updates": "804600", "lr": "0.000197374", "gnorm": "0.792", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "19.8", "wall": "262711"} [2022-08-04 02:27:43,929][train_inner][INFO] - {"epoch": 16, "update": 15.637, "loss": "1.991", "ppl": "3.97", "wps": "363572", "ups": "3.07", "wpb": "118534", "bsz": "255.9", "num_updates": "804800", "lr": "0.000197172", "gnorm": "0.797", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "19.8", "wall": "262776"} [2022-08-04 02:28:48,841][train_inner][INFO] - {"epoch": 16, "update": 15.641, "loss": "1.982", "ppl": "3.95", "wps": "364812", "ups": "3.08", "wpb": "118400", "bsz": "256", "num_updates": "805000", "lr": "0.00019697", "gnorm": "0.802", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "21.6", "wall": "262841"} [2022-08-04 02:29:53,531][train_inner][INFO] - {"epoch": 16, "update": 15.645, "loss": "1.982", "ppl": "3.95", "wps": "366009", "ups": "3.09", "wpb": "118385", "bsz": "256", "num_updates": "805200", "lr": "0.000196768", "gnorm": "0.795", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "24.2", "wall": "262906"} [2022-08-04 02:30:57,736][train_inner][INFO] - {"epoch": 16, "update": 15.649, "loss": "1.986", "ppl": "3.96", "wps": "368329", "ups": "3.12", "wpb": "118241", "bsz": "256", "num_updates": "805400", "lr": "0.000196566", "gnorm": "0.802", "clip": "0", "loss_scale": "8", "train_wall": "64", "gb_free": "25.9", "wall": "262970"} [2022-08-04 02:32:02,407][train_inner][INFO] - {"epoch": 16, "update": 15.653, "loss": "1.993", "ppl": "3.98", "wps": "366642", "ups": "3.09", "wpb": "118553", "bsz": "256", "num_updates": "805600", "lr": "0.000196364", "gnorm": "0.797", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "21.6", "wall": "263035"} [2022-08-04 02:33:07,113][train_inner][INFO] - {"epoch": 16, "update": 15.656, "loss": "1.988", "ppl": "3.97", "wps": "364001", "ups": "3.09", "wpb": "117764", "bsz": "256", "num_updates": "805800", "lr": "0.000196162", "gnorm": "0.801", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.8", "wall": "263099"} [2022-08-04 02:34:11,647][train_inner][INFO] - {"epoch": 16, "update": 15.66, "loss": "1.991", "ppl": "3.97", "wps": "365784", "ups": "3.1", "wpb": "118025", "bsz": "256", "num_updates": "806000", "lr": "0.00019596", "gnorm": "0.797", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "20.4", "wall": "263164"} [2022-08-04 02:35:16,846][train_inner][INFO] - {"epoch": 16, "update": 15.664, "loss": "1.986", "ppl": "3.96", "wps": "362789", "ups": "3.07", "wpb": "118267", "bsz": "256", "num_updates": "806200", "lr": "0.000195758", "gnorm": "0.801", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "25.4", "wall": "263229"} [2022-08-04 02:36:21,746][train_inner][INFO] - {"epoch": 16, "update": 15.668, "loss": "1.988", "ppl": "3.97", "wps": "364062", "ups": "3.08", "wpb": "118136", "bsz": "256", "num_updates": "806400", "lr": "0.000195556", "gnorm": "0.798", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "19.9", "wall": "263294"} [2022-08-04 02:37:26,859][train_inner][INFO] - {"epoch": 16, "update": 15.672, "loss": "1.983", "ppl": "3.95", "wps": "364622", "ups": "3.07", "wpb": "118707", "bsz": "255.9", "num_updates": "806600", "lr": "0.000195354", "gnorm": "0.796", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20", "wall": "263359"} [2022-08-04 02:38:31,517][train_inner][INFO] - {"epoch": 16, "update": 15.676, "loss": "1.99", "ppl": "3.97", "wps": "366808", "ups": "3.09", "wpb": "118583", "bsz": "256", "num_updates": "806800", "lr": "0.000195152", "gnorm": "0.799", "clip": "0", "loss_scale": "16", "train_wall": "64", "gb_free": "25.3", "wall": "263424"} [2022-08-04 02:39:37,093][train_inner][INFO] - {"epoch": 16, "update": 15.68, "loss": "1.986", "ppl": "3.96", "wps": "360002", "ups": "3.05", "wpb": "118037", "bsz": "256", "num_updates": "807000", "lr": "0.000194949", "gnorm": "0.801", "clip": "0", "loss_scale": "16", "train_wall": "65", "gb_free": "20.2", "wall": "263489"} [2022-08-04 02:40:37,549][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2022-08-04 02:40:42,096][train_inner][INFO] - {"epoch": 16, "update": 15.684, "loss": "1.988", "ppl": "3.97", "wps": "361459", "ups": "3.08", "wpb": "117476", "bsz": "256", "num_updates": "807200", "lr": "0.000194747", "gnorm": "0.803", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20", "wall": "263554"} [2022-08-04 02:41:47,846][train_inner][INFO] - {"epoch": 16, "update": 15.688, "loss": "1.986", "ppl": "3.96", "wps": "360883", "ups": "3.04", "wpb": "118641", "bsz": "256", "num_updates": "807400", "lr": "0.000194545", "gnorm": "0.797", "clip": "0", "loss_scale": "8", "train_wall": "65", "gb_free": "20.4", "wall": "263620"}