# Copyright 2024 ByteDance and/or its affiliates. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== # Protenix Model Inference Test Script # # Purpose: # This script provides usage examples for running inference with various # Protenix model versions and configurations. # # Arguments Summary (for 'protenix pred' or 'runner/inference.py'): # -i, --input (str): [Required] Input JSON file or directory. # -o, --out_dir (str): [Default: ./output] Output directory for results. # -s, --seeds (str): [Default: 101] Inference seeds (e.g., "101,102"). # -c, --cycle (int): [Default: 10] Number of Pairformer cycles. # -p, --step (int): [Default: 200] Number of diffusion steps. # -e, --sample (int): [Default: 5] Samples per seed. # -d, --dtype (str): [Default: bf16] Inference data type (bf16, fp32). # -n, --model_name (str): [Default: protenix_base_default_v1.0.0] Model name. # NOTE: protenix_base_default_v1.0.0 is the RECOMMENDED default. # --use_msa (bool): Whether to use protein MSA features. # --use_default_params: Auto-load recommended defaults for the model. # --trimul_kernel (str): Triangle multiplicative kernel ('cuequivariance', 'torch'). # --triatt_kernel (str): Triangle attention kernel ('triattention', 'cuequivariance', etc.). # --use_template (bool): Enable template features (v1.0.0+ only). # --use_rna_msa (bool): Enable RNA MSA features (v1.0.0+ only). # --use_seeds_in_json: Prioritize seeds defined in the input JSON. # --use_tfg_guidance (bool): Use Training-Free Guidance (TFG) for inference. # # Available Models (Ref: configs/configs_model_type.py, docs/supported_models.md): # * protenix_base_default_v1.0.0: [DEFAULT] Advanced model supporting Template & RNA MSA (Training Data Cutoff: 2021-09-30). # 1. protenix_base_20250630_v1.0.0: Latest model for practical scenarios (Training Data Cutoff: 2025-06-30). # 2. protenix_base_default_v0.5.0: Standard base model (Training Data Cutoff: 2021-09-30). # 3. protenix_base_constraint_v0.5.0: Base model with constraint support (Training Data Cutoff: 2021-09-30). # 4. protenix_mini_esm_v0.5.0: Lightweight ESM-only model (no MSA) (Training Data Cutoff: 2021-09-30). # 5. protenix_mini_ism_v0.5.0: Lightweight ISM-only model (no MSA) (Training Data Cutoff: 2021-09-30). # 6. protenix_mini_default_v0.5.0: Standard lightweight model (Training Data Cutoff: 2021-09-30). # 7. protenix_tiny_default_v0.5.0: Ultra-lightweight model (Training Data Cutoff: 2021-09-30). # ============================================================================== # ------------------------------------------------------------------------------ # Section 1: Running via Protenix CLI (protenix pred) # ------------------------------------------------------------------------------ # ############################################################################## # # !!! IMPORTANT: ENVIRONMENT SETUP !!! # # ---------------------------------------------------------------------------- # # 1. Ensure environment variables are correctly set: # # - PROTENIX_ROOT_DIR: Your data root directory # # - CUTLASS_PATH: Path for deepspeed (e.g., /opt/cutlass/) # # # # Uncomment and modify the lines below if needed: # # # export PROTENIX_ROOT_DIR="/modify/to/your/data_root_dir" # # # export CUTLASS_PATH=/opt/cutlass/ # # # # 2. Dependency for Template & RNA MSA: # # If using these features, ensure 'kalign' and 'hmmer' are installed: # # apt-get update && apt-get install -y kalign hmmer # # ############################################################################ echo "Starting Section 1: CLI-based inference tests..." # Example 1.1: Standard inference with Template support (v1.0.0) protenix pred \ -i examples/input.json \ -o ./test_outputs/cmd/output_base_v1 \ -s 101 \ -n protenix_base_default_v1.0.0 \ --use_template true \ --use_default_params true # Example 1.2: Inference using seeds defined in JSON protenix pred \ -i examples/examples_with_template/example_mgyp004658859411.json \ -o ./test_outputs/cmd/output_base_v1 \ -s 101 \ -n protenix_base_default_v1.0.0 \ --use_template true \ --use_seeds_in_json true \ --use_default_params true # Example 1.3: RNA MSA support (v1.0.0 exclusive) protenix pred \ -i examples/examples_with_rna_msa/example_9gmw_2.json \ -o ./test_outputs/cmd/output_base_v1 \ -n protenix_base_default_v1.0.0 \ --use_rna_msa true \ --use_default_params true # Example 1.4: Latest model v1.0.0 with 2025-06-30 cutoff protenix pred \ -i examples/input.json \ -o ./test_outputs/cmd/output_base_v1_20250630 \ -s 101 \ -n protenix_base_20250630_v1.0.0 \ -c 4 \ -p 20 \ --use_template true # Example 1.5: Base model v0.5.0 with precomputed MSA protenix pred \ -i examples/example.json \ -o ./test_outputs/cmd/output_base \ -s 101 \ -c 4 \ -p 20 \ -n "protenix_base_default_v0.5.0" \ --use_default_params true # Example 1.6: Mini model with ESM features only protenix pred \ -i examples/example.json \ -o ./test_outputs/cmd/output_mini_esm \ -s 102 \ -n "protenix_mini_esm_v0.5.0" \ --use_default_params true # Example 1.7: Mini model with ISM features only protenix pred \ -i examples/example.json \ -o ./test_outputs/cmd/output_mini_ism \ -s 103 \ -n "protenix_mini_ism_v0.5.0" \ --use_default_params true # Example 1.8: Base constraint model protenix pred \ -i examples/example_constraint_msa.json \ -o ./test_outputs/cmd/output_constraint \ -s 104 \ -n "protenix_base_constraint_v0.5.0" \ --use_default_params true # Example 1.9: Tiny default model protenix pred \ -i examples/example.json \ -o ./test_outputs/cmd/output_tiny \ -s 106 \ -n "protenix_tiny_default_v0.5.0" \ --use_default_params true # Example 1.10: Scaled-up model v1.0.0 protenix pred \ -i examples/input.json \ -o ./test_outputs/cmd/output_w2x \ -s 101 \ -n protenix-v2 \ --use_template true \ --use_default_params true # ------------------------------------------------------------------------------ # Section 2: Running via Runner Script (runner/inference.py) # # IMPORTANT: # Direct script execution requires features (MSA, templates, RNA MSA, etc.) # to be pre-prepared in the input JSON. This mode is optimized for GPU-only # computation. # If features are NOT ready, please use the preprocessing command first: # Example: protenix prep --input examples/input.json --out_dir ./output # ------------------------------------------------------------------------------ echo "Starting Section 2: Script-based inference tests..." export PYTHONPATH="${PYTHONPATH}:$(pwd)" # Test 2.1: Base v1.0.0 with Template support # Features: Template enabled, cuequivariance attention N_sample=5 N_step=200 N_cycle=10 seed=103 input_json_path="./examples/examples_with_template/example_9fm7.json" dump_dir="./test_outputs/sh/output_m_9fm7" model_name="protenix_base_default_v1.0.0" python3 runner/inference.py \ --model_name ${model_name} \ --seeds ${seed} \ --dump_dir ${dump_dir} \ --input_json_path ${input_json_path} \ --model.N_cycle ${N_cycle} \ --sample_diffusion.N_sample ${N_sample} \ --sample_diffusion.N_step ${N_step} \ --triangle_attention "cuequivariance" \ --use_seeds_in_json true \ --triangle_multiplicative "cuequivariance" \ --use_template true # Test 2.2: Latest model v1.0.0 with 2025-06-30 cutoff N_sample=1 N_step=200 N_cycle=10 seed=101 input_json_path="./examples/input.json" dump_dir="./test_outputs/sh/output_base_20250630" model_name="protenix_base_20250630_v1.0.0" python3 runner/inference.py \ --model_name ${model_name} \ --seeds ${seed} \ --dump_dir ${dump_dir} \ --input_json_path ${input_json_path} \ --model.N_cycle ${N_cycle} \ --sample_diffusion.N_sample ${N_sample} \ --sample_diffusion.N_step ${N_step} \ --triangle_attention "cuequivariance" \ --triangle_multiplicative "cuequivariance" \ --use_template true # Test 2.3: Base v0.5.0 with triattention N_sample=1 N_step=200 N_cycle=10 seed=101 input_json_path="./examples/example.json" dump_dir="./test_outputs/sh/output_base" model_name="protenix_base_default_v0.5.0" python3 runner/inference.py \ --model_name ${model_name} \ --seeds ${seed} \ --dump_dir ${dump_dir} \ --input_json_path ${input_json_path} \ --model.N_cycle ${N_cycle} \ --sample_diffusion.N_sample ${N_sample} \ --sample_diffusion.N_step ${N_step} \ --triangle_attention "triattention" \ --triangle_multiplicative "cuequivariance" # Test 2.4: Mini ESM v0.5.0 with cuequivariance N_sample=1 N_step=5 N_cycle=4 seed=101 input_json_path="./examples/example.json" dump_dir="./test_outputs/sh/output_mini_esm" model_name="protenix_mini_esm_v0.5.0" python3 runner/inference.py \ --model_name ${model_name} \ --seeds ${seed} \ --dump_dir ${dump_dir} \ --input_json_path ${input_json_path} \ --model.N_cycle ${N_cycle} \ --sample_diffusion.N_sample ${N_sample} \ --sample_diffusion.N_step ${N_step} \ --triangle_attention "cuequivariance" \ --triangle_multiplicative "cuequivariance" # Test 2.5: Mini ISM v0.5.0 with deepspeed N_sample=1 N_step=5 N_cycle=4 seed=101 input_json_path="./examples/example.json" dump_dir="./test_outputs/sh/output_mini_ism" model_name="protenix_mini_ism_v0.5.0" python3 runner/inference.py \ --model_name ${model_name} \ --seeds ${seed} \ --dump_dir ${dump_dir} \ --input_json_path ${input_json_path} \ --model.N_cycle ${N_cycle} \ --sample_diffusion.N_sample ${N_sample} \ --sample_diffusion.N_step ${N_step} \ --triangle_attention "deepspeed" \ --triangle_multiplicative "cuequivariance" # Test 2.6: Base Constraint v0.5.0 with torch attention N_sample=1 N_step=200 N_cycle=10 seed=101 input_json_path="./examples/example_constraint_msa.json" dump_dir="./test_outputs/sh/output_constraint" model_name="protenix_base_constraint_v0.5.0" python3 runner/inference.py \ --model_name ${model_name} \ --seeds ${seed} \ --dump_dir ${dump_dir} \ --input_json_path ${input_json_path} \ --model.N_cycle ${N_cycle} \ --sample_diffusion.N_sample ${N_sample} \ --sample_diffusion.N_step ${N_step} \ --triangle_attention "torch" \ --triangle_multiplicative "cuequivariance" # Test 2.7: Mini Default v0.5.0 with torch attention/multiplicative N_sample=1 N_step=5 N_cycle=4 seed=101 input_json_path="./examples/example.json" dump_dir="./test_outputs/sh/output_mini" model_name="protenix_mini_default_v0.5.0" python3 runner/inference.py \ --model_name ${model_name} \ --seeds ${seed} \ --dump_dir ${dump_dir} \ --input_json_path ${input_json_path} \ --model.N_cycle ${N_cycle} \ --sample_diffusion.N_sample ${N_sample} \ --sample_diffusion.N_step ${N_step} \ --triangle_attention "torch" \ --triangle_multiplicative "torch" # Test 2.8: Tiny Default v0.5.0 with torch attention/multiplicative N_sample=1 N_step=5 N_cycle=4 seed=101 input_json_path="./examples/example.json" dump_dir="./test_outputs/sh/output_tiny" model_name="protenix_tiny_default_v0.5.0" python3 runner/inference.py \ --model_name ${model_name} \ --seeds ${seed} \ --dump_dir ${dump_dir} \ --input_json_path ${input_json_path} \ --model.N_cycle ${N_cycle} \ --sample_diffusion.N_sample ${N_sample} \ --sample_diffusion.N_step ${N_step} \ --triangle_attention "torch" \ --triangle_multiplicative "torch" # Test 2.9: Inference with Training-Free Guidance (TFG) enabled N_sample=1 N_step=200 N_cycle=10 seed=101 input_json_path="./examples/input.json" dump_dir="./test_outputs/sh/output_tfg" model_name="protenix_base_default_v1.0.0" python3 runner/inference.py \ --model_name ${model_name} \ --seeds ${seed} \ --dump_dir ${dump_dir} \ --input_json_path ${input_json_path} \ --model.N_cycle ${N_cycle} \ --sample_diffusion.N_sample ${N_sample} \ --sample_diffusion.N_step ${N_step} \ --sample_diffusion.guidance.enable true echo "All inference tests completed." # The following is a demo to use DDP for inference # torchrun \ # --nproc_per_node $NPROC \ # --master_addr $WORKER_0_HOST \ # --master_port $WORKER_0_PORT \ # --node_rank=$ID \ # --nnodes=$WORKER_NUM \ # runner/inference.py \ # --seeds ${seed} \ # --dump_dir ${dump_dir} \ # --input_json_path ${input_json_path} \ # --model.N_cycle ${N_cycle} \ # --sample_diffusion.N_sample ${N_sample} \ # --sample_diffusion.N_step ${N_step}