dlc

Deep learning configuration.
Table of Contents
- Linux
- SSH
- MobaXterm
- VSCode
- Git
- Conda
- Pip
- Vim
- Tmux
- Python
- PyTorch
- Seed
- Args
- Configs
- Debug
- WandB
- TensorBoard
- HuggingFace
- Gradio
Linux
- Choose
Ubuntu 22.04 LTS. -
Know common commands.
ls <path> # list directory ll <path> # list long directory tree <path> -a -L <layers> # list directory tree, install by `sudo apt install tree` cd <path> # change directory pwd # print working directory clear # clear command line touch <path> # make file mkdir <path> # make directory cp -r <src_path> <dst_path> # copy mv <src_path> <dst_path> # move rm -rf <src_path> # remove ln -s <src_path> <dst_path> # soft link unlink <path> # unlink du -h <path> --max-depth <depthscp> # show disk usage df -h <path> # show disk free sudo apt install <pkg_name> # install package wget <url> -O <dst_path> # web get aria2c -x 16 -s 16 <url> -o <dst_path> # multi-thread download, install by `sudo apt-get install aria2` rsync -avz --progress -e "ssh -p <port>" <src_path> <username>@<ip>:<dst_path> # remote sync, local and remote both need install by `sudo apt install rsync` zip -r <dst_path>.zip <src_path> # zip unzip <src_path>.zip -d <dst_path> # unzip tar -czf <dst_path>.tar.gz <src_path> # tar tar -xzf <src_path>.tar.gz -C <dst_path> # untar ... | grep <str> # find in output which ... # find bin path [up]/[down] # use command in history history # command history ps # process status htop # show table of processes kill -9 <pid> # kill process [ctrl]+[c] # cancel process [ctrl]+[z] # pause process -
~/.bashrc: Set environment variables, renew bysource ~/.bashrc.export LD_LIBRARY_PATH=/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH -
run.sh: Run batch commands bybash run.sh.#!/bin/bash seqs=(seq1 seq2) for seq in "${seqs[@]}"; do if [ -e "$seq" ]; then python train.py --path "$seq" else echo "no $seq" fi done - Store
data,model,softwarein big disk, storecodein small disk, use soft link to connect.
SSH
- Local: Generate keys by
ssh-keygen -t ed25519 -C "your_email", saved at~/.ssh/id_ed25519. You can generate many, just save differently. - Server: Generate keys by
ssh-keygen -t ed25519 -C "your_email", saved at~/.ssh/id_ed25519. You can generate many, just save differently. - Server: Authorize local keys by
touch ~/.ssh/authorized_keysandecho "your_local_pub_key" >> ~/.ssh/authorized_keys. You can authorize many, just append new lines. -
Local: Edit config in
~/.ssh/configasHost <abbr> HostName <ip> Port <port> User <username> PreferredAuthentications publickey IdentityFile "<your_local_priv_key_path>" - Tunneling: Achieved by MobaXterm.
- X11 forwarding: Achieved by MobaXterm.
MobaXterm
- Download MobaXterm.
- Set
hostname,port,username,password. - Set private key.
VSCode
- Download VSCode.
- Install
Remoteextension. - Remote config by
~/.ssh/config.
Git
-
Know common commands.
git init -b main # initialize git clone <url> # clone git status # status git config user.name "your_name" # config name git config user.email "your_email" # config email git add <path> # add git commit -m "your_message" # commit git log --all --graph --decorate # history git branch # list branches git checkout -b <br_name> # create branch git remote -vv # list remotes git remote add <name> <url> # add remote git remote set-url <name> <url> # edit remote git pull # pull git push -u <name> <local_br_name>:<remote_br_name> # push # to different branch git stash list git stash push -u -m "your_message" # stash local modification git stash pop # pop stash # with different branches or different users git worktree add ../<repo_name>.<feat_name> <br_name> # create worktree git worktree remove ../<repo_name>.<feat_name> # remove worktree .gitignoreand.gitkeepfor ignoring files.- GitHub authentication: Add public key on GitHub, check by
ssh -T git@github.com. -
Multiple users: Edit ssh config in
~/.ssh/configasHost github-<your_name> HostName ssh.github.com Port 443 User git PreferredAuthentications publickey IdentityFile "<your_priv_key_path>" -
git-filter-repoasgit install git-filter-repo # install git filter-repo --path <file_or_dir> --invert-paths --force # remove files git filter-repo --replace-text replacements.txt --force # replace codeswith
replacements.txtasfoo==>bar regex:foo(?:\r?\n)?==>
Conda
- Choose Miniconda.
-
Use
mambasolver byconda config --set solver libmamba -
Use
conda-forgechannel byconda config --add channels conda-forge conda config --set channel_priority strict -
Know common commands.
conda create -n <env_name> python=3.10 # create conda env create -f environment.yml/env.lock # create from file conda create -n <env_name> --clone <old_env_name> # clone environment conda activate <env_name> # activate conda deactivate # deactivate conda remove -n <env_name> --all # remove environment conda env list # list environments conda install <pkg_name>=2.0.0 -c https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main/ # install package with tsinghua source conda uninstall <pkg_name> # uninstall package conda list # list packages conda env export > environment.yml # export package list conda list --explicit > env.lock # export package lock -
environment.ymlasname: your_env_name channels: - pytorch - nvidia - conda-forge - defaults - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main/ - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/ - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/msys2/ dependencies: - python=3.10 - pip - pytorch::pytorch=2.0.0 - pytorch::torchvision=0.15.0 - pip: # no real-time info - numpy>2.0.0,<=2.1.0 - scipy==1.15.0 - tqdm - -i https://pypi.tuna.tsinghua.edu.cn/simple -
Know common environment variables.
CONDA_PREFIX=/path/to/miniconda3/envs/env_name # /path/to/miniconda3 for base CONDA_DEFAULT_ENV=env_name CONDA_PYTHON_EXE=/path/to/miniconda3/envs/env_name/bin/python # /path/to/miniconda3/bin/python for base CONDA_EXE=/path/to/miniconda3/bin/conda -
Customize environment activation hook by
$CONDA_PREFIX/etc/conda/activate.d/*.shand$CONDA_PREFIX/etc/conda/deactivate.d/*.sh.# $CONDA_PREFIX/etc/conda/activate.d/env_vars.sh export OLD_LD_LIBRARY_PATH="$LD_LIBRARY_PATH" export LD_LIBRARY_PATH="$CONDA_PREFIX/lib${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH"# $CONDA_PREFIX/etc/conda/deactivate.d/env_vars.sh export LD_LIBRARY_PATH="$OLD_LD_LIBRARY_PATH"
Pip
-
Know common commands.
pip install <pkg_name>==2.0.0 -i https://pypi.tuna.tsinghua.edu.cn/simple # install with tsinghua source pip install -r requirements.txt # install from file pip install -e <pkg_path>[<extra_name>] # install editable locally pip uninstall <pkg_name> # uninstall pip list # list packages -
requirements.txtasnumpy>2.0.0,<=2.1.0 scipy==1.15.0 tqdm# generate requirements.txt pip install pipreqs pipreqs /path/to/project -
setup.pyasfrom setuptools import setup, find_packages basics = [ 'numpy>2.0.0,<=2.1.0', 'tqdm', ] extras = { 'dev': [ 'scipy==1.15.0', ], 'other': [ 'pynput', ], } extras['all'] = list(set({pkg for pkgs in extras.values() for pkg in pkgs})) setup( name = 'your_pkg_name', version = '0.0.1', license = 'MIT', description = 'your_pkg_description', author = "your_name", author_email = "your_email", maintainer = "your_name", maintainer_email = "your_email", url = "your_pkg_url", packages = find_packages(), include_package_data = True, install_requires = basics, extras_require = extras, zip_safe = False ) -
pyproject.tomlas[build-system] requires = ["setuptools>=61.0", "wheel"] build-backend = "setuptools.build_meta" [project] name = "your_pkg_name" version = "0.0.1" description = "your_pkg_description" license = { text = "MIT" } authors = [ { name = "your_name", email = "your_email" } ] maintainers = [ { name = "your_name", email = "your_email" } ] dependencies = [ "numpy>2.0.0,<=2.1.0", "tqdm", ] requires-python = ">=3.7" urls = { "Homepage" = "your_pkg_url" } [project.optional-dependencies] dev = [ "scipy==1.15.0", ] other = [ "pynput", ] all = [ "scipy==1.15.0", "pynput", ] [tool.setuptools] include-package-data = true [tool.setuptools.packages.find] where = ["."] -
Set cache path in
~/.bashrcbyexport PIP_CACHE_DIR=/data/.cache/pip, default as~/.cache/pip.
Vim
-
Know common commands.
[i] # insert [a] # append [o] # open a new line below [esc] # return [dd] # delete line [j] # down [k] # up [h] # left [l] # right [w] # next word [e] # word end [b] # back word [0] # beginning line [$] # end line [I] # insert beginning line [A] # append end line [ctrl]+[u] # scroll up [ctrl]+[d] # scroll down [gg] # beginning file [G] # end file [:wq] # save and quit [:q!] # quit without save
Tmux
- Install by
sudo apt install tmux. -
Know common commands.
tmux new -s <name> # create session tmux ls # list sessions tmux a -t <name> # attach session [ctrl]+[b] [d] # detach session [ctrl]+[d] # destory pane, window, session [ctrl]+[b] [[] # scrollback [ctrl]+[b] [c] # create window [ctrl]+[b] [p] # change to previous window [ctrl]+[b] [n] # change to next window [ctrl]+[b] [,] # rename window [ctrl]+[b] ["] # horizonally split pane [ctrl]+[b] [%] # vertically split pane [ctrl]+[b] [up]/[down]/[left]/[right] # change to other pane
Python
- Choose
Python 3.10. - Add temporary library finding path by
PYTHONPATH=/path/to/lib python train.py. - Cell run by
# %% cell_namefor vscode/pycharm.
PyTorch
- Check CUDA driver version and GPU status by
nvidia-smi. - Install PyTorch according to the CUDA driver version.
nvccCUDA toolkit is needed only to compile cuda programs, check CUDA toolkit version bynvcc --version.- Install CUDA Toolkit, under
/usr/local/cudaby default. You can also install byconda install -c "nvidia/label/cuda-12.4.0" cuda-toolkitfor user only. - Set environment variables in
~/.bashrcbyexport CUDA_HOME=/usr/local/cuda,export PATH=$CUDA_HOME/bin:$PATHandexport LD_LIBRARY_PATH=$CUDA_HOME/lib64:$LD_LIBRARY_PATH. - Check GPU available by
torch.cuda.is_available(). - Choose GPU by
CUDA_VISIBLE_DEVICES=0,1 python train.py. - Set cache path in
~/.bashrcbyexport TORCH_HOME=/data/.cache/torch, default as~/.cache/torch.
Seed
import os
import random
import numpy as np
import torch
def setup_seed(seed:int=42) -> None:
random.seed(seed)
np.random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
Args
-
argparse.import argparse def arg_parse() -> argparse.Namespace: parser = argparse.ArgumentParser() parser.add_argument("--verbose", action="store_true", default=False, help="whether verbose") parser.add_argument("--batch_size", type=int, required=True, help="batch size") parser.add_argument("--devices", type=str, nargs="+", default=[], help="pytorch device") args = parser.parse_args() return args args = arg_parse() print(args.verbose) -
tyro.pip install tyrofrom typing import Tuple import tyro def train(verbose: bool, batch_size: int = 16, devices: Tuple[str, ...] = ()) -> None: print(verbose) if __name__ == "__main__": tyro.cli(train)from typing import Tuple from dataclasses import dataclass import tyro @dataclass class Config: verbose: bool batch_size: int = 16 devices: Tuple[str, ...] = () if __name__ == "__main__": config = tyro.cli(Config) print(config.verbose) -
tappip install typed-argument-parserfrom typing import Tuple from tap import Tap class SimpleArgumentParser(Tap): verbose: bool # whether verbose batch_size: int = 16 # batch size devices: Tuple[str, ...] = () # pytorch device if __name__ == "__main__": args = SimpleArgumentParser().parse_args() print(args.verbose)
Configs
-
configargparse.pip install ConfigArgParseimport configargparse def config_parse() -> configargparse.Namespace: parser = configargparse.ArgumentParser() parser.add_argument('--config', is_config_file=True, help='config file path') parser.add_argument("--verbose", action="store_true", default=False, help="whether verbose") parser.add_argument("--batch_size", type=int, required=True, help="batch size") parser.add_argument("--devices", type=str, nargs="+", default=[], help="pytorch device") args = parser.parse_args() return args# config.txt verbose = False batch_size = 8 devices = [cuda:0, cuda:1] -
json.import json def load_config(path:str) -> dict: with open(path, 'r', encoding='utf-8') as f: config = json.load(f) return config def save_config(path:str, config:dict) -> None: with open(path, 'w', encoding='utf-8') as f: json.dump(config, f, ensure_ascii=False, indent=4){ "verbose": false, "batch_size": 8, "devices": ["cuda:0", "cuda:1"] } -
yaml.pip install PyYAMLimport yaml def load_config(path:str) -> dict: with open(path, 'r', encoding='utf-8') as f: config = yaml.load(f, Loader=yaml.FullLoader) return config def save_config(path:str, config:dict) -> None: with open(path, 'w', encoding='utf-8') as f: yaml.dump(config, f, allow_unicode=True, sort_keys=False)# config.yaml verbose: false batch_size: 8 devices: - cuda:0 - cuda:1 -
hydra.pip install hydra-coreimport hydra from omegaconf import DictConfig @hydra.main(config_path='./configs', config_name='config', version_base='1.2') def train(cfg:DictConfig) -> None: hydra_cfg = hydra.core.hydra_config.HydraConfig.get() output_dir = hydra_cfg['runtime']['output_dir'] pass if __name__ == '__main__': train()# configs/config.yaml defaults: - training: config # configs/training/config.yaml - _self_ abbr: 'exp0' verbose: False hydra: run: dir: "./outputs/train/${abbr}_${now:%Y_%m_%d_%H_%M_%S}"
Debug
-
import pdb; pdb.set_trace().n: next s: step c: continue l: list p: print -
debugpy.pip install debugpyimport debugpy debugpy.listen(("0.0.0.0", 5678)) print("Waiting for debugger attach...") debugpy.wait_for_client() # vscode remote debug print("Debugger attached!!!") -
rich.pip install richimport torch from rich import print # pretty print from rich import inspect # inspect types, values, properties and methods from rich.traceback import install as install_traceback # pretty traceback install_traceback(show_locals=True, suppress=[torch]) my_dict = { 'verbose': True, 'batch_size': 16, 'devices': ['cuda:0', 'cuda:1'] } print("[i]Hello[/i], [bold red]World[/bold red]!", ":thumbs_up:", my_dict) inspect(my_dict, methods=True)
WandB
pip install wandb
wandb login
import wandb
wandb.init(project="your_proj_name", name="your_exp_name")
for epoch in range(num_epochs):
wandb.log({
'Loss/train': train_loss,
'Loss/val': val_loss,
'Accuracy/val': val_acc,
'Input/Image': wandb.Image(image)
}, step=epoch)
wandb.finish()
TensorBoard
pip install tensorboard
from torch.utils.tensorboard import SummaryWriter
tb_writer = SummaryWriter(log_dir="your_log_dir")
for epoch in range(num_epochs):
tb_writer.add_scalars('Loss', {'train': train_loss, 'val': val_loss}, epoch)
tb_writer.add_scalar('Accuracy/val', val_acc, epoch)
tb_writer.add_image('Input/Image', image, epoch)
tb_writer.close()
tensorboard --logdir=<your_log_dir> --port=6006 # http://localhost:6006
HuggingFace
- Set cache hub path in
~/.bashrcbyexport HF_HOME=/data/.cache/huggingface, default as~/.cache/huggingface. - Set mirror endpoint in
~/.bashrcbyexport HF_ENDPOINT=https://hf-mirror.com.
Gradio
pip install gradio
import gradio as gr
with gr.Blocks(title="your_title") as demo:
gr.Markdown("# Your_Title")
with gr.Column():
input_images = gr.File(label="Images", file_count="multiple")
with gr.Row():
schedule = gr.Dropdown(["linear", "cosine"], value="linear", label="Schedule", info="For aligment")
niter = gr.Number(value=50, precision=0, minimum=0, maximum=100, label="Iterations", info="For denoising")
name = gr.Textbox(label="Name", placeholder="NULL", info="Experiment name")
thr = gr.Slider(label="Threshold", value=5, minimum=1, maximum=10, step=1)
flag = gr.Checkbox(value=True, label="Mask")
run_btn = gr.Button("Run")
output_model = gr.Model3D(label="3D Result")
output_gallery = gr.Gallery(label="2D Results", columns=4)
flag.change(set_flag_fn, inputs=[input_images, flag], outputs=niter)
run_btn.click(run_fn, inputs=[input_images, schedule, niter, name, thr, flag], outputs=[output_model, output_gallery])
demo.launch()