This commit is contained in:
2021-10-11 12:24:57 +09:00
parent 4fc8f4c77b
commit d807a0407f
7 changed files with 111 additions and 258 deletions

View File

@@ -71,8 +71,6 @@ reflector --protocol https --latest 30 --sort rate --save /etc/pacman.d/mirrorli
pacman -S vim man-db man-pages git base-devel
```
reflector --protocol https --latest 30 --sort rate --save /etc/pacman.d/mirrorlist --verbose
## locale
```bash
@@ -397,34 +395,34 @@ yay -S telegraf
# Configuration for telegraf agent
[agent]
interval = "10s"
round_interval = true
metric_batch_size = 1000
metric_buffer_limit = 10000
collection_jitter = "0s"
flush_interval = "10s"
flush_jitter = "0s"
precision = ""
hostname = ""
omit_hostname = false
interval = "15s"
round_interval = true
metric_batch_size = 1000
metric_buffer_limit = 10000
collection_jitter = "0s"
flush_interval = "10s"
flush_jitter = "0s"
precision = ""
hostname = ""
omit_hostname = false
# Read InfluxDB-formatted JSON metrics from one or more HTTP endpoints
[[outputs.influxdb]]
urls = ["http://127.0.0.1:8086"]
database = "<db>"
username = "<user>"
password = "<password>"
urls = ["http://127.0.0.1:8086"]
database = "<db>"
username = "<user>"
password = "<password>"
# Read metrics about cpu usage
[[inputs.cpu]]
percpu = true
totalcpu = true
collect_cpu_time = false
report_active = false
percpu = true
totalcpu = true
collect_cpu_time = false
report_active = false
# Read metrics about disk usage by mount point
[[inputs.disk]]
ignore_fs = ["tmpfs", "devtmpfs", "devfs", "iso9660", "overlay", "aufs", "squashfs"]
ignore_fs = ["tmpfs", "devtmpfs", "devfs", "iso9660", "overlay", "aufs", "squashfs"]
# Read metrics about disk IO by device
[[inputs.diskio]]
@@ -443,32 +441,41 @@ ignore_fs = ["tmpfs", "devtmpfs", "devfs", "iso9660", "overlay", "aufs", "squash
# Read metrics about network interface usage
[[inputs.net]]
interfaces = ["enp5s0"]
interfaces = ["enp5s0"]
# Read metrics about docker containers
[[inputs.docker]]
endpoint = "unix:///var/run/docker.sock"
perdevice = false
total = true
endpoint = "unix:///var/run/docker.sock"
perdevice = false
total = true
[[inputs.fail2ban]]
interval = "15m"
use_sudo = true
interval = "15m"
use_sudo = true
# Pulls statistics from nvidia GPUs attached to the host
[[inputs.nvidia_smi]]
timeout = "30s"
timeout = "30s"
[[inputs.http_response]]
interval = "5m"
urls = [
"https://example.com"
]
interval = "5m"
urls = [
"https://example.com"
]
# Monitor sensors, requires lm-sensors package
[[inputs.sensors]]
interval = "60s"
remove_numbers = false
interval = "60s"
remove_numbers = false
# Run executable as long-running input plugin
[[inputs.execd]]
interval = "15s"
command = ["/metrics.sh"]
name_override = "metrics"
signal = "STDIN"
restart_delay = "20s"
data_format = "logfmt"
```
```ini /etc/sudoers.d/telegraf
@@ -502,6 +509,7 @@ notification:
```ini /etc/cfddns/domains
example.com
dev.example.com
```
```
@@ -511,110 +519,75 @@ systemctl enable --now cfddns
## backup
```bash
pacman -S borg
pacman -S restic
```
```ini /etc/backups/borg.service
```ini /etc/backup/restic.service
[Unit]
Description=Borg Daily Backup Service
Description=Daily Backup Service
[Service]
Type=simple
Nice=19
IOSchedulingClass=2
IOSchedulingPriority=7
ExecStart=/etc/backups/run.sh
ExecStart=/etc/backup/run.sh
```
```ini /etc/backups/borg.timer
```ini /etc/backup/restic.timer
[Unit]
Description=Borg Daily Backup Timer
Description=Daily Backup Timer
[Timer]
WakeSystem=false
OnCalendar=*-*-* 03:00
RandomizedDelaySec=10min
OnCalendar=*-*-* 14:00
RandomizedDelaySec=5min
[Install]
WantedBy=timers.target
```
```bash /etc/backups/run.sh
```bash /etc/backup/run.sh
#!/bin/bash -ue
# The udev rule is not terribly accurate and may trigger our service before
# the kernel has finished probing partitions. Sleep for a bit to ensure
# the kernel is done.
sleep 5
# https://restic.readthedocs.io/en/latest/040_backup.html#
#
# Script configuration
#
export BORG_PASSPHRASE="<secret>"
MOUNTPOINT=/mnt/backup
TARGET=$MOUNTPOINT/borg
export RESTIC_REPOSITORY=/path/to/backup
export RESTIC_PASSWORD=<passphrase>
export RESTIC_PROGRESS_FPS=1
# Archive name schema
DATE=$(date --iso-8601)
#
# Create backups
#
# Options for borg create
BORG_OPTS="--stats --compression lz4 --checkpoint-interval 86400"
# No one can answer if Borg asks these questions, it is better to just fail quickly instead of hanging.
export BORG_RELOCATED_REPO_ACCESS_IS_OK=no
export BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK=no
# Log Borg version
borg --version
echo "Starting backup for $DATE"
echo "# system"
borg create $BORG_OPTS \
--exclude /root/.cache \
--exclude /root/.pyenv \
--exclude /root/.vscode-server \
--exclude /root/.local/share/TabNine \
--exclude 'sh:/home/*/.cache' \
--exclude 'sh:/home/*/.cargo' \
--exclude 'sh:/home/*/.pyenv' \
--exclude 'sh:/home/*/.vscode-server' \
--exclude 'sh:/home/*/.local/share/TabNine' \
# system
restic backup --tag system -v \
--one-file-system \
$TARGET::'system-{now}' \
/etc /boot /home /root /srv
--exclude .cache \
--exclude .venv \
--exclude .vscode-server \
--exclude .vscode-server-insiders \
--exclude TabNine \
--exclude node_modules \
--exclude /var/lib/docker/overlay2 \
/ /boot
echo "# data"
borg create $BORG_OPTS \
--exclude 'sh:/mnt/data/nextcloud/appdata_*/preview' \
--exclude 'sh:/mnt/data/nextcloud/appdata_*/dav-photocache' \
$TARGET::'data-{now}' \
/mnt/data
echo "# ftl"
borg create $BORG_OPTS \
$TARGET::'ftl-{now}' \
# ftl
restic backup --tag ftl -v \
/mnt/ftl
echo "Start pruning"
BORG_PRUNE_OPTS_NORMAL="--list --stats --keep-daily 7 --keep-weekly 3 --keep-monthly 3"
borg prune $BORG_PRUNE_OPTS_NORMAL --prefix 'system-' $TARGET
borg prune $BORG_PRUNE_OPTS_NORMAL --prefix 'data-' $TARGET
borg prune $BORG_PRUNE_OPTS_NORMAL --prefix 'ftl-' $TARGET
# data
restic backup --tag data -v \
--exclude 'appdata_*/preview' \
--exclude 'appdata_*/dav-photocache' \
/mnt/data
echo "Completed backup for $DATE"
restic forget --prune --group-by tags \
--keep-daily 7 --keep-weekly 3 --keep-monthly 3
# Just to be completely paranoid
sync
restic check
```
```bash
ln -sf /etc/backups/borg.{service,timer} /etc/systemd/system/
systemctl enable --now borg
chmod 700 /etc/backup/run.sh
ln -sf /etc/backup/restic.{service,timer} /etc/systemd/system/
systemctl enable --now restic
```
## Kubernetes
@@ -866,7 +839,7 @@ Audit=no
This occurs after updating linux kernel.
- Run `docker --rm --gpus all -it nvidia/cuda:10.2-cudnn7-runtime nvidia-smi` once.
- Run `docker run --rm --gpus all --device /dev/nvidia0 --device /dev/nvidiactl --device /dev/nvidia-modeset --device /dev/nvidia-uvm --device /dev/nvidia-uvm-tools -it nvidia/cuda:10.2-cudnn7-runtime nvidia-smi` once.
# Useful links
@@ -878,5 +851,4 @@ This occurs after updating linux kernel.
- [udev - ArchWiki](https://wiki.archlinux.org/title/Udev#Debug_output)
- [[HOWTO] Repair Broken system, system without a kernel / Forum & Wiki discussion / Arch Linux Forums](https://bbs.archlinux.org/viewtopic.php?id=18066)
- [Archboot - ArchWiki](https://wiki.archlinux.org/title/Archboot)
- [Restoring with the Borg](https://blog.jamesthebard.net/restoring-with-the-borg/)
- [Restore with Borg | BorgBase Docs](https://docs.borgbase.com/restore/borg/)
- [Restic Documentation — restic 0.12.1 documentation](https://restic.readthedocs.io/en/stable/)

View File

@@ -7,15 +7,13 @@ date: 2021-02-13T00:00:00
# 用途
- セルフホスト (Dockerized)
- セルフホスト (Docker)
- メールサーバー
- DNS サーバー
- Nextcloud(ファイル、カレンダー、連絡先等)
- GitLab
- プライベート Docker レジストリ
- VPN
- VPN
- 計算実験
- Docker Swarm ノード
- Docker Swarm マスターノード
- VS Code Remote SSH のホストマシン
# スペック
@@ -24,6 +22,8 @@ date: 2021-02-13T00:00:00
> 結果から言うとメモリはもっと必要でした。巨大な Pandas データフレームを並列処理なんかするとサクッと消えてしまいます。予算に余裕があるなら 128GB ほど用意したほうが良いかもしれません。
> 追記: メモリ異常を起因とするシステム誤動作により、`/sbin` 以下がゼロ上書きされカーネルが起動しなくなるなど様々な厄災に襲われました。後日 Hynix 製のチップを搭載した V-color 社の ECC 付き U-DIMM に交換してからは、サーバーが安定動作するようになり現在に至ります。やはり 365 日稼働し続けるサーバーには最初からケチらずに ECC 付きメモリを選んでおいた方が賢明です。
GPU は古いサーバーに突っ込んでいた NVIDIA GeForce GTX TITAN X (Maxwell)を流用しました。グラフィックメモリが 12GB ちょっとですが、最大ワークロード時でも 5GB は残るので今のところ十分です。必要になったタイミングで増やします。
記憶装置は WD HDD 3TB 2 台と Samsung 970 EVO Plus 500GB M.2 PCIe、そして古いサーバーから引っこ抜いた Samsung 870 EVO Plus 500GB SSD です。NVMe メモリは OS 用、SSD/HDD はデータとバックアップ用にします。
@@ -55,8 +55,9 @@ Arch Linux のセットアップは[個別に記事](https://uechi.io/blog/insta
# 組立ての勘所
- 半年間はすべての箱・書類を取っておく
- 筐体は無視してまずマザボ、CPU、クーラー、(オンボードグラフィックが無い CPU なら)グラボ、そして電源を繋いで通電・動作テストをする
- [MemTest86](https://www.memtest86.com/)でメモリの動作テスト
- [MemTest86](https://www.memtest86.com/)でメモリの動作テストを最後までやる(エラーが出たら交換依頼)
- USB ブートで OS の起動確認
- Ethernet が死んでいる場合は USB-Ethernet アダプターでまずネットを確保する
- ほとんどの場合 Linux カーネルのバージョンを上げると(デバイスドライバーも新しくなり)直る