update
This commit is contained in:
@@ -71,8 +71,6 @@ reflector --protocol https --latest 30 --sort rate --save /etc/pacman.d/mirrorli
|
||||
pacman -S vim man-db man-pages git base-devel
|
||||
```
|
||||
|
||||
reflector --protocol https --latest 30 --sort rate --save /etc/pacman.d/mirrorlist --verbose
|
||||
|
||||
## locale
|
||||
|
||||
```bash
|
||||
@@ -397,34 +395,34 @@ yay -S telegraf
|
||||
|
||||
# Configuration for telegraf agent
|
||||
[agent]
|
||||
interval = "10s"
|
||||
round_interval = true
|
||||
metric_batch_size = 1000
|
||||
metric_buffer_limit = 10000
|
||||
collection_jitter = "0s"
|
||||
flush_interval = "10s"
|
||||
flush_jitter = "0s"
|
||||
precision = ""
|
||||
hostname = ""
|
||||
omit_hostname = false
|
||||
interval = "15s"
|
||||
round_interval = true
|
||||
metric_batch_size = 1000
|
||||
metric_buffer_limit = 10000
|
||||
collection_jitter = "0s"
|
||||
flush_interval = "10s"
|
||||
flush_jitter = "0s"
|
||||
precision = ""
|
||||
hostname = ""
|
||||
omit_hostname = false
|
||||
|
||||
# Read InfluxDB-formatted JSON metrics from one or more HTTP endpoints
|
||||
[[outputs.influxdb]]
|
||||
urls = ["http://127.0.0.1:8086"]
|
||||
database = "<db>"
|
||||
username = "<user>"
|
||||
password = "<password>"
|
||||
urls = ["http://127.0.0.1:8086"]
|
||||
database = "<db>"
|
||||
username = "<user>"
|
||||
password = "<password>"
|
||||
|
||||
# Read metrics about cpu usage
|
||||
[[inputs.cpu]]
|
||||
percpu = true
|
||||
totalcpu = true
|
||||
collect_cpu_time = false
|
||||
report_active = false
|
||||
percpu = true
|
||||
totalcpu = true
|
||||
collect_cpu_time = false
|
||||
report_active = false
|
||||
|
||||
# Read metrics about disk usage by mount point
|
||||
[[inputs.disk]]
|
||||
ignore_fs = ["tmpfs", "devtmpfs", "devfs", "iso9660", "overlay", "aufs", "squashfs"]
|
||||
ignore_fs = ["tmpfs", "devtmpfs", "devfs", "iso9660", "overlay", "aufs", "squashfs"]
|
||||
|
||||
# Read metrics about disk IO by device
|
||||
[[inputs.diskio]]
|
||||
@@ -443,32 +441,41 @@ ignore_fs = ["tmpfs", "devtmpfs", "devfs", "iso9660", "overlay", "aufs", "squash
|
||||
|
||||
# Read metrics about network interface usage
|
||||
[[inputs.net]]
|
||||
interfaces = ["enp5s0"]
|
||||
interfaces = ["enp5s0"]
|
||||
|
||||
# Read metrics about docker containers
|
||||
[[inputs.docker]]
|
||||
endpoint = "unix:///var/run/docker.sock"
|
||||
perdevice = false
|
||||
total = true
|
||||
endpoint = "unix:///var/run/docker.sock"
|
||||
perdevice = false
|
||||
total = true
|
||||
|
||||
[[inputs.fail2ban]]
|
||||
interval = "15m"
|
||||
use_sudo = true
|
||||
interval = "15m"
|
||||
use_sudo = true
|
||||
|
||||
# Pulls statistics from nvidia GPUs attached to the host
|
||||
[[inputs.nvidia_smi]]
|
||||
timeout = "30s"
|
||||
timeout = "30s"
|
||||
|
||||
[[inputs.http_response]]
|
||||
interval = "5m"
|
||||
urls = [
|
||||
"https://example.com"
|
||||
]
|
||||
interval = "5m"
|
||||
urls = [
|
||||
"https://example.com"
|
||||
]
|
||||
|
||||
# Monitor sensors, requires lm-sensors package
|
||||
[[inputs.sensors]]
|
||||
interval = "60s"
|
||||
remove_numbers = false
|
||||
interval = "60s"
|
||||
remove_numbers = false
|
||||
|
||||
# Run executable as long-running input plugin
|
||||
[[inputs.execd]]
|
||||
interval = "15s"
|
||||
command = ["/metrics.sh"]
|
||||
name_override = "metrics"
|
||||
signal = "STDIN"
|
||||
restart_delay = "20s"
|
||||
data_format = "logfmt"
|
||||
```
|
||||
|
||||
```ini /etc/sudoers.d/telegraf
|
||||
@@ -502,6 +509,7 @@ notification:
|
||||
|
||||
```ini /etc/cfddns/domains
|
||||
example.com
|
||||
dev.example.com
|
||||
```
|
||||
|
||||
```
|
||||
@@ -511,110 +519,75 @@ systemctl enable --now cfddns
|
||||
## backup
|
||||
|
||||
```bash
|
||||
pacman -S borg
|
||||
pacman -S restic
|
||||
```
|
||||
|
||||
```ini /etc/backups/borg.service
|
||||
```ini /etc/backup/restic.service
|
||||
[Unit]
|
||||
Description=Borg Daily Backup Service
|
||||
Description=Daily Backup Service
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
Nice=19
|
||||
IOSchedulingClass=2
|
||||
IOSchedulingPriority=7
|
||||
ExecStart=/etc/backups/run.sh
|
||||
ExecStart=/etc/backup/run.sh
|
||||
```
|
||||
|
||||
```ini /etc/backups/borg.timer
|
||||
```ini /etc/backup/restic.timer
|
||||
[Unit]
|
||||
Description=Borg Daily Backup Timer
|
||||
Description=Daily Backup Timer
|
||||
|
||||
[Timer]
|
||||
WakeSystem=false
|
||||
OnCalendar=*-*-* 03:00
|
||||
RandomizedDelaySec=10min
|
||||
OnCalendar=*-*-* 14:00
|
||||
RandomizedDelaySec=5min
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
```
|
||||
|
||||
```bash /etc/backups/run.sh
|
||||
```bash /etc/backup/run.sh
|
||||
#!/bin/bash -ue
|
||||
|
||||
# The udev rule is not terribly accurate and may trigger our service before
|
||||
# the kernel has finished probing partitions. Sleep for a bit to ensure
|
||||
# the kernel is done.
|
||||
sleep 5
|
||||
# https://restic.readthedocs.io/en/latest/040_backup.html#
|
||||
|
||||
#
|
||||
# Script configuration
|
||||
#
|
||||
export BORG_PASSPHRASE="<secret>"
|
||||
MOUNTPOINT=/mnt/backup
|
||||
TARGET=$MOUNTPOINT/borg
|
||||
export RESTIC_REPOSITORY=/path/to/backup
|
||||
export RESTIC_PASSWORD=<passphrase>
|
||||
export RESTIC_PROGRESS_FPS=1
|
||||
|
||||
# Archive name schema
|
||||
DATE=$(date --iso-8601)
|
||||
|
||||
#
|
||||
# Create backups
|
||||
#
|
||||
|
||||
# Options for borg create
|
||||
BORG_OPTS="--stats --compression lz4 --checkpoint-interval 86400"
|
||||
|
||||
# No one can answer if Borg asks these questions, it is better to just fail quickly instead of hanging.
|
||||
export BORG_RELOCATED_REPO_ACCESS_IS_OK=no
|
||||
export BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK=no
|
||||
|
||||
# Log Borg version
|
||||
borg --version
|
||||
|
||||
echo "Starting backup for $DATE"
|
||||
|
||||
echo "# system"
|
||||
borg create $BORG_OPTS \
|
||||
--exclude /root/.cache \
|
||||
--exclude /root/.pyenv \
|
||||
--exclude /root/.vscode-server \
|
||||
--exclude /root/.local/share/TabNine \
|
||||
--exclude 'sh:/home/*/.cache' \
|
||||
--exclude 'sh:/home/*/.cargo' \
|
||||
--exclude 'sh:/home/*/.pyenv' \
|
||||
--exclude 'sh:/home/*/.vscode-server' \
|
||||
--exclude 'sh:/home/*/.local/share/TabNine' \
|
||||
# system
|
||||
restic backup --tag system -v \
|
||||
--one-file-system \
|
||||
$TARGET::'system-{now}' \
|
||||
/etc /boot /home /root /srv
|
||||
--exclude .cache \
|
||||
--exclude .venv \
|
||||
--exclude .vscode-server \
|
||||
--exclude .vscode-server-insiders \
|
||||
--exclude TabNine \
|
||||
--exclude node_modules \
|
||||
--exclude /var/lib/docker/overlay2 \
|
||||
/ /boot
|
||||
|
||||
echo "# data"
|
||||
borg create $BORG_OPTS \
|
||||
--exclude 'sh:/mnt/data/nextcloud/appdata_*/preview' \
|
||||
--exclude 'sh:/mnt/data/nextcloud/appdata_*/dav-photocache' \
|
||||
$TARGET::'data-{now}' \
|
||||
/mnt/data
|
||||
|
||||
echo "# ftl"
|
||||
borg create $BORG_OPTS \
|
||||
$TARGET::'ftl-{now}' \
|
||||
# ftl
|
||||
restic backup --tag ftl -v \
|
||||
/mnt/ftl
|
||||
|
||||
echo "Start pruning"
|
||||
BORG_PRUNE_OPTS_NORMAL="--list --stats --keep-daily 7 --keep-weekly 3 --keep-monthly 3"
|
||||
borg prune $BORG_PRUNE_OPTS_NORMAL --prefix 'system-' $TARGET
|
||||
borg prune $BORG_PRUNE_OPTS_NORMAL --prefix 'data-' $TARGET
|
||||
borg prune $BORG_PRUNE_OPTS_NORMAL --prefix 'ftl-' $TARGET
|
||||
# data
|
||||
restic backup --tag data -v \
|
||||
--exclude 'appdata_*/preview' \
|
||||
--exclude 'appdata_*/dav-photocache' \
|
||||
/mnt/data
|
||||
|
||||
echo "Completed backup for $DATE"
|
||||
restic forget --prune --group-by tags \
|
||||
--keep-daily 7 --keep-weekly 3 --keep-monthly 3
|
||||
|
||||
# Just to be completely paranoid
|
||||
sync
|
||||
restic check
|
||||
```
|
||||
|
||||
```bash
|
||||
ln -sf /etc/backups/borg.{service,timer} /etc/systemd/system/
|
||||
systemctl enable --now borg
|
||||
chmod 700 /etc/backup/run.sh
|
||||
ln -sf /etc/backup/restic.{service,timer} /etc/systemd/system/
|
||||
systemctl enable --now restic
|
||||
```
|
||||
|
||||
## Kubernetes
|
||||
@@ -866,7 +839,7 @@ Audit=no
|
||||
|
||||
This occurs after updating linux kernel.
|
||||
|
||||
- Run `docker --rm --gpus all -it nvidia/cuda:10.2-cudnn7-runtime nvidia-smi` once.
|
||||
- Run `docker run --rm --gpus all --device /dev/nvidia0 --device /dev/nvidiactl --device /dev/nvidia-modeset --device /dev/nvidia-uvm --device /dev/nvidia-uvm-tools -it nvidia/cuda:10.2-cudnn7-runtime nvidia-smi` once.
|
||||
|
||||
# Useful links
|
||||
|
||||
@@ -878,5 +851,4 @@ This occurs after updating linux kernel.
|
||||
- [udev - ArchWiki](https://wiki.archlinux.org/title/Udev#Debug_output)
|
||||
- [[HOWTO] Repair Broken system, system without a kernel / Forum & Wiki discussion / Arch Linux Forums](https://bbs.archlinux.org/viewtopic.php?id=18066)
|
||||
- [Archboot - ArchWiki](https://wiki.archlinux.org/title/Archboot)
|
||||
- [Restoring with the Borg](https://blog.jamesthebard.net/restoring-with-the-borg/)
|
||||
- [Restore with Borg | BorgBase Docs](https://docs.borgbase.com/restore/borg/)
|
||||
- [Restic Documentation — restic 0.12.1 documentation](https://restic.readthedocs.io/en/stable/)
|
||||
|
@@ -7,15 +7,13 @@ date: 2021-02-13T00:00:00
|
||||
|
||||
# 用途
|
||||
|
||||
- セルフホスト (Dockerized)
|
||||
- セルフホスト (Docker)
|
||||
- メールサーバー
|
||||
- DNS サーバー
|
||||
- Nextcloud(ファイル、カレンダー、連絡先等)
|
||||
- GitLab
|
||||
- プライベート Docker レジストリ
|
||||
- VPN 他
|
||||
- VPN 等
|
||||
- 計算実験
|
||||
- Docker Swarm ノード
|
||||
- Docker Swarm マスターノード
|
||||
- VS Code Remote SSH のホストマシン
|
||||
|
||||
# スペック
|
||||
@@ -24,6 +22,8 @@ date: 2021-02-13T00:00:00
|
||||
|
||||
> 結果から言うとメモリはもっと必要でした。巨大な Pandas データフレームを並列処理なんかするとサクッと消えてしまいます。予算に余裕があるなら 128GB ほど用意したほうが良いかもしれません。
|
||||
|
||||
> 追記: メモリ異常を起因とするシステム誤動作により、`/sbin` 以下がゼロ上書きされカーネルが起動しなくなるなど様々な厄災に襲われました。後日 Hynix 製のチップを搭載した V-color 社の ECC 付き U-DIMM に交換してからは、サーバーが安定動作するようになり現在に至ります。やはり 365 日稼働し続けるサーバーには最初からケチらずに ECC 付きメモリを選んでおいた方が賢明です。
|
||||
|
||||
GPU は古いサーバーに突っ込んでいた NVIDIA GeForce GTX TITAN X (Maxwell)を流用しました。グラフィックメモリが 12GB ちょっとですが、最大ワークロード時でも 5GB は残るので今のところ十分です。必要になったタイミングで増やします。
|
||||
|
||||
記憶装置は WD HDD 3TB 2 台と Samsung 970 EVO Plus 500GB M.2 PCIe、そして古いサーバーから引っこ抜いた Samsung 870 EVO Plus 500GB SSD です。NVMe メモリは OS 用、SSD/HDD はデータとバックアップ用にします。
|
||||
@@ -55,8 +55,9 @@ Arch Linux のセットアップは[個別に記事](https://uechi.io/blog/insta
|
||||
|
||||
# 組立ての勘所
|
||||
|
||||
- 半年間はすべての箱・書類を取っておく
|
||||
- 筐体は無視してまずマザボ、CPU、クーラー、(オンボードグラフィックが無い CPU なら)グラボ、そして電源を繋いで通電・動作テストをする
|
||||
- [MemTest86](https://www.memtest86.com/)でメモリの動作テスト
|
||||
- [MemTest86](https://www.memtest86.com/)でメモリの動作テストを最後までやる(エラーが出たら交換依頼)
|
||||
- USB ブートで OS の起動確認
|
||||
- Ethernet が死んでいる場合は USB-Ethernet アダプターでまずネットを確保する
|
||||
- ほとんどの場合 Linux カーネルのバージョンを上げると(デバイスドライバーも新しくなり)直る
|
||||
|
Reference in New Issue
Block a user