update articles
This commit is contained in:
parent
051caa7790
commit
42c316a1fa
10
package.json
10
package.json
@ -9,10 +9,10 @@
|
|||||||
"test": "lint-staged"
|
"test": "lint-staged"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"hexo": "^5.0.0",
|
"hexo": "^5.4.0",
|
||||||
"hexo-excerpt": "^1.1.6",
|
"hexo-excerpt": "^1.1.6",
|
||||||
"hexo-feed": "^1.1.0",
|
"hexo-feed": "^1.1.0",
|
||||||
"hexo-filter-mathjax": "^0.6.3",
|
"hexo-filter-mathjax": "^0.7.0",
|
||||||
"hexo-generator-archive": "^1.0.0",
|
"hexo-generator-archive": "^1.0.0",
|
||||||
"hexo-generator-category": "^1.0.0",
|
"hexo-generator-category": "^1.0.0",
|
||||||
"hexo-generator-index": "^2.0.0",
|
"hexo-generator-index": "^2.0.0",
|
||||||
@ -26,17 +26,17 @@
|
|||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"lint-staged": "^10.5.4",
|
"lint-staged": "^10.5.4",
|
||||||
"textlint": "^11.8.2",
|
"textlint": "^11.9.0",
|
||||||
"textlint-filter-rule-whitelist": "^2.0.0",
|
"textlint-filter-rule-whitelist": "^2.0.0",
|
||||||
"textlint-rule-common-misspellings": "^1.0.1",
|
"textlint-rule-common-misspellings": "^1.0.1",
|
||||||
"textlint-rule-no-start-duplicated-conjunction": "^2.0.2",
|
"textlint-rule-no-start-duplicated-conjunction": "^2.0.2",
|
||||||
"textlint-rule-preset-japanese": "^5.0.0"
|
"textlint-rule-preset-japanese": "^6.0.1"
|
||||||
},
|
},
|
||||||
"lint-staged": {
|
"lint-staged": {
|
||||||
"*.md": "textlint"
|
"*.md": "textlint"
|
||||||
},
|
},
|
||||||
"hexo": {
|
"hexo": {
|
||||||
"version": "5.3.0"
|
"version": "5.4.0"
|
||||||
},
|
},
|
||||||
"private": true
|
"private": true
|
||||||
}
|
}
|
@ -20,21 +20,25 @@ exponent2: 3417 (0xd59)
|
|||||||
coefficient: 17568 (0x44a0)
|
coefficient: 17568 (0x44a0)
|
||||||
```
|
```
|
||||||
|
|
||||||
$$\text{modulus} = \text{prime1} \cdot \text{prime2}$$
|
$$\text{modulus} = (\text{prime1} - 1) \cdot (\text{prime2} - 1)$$
|
||||||
|
|
||||||
publicExponent は $(\text{prime1} - 1)(\text{prime2} - 1)$ とお互いに素な数から選ぶ。65537 で固定、なぜなら二進数で 10000000000000001 となり、ビットがあまり立っておらず計算が早いため。
|
publicExponent は modulus とお互いに素な数から選ぶ。65537 で固定、なぜなら二進数で 10000000000000001 となり、ビットがあまり立っておらず計算が早いため。
|
||||||
|
|
||||||
privateExponent は $\text{publicExponent}^{-1} \text{mod} (\text{prime1} - 1)(\text{prime2} - 1)$
|
privateExponent は $\text{publicExponent}^{-1}\ \text{mod}\ \text{modulus}$
|
||||||
|
|
||||||
## 中国の余剰定理
|
## 中国の余剰定理
|
||||||
|
|
||||||
[定理の詳細](https://ja.wikipedia.org/wiki/中国の剰余定理)
|
[定理の詳細](https://ja.wikipedia.org/wiki/中国の剰余定理)
|
||||||
|
|
||||||
$$\text{exponent1} = \text{privateExponent} \pmod{\text{prime1} - 1}$$
|
$$
|
||||||
|
\text{exponent1} = \text{privateExponent} \pmod{\text{prime1} - 1}
|
||||||
|
$$
|
||||||
|
|
||||||
$\text{exponent2} = \text{privateExponent} \pmod{\text{prime2} - 1} $
|
$$
|
||||||
|
\text{exponent2} = \text{privateExponent} \pmod{\text{prime2} - 1}
|
||||||
|
$$
|
||||||
|
|
||||||
$ \text{coefficient} = \text{prime2}^{-1} \pmod{\text{prime1}} $
|
$$ \text{coefficient} = \text{prime2}^{-1} \pmod{\text{prime1}} $$
|
||||||
|
|
||||||
これらは復号の簡単化のために用意された係数である。
|
これらは復号の簡単化のために用意された係数である。
|
||||||
|
|
||||||
|
@ -1,25 +1,25 @@
|
|||||||
---
|
---
|
||||||
title: Toxicity Analysis in YouTube Live Chat
|
title: Exploratory Data Analysis on Vtubers Live Chat
|
||||||
---
|
---
|
||||||
|
|
||||||
A little analysis and experiment on a flock of toxic people.
|
A little experiment and analysis on toxic people floating across YouTube.
|
||||||
|
|
||||||
# Why
|
# Why
|
||||||
|
|
||||||
The motivation is straightforward; I just feel sad when they sound suffered from toxic chats. The goal is also straightforward: design an automated system to spot toxic chat and quarantine them.
|
The motivation is straightforward; I just feel sad when they suffered from random toxic chats. The goal is also straightforward: design an automated system spotting toxic chat and quarantine them.
|
||||||
|
|
||||||
# Data, Data, Data
|
# Data, Data, Data
|
||||||
|
|
||||||
> I can't make bricks without clay.
|
> I can't make bricks without clay.
|
||||||
> — Sherlock Holmes
|
> — Sherlock Holmes
|
||||||
|
|
||||||
I need a myriad of live chat comments and moderation events for this.
|
I need a myriad of live chat comments and moderation events for the experiment.
|
||||||
|
|
||||||
Unfortunately, YouTube API does not offer a way to retrieve these kinds of events in real time. Which is crucial because live streams are only place we can observe moderators' actions (deletion and BAN). Once it gets archived, these activities are no longer observable.
|
Unfortunately, YouTube API does not offer a way to retrieve these kinds of events in real time, which is crucial because live streams are only place we can observe moderators' activities (deletion and BAN). Once it gets archived, these events are no longer available to fetch.
|
||||||
|
|
||||||
## Collecting Crusts
|
## Collecting Crusts
|
||||||
|
|
||||||
So, I ended up developing a library to accumulate events from a YouTube live stream, with a fancy CLI app mimics live chat. It accepts YouTube video id and save live chats in [JSON Lines](https://jsonlines.org/) format:
|
So, I ended up developing a library to accumulate events from a live stream, with a fancy CLI app mimics live chat. It accepts YouTube video id and save live chats in [JSON Lines](https://jsonlines.org/) format:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
collector <videoId>
|
collector <videoId>
|
||||||
@ -37,9 +37,11 @@ Thankfully, there's a great web service around Hololive community: [Holotools](h
|
|||||||
|
|
||||||
Here I divided my system into two components: Scheduler and workers. Scheduler periodically checks for newly scheduled live streams through Holotools API and create a job to be handled by workers. Workers are responsible for handling jobs and spawning a process to collect live chat events.
|
Here I divided my system into two components: Scheduler and workers. Scheduler periodically checks for newly scheduled live streams through Holotools API and create a job to be handled by workers. Workers are responsible for handling jobs and spawning a process to collect live chat events.
|
||||||
|
|
||||||
|
At this point, saving chat to text files in JSONL format is just ineffective as the throughput grows tremendously, I've managed to switch its data source to MongoDB.
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
I run the cluster for a while and by far it hoards approximately one million comments per day. Now I could reliably run my own bakery.
|
I run the cluster for a while, and by far it hoards approximately one million comments per day. Now I could reliably run my own bakery.
|
||||||
|
|
||||||
# Look Before You Leap
|
# Look Before You Leap
|
||||||
|
|
||||||
@ -53,11 +55,15 @@ Okay now there are five million chats sitting on MongoDB store. Let's take a clo
|
|||||||
|
|
||||||
# Creating Dataset
|
# Creating Dataset
|
||||||
|
|
||||||
## Labelling Spam & Toxic Chat
|
## Labelling Toxic Chat
|
||||||
|
|
||||||
### Utilizing Moderators' Activities
|
### Utilizing Moderators' Activities
|
||||||
|
|
||||||
### Introducing Normalized Co-occurrence Entropy
|
### Browser Extension
|
||||||
|
|
||||||
|
### Normalized Co-occurrence Entropy
|
||||||
|
|
||||||
|
Shannon Entropy is not enough. So I combined the ideas of [Burrows-Wheeler Transform](https://en.wikipedia.org/wiki/Burrows%E2%80%93Wheeler_transform) and [Run-length Encoding](https://en.wikipedia.org/wiki/Run-length_encoding) to formulate a new entropy which represents "spamminess" of given text.
|
||||||
|
|
||||||
$$
|
$$
|
||||||
NCE(T) = \frac{N_T}{RLE_{string}(BWT(T))}
|
NCE(T) = \frac{N_T}{RLE_{string}(BWT(T))}
|
||||||
@ -67,11 +73,7 @@ $$
|
|||||||
BWT[T,i] = \begin{cases} T[SA[i]-1], & \text{if }SA[i] > 0\\ \$, & \text{otherwise}\end{cases}
|
BWT[T,i] = \begin{cases} T[SA[i]-1], & \text{if }SA[i] > 0\\ \$, & \text{otherwise}\end{cases}
|
||||||
$$
|
$$
|
||||||
|
|
||||||
Shannon Entropy is not enough. So I combined the ideas of [Burrows-Wheeler Transform](https://en.wikipedia.org/wiki/Burrows%E2%80%93Wheeler_transform) and [Run-length Encoding](https://en.wikipedia.org/wiki/Run-length_encoding) to formulate a new entropy which represents "spamminess" of given text.
|
### Sentence Encoding
|
||||||
|
|
||||||
### Browser Extension
|
|
||||||
|
|
||||||
## Sentence Encoding
|
|
||||||
|
|
||||||
Here's a [t-SNE](https://en.wikipedia.org/wiki/T-distributed_stochastic_neighbor_embedding) visualization for output of Sentence Transformer. Blue dots are spam and orange dots are normal chats.
|
Here's a [t-SNE](https://en.wikipedia.org/wiki/T-distributed_stochastic_neighbor_embedding) visualization for output of Sentence Transformer. Blue dots are spam and orange dots are normal chats.
|
||||||
|
|
||||||
|
@ -6,7 +6,17 @@ redirect_from: "/blog/2017/06/16/x11forward"
|
|||||||
|
|
||||||

|

|
||||||
|
|
||||||
## Ubuntu 16.04
|
# Installation
|
||||||
|
|
||||||
|
## Remote
|
||||||
|
|
||||||
|
### Arch Linux
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pacman -S xorg-xauth xorg-fonts-100dpi xorg-xeyes
|
||||||
|
```
|
||||||
|
|
||||||
|
### Ubuntu 16.04
|
||||||
|
|
||||||
Make sure you have installed SSH, X11 and xAuth on a remote server.
|
Make sure you have installed SSH, X11 and xAuth on a remote server.
|
||||||
|
|
||||||
@ -16,12 +26,12 @@ sudo sed -i '/ForwardX11/s/.*/ForwardX11 yes/' /etc/ssh/sshd_config
|
|||||||
sudo service ssh restart
|
sudo service ssh restart
|
||||||
```
|
```
|
||||||
|
|
||||||
## macOS Sierra
|
## Client (macOS Big Sur)
|
||||||
|
|
||||||
You also need to have X11 on your local machine.
|
You also need to have X11 on your local machine.
|
||||||
|
|
||||||
```
|
```
|
||||||
brew cask install xquartz # install X11
|
brew install xquartz # install X11
|
||||||
ssh -X <remote>
|
ssh -X <remote>
|
||||||
$ xeyes # verify you have X11
|
$ xeyes # verify you have X11
|
||||||
```
|
```
|
||||||
|
@ -381,6 +381,9 @@ systemctl enable --now cfddns
|
|||||||
```bash
|
```bash
|
||||||
pacman -S smartmontools
|
pacman -S smartmontools
|
||||||
systemctl enable --now smartd
|
systemctl enable --now smartd
|
||||||
|
|
||||||
|
smartctl -t short /dev/sdc
|
||||||
|
smartctl -l selftest /dev/sdc
|
||||||
```
|
```
|
||||||
|
|
||||||
## backup
|
## backup
|
||||||
@ -455,12 +458,17 @@ borg create $BORG_OPTS \
|
|||||||
--exclude /var/lib/docker/devicemapper \
|
--exclude /var/lib/docker/devicemapper \
|
||||||
--exclude 'sh:/home/*/.cache' \
|
--exclude 'sh:/home/*/.cache' \
|
||||||
--exclude 'sh:/home/*/.cargo' \
|
--exclude 'sh:/home/*/.cargo' \
|
||||||
|
--exclude 'sh:/home/*/.pyenv' \
|
||||||
|
--exclude 'sh:/home/*/.vscode-server' \
|
||||||
|
--exclude 'sh:/home/*/.local/share/TabNine' \
|
||||||
--one-file-system \
|
--one-file-system \
|
||||||
$TARGET::'{hostname}-system-{now}' \
|
$TARGET::'{hostname}-system-{now}' \
|
||||||
/ /boot
|
/ /boot
|
||||||
|
|
||||||
echo "# data"
|
echo "# data"
|
||||||
borg create $BORG_OPTS \
|
borg create $BORG_OPTS \
|
||||||
|
--exclude 'sh:/mnt/data/nextcloud/appdata_*/preview' \
|
||||||
|
--exclude 'sh:/mnt/data/nextcloud/appdata_*/dav-photocache' \
|
||||||
$TARGET::'{hostname}-data-{now}' \
|
$TARGET::'{hostname}-data-{now}' \
|
||||||
/mnt/data /mnt/ftl
|
/mnt/data /mnt/ftl
|
||||||
|
|
||||||
|
@ -93,7 +93,7 @@ hr {
|
|||||||
margin: 40px 0;
|
margin: 40px 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
.header {
|
header.header {
|
||||||
display: flex;
|
display: flex;
|
||||||
flex-wrap: wrap;
|
flex-wrap: wrap;
|
||||||
margin-top: 60px;
|
margin-top: 60px;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user