{ "cells": [ { "cell_type": "code", "execution_count": 1, "source": [ "from datasets import load_dataset, Features\n", "from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments\n", "import os\n", "from os.path import join\n", "import pandas as pd\n", "from datasets import ClassLabel, Value\n", "\n", "# https://huggingface.co/docs/datasets/loading_datasets.html\n", "\n", "DATASET_DIR = os.environ['DATASET_DIR']" ], "outputs": [], "metadata": {} }, { "cell_type": "markdown", "source": [], "metadata": {} }, { "cell_type": "code", "execution_count": 2, "source": [ "dataset = load_dataset(\"holodata/sensai\", features=Features(\n", " {\n", " \"body\": Value(\"string\"),\n", " \"toxic\": ClassLabel(num_classes=2, names=['0', '1'])\n", " }\n", " ))\n", "dataset = dataset['train']" ], "outputs": [ { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "704789c10f1e44ddbb83262b8a826eec" }, "text/plain": [ " 0%| | 0/1 [00:00\n", " \n", " \n", " [15000/15000 31:45, Epoch 3/3]\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StepTraining Loss
130000.687500
135000.686300
140000.637900
145000.643200
150000.627700

" ], "text/plain": [ "" ] }, "metadata": {} }, { "output_type": "stream", "name": "stderr", "text": [ "Saving model checkpoint to test_trainer/checkpoint-13000\n", "Configuration saved in test_trainer/checkpoint-13000/config.json\n", "Model weights saved in test_trainer/checkpoint-13000/pytorch_model.bin\n", "Saving model checkpoint to test_trainer/checkpoint-13500\n", "Configuration saved in test_trainer/checkpoint-13500/config.json\n", "Model weights saved in test_trainer/checkpoint-13500/pytorch_model.bin\n", "Saving model checkpoint to test_trainer/checkpoint-14000\n", "Configuration saved in test_trainer/checkpoint-14000/config.json\n", "Model weights saved in test_trainer/checkpoint-14000/pytorch_model.bin\n", "Saving model checkpoint to test_trainer/checkpoint-14500\n", "Configuration saved in test_trainer/checkpoint-14500/config.json\n", "Model weights saved in test_trainer/checkpoint-14500/pytorch_model.bin\n", "Saving model checkpoint to test_trainer/checkpoint-15000\n", "Configuration saved in test_trainer/checkpoint-15000/config.json\n", "Model weights saved in test_trainer/checkpoint-15000/pytorch_model.bin\n", "\n", "\n", "Training completed. Do not forget to share your model on huggingface.co/models =)\n", "\n", "\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ "TrainOutput(global_step=15000, training_loss=0.10941998901367188, metrics={'train_runtime': 1918.0916, 'train_samples_per_second': 62.562, 'train_steps_per_second': 7.82, 'total_flos': 3.24994775580672e+16, 'train_loss': 0.10941998901367188, 'epoch': 3.0})" ] }, "metadata": {}, "execution_count": 9 } ], "metadata": {} }, { "cell_type": "code", "execution_count": null, "source": [], "outputs": [], "metadata": {} } ], "metadata": { "orig_nbformat": 4, "language_info": { "name": "python", "version": "3.8.6", "mimetype": "text/x-python", "codemirror_mode": { "name": "ipython", "version": 3 }, "pygments_lexer": "ipython3", "nbconvert_exporter": "python", "file_extension": ".py" }, "kernelspec": { "name": "python3", "display_name": "Python 3.8.6 64-bit" }, "interpreter": { "hash": "c8daecebaf2d81430b8373e4b4af380b12df116248cd1bbadd3fc947f45a1f88" } }, "nbformat": 4, "nbformat_minor": 2 }