{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Hyperparameter search for text classification (Pytorch)\n", "\n", "In this tutorial we present how to use hyperparameter optimization on a text classification analysis example from the Pytorch documentation.\n", "\n", "**Reference**:\n", " This tutorial is based on materials from the Pytorch Documentation: [Text classification with the torchtext library](https://pytorch.org/tutorials/beginner/text_sentiment_ngrams_tutorial.html)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: deephyper in /Users/romainegele/Documents/Argonne/deephyper (0.3.4)\n", "Requirement already satisfied: ConfigSpace>=0.4.20 in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from deephyper) (0.5.0)\n", "Requirement already satisfied: dm-tree in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from deephyper) (0.1.7)\n", "Requirement already satisfied: Jinja2<3.1 in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from deephyper) (3.0.3)\n", "Requirement already satisfied: numpy in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from deephyper) (1.22.4)\n", "Requirement already satisfied: pandas>=0.24.2 in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from deephyper) (1.4.2)\n", "Requirement already satisfied: packaging in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from deephyper) (21.3)\n", "Requirement already satisfied: scikit-learn>=0.23.1 in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from deephyper) (1.1.1)\n", "Requirement already satisfied: scipy>=0.19.1 in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from deephyper) (1.8.1)\n", "Requirement already satisfied: tqdm>=4.64.0 in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from deephyper) (4.64.0)\n", "Requirement already satisfied: pyyaml in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from deephyper) (6.0)\n", "Requirement already satisfied: cython in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from ConfigSpace>=0.4.20->deephyper) (0.29.30)\n", "Requirement already satisfied: pyparsing in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from ConfigSpace>=0.4.20->deephyper) (3.0.9)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from Jinja2<3.1->deephyper) (2.1.1)\n", "Requirement already satisfied: python-dateutil>=2.8.1 in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from pandas>=0.24.2->deephyper) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from pandas>=0.24.2->deephyper) (2022.1)\n", "Requirement already satisfied: joblib>=1.0.0 in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from scikit-learn>=0.23.1->deephyper) (1.1.0)\n", "Requirement already satisfied: threadpoolctl>=2.0.0 in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from scikit-learn>=0.23.1->deephyper) (3.1.0)\n", "Requirement already satisfied: six>=1.5 in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from python-dateutil>=2.8.1->pandas>=0.24.2->deephyper) (1.15.0)\n", "Requirement already satisfied: ray in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (1.12.1)\n", "Requirement already satisfied: grpcio<=1.43.0,>=1.28.1 in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from ray) (1.42.0)\n", "Requirement already satisfied: requests in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from ray) (2.27.1)\n", "Requirement already satisfied: click>=7.0 in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from ray) (8.1.3)\n", "Requirement already satisfied: virtualenv in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from ray) (20.14.1)\n", "Requirement already satisfied: frozenlist in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from ray) (1.3.0)\n", "Requirement already satisfied: pyyaml in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from ray) (6.0)\n", "Requirement already satisfied: jsonschema in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from ray) (4.6.0)\n", "Requirement already satisfied: protobuf>=3.15.3 in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from ray) (3.18.1)\n", "Requirement already satisfied: aiosignal in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from ray) (1.2.0)\n", "Requirement already satisfied: attrs in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from ray) (21.4.0)\n", "Requirement already satisfied: msgpack<2.0.0,>=1.0.0 in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from ray) (1.0.4)\n", "Requirement already satisfied: numpy>=1.19.3 in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from ray) (1.22.4)\n", "Requirement already satisfied: filelock in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from ray) (3.7.1)\n", "Requirement already satisfied: six>=1.5.2 in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from grpcio<=1.43.0,>=1.28.1->ray) (1.15.0)\n", "Requirement already satisfied: pyrsistent!=0.17.0,!=0.17.1,!=0.17.2,>=0.14.0 in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from jsonschema->ray) (0.18.1)\n", "Requirement already satisfied: idna<4,>=2.5 in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from requests->ray) (3.3)\n", "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from requests->ray) (1.26.9)\n", "Requirement already satisfied: certifi>=2017.4.17 in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from requests->ray) (2022.5.18.1)\n", "Requirement already satisfied: charset-normalizer~=2.0.0 in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from requests->ray) (2.0.12)\n", "Requirement already satisfied: distlib<1,>=0.3.1 in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from virtualenv->ray) (0.3.4)\n", "Requirement already satisfied: platformdirs<3,>=2 in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from virtualenv->ray) (2.5.2)\n", "Requirement already satisfied: torch in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (1.11.0)\n", "Requirement already satisfied: torchtext in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (0.12.0)\n", "Requirement already satisfied: torchdata in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (0.3.0)\n", "Requirement already satisfied: typing-extensions in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from torch) (4.2.0)\n", "Requirement already satisfied: numpy in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from torchtext) (1.22.4)\n", "Requirement already satisfied: tqdm in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from torchtext) (4.64.0)\n", "Requirement already satisfied: requests in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from torchtext) (2.27.1)\n", "Requirement already satisfied: urllib3>=1.25 in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from torchdata) (1.26.9)\n", "Requirement already satisfied: idna<4,>=2.5 in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from requests->torchtext) (3.3)\n", "Requirement already satisfied: certifi>=2017.4.17 in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from requests->torchtext) (2022.5.18.1)\n", "Requirement already satisfied: charset-normalizer~=2.0.0 in /Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages (from requests->torchtext) (2.0.12)\n" ] } ], "source": [ "!pip3 install deephyper\n", "!pip3 install ray\n", "!pip3 install torch torchtext torchdata" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Imports" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import ray\n", "import json\n", "import pandas as pd\n", "from functools import partial\n", "\n", "import torch\n", "\n", "from torchtext.data.utils import get_tokenizer\n", "from torchtext.data.functional import to_map_style_dataset\n", "from torchtext.vocab import build_vocab_from_iterator\n", "\n", "from torch.utils.data import DataLoader\n", "from torch.utils.data.dataset import random_split\n", "\n", "from torch import nn" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "
\n", " \n", "Note\n", " \n", "The following can be used to detect if CUDA devices are available on the current host. Therefore, this notebook will automatically adapt the parallel execution based on the ressources available locally. However, it will not be the case if many compute nodes are requested.\n", " \n", "
" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "is_gpu_available = torch.cuda.is_available()\n", "n_gpus = torch.cuda.device_count()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## The dataset\n", "\n", "The torchtext library provides a few raw dataset iterators, which yield the raw text strings. For example, the `AG_NEWS` dataset iterators yield the raw data as a tuple of label and text. It has four labels (1 : World 2 : Sports 3 : Business 4 : Sci/Tec).\n", "\n", "
" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from torchtext.datasets import AG_NEWS\n", "\n", "def load_data(train_ratio):\n", " train_iter, test_iter = AG_NEWS()\n", " train_dataset = to_map_style_dataset(train_iter)\n", " test_dataset = to_map_style_dataset(test_iter)\n", " num_train = int(len(train_dataset) * train_ratio)\n", " split_train, split_valid = \\\n", " random_split(train_dataset, [num_train, len(train_dataset) - num_train])\n", "\n", " return split_train, split_valid, test_dataset" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Preprocessing pipelines and Batch generation\n", "\n", "Here is an example for typical NLP data processing with tokenizer and vocabulary. The first step is to build a vocabulary with the raw training dataset. Here we use built in\n", "factory function `build_vocab_from_iterator` which accepts iterator that yield list or iterator of tokens. Users can also pass any special symbols to be added to the\n", "vocabulary.\n", "\n", "The vocabulary block converts a list of tokens into integers.\n", "\n", "```\n", "vocab(['here', 'is', 'an', 'example'])\n", ">>> [475, 21, 30, 5286]\n", "```\n", "\n", "The text pipeline converts a text string into a list of integers based on the lookup table defined in the vocabulary. The label pipeline converts the label into integers. For example,\n", "\n", "```\n", "text_pipeline('here is the an example')\n", ">>> [475, 21, 2, 30, 5286]\n", "label_pipeline('10')\n", ">>> 9\n", "```" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages/torch/utils/data/datapipes/utils/common.py:24: UserWarning: Lambda function is not supported for pickle, please use regular python function or functools.partial instead.\n", " warnings.warn(\n" ] } ], "source": [ "train_iter = AG_NEWS(split='train')\n", "num_class = 4\n", "\n", "tokenizer = get_tokenizer('basic_english')\n", "\n", "def yield_tokens(data_iter):\n", " for _, text in data_iter:\n", " yield tokenizer(text)\n", "\n", "vocab = build_vocab_from_iterator(yield_tokens(train_iter), specials=[\"\"])\n", "vocab.set_default_index(vocab[\"\"])\n", "vocab_size = len(vocab)\n", "\n", "text_pipeline = lambda x: vocab(tokenizer(x))\n", "label_pipeline = lambda x: int(x) - 1\n", "\n", "\n", "def collate_batch(batch, device):\n", " label_list, text_list, offsets = [], [], [0]\n", " for (_label, _text) in batch:\n", " label_list.append(label_pipeline(_label))\n", " processed_text = torch.tensor(text_pipeline(_text), dtype=torch.int64)\n", " text_list.append(processed_text)\n", " offsets.append(processed_text.size(0))\n", " label_list = torch.tensor(label_list, dtype=torch.int64)\n", " offsets = torch.tensor(offsets[:-1]).cumsum(dim=0)\n", " text_list = torch.cat(text_list)\n", " return label_list.to(device), text_list.to(device), offsets.to(device)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "
\n", "Note \n", "
\n", "\n", "The `collate_fn` function works on a batch of samples generated from `DataLoader`. The input to `collate_fn` is a batch of data with the batch size in `DataLoader`, and `collate_fn` processes them according to the data processing pipelines declared previously.\n", " \n", "
\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Define the model\n", "\n", "The model is composed of the [nn.EmbeddingBag](https://pytorch.org/docs/stable/nn.html?highlight=embeddingbag#torch.nn.EmbeddingBag) layer plus a linear layer for the classification purpose." ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [], "source": [ "class TextClassificationModel(nn.Module):\n", "\n", " def __init__(self, vocab_size, embed_dim, num_class):\n", " super(TextClassificationModel, self).__init__()\n", " self.embedding = nn.EmbeddingBag(vocab_size, embed_dim, sparse=True)\n", " self.fc = nn.Linear(embed_dim, num_class)\n", " self.init_weights()\n", "\n", " def init_weights(self):\n", " initrange = 0.5\n", " self.embedding.weight.data.uniform_(-initrange, initrange)\n", " self.fc.weight.data.uniform_(-initrange, initrange)\n", " self.fc.bias.data.zero_()\n", "\n", " def forward(self, text, offsets):\n", " embedded = self.embedding(text, offsets)\n", " return self.fc(embedded)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Define functions to train the model and evaluate results.\n", "---------------------------------------------------------\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def train(model, criterion, optimizer, dataloader):\n", " model.train()\n", "\n", " for _, (label, text, offsets) in enumerate(dataloader):\n", " optimizer.zero_grad()\n", " predicted_label = model(text, offsets)\n", " loss = criterion(predicted_label, label)\n", " loss.backward()\n", " torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)\n", " optimizer.step()\n", "\n", "def evaluate(model, dataloader):\n", " model.eval()\n", " total_acc, total_count = 0, 0\n", "\n", " with torch.no_grad():\n", " for _, (label, text, offsets) in enumerate(dataloader):\n", " predicted_label = model(text, offsets)\n", " total_acc += (predicted_label.argmax(1) == label).sum().item()\n", " total_count += label.size(0)\n", " return total_acc/total_count" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Define the run-function\n", "\n", "The run-function defines how the objective that we want to maximize is computed. It takes a `config` dictionary as input and often returns a scalar value that we want to maximize. The `config` contains a sample value of hyperparameters that we want to tune. In this example we will search for:\n", "\n", "* `num_epochs` (default value: `10`)\n", "* `batch_size` (default value: `64`)\n", "* `learning_rate` (default value: `5`)\n", "\n", "A hyperparameter value can be acessed easily in the dictionary through the corresponding key, for example `config[\"units\"]`." ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def get_run(train_ratio=0.95):\n", " def run(config: dict):\n", " device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", "\n", " embed_dim = 64\n", " \n", " collate_fn = partial(collate_batch, device=device)\n", " split_train, split_valid, _ = load_data(train_ratio)\n", " train_dataloader = DataLoader(split_train, batch_size=int(config[\"batch_size\"]),\n", " shuffle=True, collate_fn=collate_fn)\n", " valid_dataloader = DataLoader(split_valid, batch_size=int(config[\"batch_size\"]),\n", " shuffle=True, collate_fn=collate_fn)\n", "\n", " model = TextClassificationModel(vocab_size, int(embed_dim), num_class).to(device)\n", " \n", " criterion = torch.nn.CrossEntropyLoss()\n", " optimizer = torch.optim.SGD(model.parameters(), lr=config[\"learning_rate\"])\n", "\n", " for _ in range(1, int(config[\"num_epochs\"]) + 1):\n", " train(model, criterion, optimizer, train_dataloader)\n", " \n", " accu_test = evaluate(model, valid_dataloader)\n", " return accu_test\n", " return run" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We create two versions of `run`, one quicker to evaluate for the seacrh, with a small training dataset, and another one, for performance evaluation, which uses a normal training/validation ratio." ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "quick_run = get_run(train_ratio=0.3)\n", "perf_run = get_run(train_ratio=0.95)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "
\n", "Note \n", "
\n", "\n", "The objective maximised by DeepHyper is the scalar value returned by the `run`-function.\n", " \n", "
\n", "\n", "In this tutorial it corresponds to the validation accuracy of the model after training." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Define the Hyperparameter optimization problem\n", "\n", "Hyperparameter ranges are defined using the following syntax:\n", "\n", "* Discrete integer ranges are generated from a tuple `(lower: int, upper: int)`\n", "* Continuous prarameters are generated from a tuple `(lower: float, upper: float)`\n", "* Categorical or nonordinal hyperparameter ranges can be given as a list of possible values `[val1, val2, ...]`\n", "\n", "We provide the default configuration of hyperparameters as a starting point of the problem." ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Configuration space object:\n", " Hyperparameters:\n", " batch_size, Type: UniformInteger, Range: [8, 512], Default: 64, on log-scale\n", " learning_rate, Type: UniformFloat, Range: [0.1, 10.0], Default: 5.0, on log-scale\n", " num_epochs, Type: UniformInteger, Range: [5, 20], Default: 10" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from deephyper.problem import HpProblem\n", "\n", "problem = HpProblem()\n", "\n", "# Discrete hyperparameter (sampled with uniform prior)\n", "problem.add_hyperparameter((5, 20), \"num_epochs\", default_value=10)\n", "\n", "# Discrete and Real hyperparameters (sampled with log-uniform)\n", "problem.add_hyperparameter((8, 512, \"log-uniform\"), \"batch_size\", default_value=64)\n", "problem.add_hyperparameter((0.1, 10, \"log-uniform\"), \"learning_rate\", default_value=5)\n", "\n", "problem" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Evaluate a default configuration\n", "\n", "We evaluate the performance of the default set of hyperparameters provided in the Pytorch tutorial." ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages/torch/utils/data/datapipes/utils/common.py:24: UserWarning: Lambda function is not supported for pickle, please use regular python function or functools.partial instead.\n", " warnings.warn(\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Accuracy Default Configuration: 0.902\n" ] } ], "source": [ "# We launch the Ray run-time and execute the `run` function\n", "# with the default configuration\n", "\n", "if is_gpu_available:\n", " if not(ray.is_initialized()):\n", " ray.init(num_cpus=n_gpus, num_gpus=n_gpus, log_to_driver=False)\n", " \n", " run_default = ray.remote(num_cpus=1, num_gpus=1)(perf_run)\n", " objective_default = ray.get(run_default.remote(problem.default_configuration))\n", "else:\n", " if not(ray.is_initialized()):\n", " ray.init(num_cpus=1, log_to_driver=False)\n", " run_default = perf_run\n", " objective_default = run_default(problem.default_configuration)\n", "\n", "print(f\"Accuracy Default Configuration: {objective_default:.3f}\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Define the evaluator object\n", "\n", "The `Evaluator` object allows to change the parallelization backend used by DeepHyper. \n", "It is a standalone object which schedules the execution of remote tasks. All evaluators needs a `run_function` to be instantiated. \n", "Then a keyword `method` defines the backend (e.g., `\"ray\"`) and the `method_kwargs` corresponds to keyword arguments of this chosen `method`.\n", "\n", "```python\n", "evaluator = Evaluator.create(run_function, method, method_kwargs)\n", "```\n", "\n", "Once created the `evaluator.num_workers` gives access to the number of available parallel workers.\n", "\n", "Finally, to submit and collect tasks to the evaluator one just needs to use the following interface:\n", "\n", "```python\n", "configs = [...]\n", "evaluator.submit(configs)\n", "...\n", "tasks_done = evaluator.get(\"BATCH\", size=1) # For asynchronous\n", "tasks_done = evaluator.get(\"ALL\") # For batch synchronous\n", "```\n", "\n", "
\n", "\n", "Warning\n", "\n", "Each `Evaluator` saves its own state, therefore it is crucial to create a new evaluator when launching a fresh search.\n", " \n", "
\n" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Created new evaluator with 1 worker and config: {'num_cpus': 1, 'num_cpus_per_task': 1, 'callbacks': []}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/Users/romainegele/Documents/Argonne/deephyper/deephyper/evaluator/_evaluator.py:99: UserWarning: Applying nest-asyncio patch for IPython Shell!\n", " warnings.warn(\n" ] } ], "source": [ "from deephyper.evaluator import Evaluator\n", "from deephyper.evaluator.callback import TqdmCallback\n", "\n", "def get_evaluator(run_function):\n", " # Default arguments for Ray: 1 worker and 1 worker per evaluation\n", " method_kwargs = {\n", " \"num_cpus\": 1, \n", " \"num_cpus_per_task\": 1,\n", " \"callbacks\": [TqdmCallback()]\n", " }\n", "\n", " # If GPU devices are detected then it will create 'n_gpus' workers\n", " # and use 1 worker for each evaluation\n", " if is_gpu_available:\n", " method_kwargs[\"num_cpus\"] = n_gpus\n", " method_kwargs[\"num_gpus\"] = n_gpus\n", " method_kwargs[\"num_cpus_per_task\"] = 1\n", " method_kwargs[\"num_gpus_per_task\"] = 1\n", "\n", " evaluator = Evaluator.create(\n", " run_function, \n", " method=\"ray\", \n", " method_kwargs=method_kwargs\n", " )\n", " print(f\"Created new evaluator with {evaluator.num_workers} worker{'s' if evaluator.num_workers > 1 else ''} and config: {method_kwargs}\", )\n", " \n", " return evaluator\n", "\n", "evaluator_1 = get_evaluator(quick_run)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Define and run the Centralized Bayesian Optimization search (CBO)\n", "\n", "We create the CBO using the `problem` and `evaluator` defined above." ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "from deephyper.search.hps import CBO\n", "# Uncomment the following line to show the arguments of CBO.\n", "# CBO?" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "# Instanciate the search with the problem and a specific evaluator\n", "search = CBO(problem, evaluator_1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "
\n", " \n", "Note\n", " \n", "All DeepHyper's search algorithm have two stopping criteria:\n", "
    \n", "
  • `max_evals (int)`: Defines the maximum number of evaluations that we want to perform. Default to -1 for an infinite number.
  • \n", "
  • `timeout (int)`: Defines a time budget (in seconds) before stopping the search. Default to None for an infinite time budget.
  • \n", "
\n", " \n", "
" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 30/30 [07:08<00:00, 11.39s/it, objective=0.892] " ] } ], "source": [ "results = search.search(max_evals=30)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The returned `results` is a Pandas Dataframe where columns are hyperparameters and information stored by the evaluator:\n", "\n", "* `job_id` is a unique identifier corresponding to the order of creation of tasks\n", "* `objective` is the value returned by the run-function\n", "* `timestamp_submit` is the time (in seconds) when the hyperparameter configuration was submitted by the `Evaluator` relative to the creation of the evaluator.\n", "* `timestamp_gather` is the time (in seconds) when the hyperparameter configuration was collected by the `Evaluator` relative to the creation of the evaluator." ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
batch_sizelearning_ratenum_epochsjob_idobjectivetimestamp_submittimestamp_gather
0130.3961631010.8763100.16712621.861172
1230.1763521320.83681021.89340346.585527
2850.615317930.84925046.60355062.330578
394.6478541540.88852462.34827296.357171
4221.862895750.89213196.375062111.058422
5138.390420760.892155111.077491127.247439
6565.709700770.888726127.265297140.431804
7471.0739371780.891607140.449749168.683609
81160.356077890.757940168.787206183.260858
9393.1270177100.891643183.278615196.925297
105120.3487086110.478179197.162700207.922961
115123.72631917120.879036208.166723232.997138
125128.7653706130.864667233.320723244.213859
135121.9108155140.737810244.466962254.037378
145120.3139726150.471179254.293386265.038085
155120.39025417160.659024265.296111291.446844
165120.3141176170.463964291.793700302.993824
175120.4061976180.495940303.264247314.411603
185120.3207376190.476452314.679307325.618635
195120.3101326200.455762325.959387336.946107
205120.2771786210.438250337.212328348.157166
215120.2769176220.449714348.422956359.220447
225120.2739786230.453071359.485135370.697273
235120.2986666240.450143371.048602382.304554
245120.2803376250.463952382.571858393.747633
255120.2924636260.469250394.017767405.152517
265120.2738286270.459488405.424290416.641872
275120.3069016280.461964416.986107428.063347
285120.2810896290.458131428.335923439.447637
295120.2788336300.436702439.720846450.642993
\n", "
" ], "text/plain": [ " batch_size learning_rate num_epochs job_id objective \\\n", "0 13 0.396163 10 1 0.876310 \n", "1 23 0.176352 13 2 0.836810 \n", "2 85 0.615317 9 3 0.849250 \n", "3 9 4.647854 15 4 0.888524 \n", "4 22 1.862895 7 5 0.892131 \n", "5 13 8.390420 7 6 0.892155 \n", "6 56 5.709700 7 7 0.888726 \n", "7 47 1.073937 17 8 0.891607 \n", "8 116 0.356077 8 9 0.757940 \n", "9 39 3.127017 7 10 0.891643 \n", "10 512 0.348708 6 11 0.478179 \n", "11 512 3.726319 17 12 0.879036 \n", "12 512 8.765370 6 13 0.864667 \n", "13 512 1.910815 5 14 0.737810 \n", "14 512 0.313972 6 15 0.471179 \n", "15 512 0.390254 17 16 0.659024 \n", "16 512 0.314117 6 17 0.463964 \n", "17 512 0.406197 6 18 0.495940 \n", "18 512 0.320737 6 19 0.476452 \n", "19 512 0.310132 6 20 0.455762 \n", "20 512 0.277178 6 21 0.438250 \n", "21 512 0.276917 6 22 0.449714 \n", "22 512 0.273978 6 23 0.453071 \n", "23 512 0.298666 6 24 0.450143 \n", "24 512 0.280337 6 25 0.463952 \n", "25 512 0.292463 6 26 0.469250 \n", "26 512 0.273828 6 27 0.459488 \n", "27 512 0.306901 6 28 0.461964 \n", "28 512 0.281089 6 29 0.458131 \n", "29 512 0.278833 6 30 0.436702 \n", "\n", " timestamp_submit timestamp_gather \n", "0 0.167126 21.861172 \n", "1 21.893403 46.585527 \n", "2 46.603550 62.330578 \n", "3 62.348272 96.357171 \n", "4 96.375062 111.058422 \n", "5 111.077491 127.247439 \n", "6 127.265297 140.431804 \n", "7 140.449749 168.683609 \n", "8 168.787206 183.260858 \n", "9 183.278615 196.925297 \n", "10 197.162700 207.922961 \n", "11 208.166723 232.997138 \n", "12 233.320723 244.213859 \n", "13 244.466962 254.037378 \n", "14 254.293386 265.038085 \n", "15 265.296111 291.446844 \n", "16 291.793700 302.993824 \n", "17 303.264247 314.411603 \n", "18 314.679307 325.618635 \n", "19 325.959387 336.946107 \n", "20 337.212328 348.157166 \n", "21 348.422956 359.220447 \n", "22 359.485135 370.697273 \n", "23 371.048602 382.304554 \n", "24 382.571858 393.747633 \n", "25 394.017767 405.152517 \n", "26 405.424290 416.641872 \n", "27 416.986107 428.063347 \n", "28 428.335923 439.447637 \n", "29 439.720846 450.642993 " ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "results" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Evaluate the best configuration\n", "\n", "Now that the search is over, let us print the best configuration found during this run and evaluate it on the full training dataset." ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The default configuration has an accuracy of 0.902. \n", "The best configuration found by DeepHyper has an accuracy 0.892, \n", "finished after 127.25 secondes of search.\n", "\n", "{\n", " \"batch_size\": 13.0,\n", " \"learning_rate\": 8.39041977280772,\n", " \"num_epochs\": 7.0,\n", " \"job_id\": 6.0\n", "}\n" ] } ], "source": [ "i_max = results.objective.argmax()\n", "best_config = results.iloc[i_max][:-3].to_dict()\n", "\n", "print(f\"The default configuration has an accuracy of {objective_default:.3f}. \\n\" \n", " f\"The best configuration found by DeepHyper has an accuracy {results['objective'].iloc[i_max]:.3f}, \\n\" \n", " f\"finished after {results['timestamp_gather'].iloc[i_max]:.2f} secondes of search.\\n\")\n", "\n", "print(json.dumps(best_config, indent=4))" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/romainegele/miniforge3/envs/dh-env-test/lib/python3.9/site-packages/torch/utils/data/datapipes/utils/common.py:24: UserWarning: Lambda function is not supported for pickle, please use regular python function or functools.partial instead.\n", " warnings.warn(\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Accuracy Best Configuration: 0.914\n" ] } ], "source": [ "objective_best = perf_run(best_config)\n", "print(f\"Accuracy Best Configuration: {objective_best:.3f}\")" ] } ], "metadata": { "interpreter": { "hash": "9dc039a2a4f4ad8a9dc018393b0776cc00e1bb4d428a37e9ad776085656c6f7f" }, "kernelspec": { "display_name": "Python 3.9.13 ('dh-env-test')", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.13" } }, "nbformat": 4, "nbformat_minor": 0 }