{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "6282ba43",
   "metadata": {},
   "source": [
    "This example is intended to show how to use CuPy to run mdopt on GPU. It has two blocks of code which are to be run in Colab to show the performance difference between CPU and GPU."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dac5ce7b",
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip -q install \"git+https://github.com/quicophy/mdopt.git\"\n",
    "\n",
    "import time, numpy as np\n",
    "from tqdm import tqdm\n",
    "from mdopt.mps.utils import create_simple_product_state\n",
    "from mdopt.utils.utils import create_random_mpo, mpo_to_matrix\n",
    "from mdopt.contractor.contractor import mps_mpo_contract\n",
    "from mdopt.backend import array as A\n",
    "\n",
    "print(\"Backend GPU flag:\", A.GPU)  # should be False\n",
    "\n",
    "def bench(run_label=\"CPU\", num_sites=48, phys_dim=2, mpo_len=32, chi=256, reps=5):\n",
    "    mps = create_simple_product_state(num_sites=num_sites, which=\"0\", phys_dim=phys_dim)\n",
    "    mpo = create_random_mpo(\n",
    "        num_sites=mpo_len,\n",
    "        bond_dimensions=[chi]*(mpo_len-1),\n",
    "        phys_dim=phys_dim,\n",
    "        which=\"uniform\",\n",
    "    )\n",
    "    start_site = 0\n",
    "    # warm-up\n",
    "    _ = mps_mpo_contract(mps.copy(), mpo, start_site=start_site, renormalise=False)\n",
    "    A.synchronize()\n",
    "    t0 = time.perf_counter()\n",
    "    for _ in tqdm(range(reps)):\n",
    "        _ = mps_mpo_contract(mps.copy(), mpo, start_site=start_site, renormalise=False)\n",
    "    A.synchronize()\n",
    "    t1 = time.perf_counter()\n",
    "    print(f\"{run_label}: {((t1 - t0)/reps):.4f} s per run\")\n",
    "\n",
    "cpu_time = bench(\"CPU\", num_sites=48, mpo_len=32, chi=256, reps=10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "908d389e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# In Colab, don't forget to switch the runtime to GPU before running the next block\n",
    "\n",
    "!nvidia-smi\n",
    "!pip -q install cupy-cuda12x\n",
    "!pip -q install \"git+https://github.com/quicophy/mdopt.git\"\n",
    "\n",
    "%env MDOPT_BACKEND=cupy\n",
    "\n",
    "import time, numpy as np\n",
    "from tqdm import tqdm\n",
    "from mdopt.mps.utils import create_simple_product_state\n",
    "from mdopt.utils.utils import create_random_mpo\n",
    "from mdopt.contractor.contractor import mps_mpo_contract\n",
    "from mdopt.backend import array as A\n",
    "\n",
    "print(\"Backend GPU flag:\", A.GPU)  # should be True\n",
    "\n",
    "def bench(run_label=\"GPU\", num_sites=48, phys_dim=2, mpo_len=32, chi=256, reps=5):\n",
    "    mps = create_simple_product_state(num_sites=num_sites, which=\"0\", phys_dim=phys_dim)\n",
    "    mpo = create_random_mpo(\n",
    "        num_sites=mpo_len,\n",
    "        bond_dimensions=[chi]*(mpo_len-1),\n",
    "        phys_dim=phys_dim,\n",
    "        which=\"uniform\",\n",
    "    )\n",
    "    start_site = 0\n",
    "    # warm-up\n",
    "    _ = mps_mpo_contract(mps.copy(), mpo, start_site=start_site, renormalise=False)\n",
    "    A.synchronize()\n",
    "    t0 = time.perf_counter()\n",
    "    for _ in tqdm(range(reps)):\n",
    "        _ = mps_mpo_contract(mps.copy(), mpo, start_site=start_site, renormalise=False)\n",
    "    A.synchronize()\n",
    "    t1 = time.perf_counter()\n",
    "    print(f\"{run_label}: {((t1 - t0)/reps):.4f} s per run\")\n",
    "\n",
    "cpu_time = bench(\"GPU\", num_sites=48, mpo_len=32, chi=256, reps=10)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "mdopt-ZdbamFdU-py3.10",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "name": "python",
   "version": "3.10.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}