{ "cells": [ { "cell_type": "code", "execution_count": 45, "id": "20812e3f-698f-4539-964e-bfaa79d4b7ec", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import requests\n", "import shutil\n", "import time as time\n", "from os.path import join,expanduser" ] }, { "cell_type": "code", "execution_count": 46, "id": "de4baf13-3f64-4798-bdc7-cdf16eeab494", "metadata": {}, "outputs": [], "source": [ "# user_home_dir = expanduser('~')\n", "user_home_dir = r'D:\\0_RawData\\ecco'\n", "\n", "# xarray is used to read and plot the downloaded NetCDF files\n", "import xarray as xr\n", "import sys\n", "\n", "import matplotlib.pyplot as plt\n", "\n", "# for concurrent simultaneous downloads\n", "from concurrent.futures import ThreadPoolExecutor\n", "from getpass import getpass\n", "from http.cookiejar import CookieJar\n", "from io import StringIO\n", "from itertools import repeat\n", "from pathlib import Path\n", "from platform import system\n", "from netrc import netrc\n", "from os.path import basename, isfile, isdir\n", "# progress bar\n", "from tqdm import tqdm\n", "# library to download files\n", "from urllib import request\n", "\n", "# Predict the path of the netrc file depending on os/platform type.\n", "_netrc = join(user_home_dir, \"_netrc\" if system()==\"Windows\" else \".netrc\")\n", "\n", "# load the ecco_v4_py package\n", "# here is is assumed to be installed in /home/user/ECCOv4-py\n", "# change as needed for your setup\n", "sys.path.append(join(user_home_dir,'ECCOv4-py'))\n", "import ecco_v4_py as ecco" ] }, { "cell_type": "code", "execution_count": 47, "id": "214149e4-626c-4a1a-8d07-888b97908216", "metadata": {}, "outputs": [], "source": [ "# not pretty but it works\n", "def setup_earthdata_login_auth(url: str='urs.earthdata.nasa.gov'):\n", " # look for the netrc file and use the login/password\n", " try:\n", " username, _, password = netrc(file=_netrc).authenticators(url)\n", "\n", " # if the file is not found, prompt the user for the login/password\n", " except (FileNotFoundError, TypeError):\n", " print('Please provide Earthdata Login credentials for access.')\n", " username, password = input('Username: '), getpass('Password: ')\n", "\n", " manager = request.HTTPPasswordMgrWithDefaultRealm()\n", " manager.add_password(None, url, username, password)\n", " auth = request.HTTPBasicAuthHandler(manager)\n", " jar = CookieJar()\n", " processor = request.HTTPCookieProcessor(jar)\n", " opener = request.build_opener(auth, processor)\n", " request.install_opener(opener)" ] }, { "cell_type": "code", "execution_count": 48, "id": "8921d24a-45b6-4480-b251-fa5a6e67b319", "metadata": {}, "outputs": [], "source": [ "def set_params(params: dict):\n", " params.update({'scroll': \"true\", 'page_size': 2000})\n", " return {par: val for par, val in params.items() if val is not None}\n", "\n", "def get_results(params: dict, headers: dict=None):\n", " response = requests.get(url=\"https://cmr.earthdata.nasa.gov/search/granules.csv\",\n", " params=set_params(params),\n", " headers=headers)\n", " return response, response.headers\n", "\n", "\n", "def get_granules(params: dict):\n", " response, headers = get_results(params=params)\n", " scroll = headers['CMR-Scroll-Id']\n", " hits = int(headers['CMR-Hits'])\n", " if hits==0:\n", " raise Exception(\"No granules matched your input parameters.\")\n", " df = pd.read_csv(StringIO(response.text))\n", " while hits > df.index.size:\n", " response, _ = get_results(params=params, headers={'CMR-Scroll-Id': scroll})\n", " data = pd.read_csv(StringIO(response.text))\n", " df = pd.concat([df, data])\n", " return df" ] }, { "cell_type": "code", "execution_count": 49, "id": "1fdbb861-f3b8-480b-8c8b-ff416b09f3be", "metadata": {}, "outputs": [], "source": [ "# To force redownload of the file, pass **True** to the boolean argument *force* (default **False**)\n", "def download_file(url: str, output_dir: str, force: bool=False):\n", " \"\"\"\n", " url (str): the HTTPS url from which the file will download\n", " output_dir (str): the local path into which the file will download\n", " force (bool): download even if the file exists locally already\n", " \"\"\"\n", " if not isdir(output_dir):\n", " raise Exception(f\"Output directory doesnt exist! ({output_dir})\")\n", "\n", " target_file = join(output_dir, basename(url))\n", "\n", " # if the file has already been downloaded, skip\n", " if isfile(target_file) and force is False:\n", " print(f'\\n{basename(url)} already exists, and force=False, not re-downloading')\n", " return 0\n", "\n", " with requests.get(url) as r:\n", " if not r.status_code // 100 == 2:\n", " raise Exception(r.text)\n", " return 0\n", " else:\n", " with open(target_file, 'wb') as f:\n", " total_size_in_bytes= int(r.headers.get('content-length', 0))\n", " for chunk in r.iter_content(chunk_size=1024):\n", " if chunk:\n", " f.write(chunk)\n", "\n", " return total_size_in_bytes" ] }, { "cell_type": "code", "execution_count": 50, "id": "dea39e8d-1843-4e90-b9da-f772b101bfb9", "metadata": {}, "outputs": [], "source": [ "def download_files_concurrently(dls, download_dir, force=False):\n", " start_time = time.time()\n", "\n", " # use 3 threads for concurrent downloads\n", " with ThreadPoolExecutor(max_workers=max_workers) as executor:\n", "\n", " # tqdm makes a cool progress bar\n", " results = list(tqdm(executor.map(download_file, dls, repeat(download_dir), repeat(force)),\\\n", " total=len(dls), desc='DL Progress', ascii=True, ncols=75, file=sys.stdout))\n", "\n", " # add up the total downloaded file sizes\n", " total_download_size_in_bytes = np.sum(np.array(results))\n", " # calculate total time spent in the download\n", " total_time = time.time() - start_time\n", "\n", " print('\\n=====================================')\n", " print(f'total downloaded: {np.round(total_download_size_in_bytes/1e6,2)} Mb')\n", " print(f'avg download speed: {np.round(total_download_size_in_bytes/1e6/total_time,2)} Mb/s')" ] }, { "cell_type": "code", "execution_count": 51, "id": "5b314c00-82ba-49c1-8ba5-052c2f2cda9e", "metadata": {}, "outputs": [], "source": [ "# ECCO dataset ShortName\n", "#ShortName = \"ECCO_L4_OCEAN_TEMPERATURE_SALINITY_LLC0090GRID_DAILY_V4R4\"\n", "ShortName = 'ECCO_L4_SSH_LLC0090GRID_DAILY_V4R4'\n", "# desired date range\n", "StartDate = \"2000-01-01\"\n", "EndDate = \"2000-01-07\"\n", "\n", "\n", "# # Dec. 2023 update: StartDate/EndDate adjustments\n", "# for monthly and daily datasets, do not include the month or day before\n", "if (('MONTHLY' in ShortName) or ('DAILY' in ShortName)):\n", " StartDate = str(np.datetime64(StartDate,'D') + np.timedelta64(1,'D'))\n", "# for snapshot datasets, move EndDate one day later\n", "if 'SNAPSHOT' in ShortName:\n", " EndDate = str(np.datetime64(EndDate,'D') + np.timedelta64(1,'D'))" ] }, { "cell_type": "code", "execution_count": 52, "id": "3cf3a7d9-b80b-4450-86cc-ae96b81cca9a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "created download directory D:\\0_RawData\\ecco\\ECCO_L4_SSH_LLC0090GRID_DAILY_V4R4\n" ] } ], "source": [ "# define root directory for downloaded NetCDF files\n", "download_root_dir = Path(user_home_dir)\n", "\n", "# define the directory where the downloaded files will be saved\n", "download_dir = download_root_dir / ShortName\n", "\n", "# create the download directory\n", "download_dir.mkdir(exist_ok = True, parents=True)\n", "\n", "print(f'created download directory {download_dir}')" ] }, { "cell_type": "code", "execution_count": 53, "id": "df0d9d8f-f2cb-42bf-b69b-77f3aab7f79b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Please provide Earthdata Login credentials for access.\n" ] }, { "name": "stdin", "output_type": "stream", "text": [ "Username: AuroraMok\n", "Password: ········\n" ] } ], "source": [ "# actually log in with this command:\n", "setup_earthdata_login_auth()" ] }, { "cell_type": "code", "execution_count": 59, "id": "729ba532-8048-4d7b-bc9f-2695e189074f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'ShortName': 'ECCO_L4_SSH_LLC0090GRID_DAILY_V4R4', 'temporal': '2000-01-02,2000-01-07'}\n" ] } ], "source": [ "# create a Python dictionary with our search criteria: `ShortName` and `temporal`\n", "input_search_params = {'ShortName': ShortName,\n", " 'temporal': \",\".join([StartDate, EndDate])}\n", "\n", "print(input_search_params)" ] }, { "cell_type": "code", "execution_count": 60, "id": "a1e0ecda-310e-4ae1-883d-012981df25d3", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 7 entries, 0 to 6\n", "Data columns (total 9 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 Granule UR 7 non-null object \n", " 1 Producer Granule ID 0 non-null float64\n", " 2 Start Time 7 non-null object \n", " 3 End Time 7 non-null object \n", " 4 Online Access URLs 7 non-null object \n", " 5 Browse URLs 0 non-null float64\n", " 6 Cloud Cover 0 non-null float64\n", " 7 Day/Night 7 non-null object \n", " 8 Size 7 non-null float64\n", "dtypes: float64(4), object(5)\n", "memory usage: 636.0+ bytes\n", "\n", "Total number of matching granules: 7\n" ] } ], "source": [ "# grans means 'granules', PO.DAAC's term for individual files in a dataset\n", "grans = get_granules(input_search_params)\n", "\n", "# what did we find? --- 8 granules!\n", "grans.info()\n", "\n", "num_grans = len( grans['Granule UR'] )\n", "print (f'\\nTotal number of matching granules: {num_grans}')" ] }, { "cell_type": "code", "execution_count": 61, "id": "31e51208-0449-499c-83da-84ea4cad4d4b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/ECCO_L4_SSH_LLC0090GRID_DAILY_V4R4/SEA_SURFACE_HEIGHT_day_mean_2000-01-01_ECCO_V4r4_native_llc0090.nc\n" ] } ], "source": [ "# convert the rows of the 'Online Access URLS' column to a Python list\n", "dls = grans['Online Access URLs'].tolist()\n", "\n", "# the url of the first file is\n", "print(dls[0])" ] }, { "cell_type": "code", "execution_count": 62, "id": "e2222923-7490-47c9-9d58-6f8db007964e", "metadata": {}, "outputs": [], "source": [ "max_workers = 6" ] }, { "cell_type": "code", "execution_count": 63, "id": "f1794810-9868-424f-93fe-12c79d446d09", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "DL Progress: 0%| | 0/7 [00:02 3\u001b[0m download_files_concurrently(dls, download_dir, force)\n", "Cell \u001b[1;32mIn[50], line 8\u001b[0m, in \u001b[0;36mdownload_files_concurrently\u001b[1;34m(dls, download_dir, force)\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[38;5;66;03m# use 3 threads for concurrent downloads\u001b[39;00m\n\u001b[0;32m 5\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m ThreadPoolExecutor(max_workers\u001b[38;5;241m=\u001b[39mmax_workers) \u001b[38;5;28;01mas\u001b[39;00m executor:\n\u001b[0;32m 6\u001b[0m \n\u001b[0;32m 7\u001b[0m \u001b[38;5;66;03m# tqdm makes a cool progress bar\u001b[39;00m\n\u001b[1;32m----> 8\u001b[0m results \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(tqdm(executor\u001b[38;5;241m.\u001b[39mmap(download_file, dls, repeat(download_dir), repeat(force)),\\\n\u001b[0;32m 9\u001b[0m total\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mlen\u001b[39m(dls), desc\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mDL Progress\u001b[39m\u001b[38;5;124m'\u001b[39m, ascii\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, ncols\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m75\u001b[39m, file\u001b[38;5;241m=\u001b[39msys\u001b[38;5;241m.\u001b[39mstdout))\n\u001b[0;32m 11\u001b[0m \u001b[38;5;66;03m# add up the total downloaded file sizes\u001b[39;00m\n\u001b[0;32m 12\u001b[0m total_download_size_in_bytes \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39msum(np\u001b[38;5;241m.\u001b[39marray(results))\n", "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\tqdm\\std.py:1178\u001b[0m, in \u001b[0;36mtqdm.__iter__\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 1175\u001b[0m time \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_time\n\u001b[0;32m 1177\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m-> 1178\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m obj \u001b[38;5;129;01min\u001b[39;00m iterable:\n\u001b[0;32m 1179\u001b[0m \u001b[38;5;28;01myield\u001b[39;00m obj\n\u001b[0;32m 1180\u001b[0m \u001b[38;5;66;03m# Update and possibly print the progressbar.\u001b[39;00m\n\u001b[0;32m 1181\u001b[0m \u001b[38;5;66;03m# Note: does not call self.update(1) for speed optimisation.\u001b[39;00m\n", "File \u001b[1;32m~\\anaconda3\\Lib\\concurrent\\futures\\_base.py:619\u001b[0m, in \u001b[0;36mExecutor.map..result_iterator\u001b[1;34m()\u001b[0m\n\u001b[0;32m 616\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m fs:\n\u001b[0;32m 617\u001b[0m \u001b[38;5;66;03m# Careful not to keep a reference to the popped future\u001b[39;00m\n\u001b[0;32m 618\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m timeout \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m--> 619\u001b[0m \u001b[38;5;28;01myield\u001b[39;00m _result_or_cancel(fs\u001b[38;5;241m.\u001b[39mpop())\n\u001b[0;32m 620\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 621\u001b[0m \u001b[38;5;28;01myield\u001b[39;00m _result_or_cancel(fs\u001b[38;5;241m.\u001b[39mpop(), end_time \u001b[38;5;241m-\u001b[39m time\u001b[38;5;241m.\u001b[39mmonotonic())\n", "File \u001b[1;32m~\\anaconda3\\Lib\\concurrent\\futures\\_base.py:317\u001b[0m, in \u001b[0;36m_result_or_cancel\u001b[1;34m(***failed resolving arguments***)\u001b[0m\n\u001b[0;32m 315\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 316\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 317\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m fut\u001b[38;5;241m.\u001b[39mresult(timeout)\n\u001b[0;32m 318\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[0;32m 319\u001b[0m fut\u001b[38;5;241m.\u001b[39mcancel()\n", "File \u001b[1;32m~\\anaconda3\\Lib\\concurrent\\futures\\_base.py:456\u001b[0m, in \u001b[0;36mFuture.result\u001b[1;34m(self, timeout)\u001b[0m\n\u001b[0;32m 454\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m CancelledError()\n\u001b[0;32m 455\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_state \u001b[38;5;241m==\u001b[39m FINISHED:\n\u001b[1;32m--> 456\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__get_result()\n\u001b[0;32m 457\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 458\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTimeoutError\u001b[39;00m()\n", "File \u001b[1;32m~\\anaconda3\\Lib\\concurrent\\futures\\_base.py:401\u001b[0m, in \u001b[0;36mFuture.__get_result\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 399\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception:\n\u001b[0;32m 400\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 401\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception\n\u001b[0;32m 402\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[0;32m 403\u001b[0m \u001b[38;5;66;03m# Break a reference cycle with the exception in self._exception\u001b[39;00m\n\u001b[0;32m 404\u001b[0m \u001b[38;5;28mself\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n", "File \u001b[1;32m~\\anaconda3\\Lib\\concurrent\\futures\\thread.py:58\u001b[0m, in \u001b[0;36m_WorkItem.run\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 55\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[0;32m 57\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m---> 58\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfn(\u001b[38;5;241m*\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mkwargs)\n\u001b[0;32m 59\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[0;32m 60\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfuture\u001b[38;5;241m.\u001b[39mset_exception(exc)\n", "Cell \u001b[1;32mIn[49], line 20\u001b[0m, in \u001b[0;36mdownload_file\u001b[1;34m(url, output_dir, force)\u001b[0m\n\u001b[0;32m 18\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m requests\u001b[38;5;241m.\u001b[39mget(url) \u001b[38;5;28;01mas\u001b[39;00m r:\n\u001b[0;32m 19\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m r\u001b[38;5;241m.\u001b[39mstatus_code \u001b[38;5;241m/\u001b[39m\u001b[38;5;241m/\u001b[39m \u001b[38;5;241m100\u001b[39m \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m2\u001b[39m:\n\u001b[1;32m---> 20\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m(r\u001b[38;5;241m.\u001b[39mtext)\n\u001b[0;32m 21\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;241m0\u001b[39m\n\u001b[0;32m 22\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", "\u001b[1;31mException\u001b[0m: HTTP Basic: Access denied.\n" ] } ], "source": [ "# Ex 2) Do not force redownload if the file exists\n", "force=False\n", "download_files_concurrently(dls, download_dir, force)" ] }, { "cell_type": "code", "execution_count": null, "id": "35dc3299-8c2b-4657-829c-7a1aeed2b8aa", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.7" } }, "nbformat": 4, "nbformat_minor": 5 }