In [45]:
import numpy as np
import pandas as pd
import requests
import shutil
import time as time
from os.path import join,expanduser

In [46]:
# user_home_dir = expanduser('~')
user_home_dir = r'D:\0_RawData\ecco'

# xarray is used to read and plot the downloaded NetCDF files
import xarray as xr
import sys

import matplotlib.pyplot as plt

# for concurrent simultaneous downloads
from concurrent.futures import ThreadPoolExecutor
from getpass import getpass
from http.cookiejar import CookieJar
from io import StringIO
from itertools import repeat
from pathlib import Path
from platform import system
from netrc import netrc
from os.path import basename, isfile, isdir
# progress bar
from tqdm import tqdm
# library to download files
from urllib import request

# Predict the path of the netrc file depending on os/platform type.
_netrc = join(user_home_dir, "_netrc" if system()=="Windows" else ".netrc")

# load the ecco_v4_py package
# here is is assumed to be installed in /home/user/ECCOv4-py
# change as needed for your setup
sys.path.append(join(user_home_dir,'ECCOv4-py'))
import ecco_v4_py as ecco

In [47]:
# not pretty but it works
def setup_earthdata_login_auth(url: str='urs.earthdata.nasa.gov'):
 # look for the netrc file and use the login/password
 try:
 username, _, password = netrc(file=_netrc).authenticators(url)

 # if the file is not found, prompt the user for the login/password
 except (FileNotFoundError, TypeError):
 print('Please provide Earthdata Login credentials for access.')
 username, password = input('Username: '), getpass('Password: ')

 manager = request.HTTPPasswordMgrWithDefaultRealm()
 manager.add_password(None, url, username, password)
 auth = request.HTTPBasicAuthHandler(manager)
 jar = CookieJar()
 processor = request.HTTPCookieProcessor(jar)
 opener = request.build_opener(auth, processor)
 request.install_opener(opener)

In [48]:
def set_params(params: dict):
 params.update({'scroll': "true", 'page_size': 2000})
 return {par: val for par, val in params.items() if val is not None}

def get_results(params: dict, headers: dict=None):
 response = requests.get(url="https://cmr.earthdata.nasa.gov/search/granules.csv",
 params=set_params(params),
 headers=headers)
 return response, response.headers


def get_granules(params: dict):
 response, headers = get_results(params=params)
 scroll = headers['CMR-Scroll-Id']
 hits = int(headers['CMR-Hits'])
 if hits==0:
 raise Exception("No granules matched your input parameters.")
 df = pd.read_csv(StringIO(response.text))
 while hits > df.index.size:
 response, _ = get_results(params=params, headers={'CMR-Scroll-Id': scroll})
 data = pd.read_csv(StringIO(response.text))
 df = pd.concat([df, data])
 return df

In [49]:
# To force redownload of the file, pass **True** to the boolean argument *force* (default **False**)
def download_file(url: str, output_dir: str, force: bool=False):
 """
 url (str): the HTTPS url from which the file will download
 output_dir (str): the local path into which the file will download
 force (bool): download even if the file exists locally already
 """
 if not isdir(output_dir):
 raise Exception(f"Output directory doesnt exist! ({output_dir})")

 target_file = join(output_dir, basename(url))

 # if the file has already been downloaded, skip
 if isfile(target_file) and force is False:
 print(f'\n{basename(url)} already exists, and force=False, not re-downloading')
 return 0

 with requests.get(url) as r:
 if not r.status_code // 100 == 2:
 raise Exception(r.text)
 return 0
 else:
 with open(target_file, 'wb') as f:
 total_size_in_bytes= int(r.headers.get('content-length', 0))
 for chunk in r.iter_content(chunk_size=1024):
 if chunk:
 f.write(chunk)

 return total_size_in_bytes

In [50]:
def download_files_concurrently(dls, download_dir, force=False):
 start_time = time.time()

 # use 3 threads for concurrent downloads
 with ThreadPoolExecutor(max_workers=max_workers) as executor:

 # tqdm makes a cool progress bar
 results = list(tqdm(executor.map(download_file, dls, repeat(download_dir), repeat(force)),\
 total=len(dls), desc='DL Progress', ascii=True, ncols=75, file=sys.stdout))

 # add up the total downloaded file sizes
 total_download_size_in_bytes = np.sum(np.array(results))
 # calculate total time spent in the download
 total_time = time.time() - start_time

 print('\n=====================================')
 print(f'total downloaded: {np.round(total_download_size_in_bytes/1e6,2)} Mb')
 print(f'avg download speed: {np.round(total_download_size_in_bytes/1e6/total_time,2)} Mb/s')

In [51]:
# ECCO dataset ShortName
#ShortName = "ECCO_L4_OCEAN_TEMPERATURE_SALINITY_LLC0090GRID_DAILY_V4R4"
ShortName = 'ECCO_L4_SSH_LLC0090GRID_DAILY_V4R4'
# desired date range
StartDate = "2000-01-01"
EndDate = "2000-01-07"


# # Dec. 2023 update: StartDate/EndDate adjustments
# for monthly and daily datasets, do not include the month or day before
if (('MONTHLY' in ShortName) or ('DAILY' in ShortName)):
 StartDate = str(np.datetime64(StartDate,'D') + np.timedelta64(1,'D'))
# for snapshot datasets, move EndDate one day later
if 'SNAPSHOT' in ShortName:
 EndDate = str(np.datetime64(EndDate,'D') + np.timedelta64(1,'D'))

In [52]:
# define root directory for downloaded NetCDF files
download_root_dir = Path(user_home_dir)

# define the directory where the downloaded files will be saved
download_dir = download_root_dir / ShortName

# create the download directory
download_dir.mkdir(exist_ok = True, parents=True)

print(f'created download directory {download_dir}')

created download directory D:\0_RawData\ecco\ECCO_L4_SSH_LLC0090GRID_DAILY_V4R4


In [53]:
# actually log in with this command:
setup_earthdata_login_auth()

Please provide Earthdata Login credentials for access.


Username: AuroraMok
Password: ········


In [59]:
# create a Python dictionary with our search criteria: `ShortName` and `temporal`
input_search_params = {'ShortName': ShortName,
 'temporal': ",".join([StartDate, EndDate])}

print(input_search_params)

{'ShortName': 'ECCO_L4_SSH_LLC0090GRID_DAILY_V4R4', 'temporal': '2000-01-02,2000-01-07'}


In [60]:
# grans means 'granules', PO.DAAC's term for individual files in a dataset
grans = get_granules(input_search_params)

# what did we find? --- 8 granules!
grans.info()

num_grans = len( grans['Granule UR'] )
print (f'\nTotal number of matching granules: {num_grans}')


RangeIndex: 7 entries, 0 to 6
Data columns (total 9 columns):
 # Column Non-Null Count Dtype 
--- ------ -------------- ----- 
 0 Granule UR 7 non-null object 
 1 Producer Granule ID 0 non-null float64
 2 Start Time 7 non-null object 
 3 End Time 7 non-null object 
 4 Online Access URLs 7 non-null object 
 5 Browse URLs 0 non-null float64
 6 Cloud Cover 0 non-null float64
 7 Day/Night 7 non-null object 
 8 Size 7 non-null float64
dtypes: float64(4), object(5)
memory usage: 636.0+ bytes

Total number of matching granules: 7


In [61]:
# convert the rows of the 'Online Access URLS' column to a Python list
dls = grans['Online Access URLs'].tolist()

# the url of the first file is
print(dls[0])

https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/ECCO_L4_SSH_LLC0090GRID_DAILY_V4R4/SEA_SURFACE_HEIGHT_day_mean_2000-01-01_ECCO_V4r4_native_llc0090.nc


In [62]:
max_workers = 6

In [63]:
# Ex 2) Do not force redownload if the file exists
force=False
download_files_concurrently(dls, download_dir, force)

DL Progress: 0%| | 0/7 [00:02 3[0m download_files_concurrently(dls, download_dir, force)
Cell [1;32mIn[50], line 8[0m, in [0;36mdownload_files_concurrently[1;34m(dls, download_dir, force)[0m
[0;32m 4[0m [38;5;66;03m# use 3 threads for concurrent downloads[39;00m
[0;32m 5[0m [38;5;28;01mwith[39;00m ThreadPoolExecutor(max_workers[38;5;241m=[39mmax_workers) [38;5;28;01mas[39;00m executor:
[0;32m 6[0m 
[0;32m 7[0m [38;5;66;03m# tqdm makes a cool progress bar[39;00m
[1;32m----> 8[0m results [38;5;241m=[39m [38;5;28mlist[39m(tqdm(executor[38;5;241m.[39mmap(download_file, dls, repeat(download_dir), repeat(force)),\
[0;32m 9[0m total[38;5;241m=[39m[38;5;28mlen[39m(dls), desc[38;5;241m=[39m[38;5;124m'[39m[38;5;124mDL Progress[39m[38;5;124m'[39m, ascii[38;5;241m=[39m[38;5;28;01mTrue[39;00m, ncols[38;5;241m=[39m[38;5;241m75[39m, file[38;5;241m=[39msys[38;5;241m.[39mstdout))
[0;32m 11[0m [38;5;66;03m# add up the total downloaded file siz