Reading Argo data by date#

Let’s use, as an example, data in the *Atlantic for the 11th November 2020. It is pre-downloaded in the ./Data folder, but you can download it from the Coriolis GDAC See here for instructions on how to download the data

import numpy as np
import netCDF4
import xarray as xr

import cartopy.crs as ccrs
import cartopy

import matplotlib as mpl
import matplotlib.cm as cm
from matplotlib import pyplot as plt
%matplotlib inline
# Usefull colormaps and colorbar makers:
qcmap = mpl.colors.ListedColormap(['#000000' , '#31FC03' , '#ADFC03' , '#FCBA03' ,'#FC1C03',
                                   '#324CA8' , '#000000' , '#000000' , '#B22CC9', '#000000'])
def colorbar_qc(cmap, **kwargs):
    """Adjust colorbar ticks with discrete colors for QC flags"""
    ncolors = 10
    mappable = cm.ScalarMappable(cmap=cmap)
    mappable.set_array([])
    mappable.set_clim(-0.5, ncolors+0.5)
    colorbar = plt.colorbar(mappable, **kwargs)
    colorbar.set_ticks(np.linspace(0, ncolors, ncolors))
    colorbar.set_ticklabels(range(ncolors))
    return colorbar
dayADS = xr.open_dataset('../../Data/atlantic_ocean/2020/11/20201111_prof.nc')
dayADS
<xarray.Dataset>
Dimensions:                       (N_PROF: 188, N_PARAM: 3, N_LEVELS: 1331, N_CALIB: 3, N_HISTORY: 0)
Dimensions without coordinates: N_PROF, N_PARAM, N_LEVELS, N_CALIB, N_HISTORY
Data variables: (12/64)
    DATA_TYPE                     object b'Argo profile    '
    FORMAT_VERSION                object b'3.1 '
    HANDBOOK_VERSION              object b'1.2 '
    REFERENCE_DATE_TIME           object b'19500101000000'
    DATE_CREATION                 object b'20201111082709'
    DATE_UPDATE                   object b'20211211022000'
    ...                            ...
    HISTORY_ACTION                (N_HISTORY, N_PROF) object 
    HISTORY_PARAMETER             (N_HISTORY, N_PROF) object 
    HISTORY_START_PRES            (N_HISTORY, N_PROF) float32 
    HISTORY_STOP_PRES             (N_HISTORY, N_PROF) float32 
    HISTORY_PREVIOUS_VALUE        (N_HISTORY, N_PROF) float32 
    HISTORY_QCTEST                (N_HISTORY, N_PROF) object 
Attributes:
    title:                Argo float vertical profile
    institution:          FR GDAC
    source:               Argo float
    history:              2021-12-11T02:20:00Z creation
    references:           http://www.argodatamgt.org/Documentation
    user_manual_version:  3.1
    Conventions:          Argo-3.1 CF-1.6
    featureType:          trajectoryProfile
print(f" for this day there were {dayADS.dims['N_PROF']} profiles")
 for this day there were 188 profiles

For each one of the profiles, which are the Argo Core missions ones, this is the Primary sampling, so we have all the meta-information to track the float that did the observations. Let’s see it for a few profiles:

for i1 in range(1,dayADS.dims['N_PROF'],10):
    print(f"WMO {dayADS.data_vars['PLATFORM_NUMBER'].values.astype(str)[i1]}"
          f" Data Center {dayADS.data_vars['DATA_CENTRE'].values.astype(str)[i1]}"   
          f" Data Center {dayADS.data_vars['PROJECT_NAME'].values.astype(str)[i1]}" )
WMO 4903277  Data Center AO Data Center US ARGO PROJECT                                                 
WMO 7900506  Data Center IF Data Center ARGO-BSH                                                        
WMO 6901986  Data Center IF Data Center Argo Netherlands                                                
WMO 4902117  Data Center AO Data Center US ARGO PROJECT                                                 
WMO 4902354  Data Center AO Data Center US ARGO PROJECT                                                 
WMO 3901551  Data Center BO Data Center Argo UK                                                         
WMO 6902727  Data Center IF Data Center CORIOLIS                                                        
WMO 3901869  Data Center IF Data Center MOCCA-EU                                                        
WMO 6902818  Data Center IF Data Center GMMC OVIDE                                                      
WMO 4903276  Data Center AO Data Center US ARGO PROJECT                                                 
WMO 3901823  Data Center AO Data Center US ARGO PROJECT                                                 
WMO 6903552  Data Center IF Data Center Argo-Norway                                                     
WMO 3901633  Data Center IF Data Center ARGO-BSH                                                        
WMO 6902944  Data Center IF Data Center PERLE                                                           
WMO 6900892  Data Center IF Data Center ARGO-BSH                                                        
WMO 6901254  Data Center IF Data Center ARGO SPAIN                                                      
WMO 4902505  Data Center ME Data Center Argo Canada                                                     
WMO 3901601  Data Center IF Data Center ARGO-BSH                                                        
WMO 6902965  Data Center IF Data Center MOOSE                                                           

The correspondence for the DATA_CENTRE code and the name is in the Reference table 4: data centres and institutions codes

And we have all the geographical information in LONGITUDE and LATITUDE. First, let’s read the data for the same day in the pacific_ocean and indian_ocean

dayPDS = xr.open_dataset('../../Data/pacific_ocean/2020/11/20201111_prof.nc')
dayIDS = xr.open_dataset('../../Data/indian_ocean/2020/11/20201111_prof.nc')
fig,ax = plt.subplots(figsize=(15,10),subplot_kw={'projection': ccrs.PlateCarree()})
ax.set_global()
ax.plot(dayADS.LONGITUDE,dayADS.LATITUDE,'ob')
ax.plot(dayPDS.LONGITUDE,dayPDS.LATITUDE,'ob')
ax.plot(dayIDS.LONGITUDE,dayIDS.LATITUDE,'ob')

ax.set_title(f"Data from {dayADS.JULD[0].values.astype('datetime64[D]')}")
ax.coastlines()
ax.add_feature(cartopy.feature.LAND)
ax.gridlines(draw_labels=True, x_inline=False, y_inline=False);

ax.grid()
../../_images/b70b6959fed67d2d36f2d0a9e341ec34b90c2fb869ce2ba8e6c0c808a4179c77.png

In these files there are also the core argo variables TEMP PSAL and PRE, we can just take a quick look at the data a the most superficial level for each profile.

fig,ax = plt.subplots(figsize=(18,10),subplot_kw={'projection': ccrs.PlateCarree()})
ax.set_global()

cs=ax.scatter(dayADS.LONGITUDE,dayADS.LATITUDE,c=dayADS.TEMP[:,1],cmap="RdBu_r",vmin=-2, vmax=30, edgecolor='none')
ax.scatter(dayPDS.LONGITUDE,dayPDS.LATITUDE,c=dayPDS.TEMP[:,1],cmap="RdBu_r", vmin=-2, vmax=30, edgecolor='none')
ax.scatter(dayIDS.LONGITUDE,dayIDS.LATITUDE,c=dayIDS.TEMP[:,1],cmap="RdBu_r", vmin=-2, vmax=30, edgecolor='none')

ax.set_title(f"Most superficial TEMP data on the {dayADS.JULD[0].values.astype('datetime64[D]')}")
ax.coastlines()
ax.add_feature(cartopy.feature.LAND.with_scale('110m'))
ax.gridlines(draw_labels=True, x_inline=False, y_inline=False);


cbar=fig.colorbar(cs,ax=ax,extend='both',orientation='horizontal',shrink=.8,aspect=40)
cbar.set_label(dayPDS.TEMP.long_name)
../../_images/eae7fcb2a4a47d47239eb8f36a9feddd51a7e9ddc2cd0b2128a0162f0b69762a.png

Or with a little of interpolation, the Temperature a 10 dbar:

fig,ax = plt.subplots(figsize=(15,10),subplot_kw={'projection': ccrs.PlateCarree()})
ax.set_global()

#For the 3 oceans
for filein in ['../../Data/atlantic_ocean/2020/11/20201111_prof.nc', 
               '../../Data/pacific_ocean/2020/11/20201111_prof.nc', 
              '../../Data/indian_ocean/2020/11/20201111_prof.nc']:
    DS=xr.open_dataset(filein)
    lon=DS.LONGITUDE.values
    lat=DS.LATITUDE.values
    tempi= np.zeros(lon.shape[0])
    tempi.fill(np.nan)
    for ip in range(0,DS.LONGITUDE.shape[0]):
        tempi[ip]=np.interp(10,DS.PRES[ip,:],DS.TEMP[ip,:])
    cs=ax.scatter(lon,lat,c=tempi,cmap="RdBu_r",vmin=-2, vmax=30, edgecolor='none')

ax.set_title(f"Data from {DS.JULD[0].values.astype('datetime64[D]')}")
ax.coastlines()
ax.add_feature(cartopy.feature.LAND.with_scale('110m'))

ax.gridlines(draw_labels=True, x_inline=False, y_inline=False);
ax.grid()

cbar=fig.colorbar(cs,ax=ax,extend='both',orientation='horizontal',shrink=.8,aspect=40)
cbar.set_label(dayPDS.TEMP.long_name)
../../_images/a7afacc688b8be3a429555a35032899be3b3e155f2961a9d0befe43539976237.png

To evalute the 10 dbar temperature in the last month:

fig,ax = plt.subplots(figsize=(15,10),subplot_kw={'projection': ccrs.PlateCarree()})
ax.set_global()

#For the 3 oceans
for basin in ['atlantic_ocean','pacific_ocean','indian_ocean']:
    for iday in range(1,31):
        filein=f"../../Data/{basin}/2010/11/201011{iday:02d}_prof.nc"
        DS=xr.open_dataset(filein)
        lon=DS.LONGITUDE.values
        lat=DS.LATITUDE.values
        tempi= np.zeros(lon.shape[0])
        tempi.fill(np.nan)
        for ip in range(0,lon.shape[0]):
            tempi[ip]=np.interp(10,DS.PRES[ip,:],DS.TEMP[ip,:])
        cs=ax.scatter(lon,lat,c=tempi,cmap="RdBu_r",vmin=-2, vmax=30, edgecolor='none')

ax.set_title(f"Data from {DS.JULD[0].values.astype('datetime64[M]')}")
ax.coastlines()
ax.add_feature(cartopy.feature.LAND.with_scale('110m'))

ax.gridlines(draw_labels=True, x_inline=False, y_inline=False);
ax.grid()

cbar=fig.colorbar(cs,ax=ax,extend='both',orientation='horizontal',shrink=.8,aspect=40)
cbar.set_label(dayPDS.TEMP.long_name)    
../../_images/7a02baba1937bb52e6586808d6489026f11118de422ddebe9d2c78c52915ef94.png

Let’s compare with 10 years ago:

fig,ax = plt.subplots(figsize=(15,10),subplot_kw={'projection': ccrs.PlateCarree()})
ax.set_global()

#For the 3 oceans
for basin in ['atlantic_ocean','pacific_ocean','indian_ocean']:
    for iday in range(1,31):
        filein=f"../../Data/{basin}/2010/11/201011{iday:02d}_prof.nc"
        DS=xr.open_dataset(filein)
        lon=DS.LONGITUDE.values
        lat=DS.LATITUDE.values
        tempi= np.zeros(lon.shape[0])
        tempi.fill(np.nan)
        for ip in range(0,lon.shape[0]):
            tempi[ip]=np.interp(10,DS.PRES[ip,:],DS.TEMP[ip,:])
        cs=ax.scatter(lon,lat,c=tempi,cmap="RdBu_r",vmin=-2, vmax=30, edgecolor='none')

ax.set_title(f"Data from {DS.JULD[0].values.astype('datetime64[M]')}")
ax.coastlines()
ax.add_feature(cartopy.feature.LAND.with_scale('110m'))

ax.gridlines(draw_labels=True, x_inline=False, y_inline=False);
ax.grid()

cbar=fig.colorbar(cs,ax=ax,extend='both',orientation='horizontal',shrink=.8,aspect=40)
cbar.set_label(dayPDS.TEMP.long_name)    
../../_images/ccf7eadf582e75e258fde693cc3d5715d25547f41010ad05ed1f079fd3430338.png

and it is possible to get the WMO of all the platforms measured during this month, together with its data acquisition center. Hence, we can download the netCDF files for each cycle if necessary :

WMOs=np.array([])
DACs=np.array([])
#read all the basins
for basin in ['atlantic_ocean','pacific_ocean','indian_ocean']:
    for iday in range(1,31):
        filein=f"../../Data/{basin}/2020/11/202011{iday:02d}_prof.nc"
        DS=xr.open_dataset(filein)
        #look for the WMO and DAC for each float
        DACs=np.append(DACs,DS.DATA_CENTRE.astype(str).values)
        WMOs=np.append(WMOs,DS.PLATFORM_NUMBER.astype(int).values)

#Keep just the unique set of WMOs
WMOs, indices = np.unique(WMOs, return_index=True)
DACs=DACs[indices]
print(f"During november 2020 {WMOs.shape[0]} Argo floats where active:")
for ip in range(0,WMOs.shape[0],500):
    print(f"{ip} WMO {WMOs[ip]} DAC {DACs[ip]}")
During november 2020 3903 Argo floats where active:
0 WMO 1901302.0 DAC BO
500 WMO 2902785.0 DAC HZ
1000 WMO 3901867.0 DAC IF
1500 WMO 4902549.0 DAC ME
2000 WMO 5904196.0 DAC AO
2500 WMO 5905188.0 DAC CS
3000 WMO 5906066.0 DAC AO
3500 WMO 6902874.0 DAC IF
WMOs=np.array([])
DACs=np.array([])
#read all the basins
for basin in ['atlantic_ocean','pacific_ocean','indian_ocean']:
    for iday in range(1,31):
        filein=f"../../Data/{basin}/2020/11/202011{iday:02d}_prof.nc"
        DS=xr.open_dataset(filein)
        #look for the WMO and DAC for each float
        DACs=np.append(DACs,DS.DATA_CENTRE.astype(str).values)
        WMOs=np.append(WMOs,DS.PLATFORM_NUMBER.astype(int).values)

#Keep just the unique set of WMOs
WMOs, indices = np.unique(WMOs, return_index=True)
DACs=DACs[indices]
fig,ax = plt.subplots(figsize=(15,10))
ax.plot(dayADS.PSAL.transpose(),dayADS.PRES.transpose(),'o-')
ax.set_ylim(0,2000)
ax.invert_yaxis()
ax.set_xlabel(f"{dayADS.PSAL.long_name}")
ax.set_ylabel(f"{dayADS.PRES.long_name}");
../../_images/db4e00ef59cd1c6a37c7b703fffa09db4963c9ae1aa7645a9b89954113d0f249.png

There is some incorrect, or at least suspicious data. In the data variables of the netCDF file, we realise that there are a lot of the _QC variables. In the case of PSAL_QC, it changes for some profiles

fig, ax = plt.subplots(figsize=(20,10))
sc = ax.scatter(dayADS.PSAL, dayADS.PRES, c=dayADS.PSAL_QC, vmin=0, vmax=9, cmap=qcmap)
colorbar_qc(qcmap, ax=ax)
ax.grid()
ax.set_ylim(0,2000)
ax.invert_yaxis()
ax.set_xlabel(f"{dayADS.PSAL.long_name}")
ax.set_ylabel('Pressure')
ax.set_title(f"PSAL {dayADS.PSAL_QC.long_name}");
../../_images/afef449f0d5106d462a9e7fd1073825700d741586187cf23f2544078fec772e2.png

In sections Real Time quality control data and Delayed mode data you may learn everything about the use of this quality control flags, so you can choose the best data available!