Commit c4add0c5 authored by ph290's avatar ph290
Browse files

updated to make faster and to write out netcdfs dierctly if modules available

parent 6c70dcfe
......@@ -13,23 +13,50 @@ import uuid
# you may need to change things here #
##################################################
base_directory = '/gpfs/ts0/home/ph290/s2p3_rv2.0/'
base_directory = '/home/ph290/s2p3_rv2.0/'
num_procs = mp.cpu_count() # this will use all available processors. Note that on a multi-node machine it can only use the processors on one node
# num_procs = 1
output_directory = '/gpfs/ts0/projects/Research_Project-148395/s2p3_rv2.0/output/' #where you want the output to go
output_file_name = 'sw_uk_lw_res'
# output_directory = '/gpfs/ts0/projects/Research_Project-148395/s2p3_rv2.0/output/era5/test/' #where you want the output to go
output_directory = '/data/ph290/s2p3_rv2.0/output/era5_global/' #where you want the output to go
#output_file_name = 'global_tropics_era5_test'
#meterological_file_name = 'meterological_data'
#domain_file_name = 's12_m2_s2_n2_h_map_test.dat'
#nutrient_file_name = 'initial_nitrate_test.dat'
#executable_file_name = 's2p3_rv2.0'
output_file_name = 'global_tropics_era5'
meterological_file_name = 'meterological_data'
domain_file_name = 's12_m2_s2_n2_h_map.dat'
nutrient_file_name = 'initial_nitrate.dat'
domain_file_name = 's12_m2_s2_n2_h_map_1801803030_0.1.dat'
nutrient_file_name = 'initial_nitrate_1801803030_0.1.dat'
executable_file_name = 's2p3_rv2.0'
met_data_location = '/gpfs/ts0/projects/Research_Project-148395/s2p3_rv2.0/test/' # The location containing the tar.gz met files (in the format met_data_year.tar.gz)
met_data_temporary_location = base_directory+'met/spatial_data/' # The location that met data for each year will be un tar.gziped into
start_year = 2006
end_year = 2006 # same as start year resuls in a 1 year run
# output_file_name = 'gbr_coarse'
# meterological_file_name = 'meterological_data'
# domain_file_name = 's12_m2_s2_n2_h_map_gbr_coarse.dat'
# nutrient_file_name = 'initial_nitrate_gbr_coarse.dat'
# executable_file_name = 's2p3_rv2.0'
met_data_location = '/data/ph290/s2p3_rv2.0/met_data/era5_global/' # The location containing the tar.gz met files (in the format met_data_year.tar.gz)
# met_data_location = '/data/ph290/s2p3_rv2.0/met_data/gbr_coarse/' # The location containing the tar.gz met files (in the format met_data_year.tar.gz)
# met_data_temporary_location = base_directory+'met/spatial_data/' # The location that met data for each year will be un tar.gziped into
met_data_temporary_location = '/mnt/ramdisk/' # The location that met data for each year will be un tar.gziped into
# each grid point each year has to read in a new meterology dataset from disk so it may make sense to make this temporary location a RAM disk (see readme)
start_year = 1979
#having to restaer becase of isca restart
end_year = 2017 # same as start year resuls in a 1 year run
depth_min = 10 # NOTE that these numbers MUST be the same as those used in the scripts used to produce the meterology and nutrient files, otherwse data will not be taken for teh correct lats/lons and/or the script will fail
depth_max = 100
depth_max = 50
write_error_output = False
parallel_processing = True
generate_netcdf_files = True
#######################################################
# Variables to output from model #
# =1 means output, =0 means do not output #
......@@ -41,8 +68,8 @@ include_depth_output=1
include_temp_surface_output=1
include_temp_bottom_output=1
include_chlorophyll_surface_output=1
include_phyto_biomass_surface_output=1
include_phyto_biomass_bottom_output=1
include_phyto_biomass_surface_output=0
include_phyto_biomass_bottom_output=0
include_PAR_surface_output=1 # daily mean surface of PAR W m-2
include_PAR_bottom_output=1 # daily mean bottom of PAR W m-2
include_windspeed_output=0 # windspeed
......@@ -60,25 +87,84 @@ include_tpn1_output=0 # total water column net production / mg C m-2 d-1
include_tpg1_output=0 # total water column gross production / mg C m-2 hd-1
include_speed3_output=0 # depth-mean current speed
columns = [include_depth_output,include_temp_surface_output,include_temp_bottom_output,include_chlorophyll_surface_output,include_phyto_biomass_surface_output,include_phyto_biomass_bottom_output,include_PAR_surface_output,include_PAR_bottom_output,include_windspeed_output,include_stressx_output,include_stressy_output,include_Etide_output,include_Ewind_output,include_u_mean_surface_output,include_u_mean_bottom_output,include_grow1_mean_surface_output,include_grow1_mean_bottom_output,include_uptake1_mean_surface_output,include_uptake1_mean_bottom_output,include_tpn1_output,include_tpg1_output,include_speed3_output]
column_names_all = ['depth','surface temperature','bottom temperature','surface chlorophyll','surface phyto biomass','bottom phyto biomass','surface PAR','bottom PAR','windspeed','stress_x','stress_y','Etide','Ewind','u_mean_surface','u_mean_bottom','grow1_mean_surface','grow1_mean_bottom','uptake1_mean_surface','uptake1_mean_bottom','tpn1','tpg1','speed3']
##################################################
# functions used by the script #
##################################################
def distance(lat1, lon1, lat2, lon2):
p = 0.017453292519943295
a = 0.5 - cos((lat2-lat1)*p)/2 + cos(lat1*p)*cos(lat2*p) * (1-cos((lon2-lon1)*p)) / 2
return 12742 * asin(sqrt(a))
def closest(data, lat1,lon1):
return min(data, key=lambda p: distance(lat1,lon1,p[0],p[1]))
# def return_domain_lon(filename,i):
# f=open(filename)
# lines=f.readlines()
# return filter(lambda a: a != '', lines[i+1].split(' '))[0:2]
#
#
if generate_netcdf_files:
import numpy as np
import iris
import pandas as pd
from itertools import compress
from cf_units import Unit
column_names = ['day','longitude','latitude']+list(compress(column_names_all, map(bool,columns)))
specifying_names = False
## If specifying_names above is set to True, specify the below. If not, ignore ##
standard_name=['sea_surface_temperature','sea_surface_temperature','sea_surface_temperature','sea_surface_temperature','sea_surface_temperature']
long_name=['Sea Surface Temperature','Sea Surface Temperature','Sea Surface Temperature','Sea Surface Temperature','Sea Surface Temperature']
var_name=['tos','tos','tos','tos','tos']
units=['K','K','K','K','K']
if not(specifying_names):
standard_name=np.tile('sea_surface_temperature',len(column_names))
long_name=np.tile('Sea Surface Temperature',len(column_names))
var_name=np.tile('tos',len(column_names))
units=np.tile('K',len(column_names))
def put_data_into_cube(df,df_domain,variable,specifying_names,standard_name,long_name,var_name,units,run_start_date):
latitudes = np.unique(df_domain['lat'].values)
longitudes = np.unique(df_domain['lon'].values)
latitudes_run = np.unique(df['latitude'].values)
longitudes_run = np.unique(df['longitude'].values)
times = np.unique(df['day'].values)
latitude = iris.coords.DimCoord(latitudes, standard_name='latitude', units='degrees')
longitude = iris.coords.DimCoord(longitudes, standard_name='longitude', units='degrees')
time = iris.coords.DimCoord(times, standard_name='time', units='days')
time = iris.coords.DimCoord(times, standard_name='time', units=Unit('days since '+run_start_date+' 00:00:0.0', calendar='gregorian'))
if specifying_names:
cube = iris.cube.Cube(np.zeros((times.size,latitudes.size, longitudes.size), np.float32),standard_name=standard_name, long_name=long_name, var_name=var_name, units=units,dim_coords_and_dims=[(time,0), (latitude, 1), (longitude, 2)])
else:
cube = iris.cube.Cube(np.zeros((times.size,latitudes.size, longitudes.size), np.float32),standard_name=None, long_name=None, var_name=None, units=None,dim_coords_and_dims=[(time,0), (latitude, 1), (longitude, 2)])
Z,X,Y = np.meshgrid(cube.coord('time').points,cube.coord('longitude').points,cube.coord('latitude').points)
data = cube.data.copy()
data[:] = -999.99
days = np.unique(df['day'].values)
shape = [X.shape[0],X.shape[2]]
for i,day in enumerate(days):
df_tmp = df.loc[df['day'] == day]
for j,lat in enumerate(df_tmp['latitude'].values):
lon = df_tmp['longitude'].values[j]
lat_loc = np.where(np.around(cube.coord('latitude').points,decimals=6) == np.around(lat,decimals=6))[0][0]
lon_loc = np.where(np.around(cube.coord('longitude').points,decimals=6) == np.around(lon,decimals=6))[0][0]
data[i,lat_loc,lon_loc] = df_tmp[variable].values[j]
data = np.ma.masked_where((data.data < -999.9) & (data.data > -1000.0),data)
# data = np.ma.masked_where(data.data == 0.0,data)
cube.data = data
cube.data.fill_value = -999.99
cube.data.data[~(np.isfinite(cube.data.data))]=-999.99
cube.data = np.ma.masked_where(cube.data == -999.99,cube.data)
return cube
def output_netcdf(year,column_names,df,df_domain,column_name,specifying_names,standard_name,long_name,var_name,units,run_start_date,output_cube, output_directory,output_file_name,i):
column_name = column_names[i]
output_cube = put_data_into_cube(df,df_domain,column_name,specifying_names,standard_name,long_name,var_name,units,run_start_date)
iris.fileformats.netcdf.save(output_cube, output_directory+output_file_name+'_'+column_name.replace(" ", "")+'_'+str(year)+'.nc', zlib=True, complevel=2)
return output_directory+output_file_name+'_'+column_name.replace(" ", "")+'_'+str(year)+'.nc written'
domain_file_for_run = base_directory+'domain/'+domain_file_name
fwidths=[8,8,6,6,6,6,6,6,6,6,6,6,8]
df_domain = pd.read_fwf(domain_file_for_run,names=['lon','lat','t1','t2','t3','t4','t5','t6','t7','t8','t9','t10','depth'],widths = fwidths,
skiprows=[0],dtype={'lon':float,'lat':float,'t1':float,'t2':float,'t3':float,'t4':float,'t5':float,'t6':float,'t7':float,'t8':float,'t9':float,'t10':float,'depth':float},usecols=['lon','lat','depth'])
f=open(base_directory+'domain/'+domain_file_name)
......@@ -124,107 +210,102 @@ for i,line in enumerate(lines[1::]):
def run_model(domain_file_name,lats_lons,year,start_year,unique_job_id,met_data_temporary_location,lon_domain,lat_domain,smaj1,smin1,smaj2,smin2,smaj3,smin3,smaj4,smin4,smaj5,smin5,woa_nutrient,alldepth,include_depth_output,include_temp_surface_output,include_temp_bottom_output,include_chlorophyll_surface_output,include_phyto_biomass_surface_output,include_phyto_biomass_bottom_output,include_PAR_surface_output,include_PAR_bottom_output,include_windspeed_output,include_stressx_output,include_stressy_output,include_Etide_output,include_Ewind_output,include_u_mean_surface_output,include_u_mean_bottom_output,include_grow1_mean_surface_output,include_grow1_mean_bottom_output,include_uptake1_mean_surface_output,include_uptake1_mean_bottom_output,include_tpn1_output,include_tpg1_output,include_speed3_output,i):
#modifying so that the fortran code looks for the correct met file, rather than us having to copy it into the working dorectory
#modifying so that the fortran code looks for the correct met file, rather than us having to copy it into the working directory
# lon,lat = return_domain_lon(base_directory+'domain/'+domain_file_name,i)
lon_domain_tmp = float(lon_domain[i])
if lon_domain_tmp < 0.0:
lon_domain_tmp = 360.0+lon_domain_tmp
# forcing_lat_lon = closest(lats_lons, float(lat_domain[i]),float(lon_domain[i]))
# print i
# print str(forcing_lat_lon[0])
# print str(forcing_lat_lon[1])
# print str(float(lat_domain[i]))
# print str(lon_domain_tmp)
run_command = """./"""+executable_file_name+""" << EOF
"""+str(start_year)+"""
"""+str(year)+"""
"""+str(float(lat_domain[i]))+"""
"""+str(lon_domain_tmp)+"""
../domain/"""+domain_file_name+"""
../domain/"""+nutrient_file_name+"""
"""+unique_job_id+"""
"""+met_data_temporary_location+"""
map
"""+str(i+1)+"""
"""+str(smaj1[i])+"""
"""+str(smin1[i])+"""
"""+str(smaj2[i])+"""
"""+str(smin2[i])+"""
"""+str(smaj3[i])+"""
"""+str(smin3[i])+"""
"""+str(smaj4[i])+"""
"""+str(smin4[i])+"""
"""+str(smaj5[i])+"""
"""+str(smin5[i])+"""
"""+str(woa_nutrient[i])+"""
"""+str(alldepth[i])+"""
"""+str(include_depth_output)+"""
"""+str(include_temp_surface_output)+"""
"""+str(include_temp_bottom_output)+"""
"""+str(include_chlorophyll_surface_output)+"""
"""+str(include_phyto_biomass_surface_output)+"""
"""+str(include_phyto_biomass_bottom_output)+"""
"""+str(include_PAR_surface_output)+"""
"""+str(include_PAR_bottom_output)+"""
"""+str(include_windspeed_output)+"""
"""+str(include_stressx_output)+"""
"""+str(include_stressy_output)+"""
"""+str(include_Etide_output)+"""
"""+str(include_Ewind_output)+"""
"""+str(include_u_mean_surface_output)+"""
"""+str(include_u_mean_bottom_output)+"""
"""+str(include_grow1_mean_surface_output)+"""
"""+str(include_grow1_mean_bottom_output)+"""
"""+str(include_uptake1_mean_surface_output)+"""
"""+str(include_uptake1_mean_bottom_output)+"""
"""+str(include_tpn1_output)+"""
"""+str(include_tpg1_output)+"""
"""+str(include_speed3_output)+"""
"""+str(start_year)+"""
"""+str(year)+"""
"""+str(float(lat_domain[i]))+"""
"""+str(lon_domain_tmp)+"""
../domain/"""+domain_file_name+"""
../domain/"""+nutrient_file_name+"""
"""+unique_job_id+"""
"""+met_data_temporary_location+"""
map
"""+str(i+1)+"""
"""+str(smaj1[i])+"""
"""+str(smin1[i])+"""
"""+str(smaj2[i])+"""
"""+str(smin2[i])+"""
"""+str(smaj3[i])+"""
"""+str(smin3[i])+"""
"""+str(smaj4[i])+"""
"""+str(smin4[i])+"""
"""+str(smaj5[i])+"""
"""+str(smin5[i])+"""
"""+str(woa_nutrient[i])+"""
"""+str(alldepth[i])+"""
"""+str(include_depth_output)+"""
"""+str(include_temp_surface_output)+"""
"""+str(include_temp_bottom_output)+"""
"""+str(include_chlorophyll_surface_output)+"""
"""+str(include_phyto_biomass_surface_output)+"""
"""+str(include_phyto_biomass_bottom_output)+"""
"""+str(include_PAR_surface_output)+"""
"""+str(include_PAR_bottom_output)+"""
"""+str(include_windspeed_output)+"""
"""+str(include_stressx_output)+"""
"""+str(include_stressy_output)+"""
"""+str(include_Etide_output)+"""
"""+str(include_Ewind_output)+"""
"""+str(include_u_mean_surface_output)+"""
"""+str(include_u_mean_bottom_output)+"""
"""+str(include_grow1_mean_surface_output)+"""
"""+str(include_grow1_mean_bottom_output)+"""
"""+str(include_uptake1_mean_surface_output)+"""
"""+str(include_uptake1_mean_bottom_output)+"""
"""+str(include_tpn1_output)+"""
"""+str(include_tpg1_output)+"""
"""+str(include_speed3_output)+"""
EOF"""
run_command = '\n'.join(['./{} << EOF'.format(executable_file_name),
str(start_year),
str(year),
str(float(lat_domain[i])),
str(lon_domain_tmp),
'../domain/{}'.format(domain_file_name),
'../domain/{}'.format(nutrient_file_name),
unique_job_id,
met_data_temporary_location,
'map',
str(i+1),
str(smaj1[i]),
str(smin1[i]),
str(smaj2[i]),
str(smin2[i]),
str(smaj3[i]),
str(smin3[i]),
str(smaj4[i]),
str(smin4[i]),
str(smaj5[i]),
str(smin5[i]),
str(woa_nutrient[i]),
str(alldepth[i]),
str(include_depth_output),
str(include_temp_surface_output),
str(include_temp_bottom_output),
str(include_chlorophyll_surface_output),
str(include_phyto_biomass_surface_output),
str(include_phyto_biomass_bottom_output),
str(include_PAR_surface_output),
str(include_PAR_bottom_output),
str(include_windspeed_output),
str(include_stressx_output),
str(include_stressy_output),
str(include_Etide_output),
str(include_Ewind_output),
str(include_u_mean_surface_output),
str(include_u_mean_bottom_output),
str(include_grow1_mean_surface_output),
str(include_grow1_mean_bottom_output),
str(include_uptake1_mean_surface_output),
str(include_uptake1_mean_bottom_output),
str(include_tpn1_output),
str(include_tpg1_output),
str(include_speed3_output),
str(start_year),
str(year),
str(float(lat_domain[i])),
str(lon_domain_tmp),
'../domain/{}'.format(domain_file_name),
'../domain/{}'.format(nutrient_file_name),
unique_job_id,
met_data_temporary_location,
'map',
str(i+1),
str(smaj1[i]),
str(smin1[i]),
str(smaj2[i]),
str(smin2[i]),
str(smaj3[i]),
str(smin3[i]),
str(smaj4[i]),
str(smin4[i]),
str(smaj5[i]),
str(smin5[i]),
str(woa_nutrient[i]),
str(alldepth[i]),
str(include_depth_output),
str(include_temp_surface_output),
str(include_temp_bottom_output),
str(include_chlorophyll_surface_output),
str(include_phyto_biomass_surface_output),
str(include_phyto_biomass_bottom_output),
str(include_PAR_surface_output),
str(include_PAR_bottom_output),
str(include_windspeed_output),
str(include_stressx_output),
str(include_stressy_output),
str(include_Etide_output),
str(include_Ewind_output),
str(include_u_mean_surface_output),
str(include_u_mean_bottom_output),
str(include_grow1_mean_surface_output),
str(include_grow1_mean_bottom_output),
str(include_uptake1_mean_surface_output),
str(include_uptake1_mean_bottom_output),
str(include_tpn1_output),
str(include_tpg1_output),
str(include_speed3_output),
'EOF'
])
# print run_command
proc = subprocess.Popen([run_command], shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, err = proc.communicate()
......@@ -241,7 +322,7 @@ unique_job_id = str(uuid.uuid4())
num_lines = sum(1 for line in open(base_directory+'domain/'+domain_file_name)) - 1
# num_lines = 10
print 'initial unzipping of met data to extrcat lat and lon data'
print 'initial unzipping of met data to extract lat and lon data'
subprocess.call('tar -C '+met_data_temporary_location+' -zxf '+met_data_location+'met_data_'+str(start_year)+'.tar.gz', shell=True)
files = glob.glob(met_data_temporary_location+'*_'+str(start_year)+'.dat')
......@@ -267,41 +348,80 @@ for i,file in enumerate(files):
print 'looping through years'
#
# for year in range(start_year,end_year+1):
year = start_year
print year
#clean up and prexisting met files
try:
files_to_delete = glob.glob(met_data_temporary_location+'*.dat')
[os.remove(f) for f in files_to_delete]
except:
print 'no met files to clean up'
subprocess.call('tar -C '+met_data_temporary_location+' -zxf '+met_data_location+'met_data_'+str(year)+'.tar.gz', shell=True)
try:
shutil.move(output_directory+output_file_name+'_'+str(year), output_directory+output_file_name+'_'+str(year)+'_previous')
except:
print 'no previous output file to move'
for year in range(start_year,end_year+1):
print year
#clean up and prexisting met files
try:
files_to_delete = glob.glob(met_data_temporary_location+'*.dat')
[os.remove(f) for f in files_to_delete]
except:
print 'no met files to clean up'
subprocess.call('tar -C '+met_data_temporary_location+' -zxf '+met_data_location+'met_data_'+str(year)+'.tar.gz', shell=True)
try:
shutil.move(output_directory+output_file_name+'_'+str(year), output_directory+output_file_name+'_'+str(year)+'_previous')
except:
print 'no previous output file to move'
if parallel_processing:
pool = mp.Pool(processes=num_procs)
func = partial(run_model, domain_file_name, lats_lons, year, start_year, unique_job_id, met_data_temporary_location,lon_domain,lat_domain,smaj1,smin1,smaj2,smin2,smaj3,smin3,smaj4,smin4,smaj5,smin5,woa_nutrient,alldepth,include_depth_output,include_temp_surface_output,include_temp_bottom_output,include_chlorophyll_surface_output,include_phyto_biomass_surface_output,include_phyto_biomass_bottom_output,include_PAR_surface_output,include_PAR_bottom_output,include_windspeed_output,include_stressx_output,include_stressy_output,include_Etide_output,include_Ewind_output,include_u_mean_surface_output,include_u_mean_bottom_output,include_grow1_mean_surface_output,include_grow1_mean_bottom_output,include_uptake1_mean_surface_output,include_uptake1_mean_bottom_output,include_tpn1_output,include_tpg1_output,include_speed3_output)
# results,errors = pool.map(func, range(num_lines))
results, errors = zip(*pool.map(func, range(len(lat_domain))))
# results = pool.map(func, range(num_lines))
with open(output_directory+output_file_name+'_'+str(year),'w') as fout:
if generate_netcdf_files:
# run_start_date = str(year)+'-01-01'
# df = pd.DataFrame(columns=(column_names))
# i=0
# for result in results:
# lines = result.split('\n')[:-1]
# for line in lines:
# # print line
# df.loc[i] = map(float,line.split())
# i+=1
#
# for column_name in column_names[4::]:
# output_cube = put_data_into_cube(df,df_domain,column_name,specifying_names,standard_name,long_name,var_name,units,run_start_date)
# iris.fileformats.netcdf.save(output_cube, output_directory+output_file_name+'_'+column_name.replace(" ", "")+'_'+str(year)+'.nc', zlib=True, complevel=2)
run_start_date = str(year)+'-01-01'
df = pd.DataFrame(columns=(column_names))
i=0
for result in results:
fout.write(result)
if write_error_output:
with open(output_directory+output_file_name+'_error_'+str(year),'w') as fout:
for error in errors:
fout.write(error)
#clean up and leftover met files
try:
files_to_delete = glob.glob(met_data_temporary_location+'*.dat')
[os.remove(f) for f in files_to_delete]
except:
print 'no met files to clean up'
lines = result.split('\n')[:-1]
for line in lines:
# print line
df.loc[i] = map(float,line.split())
i+=1
func = partial(output_netcdf,year,column_names,df,df_domain,column_name,specifying_names,standard_name,long_name,var_name,units,run_start_date,output_cube, output_directory,output_file_name)
my_log = zip(*pool.map(func, range(4,len(column_names))))
"""
else:
with open(output_directory+output_file_name+'_'+str(year),'w') as fout:
for result in results:
fout.write(result)
if write_error_output:
with open(output_directory+output_file_name+'_error_'+str(year),'w') as fout:
for error in errors:
fout.write(error)
"""
pool.close()
#clean up and leftover met files
try:
files_to_delete = glob.glob(met_data_temporary_location+'*.dat')
[os.remove(f) for f in files_to_delete]
except:
print 'no met files to clean up'
remove_files = glob.glob(base_directory+'main/*'+unique_job_id+'*')
try:
remove_files.remove(base_directory+'main/restart'+unique_job_id+'.dat')
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment