Commit e5c8307c authored by ph290's avatar ph290
Browse files

update

parent 423490e6
......@@ -19,8 +19,8 @@ num_procs = mp.cpu_count() # this will use all available processors. Note that o
# output_directory = '/gpfs/ts0/projects/Research_Project-148395/s2p3_rv2.0/output/era5/test/' #where you want the output to go
# output_directory = '/data/ph290/s2p3_rv2.0/output/era5_global/' #where you want the output to go
output_directory = '/data/ph290/s2p3_rv2.0/output/gbrish/' #where you want the output to go
# output_directory = '/data/ph290/s2p3_rv2.0/output/era5_global_tropics/' #where you want the output to go
output_directory = '/data/ph290/s2p3_rv2.0/output/gbr_coarse/' #where you want the output to go
# output_file_name = 'global_tropics_era5'
# meterological_file_name = 'meterological_data'
......@@ -28,10 +28,10 @@ output_directory = '/data/ph290/s2p3_rv2.0/output/gbrish/' #where you want the
# nutrient_file_name = 'initial_nitrate_1801803030_0.1.dat'
# executable_file_name = 's2p3_rv2.0'
output_file_name = 'gbrish_era5'
output_file_name = 'gbr_coarse'
meterological_file_name = 'meterological_data'
domain_file_name = 's12_m2_s2_n2_h_map_gbrish.dat'
nutrient_file_name = 'initial_nitrate_gbrish.dat'
domain_file_name = 's12_m2_s2_n2_h_map_gbr_coarse.dat'
nutrient_file_name = 'initial_nitrate_gbr_coarse.dat'
executable_file_name = 's2p3_rv2.0'
......@@ -41,24 +41,25 @@ executable_file_name = 's2p3_rv2.0'
# nutrient_file_name = 'initial_nitrate_gbr_coarse.dat'
# executable_file_name = 's2p3_rv2.0'
met_data_location = '/data/ph290/s2p3_rv2.0/met_data/era5_global/' # The location containing the tar.gz met files (in the format met_data_year.tar.gz)
# met_data_location = '/data/ph290/s2p3_rv2.0/met_data/gbr_coarse/' # The location containing the tar.gz met files (in the format met_data_year.tar.gz)
# met_data_location = '/data/ph290/s2p3_rv2.0/met_data/era5_global_tropics/' # The location containing the tar.gz met files (in the format met_data_year.tar.gz)
met_data_location = '/data/ph290/s2p3_rv2.0/met_data/gbr_coarse/' # The location containing the tar.gz met files (in the format met_data_year.tar.gz)
# met_data_temporary_location = base_directory+'met/spatial_data/' # The location that met data for each year will be un tar.gziped into
met_data_temporary_location = '/mnt/ramdisk/' # The location that met data for each year will be un tar.gziped into
# each grid point each year has to read in a new meterology dataset from disk so it may make sense to make this temporary location a RAM disk (see readme)
start_year = 1998
start_year = 1997
#having to restaer becase of isca restart
end_year = 2017 # same as start year resuls in a 1 year run
depth_min = 10 # NOTE that these numbers MUST be the same as those used in the scripts used to produce the meterology and nutrient files, otherwse data will not be taken for teh correct lats/lons and/or the script will fail
depth_min = 4 # NOTE that these numbers MUST be the same as those used in the scripts used to produce the meterology and nutrient files, otherwse data will not be taken for teh correct lats/lons and/or the script will fail
depth_max = 50
write_error_output = False
parallel_processing = True
generate_netcdf_files = True
#note does not output error data if write_error_output set to True
#######################################################
# Variables to output from model #
......@@ -105,8 +106,6 @@ if generate_netcdf_files:
import pandas as pd
from itertools import compress
from cf_units import Unit
column_names = ['day','longitude','latitude']+list(compress(column_names_all, map(bool,columns)))
specifying_names = False
## If specifying_names above is set to True, specify the below. If not, ignore ##
......@@ -114,7 +113,6 @@ if generate_netcdf_files:
long_name=['Sea Surface Temperature','Sea Surface Temperature','Sea Surface Temperature','Sea Surface Temperature','Sea Surface Temperature']
var_name=['tos','tos','tos','tos','tos']
units=['K','K','K','K','K']
if not(specifying_names):
standard_name=np.tile('sea_surface_temperature',len(column_names))
long_name=np.tile('Sea Surface Temperature',len(column_names))
......@@ -131,17 +129,17 @@ def put_data_into_cube(df,df_domain,variable,specifying_names,standard_name,long
times = np.unique(df['day'].values)
latitude = iris.coords.DimCoord(latitudes, standard_name='latitude', units='degrees')
longitude = iris.coords.DimCoord(longitudes, standard_name='longitude', units='degrees')
time = iris.coords.DimCoord(times, standard_name='time', units='days')
# time = iris.coords.DimCoord(times, standard_name='time', units='days')
time = iris.coords.DimCoord(times, standard_name='time', units=Unit('days since '+run_start_date+' 00:00:0.0', calendar='gregorian'))
if specifying_names:
cube = iris.cube.Cube(np.zeros((times.size,latitudes.size, longitudes.size), np.float32),standard_name=standard_name, long_name=long_name, var_name=var_name, units=units,dim_coords_and_dims=[(time,0), (latitude, 1), (longitude, 2)])
cube = iris.cube.Cube(np.full((times.size,latitudes.size, longitudes.size),-999.99, np.float32),standard_name=standard_name, long_name=long_name, var_name=var_name, units=units,dim_coords_and_dims=[(time,0), (latitude, 1), (longitude, 2)])
else:
cube = iris.cube.Cube(np.zeros((times.size,latitudes.size, longitudes.size), np.float32),standard_name=None, long_name=None, var_name=None, units=None,dim_coords_and_dims=[(time,0), (latitude, 1), (longitude, 2)])
Z,X,Y = np.meshgrid(cube.coord('time').points,cube.coord('longitude').points,cube.coord('latitude').points)
cube = iris.cube.Cube(np.full((times.size,latitudes.size, longitudes.size),-999.99, np.float32),standard_name=None, long_name=None, var_name=None, units=None,dim_coords_and_dims=[(time,0), (latitude, 1), (longitude, 2)])
# Z,X,Y = np.meshgrid(cube.coord('time').points,cube.coord('longitude').points,cube.coord('latitude').points)
data = cube.data.copy()
data[:] = -999.99
# data[:] = -999.99
days = np.unique(df['day'].values)
shape = [X.shape[0],X.shape[2]]
# shape = [X.shape[0],X.shape[2]]
for i,day in enumerate(days):
df_tmp = df.loc[df['day'] == day]
for j,lat in enumerate(df_tmp['latitude'].values):
......@@ -157,7 +155,7 @@ def put_data_into_cube(df,df_domain,variable,specifying_names,standard_name,long
cube.data = np.ma.masked_where(cube.data == -999.99,cube.data)
return cube
def output_netcdf(year,column_names,df,df_domain,specifying_names,standard_name,long_name,var_name,units,run_start_date,output_cube, output_directory,output_file_name,i):
def output_netcdf(year,column_names,df,df_domain,specifying_names,standard_name,long_name,var_name,units,run_start_date, output_directory,output_file_name,i):
column_name = column_names[i]
output_cube = put_data_into_cube(df,df_domain,column_name,specifying_names,standard_name,long_name,var_name,units,run_start_date)
iris.fileformats.netcdf.save(output_cube, output_directory+output_file_name+'_'+column_name.replace(" ", "")+'_'+str(year)+'.nc', zlib=True, complevel=2)
......@@ -216,8 +214,8 @@ def run_model(domain_file_name,lats_lons,year,start_year,unique_job_id,met_data_
#modifying so that the fortran code looks for the correct met file, rather than us having to copy it into the working directory
# lon,lat = return_domain_lon(base_directory+'domain/'+domain_file_name,i)
lon_domain_tmp = float(lon_domain[i])
if lon_domain_tmp < 0.0:
lon_domain_tmp = 360.0+lon_domain_tmp
# if lon_domain_tmp < 0.0:
# lon_domain_tmp = 360.0+lon_domain_tmp
run_command = '\n'.join(['./{} << EOF'.format(executable_file_name),
str(start_year),
str(year),
......@@ -351,76 +349,81 @@ for i,file in enumerate(files):
print 'looping through years'
#
# for year in range(start_year,end_year+1):
year = start_year
print year
#clean up and prexisting met files
try:
files_to_delete = glob.glob(met_data_temporary_location+'*.dat')
[os.remove(f) for f in files_to_delete]
except:
print 'no met files to clean up'
subprocess.call('tar -C '+met_data_temporary_location+' -zxf '+met_data_location+'met_data_'+str(year)+'.tar.gz', shell=True)
try:
shutil.move(output_directory+output_file_name+'_'+str(year), output_directory+output_file_name+'_'+str(year)+'_previous')
except:
print 'no previous output file to move'
if parallel_processing:
pool = mp.Pool(processes=num_procs)
func = partial(run_model, domain_file_name, lats_lons, year, start_year, unique_job_id, met_data_temporary_location,lon_domain,lat_domain,smaj1,smin1,smaj2,smin2,smaj3,smin3,smaj4,smin4,smaj5,smin5,woa_nutrient,alldepth,include_depth_output,include_temp_surface_output,include_temp_bottom_output,include_chlorophyll_surface_output,include_phyto_biomass_surface_output,include_phyto_biomass_bottom_output,include_PAR_surface_output,include_PAR_bottom_output,include_windspeed_output,include_stressx_output,include_stressy_output,include_Etide_output,include_Ewind_output,include_u_mean_surface_output,include_u_mean_bottom_output,include_grow1_mean_surface_output,include_grow1_mean_bottom_output,include_uptake1_mean_surface_output,include_uptake1_mean_bottom_output,include_tpn1_output,include_tpg1_output,include_speed3_output)
# results,errors = pool.map(func, range(num_lines))
results, errors = zip(*pool.map(func, range(len(lat_domain))))
# results = pool.map(func, range(num_lines))
if generate_netcdf_files:
# run_start_date = str(year)+'-01-01'
# df = pd.DataFrame(columns=(column_names))
# i=0
# for result in results:
# lines = result.split('\n')[:-1]
# for line in lines:
# # print line
# df.loc[i] = map(float,line.split())
# i+=1
#
# for column_name in column_names[4::]:
# output_cube = put_data_into_cube(df,df_domain,column_name,specifying_names,standard_name,long_name,var_name,units,run_start_date)
# iris.fileformats.netcdf.save(output_cube, output_directory+output_file_name+'_'+column_name.replace(" ", "")+'_'+str(year)+'.nc', zlib=True, complevel=2)
run_start_date = str(year)+'-01-01'
df = pd.DataFrame(columns=(column_names))
i=0
for result in results:
lines = result.split('\n')[:-1]
for line in lines:
# print line
df.loc[i] = map(float,line.split())
i+=1
func = partial(output_netcdf,year,column_names,df,df_domain,specifying_names,standard_name,long_name,var_name,units,run_start_date,output_cube, output_directory,output_file_name)
my_log = zip(*pool.map(func, range(4,len(column_names))))
else:
with open(output_directory+output_file_name+'_'+str(year),'w') as fout:
for year in range(start_year,end_year+1):
# year = start_year
print year
#clean up and prexisting met files
try:
files_to_delete = glob.glob(met_data_temporary_location+'*.dat')
[os.remove(f) for f in files_to_delete]
except:
print 'no met files to clean up'
subprocess.call('tar -C '+met_data_temporary_location+' -zxf '+met_data_location+'met_data_'+str(year)+'.tar.gz', shell=True)
try:
shutil.move(output_directory+output_file_name+'_'+str(year), output_directory+output_file_name+'_'+str(year)+'_previous')
except:
print 'no previous output file to move'
if parallel_processing:
pool = mp.Pool(processes=num_procs)
func = partial(run_model, domain_file_name, lats_lons, year, start_year, unique_job_id, met_data_temporary_location,lon_domain,lat_domain,smaj1,smin1,smaj2,smin2,smaj3,smin3,smaj4,smin4,smaj5,smin5,woa_nutrient,alldepth,include_depth_output,include_temp_surface_output,include_temp_bottom_output,include_chlorophyll_surface_output,include_phyto_biomass_surface_output,include_phyto_biomass_bottom_output,include_PAR_surface_output,include_PAR_bottom_output,include_windspeed_output,include_stressx_output,include_stressy_output,include_Etide_output,include_Ewind_output,include_u_mean_surface_output,include_u_mean_bottom_output,include_grow1_mean_surface_output,include_grow1_mean_bottom_output,include_uptake1_mean_surface_output,include_uptake1_mean_bottom_output,include_tpn1_output,include_tpg1_output,include_speed3_output)
# results,errors = pool.map(func, range(num_lines))
results, errors = zip(*pool.map(func, range(len(lat_domain))))
# results = pool.map(func, range(num_lines))
if generate_netcdf_files:
# run_start_date = str(year)+'-01-01'
# df = pd.DataFrame(columns=(column_names))
# i=0
# for result in results:
# lines = result.split('\n')[:-1]
# for line in lines:
# # print line
# df.loc[i] = map(float,line.split())
# i+=1
#
# for column_name in column_names[4::]:
# output_cube = put_data_into_cube(df,df_domain,column_name,specifying_names,standard_name,long_name,var_name,units,run_start_date)
# iris.fileformats.netcdf.save(output_cube, output_directory+output_file_name+'_'+column_name.replace(" ", "")+'_'+str(year)+'.nc', zlib=True, complevel=2)
run_start_date = str(year)+'-01-01'
df = pd.DataFrame(columns=(column_names))
i=0
tmp_array = np.zeros([len(column_names),np.sum([len(result.split('\n')[:-1]) for result in results])])
for result in results:
fout.write(result)
if write_error_output:
with open(output_directory+output_file_name+'_error_'+str(year),'w') as fout:
for error in errors:
fout.write(error)
pool.close()
#clean up and leftover met files
try:
files_to_delete = glob.glob(met_data_temporary_location+'*.dat')
[os.remove(f) for f in files_to_delete]
except:
print 'no met files to clean up'
lines = result.split('\n')[:-1]
for line in lines:
# print line
# df.loc[i] = map(float,line.split())
tmp_array[:,i] = map(float,line.split())
i+=1
# df = pd.DataFrame({column_names[0]: tmp_array[0,:], column_names[1]: tmp_array[1,:], column_names[2]: tmp_array[2,:], column_names[3]: tmp_array[3,:], column_names[4]: tmp_array[4,:], column_names[5]: tmp_array[5,:], column_names[6]: tmp_array[6,:], column_names[7]: tmp_array[7,:], column_names[8]: tmp_array[8,:]})
# need to make this generic based on no column_names
df = pd.DataFrame({column_names[0]: tmp_array[0,:]})
for i in range(len(column_names)-1):
df[column_names[i+1]] = tmp_array[i+1,:]
func = partial(output_netcdf,year,column_names,df,df_domain,specifying_names,standard_name,long_name,var_name,units,run_start_date, output_directory,output_file_name)
my_log = zip(*pool.map(func, range(4,len(column_names))))
else:
with open(output_directory+output_file_name+'_'+str(year),'w') as fout:
for result in results:
fout.write(result)
if write_error_output:
with open(output_directory+output_file_name+'_error_'+str(year),'w') as fout:
for error in errors:
fout.write(error)
pool.close()
#clean up and leftover met files
try:
files_to_delete = glob.glob(met_data_temporary_location+'*.dat')
[os.remove(f) for f in files_to_delete]
except:
print 'no met files to clean up'
remove_files = glob.glob(base_directory+'main/*'+unique_job_id+'*')
......@@ -430,13 +433,3 @@ except:
pass
for remove_file in remove_files:
os.remove(remove_file)
# for year in range(start_year,end_year+1):
# pool = mp.Pool(processes=num_procs)
# func = partial(run_model2, domain_file_name, lats_lons,year,start_year,unique_job_id, met_data_temporary_location)
# # results,errors = pool.map(func, range(num_lines))
# results = pool.map(func, range(num_lines))
# with open(output_directory+output_file_name+'_'+str(year)+'_error','w') as fout:
# for result in results:
# fout.write(result)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment