MOM_write_cputime.F90

1! This file is part of MOM6, the Modular Ocean Model version 6.
2! See the LICENSE file for licensing information.
3! SPDX-License-Identifier: Apache-2.0
4
5!> A module to monitor the overall CPU time used by MOM6 and project when to stop the model
7
8use mom_coms, only : sum_across_pes, num_pes
9use mom_error_handler, only : mom_error, mom_mesg, fatal, is_root_pe
10use mom_io, only : open_ascii_file, close_file, append_file, writeonly_file
11use mom_file_parser, only : get_param, log_param, log_version, param_file_type
12use mom_time_manager, only : time_type, get_time, operator(>)
13
14implicit none ; private
15
17
18!-----------------------------------------------------------------------
19
20integer :: clocks_per_sec = 1000 !< The number of clock cycles per second, used by the system clock
21integer :: max_ticks = 1000 !< The number of ticks per second, used by the system clock
22
23!> A control structure that regulates the writing of CPU time
24type, public :: write_cputime_cs ; private
25 logical :: initialized = .false. !< True if this control structure has been initialized.
26 real :: maxcpu !< The maximum amount of CPU time per processor
27 !! for which MOM should run before saving a restart
28 !! file and quitting with a return value that
29 !! indicates that further execution is required to
30 !! complete the simulation [wall-clock seconds].
31 type(time_type) :: start_time !< The start time of the simulation.
32 !! Start_time is set in MOM_initialization.F90
33 real :: startup_cputime !< The CPU time used in the startup phase of the model [clock_cycles].
34 real :: prev_cputime = 0.0 !< The last measured CPU time [clock_cycles].
35 real :: dn_dcpu_min = -1.0 !< The minimum derivative of timestep with CPU time [steps clock_cycles-1].
36 real :: cputime2 = 0.0 !< The accumulated CPU time [clock_cycles].
37 integer :: previous_calls = 0 !< The number of times write_CPUtime has been called.
38 integer :: prev_n = 0 !< The value of n from the last call.
39 integer :: filecpu_ascii= -1 !< The unit number of the CPU time file.
40 character(len=200) :: cpufile !< The name of the CPU time file.
41end type write_cputime_cs
42
43contains
44
45!> Evaluate the CPU time returned by SYSTEM_CLOCK at the start of a run
46subroutine write_cputime_start_clock(CS)
47 type(write_cputime_cs), pointer :: cs !< The control structure set up by a previous
48 !! call to MOM_write_cputime_init.
49 integer :: new_cputime ! The CPU time returned by SYSTEM_CLOCK
50 if (.not.associated(cs)) allocate(cs)
51
52 call system_clock(new_cputime, clocks_per_sec, max_ticks)
53 cs%prev_cputime = new_cputime
54end subroutine write_cputime_start_clock
55
56!> Initialize the MOM_write_cputime module.
57subroutine mom_write_cputime_init(param_file, directory, Input_start_time, CS)
58 type(param_file_type), intent(in) :: param_file !< A structure to parse for run-time parameters
59 character(len=*), intent(in) :: directory !< The directory where the CPU time file goes.
60 type(time_type), intent(in) :: input_start_time !< The start model time of the simulation.
61 type(write_cputime_cs), pointer :: cs !< A pointer that may be set to point to the
62 !! control structure for this module.
63
64 ! Local variables
65 integer :: new_cputime ! The CPU time returned by SYSTEM_CLOCK
66 ! This include declares and sets the variable "version".
67# include "version_variable.h"
68 character(len=40) :: mdl = 'MOM_write_cputime' ! This module's name.
69 logical :: all_default ! If true, all parameters are using their default values.
70
71 if (.not.associated(cs)) then
72 allocate(cs)
73 call system_clock(new_cputime, clocks_per_sec, max_ticks)
74 cs%prev_cputime = new_cputime
75 endif
76
77 cs%initialized = .true.
78
79 ! Read all relevant parameters and write them to the model log.
80
81 ! Determine whether all parameters are set to their default values.
82 call get_param(param_file, mdl, "MAXCPU", cs%maxcpu, units="wall-clock seconds", default=-1.0, do_not_log=.true.)
83 call get_param(param_file, mdl, "CPU_TIME_FILE", cs%CPUfile, default="CPU_stats", do_not_log=.true.)
84 all_default = (cs%maxcpu == -1.0) .and. (trim(cs%CPUfile) == trim("CPU_stats"))
85
86 call log_version(param_file, mdl, version, "", all_default=all_default)
87 call get_param(param_file, mdl, "MAXCPU", cs%maxcpu, &
88 "The maximum amount of cpu time per processor for which "//&
89 "MOM should run before saving a restart file and "//&
90 "quitting with a return value that indicates that a "//&
91 "further run is required to complete the simulation. "//&
92 "If automatic restarts are not desired, use a negative "//&
93 "value for MAXCPU. MAXCPU has units of wall-clock "//&
94 "seconds, so the actual CPU time used is larger by a "//&
95 "factor of the number of processors used.", &
96 units="wall-clock seconds", default=-1.0)
97 call get_param(param_file, mdl, "CPU_TIME_FILE", cs%CPUfile, &
98 "The file into which CPU time is written.",default="CPU_stats")
99 cs%CPUfile = trim(directory)//trim(cs%CPUfile)
100 call log_param(param_file, mdl, "directory/CPU_TIME_FILE", cs%CPUfile)
101#ifdef STATSLABEL
102 cs%CPUfile = trim(cs%CPUfile)//"."//trim(adjustl(statslabel))
103#endif
104
105 cs%Start_time = input_start_time
106
107end subroutine mom_write_cputime_init
108
109!> Close the MOM_write_cputime module.
110subroutine mom_write_cputime_end(CS)
111 type(write_cputime_cs), pointer :: cs !< The control structure set up by a previous
112 !! call to MOM_write_cputime_init.
113
114 if (.not.associated(cs)) return
115
116 ! Flush and close the output files.
117 if (is_root_pe() .and. cs%fileCPU_ascii > 0) then
118 flush(cs%fileCPU_ascii)
119 call close_file(cs%fileCPU_ascii)
120 endif
121
122 deallocate(cs)
123
124end subroutine mom_write_cputime_end
125
126!> This subroutine assesses how much CPU time the model has taken and determines how long the model
127!! should be run before it saves a restart file and stops itself. Optionally this may also be used
128!! to trigger this module's end routine.
129subroutine write_cputime(day, n, CS, nmax, call_end)
130 type(time_type), intent(inout) :: day !< The current model time.
131 integer, intent(in) :: n !< The time step number of the current execution.
132 type(write_cputime_cs), pointer :: cs !< The control structure set up by a previous
133 !! call to MOM_write_cputime_init.
134 integer, optional, intent(inout) :: nmax !< The number of iterations after which to stop so
135 !! that the simulation will not run out of CPU time.
136 logical, optional, intent(in) :: call_end !< If true, also call MOM_write_cputime_end.
137
138 ! Local variables
139 real :: d_cputime ! The change in CPU time since the last call
140 ! this subroutine [clock_cycles]
141 integer :: new_cputime ! The CPU time returned by SYSTEM_CLOCK [clock_cycles]
142 real :: reday ! The time in days, including fractional days [days]
143 integer :: start_of_day ! The number of seconds since the start of the day
144 integer :: num_days ! The number of days in the time
145
146 if (.not.associated(cs)) call mom_error(fatal, &
147 "write_energy: Module must be initialized before it is used.")
148
149 if (.not.cs%initialized) call mom_error(fatal, &
150 "write_cputime: Module must be initialized before it is used.")
151
152 call system_clock(new_cputime, clocks_per_sec, max_ticks)
153! The following lines extract useful information even if the clock has rolled
154! over, assuming a 32-bit SYSTEM_CLOCK. With more bits, rollover is essentially
155! impossible. Negative fluctuations of less than 10 seconds are not interpreted
156! as the clock rolling over. This should be unnecessary but is sometimes needed
157! on the GFDL SGI/O3k.
158 if (new_cputime < cs%prev_cputime-(10.0*clocks_per_sec)) then
159 d_cputime = new_cputime - cs%prev_cputime + max_ticks
160 else
161 d_cputime = new_cputime - cs%prev_cputime
162 endif
163
164 call sum_across_pes(d_cputime)
165 if (cs%previous_calls == 0) cs%startup_cputime = d_cputime
166
167 cs%cputime2 = cs%cputime2 + d_cputime
168
169 if ((cs%previous_calls >= 1) .and. (cs%maxcpu > 0.0)) then
170 ! Determine the slowest rate at which time steps are executed.
171 if ((n > cs%prev_n) .and. (d_cputime > 0.0) .and. &
172 ((cs%dn_dcpu_min*d_cputime < (n - cs%prev_n)) .or. &
173 (cs%dn_dcpu_min < 0.0))) &
174 cs%dn_dcpu_min = (n - cs%prev_n) / d_cputime
175 if (present(nmax) .and. (cs%dn_dcpu_min >= 0.0)) then
176 ! Have the model stop itself after 95% of the CPU time has been used.
177 nmax = n + int( cs%dn_dcpu_min * &
178 (0.95*cs%maxcpu * real(num_pes())*clocks_per_sec - &
179 (cs%startup_cputime + cs%cputime2)) )
180! write(mesg,*) "Resetting nmax to ",nmax," at day",reday
181! call MOM_mesg(mesg)
182 endif
183 endif
184 cs%prev_cputime = new_cputime ; cs%prev_n = n
185
186 call get_time(day, start_of_day, num_days)
187 reday = real(num_days)+ (real(start_of_day)/86400.0)
188
189 ! Reopen or create a text output file.
190 if ((cs%previous_calls == 0) .and. (is_root_pe())) then
191 if (day > cs%Start_time) then
192 call open_ascii_file(cs%fileCPU_ascii, trim(cs%CPUfile), action=append_file)
193 else
194 call open_ascii_file(cs%fileCPU_ascii, trim(cs%CPUfile), action=writeonly_file)
195 endif
196 endif
197
198 if (is_root_pe()) then
199 if (cs%previous_calls == 0) then
200 write(cs%fileCPU_ascii, &
201 '("Startup CPU time: ", F12.3, " sec summed across", I5, " PEs.")') &
202 (cs%startup_cputime / clocks_per_sec), num_pes()
203 write(cs%fileCPU_ascii,*)" Day, Step number, CPU time, CPU time change"
204 endif
205 write(cs%fileCPU_ascii,'(F12.3,", ",I11,", ",F12.3,", ",F12.3)') &
206 reday, n, (cs%cputime2 / real(clocks_per_sec)), &
207 d_cputime / real(clocks_per_sec)
208
209 flush(cs%fileCPU_ascii)
210 endif
211 cs%previous_calls = cs%previous_calls + 1
212
213 if (present(call_end)) then
214 if (call_end) call mom_write_cputime_end(cs)
215 endif
216
217end subroutine write_cputime
218
219!> \namespace mom_write_cputime
220!!
221!! By Robert Hallberg, May 2006.
222!!
223!! This file contains the subroutine (write_cputime) that writes
224!! the summed CPU time across all processors to an output file. In
225!! addition, write_cputime estimates how many more time steps can be
226!! taken before 95% of the available CPU time is used, so that the
227!! model can be checkpointed at that time.
228
229end module mom_write_cputime