benchmark_train.sh 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294
  1. #!/bin/bash
  2. source test_tipc/common_func.sh
  3. # run benchmark sh
  4. # Usage:
  5. # bash run_benchmark_train.sh config.txt params
  6. # or
  7. # bash run_benchmark_train.sh config.txt
  8. function func_parser_params(){
  9. strs=$1
  10. IFS="="
  11. array=(${strs})
  12. tmp=${array[1]}
  13. echo ${tmp}
  14. }
  15. function set_dynamic_epoch(){
  16. string=$1
  17. num=$2
  18. _str=${string:1:6}
  19. IFS="C"
  20. arr=(${_str})
  21. M=${arr[0]}
  22. P=${arr[1]}
  23. ep=`expr $num \* $M \* $P`
  24. echo $ep
  25. }
  26. function func_sed_params(){
  27. filename=$1
  28. line=$2
  29. param_value=$3
  30. params=`sed -n "${line}p" $filename`
  31. IFS=":"
  32. array=(${params})
  33. key=${array[0]}
  34. value=${array[1]}
  35. new_params="${key}:${param_value}"
  36. IFS=";"
  37. cmd="sed -i '${line}s/.*/${new_params}/' '${filename}'"
  38. eval $cmd
  39. }
  40. function set_gpu_id(){
  41. string=$1
  42. _str=${string:1:6}
  43. IFS="C"
  44. arr=(${_str})
  45. M=${arr[0]}
  46. P=${arr[1]}
  47. gn=`expr $P - 1`
  48. gpu_num=`expr $gn / $M`
  49. seq=`seq -s "," 0 $gpu_num`
  50. echo $seq
  51. }
  52. function get_repo_name(){
  53. IFS=";"
  54. cur_dir=$(pwd)
  55. IFS="/"
  56. arr=(${cur_dir})
  57. echo ${arr[-1]}
  58. }
  59. FILENAME=$1
  60. # copy FILENAME as new
  61. new_filename="./test_tipc/benchmark_train.txt"
  62. cmd=`yes|cp $FILENAME $new_filename`
  63. FILENAME=$new_filename
  64. # MODE must be one of ['benchmark_train']
  65. MODE=$2
  66. PARAMS=$3
  67. to_static=""
  68. # parse "to_static" options and modify trainer into "to_static_trainer"
  69. if [[ $PARAMS =~ "dynamicTostatic" ]] ;then
  70. to_static="d2sT_"
  71. sed -i 's/trainer:norm_train/trainer:to_static_train/g' $FILENAME
  72. # clear PARAM contents
  73. if [ $PARAMS = "to_static" ] ;then
  74. PARAMS=""
  75. fi
  76. fi
  77. # bash test_tipc/benchmark_train.sh test_tipc/configs/det_mv3_db_v2_0/train_benchmark.txt benchmark_train dynamic_bs8_fp32_DP_N1C8
  78. # bash test_tipc/benchmark_train.sh test_tipc/configs/det_mv3_db_v2_0/train_benchmark.txt benchmark_train dynamicTostatic_bs8_fp32_DP_N1C8
  79. # bash test_tipc/benchmark_train.sh test_tipc/configs/det_mv3_db_v2_0/train_benchmark.txt benchmark_train dynamic_bs8_null_DP_N1C1
  80. IFS=$'\n'
  81. # parser params from train_benchmark.txt
  82. dataline=`cat $FILENAME`
  83. # parser params
  84. IFS=$'\n'
  85. lines=(${dataline})
  86. model_name=$(func_parser_value "${lines[1]}")
  87. python_name=$(func_parser_value "${lines[2]}")
  88. # set env
  89. python=python
  90. export str_tmp=$(echo `pip list|grep paddlepaddle-gpu|awk -F ' ' '{print $2}'`)
  91. export frame_version=${str_tmp%%.post*}
  92. export frame_commit=$(echo `${python} -c "import paddle;print(paddle.version.commit)"`)
  93. # 获取benchmark_params所在的行数
  94. line_num=`grep -n -w "train_benchmark_params" $FILENAME | cut -d ":" -f 1`
  95. # for train log parser
  96. batch_size=$(func_parser_value "${lines[line_num]}")
  97. line_num=`expr $line_num + 1`
  98. fp_items=$(func_parser_value "${lines[line_num]}")
  99. line_num=`expr $line_num + 1`
  100. epoch=$(func_parser_value "${lines[line_num]}")
  101. line_num=`expr $line_num + 1`
  102. profile_option_key=$(func_parser_key "${lines[line_num]}")
  103. profile_option_params=$(func_parser_value "${lines[line_num]}")
  104. profile_option="${profile_option_key}:${profile_option_params}"
  105. line_num=`expr $line_num + 1`
  106. flags_value=$(func_parser_value "${lines[line_num]}")
  107. # set flags
  108. IFS=";"
  109. flags_list=(${flags_value})
  110. for _flag in ${flags_list[*]}; do
  111. cmd="export ${_flag}"
  112. eval $cmd
  113. done
  114. # set log_name
  115. repo_name=$(get_repo_name )
  116. SAVE_LOG=${BENCHMARK_LOG_DIR:-$(pwd)} # */benchmark_log
  117. mkdir -p "${SAVE_LOG}/benchmark_log/"
  118. status_log="${SAVE_LOG}/benchmark_log/results.log"
  119. # get benchmark profiling params : PROFILING_TIMER_ONLY=no|True|False
  120. PROFILING_TIMER_ONLY=${PROFILING_TIMER_ONLY:-"True"}
  121. # The number of lines in which train params can be replaced.
  122. line_python=3
  123. line_gpuid=4
  124. line_precision=6
  125. line_epoch=7
  126. line_batchsize=9
  127. line_profile=13
  128. line_eval_py=24
  129. line_export_py=30
  130. func_sed_params "$FILENAME" "${line_eval_py}" "null"
  131. func_sed_params "$FILENAME" "${line_export_py}" "null"
  132. func_sed_params "$FILENAME" "${line_python}" "$python"
  133. # if params
  134. if [ ! -n "$PARAMS" ] ;then
  135. # PARAMS input is not a word.
  136. IFS="|"
  137. batch_size_list=(${batch_size})
  138. fp_items_list=(${fp_items})
  139. device_num_list=(N1C4)
  140. run_mode="DP"
  141. elif [[ ${PARAMS} = "dynamicTostatic" ]];then
  142. IFS="|"
  143. model_type=$PARAMS
  144. batch_size_list=(${batch_size})
  145. fp_items_list=(${fp_items})
  146. device_num_list=(N1C4)
  147. run_mode="DP"
  148. else
  149. # parser params from input: modeltype_bs${bs_item}_${fp_item}_${run_mode}_${device_num}
  150. IFS="_"
  151. params_list=(${PARAMS})
  152. model_type=${params_list[0]}
  153. batch_size=${params_list[1]}
  154. batch_size=`echo ${batch_size} | tr -cd "[0-9]" `
  155. precision=${params_list[2]}
  156. run_mode=${params_list[3]}
  157. device_num=${params_list[4]}
  158. IFS=";"
  159. if [ ${precision} = "fp16" ];then
  160. precision="amp"
  161. fi
  162. epoch=$(set_dynamic_epoch $device_num $epoch)
  163. fp_items_list=($precision)
  164. batch_size_list=($batch_size)
  165. device_num_list=($device_num)
  166. fi
  167. IFS="|"
  168. for batch_size in ${batch_size_list[*]}; do
  169. for train_precision in ${fp_items_list[*]}; do
  170. for device_num in ${device_num_list[*]}; do
  171. # sed batchsize and precision
  172. if [ ${train_precision} = "amp" ];then
  173. precision="fp16"
  174. else
  175. precision="fp32"
  176. fi
  177. func_sed_params "$FILENAME" "${line_precision}" "$train_precision"
  178. func_sed_params "$FILENAME" "${line_batchsize}" "$MODE=$batch_size"
  179. func_sed_params "$FILENAME" "${line_epoch}" "$MODE=$epoch"
  180. gpu_id=$(set_gpu_id $device_num)
  181. if [ ${#gpu_id} -le 1 ];then
  182. func_sed_params "$FILENAME" "${line_gpuid}" "0" # sed used gpu_id
  183. if [[ ${PROFILING_TIMER_ONLY} != "no" ]];then
  184. echo "run profile"
  185. # The default value of profile_option's timer_only parameter is True
  186. if [[ ${PROFILING_TIMER_ONLY} = "False" ]];then
  187. profile_option="${profile_option};timer_only=False"
  188. fi
  189. log_path="$SAVE_LOG/profiling_log"
  190. mkdir -p $log_path
  191. log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_mode}_${device_num}_${to_static}profiling"
  192. # set profile_option params
  193. tmp=`sed -i "${line_profile}s/.*/\"${profile_option}\"/" "${FILENAME}"`
  194. # run test_train_inference_python.sh
  195. cmd="timeout 5m bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
  196. echo $cmd
  197. eval ${cmd}
  198. eval "cat ${log_path}/${log_name}"
  199. fi
  200. echo "run without profile"
  201. # without profile
  202. log_path="$SAVE_LOG/train_log"
  203. speed_log_path="$SAVE_LOG/index"
  204. mkdir -p $log_path
  205. mkdir -p $speed_log_path
  206. log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_mode}_${device_num}_${to_static}log"
  207. speed_log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_mode}_${device_num}_${to_static}speed"
  208. func_sed_params "$FILENAME" "${line_profile}" "null" # sed profile_id as null
  209. cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
  210. echo $cmd
  211. job_bt=`date '+%Y%m%d%H%M%S'`
  212. eval $cmd
  213. job_et=`date '+%Y%m%d%H%M%S'`
  214. export model_run_time=$((${job_et}-${job_bt}))
  215. eval "cat ${log_path}/${log_name}"
  216. # parser log
  217. _model_name="${model_name}_bs${batch_size}_${precision}_${run_mode}"
  218. cmd="${python} ${BENCHMARK_ROOT}/scripts/analysis.py --filename ${log_path}/${log_name} \
  219. --speed_log_file '${speed_log_path}/${speed_log_name}' \
  220. --model_name ${_model_name} \
  221. --base_batch_size ${batch_size} \
  222. --run_mode ${run_mode} \
  223. --fp_item ${precision} \
  224. --keyword ips: \
  225. --skip_steps 2 \
  226. --device_num ${device_num} \
  227. --speed_unit samples/s \
  228. --convergence_key loss: "
  229. echo $cmd
  230. eval $cmd
  231. last_status=${PIPESTATUS[0]}
  232. status_check $last_status "${cmd}" "${status_log}"
  233. else
  234. IFS=";"
  235. unset_env=`unset CUDA_VISIBLE_DEVICES`
  236. log_path="$SAVE_LOG/train_log"
  237. speed_log_path="$SAVE_LOG/index"
  238. mkdir -p $log_path
  239. mkdir -p $speed_log_path
  240. log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_mode}_${device_num}_${to_static}log"
  241. speed_log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_mode}_${device_num}_${to_static}speed"
  242. func_sed_params "$FILENAME" "${line_gpuid}" "$gpu_id" # sed used gpu_id
  243. func_sed_params "$FILENAME" "${line_profile}" "null" # sed --profile_option as null
  244. cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
  245. echo $cmd
  246. job_bt=`date '+%Y%m%d%H%M%S'`
  247. eval $cmd
  248. job_et=`date '+%Y%m%d%H%M%S'`
  249. export model_run_time=$((${job_et}-${job_bt}))
  250. eval "cat ${log_path}/${log_name}"
  251. # parser log
  252. _model_name="${model_name}_bs${batch_size}_${precision}_${run_mode}"
  253. cmd="${python} ${BENCHMARK_ROOT}/scripts/analysis.py --filename ${log_path}/${log_name} \
  254. --speed_log_file '${speed_log_path}/${speed_log_name}' \
  255. --model_name ${_model_name} \
  256. --base_batch_size ${batch_size} \
  257. --run_mode ${run_mode} \
  258. --fp_item ${precision} \
  259. --keyword ips: \
  260. --skip_steps 2 \
  261. --device_num ${device_num} \
  262. --speed_unit images/s \
  263. --convergence_key loss: "
  264. echo $cmd
  265. eval $cmd
  266. last_status=${PIPESTATUS[0]}
  267. status_check $last_status "${cmd}" "${status_log}"
  268. fi
  269. done
  270. done
  271. done