(reading from stats_20250223_154500.csv)
The following statistics are based on the latest data available in the file stats_20250223_154500.csv, covering the previous 6 months. They do not take into account interactive sessions run via VNC, nor node reservations by individuals who can do so. These are only the jobs that used the SLURM scheduler.
Number of users and the number of jobs per week:
Identify the latest stats file based on the date-time stamp embedded in the file name and read the latest stats file.
stats <- read_delim(latest_stats,
delim = ";", escape_double = FALSE,
col_types = cols(),
trim_ws = TRUE)
convert_time_to_seconds <- function(time_str) {
if (str_detect(time_str, "-")) {
parts <- str_split(time_str, "-|:", simplify = TRUE)
days <- as.numeric(parts[1])
hours <- as.numeric(parts[2])
minutes <- as.numeric(parts[3])
seconds <- as.numeric(parts[4])
return(days * 86400 + hours * 3600 + minutes * 60 + seconds)
} else {
parts <- str_split(time_str, ":", simplify = TRUE)
hours <- as.numeric(parts[1])
minutes <- as.numeric(parts[2])
seconds <- as.numeric(parts[3])
return(hours * 3600 + minutes * 60 + seconds)
}
}
convert_seconds_to_hms <- function(seconds) {
hours <- as.integer(floor(seconds / 3600))
minutes <- as.integer(floor((seconds %% 3600) / 60))
seconds <- as.integer(seconds %% 60)
return(sprintf("%02d:%02d:%02d", hours, minutes, seconds))
}
stats <- stats %>%
mutate(Elapsed = sapply(Elapsed, convert_time_to_seconds),
CPUTime = sapply(CPUTime, convert_time_to_seconds))
filtered_stats <- stats %>%
filter(State == 'COMPLETED' & Elapsed >= 120)
# Data analysis
total_jobs <- nrow(stats)
completed_jobs <- nrow(filter(stats, State == 'COMPLETED'))
failed_jobs <- nrow(filter(stats, State == 'FAILED'))
unique_users <- n_distinct(stats$User)
filtered_total_jobs <- nrow(filtered_stats)
filtered_completed_jobs <- nrow(filtered_stats)
filtered_unique_users <- n_distinct(filtered_stats$User)
runtime <- filtered_stats$Elapsed
jobs_per_user <- filtered_stats %>% count(User)
runtime_stats <- list(
avg_runtime = mean(runtime, na.rm = TRUE),
median_runtime = median(runtime, na.rm = TRUE),
max_runtime = max(runtime, na.rm = TRUE)
)
runtime_stats <- lapply(runtime_stats, convert_seconds_to_hms)
jobs_per_user_stats <- list(
avg_jobs_per_user = mean(jobs_per_user$n, na.rm = TRUE),
median_jobs_per_user = median(jobs_per_user$n, na.rm = TRUE),
max_jobs_per_user = max(jobs_per_user$n, na.rm = TRUE)
)