(reading from stats_20250223_154500.csv)

Overview

The following statistics are based on the latest data available in the file stats_20250223_154500.csv, covering the previous 6 months. They do not take into account interactive sessions run via VNC, nor node reservations by individuals who can do so. These are only the jobs that used the SLURM scheduler.

Statistics

Filtering out failed and jobs <2 min:

  • Filtered total jobs: 16537
  • Filtered completed jobs: 16537
  • Filtered unique users: 11

Runtime statistics (H:M:s):

  • Average runtime: 00:58:26
  • Median runtime: 00:08:36
  • Max runtime: 289:03:01

Jobs per user statistics:

  • Average jobs per user: 1503.36363636364
  • Median jobs per user: 8
  • Max jobs per user: 10334

Plots

Number of users and the number of jobs per week:

Appendix

Load, Clean, and Analyze Data

Identify the latest stats file based on the date-time stamp embedded in the file name and read the latest stats file.

stats <- read_delim(latest_stats,
                    delim = ";", escape_double = FALSE, 
                    col_types = cols(), 
                    trim_ws = TRUE)

convert_time_to_seconds <- function(time_str) {
  if (str_detect(time_str, "-")) {
    parts <- str_split(time_str, "-|:", simplify = TRUE)
    days <- as.numeric(parts[1])
    hours <- as.numeric(parts[2])
    minutes <- as.numeric(parts[3])
    seconds <- as.numeric(parts[4])
    return(days * 86400 + hours * 3600 + minutes * 60 + seconds)
  } else {
    parts <- str_split(time_str, ":", simplify = TRUE)
    hours <- as.numeric(parts[1])
    minutes <- as.numeric(parts[2])
    seconds <- as.numeric(parts[3])
    return(hours * 3600 + minutes * 60 + seconds)
  }
}

convert_seconds_to_hms <- function(seconds) {
  hours <- as.integer(floor(seconds / 3600))
  minutes <- as.integer(floor((seconds %% 3600) / 60))
  seconds <- as.integer(seconds %% 60)
  return(sprintf("%02d:%02d:%02d", hours, minutes, seconds))
}

stats <- stats %>%
  mutate(Elapsed = sapply(Elapsed, convert_time_to_seconds),
         CPUTime = sapply(CPUTime, convert_time_to_seconds))

filtered_stats <- stats %>%
  filter(State == 'COMPLETED' & Elapsed >= 120)

# Data analysis

total_jobs <- nrow(stats)
completed_jobs <- nrow(filter(stats, State == 'COMPLETED'))
failed_jobs <- nrow(filter(stats, State == 'FAILED'))
unique_users <- n_distinct(stats$User)

filtered_total_jobs <- nrow(filtered_stats)
filtered_completed_jobs <- nrow(filtered_stats)
filtered_unique_users <- n_distinct(filtered_stats$User)

runtime <- filtered_stats$Elapsed
jobs_per_user <- filtered_stats %>% count(User)

runtime_stats <- list(
  avg_runtime = mean(runtime, na.rm = TRUE),
  median_runtime = median(runtime, na.rm = TRUE),
  max_runtime = max(runtime, na.rm = TRUE)
)

runtime_stats <- lapply(runtime_stats, convert_seconds_to_hms)

jobs_per_user_stats <- list(
  avg_jobs_per_user = mean(jobs_per_user$n, na.rm = TRUE),
  median_jobs_per_user = median(jobs_per_user$n, na.rm = TRUE),
  max_jobs_per_user = max(jobs_per_user$n, na.rm = TRUE)
)