Spaces:

cjerzak
/

Multiscaler

Running

File size: 13,398 Bytes

# setwd('~/Dropbox/ImageSeq/')

options(error = NULL)
library(shiny)
library(dplyr)
library(fields)  # For image.plot in heatMap
library(akima)   # For interpolation

# Load the data from sm.csv
sm <- read.csv("sm.csv")

# Define function to convert to numeric
f2n <- function(x) as.numeric(as.character(x))

# Compute MaxImageDimsLeft and MaxImageDimsRight from MaxImageDims
sm$MaxImageDimsLeft <- unlist(lapply(strsplit(sm$MaxImageDims, split = "_"), function(x) sort(f2n(x))[1]))
sm$MaxImageDimsRight <- unlist(lapply(strsplit(sm$MaxImageDims, split = "_"), function(x) sort(f2n(x))[2]))

# Heatmap function with optimal_point parameter
heatMap <- function(x, y, z,
                    main = "",
                    N, yaxt = NULL,
                    xlab = "",
                    ylab = "",
                    horizontal = FALSE,
                    useLog = "",
                    legend.width = 1,
                    ylim = NULL,
                    xlim = NULL,
                    zlim = NULL,
                    add.legend = TRUE,
                    legend.only = FALSE,
                    vline = NULL,
                    col_vline = "black",
                    hline = NULL,
                    col_hline = "black",
                    cex.lab = 2,
                    cex.main = 2,
                    myCol = NULL,
                    includeMarginals = FALSE,
                    marginalJitterSD_x = 0.01,
                    marginalJitterSD_y = 0.01,
                    openBrowser = FALSE,
                    optimal_point = NULL) {
  if (openBrowser) { browser() }
  s_ <- akima::interp(x = x, y = y, z = z,
                      xo = seq(min(x), max(x), length = N),
                      yo = seq(min(y), max(y), length = N),
                      duplicate = "mean")
  if (is.null(xlim)) { xlim = range(s_$x, finite = TRUE) }
  if (is.null(ylim)) { ylim = range(s_$y, finite = TRUE) }
  imageFxn <- if (add.legend) fields::image.plot else graphics::image
  if (!grepl(useLog, pattern = "z")) {
    imageFxn(s_, xlab = xlab, ylab = ylab, log = useLog, cex.lab = cex.lab, main = main,
             cex.main = cex.main, col = myCol, xlim = xlim, ylim = ylim,
             legend.width = legend.width, horizontal = horizontal, yaxt = yaxt,
             zlim = zlim, legend.only = legend.only)
  } else {
    useLog <- gsub(useLog, pattern = "z", replace = "")
    zTicks <- summary(c(s_$z))
    ep_ <- 0.001
    zTicks[zTicks < ep_] <- ep_
    zTicks <- exp(seq(log(min(zTicks)), log(max(zTicks)), length.out = 10))
    zTicks <- round(zTicks, abs(min(log(zTicks, base = 10))))
    s_$z[s_$z < ep_] <- ep_
    imageFxn(s_$x, s_$y, log(s_$z), yaxt = yaxt,
             axis.args = list(at = log(zTicks), labels = zTicks),
             main = main, cex.main = cex.main, xlab = xlab, ylab = ylab,
             log = useLog, cex.lab = cex.lab, xlim = xlim, ylim = ylim,
             horizontal = horizontal, col = myCol, legend.width = legend.width,
             zlim = zlim, legend.only = legend.only)
  }
  if (!is.null(vline)) { abline(v = vline, lwd = 10, col = col_vline) }
  if (!is.null(hline)) { abline(h = hline, lwd = 10, col = col_hline) }
  
  if (includeMarginals) {
    points(x + rnorm(length(y), sd = marginalJitterSD_x * sd(x)),
           rep(ylim[1] * 1.1, length(y)), pch = "|", col = "darkgray")
    points(rep(xlim[1] * 1.1, length(x)),
           y + rnorm(length(y), sd = sd(y) * marginalJitterSD_y), pch = "-", col = "darkgray")
  }
  
  # Add green star at optimal point if provided
  if (!is.null(optimal_point)) {
    points(optimal_point$x, optimal_point$y, pch = 8, col = "green", cex = 3, lwd = 4)
  }
}

# UI Definition
ui <- fluidPage(
  titlePanel("Multiscale Heatmap Explorer"),
  sidebarLayout(
    sidebarPanel(
      selectInput("application", "Application",
                  choices = unique(sm$application),
                  selected = unique(sm$application)[1]),
      selectInput("model", "Model",
                  choices = unique(sm$optimizeImageRep),
                  selected = "clip"),
      selectInput("metric", "Metric",
                  choices = c("AUTOC_rate_std_ratio_mean", "AUTOC_rate_mean", "AUTOC_rate_std_mean",
                              "AUTOC_rate_std_ratio_mean_pc", "AUTOC_rate_mean_pc", "AUTOC_rate_std_mean_pc",
                              "MeanVImportHalf1", "MeanVImportHalf2", "FracTopkHalf1", "RMSE"),
                  selected = "AUTOC_rate_std_ratio_mean"),
      checkboxInput("compareToBest", "Compare to best single scale", value = FALSE)
    ),
    mainPanel(
      plotOutput("heatmapPlot", height = "600px"),
      div(style = "margin-top: 10px; font-style: italic;", uiOutput("contextNote"))
    )
  )
)

# Server Definition
server <- function(input, output) {
  # Function to determine whether to maximize or minimize the metric
  get_better_direction <- function(metric) {
    #if (grepl("std|RMSE", metric)) "min" else "max"
    if (grepl(metric, pattern = "std_mean|RMSE")) "min" else "max"
  }
  
  # Reactive data processing
  filteredData <- reactive({
    df <- sm %>%
      filter(application == input$application,
             optimizeImageRep == input$model) %>%
      mutate(MaxImageDimsRight = ifelse(is.na(MaxImageDimsRight),
                                        MaxImageDimsLeft,
                                        MaxImageDimsRight))
    if (nrow(df) == 0) return(NULL)
    df
  })
  
  # Reactive expression to compute interpolated data and optimal point
  interpolated_data <- reactive({
    data <- filteredData()
    if (is.null(data)) return(NULL)
    
    # Group data
    grouped_data <- data %>%
      group_by(MaxImageDimsLeft, MaxImageDimsRight) %>%
      summarise(
        mean_metric = mean(as.numeric(get(input$metric)), na.rm = TRUE),
        se_metric = sd(as.numeric(get(input$metric)), na.rm = TRUE) / sqrt(n()),
        n = n(),
        .groups = "drop"
      )
    
    better_dir <- get_better_direction(input$metric)
    single_scale_data <- grouped_data %>% filter(MaxImageDimsLeft == MaxImageDimsRight)
    best_single_scale_metric <- if (nrow(single_scale_data) > 0) {
      if (better_dir == "max") max(single_scale_data$mean_metric, na.rm = TRUE)
      else min(single_scale_data$mean_metric, na.rm = TRUE)
    } else NA
    
    grouped_data <- grouped_data %>%
      mutate(improvement = if (better_dir == "max") {
        mean_metric - best_single_scale_metric
      } else {
        best_single_scale_metric - mean_metric
      })
    
    # Select z based on checkbox
    z_to_interpolate <- if (input$compareToBest) grouped_data$improvement else grouped_data$mean_metric
    x <- grouped_data$MaxImageDimsLeft
    y <- grouped_data$MaxImageDimsRight
    
    # Check if interpolation is possible
    if (length(unique(x)) < 2 || length(unique(y)) < 2 || nrow(grouped_data) < 3) {
      return(NULL)
    }
    
    # Compute interpolated grid
    s_ <- akima::interp(x = x, 
                        y = y, 
                        z = z_to_interpolate,
                        xo = seq(min(x), max(x), length = 50),
                        yo = seq(min(y), max(y), length = 50),
                        duplicate = "mean")
  
    # Find optimal point from interpolated grid
    max_idx <- if (input$compareToBest || better_dir == "max") {
      which.max(s_$z)
    } else {
      which.min(s_$z)
    }
    
    row_col <- arrayInd(max_idx, .dim = dim(s_$z))
    optimal_x <- s_$x[row_col[1,1]]
    optimal_y <- s_$y[row_col[1,2]]
    optimal_z <- s_$z[row_col[1,1], row_col[1,2]]
    
    list(s_ = s_, 
         optimal_point = list(x = optimal_x, 
                              y = optimal_y, 
                              z = optimal_z))
  })
  
  # Heatmap Output
  output$heatmapPlot <- renderPlot({
    interp_data <- interpolated_data()
    if (is.null(interp_data)) {
      plot.new()
      text(0.5, 0.5, "Insufficient data for interpolation", cex = 1.5)
      return(NULL)
    }
    
    data <- filteredData()
    grouped_data <- data %>%
      group_by(MaxImageDimsLeft, MaxImageDimsRight) %>%
      summarise(
        mean_metric = mean(as.numeric(get(input$metric)), na.rm = TRUE),
        .groups = "drop"
      )
    
    better_dir <- get_better_direction(input$metric)
    single_scale_data <- grouped_data %>% filter(MaxImageDimsLeft == MaxImageDimsRight)
    best_single_scale_metric <- if (nrow(single_scale_data) > 0) {
      if (better_dir == "max") max(single_scale_data$mean_metric, na.rm = TRUE)
      else min(single_scale_data$mean_metric, na.rm = TRUE)
    } else NA
    
    grouped_data <- grouped_data %>%
      mutate(improvement = if (better_dir == "max") {
        mean_metric - best_single_scale_metric
      } else {
        best_single_scale_metric - mean_metric
      })
    
    x <- grouped_data$MaxImageDimsLeft
    y <- grouped_data$MaxImageDimsRight
    if (input$compareToBest) {
      z <- grouped_data$improvement
      main <- paste(input$application, "-", input$metric, "improvement over best single scale")
      #max_abs <- max(abs(z), na.rm = TRUE)
      #zlim <- if (!is.na(max_abs)) c(-max_abs, max_abs) else NULL
      zlim <- range(z, na.rm = TRUE) 
    } else {
      z <- grouped_data$mean_metric
      main <- paste(input$application, "-", input$metric)
      zlim <- range(z, na.rm = TRUE) 
    }
    
    customPalette <- colorRampPalette(c("blue", "white", "red"))(50)
    heatMap(x = x,
            y = y,
            z = z,
            N = 50,
            main = main,
            xlab = "Image Dimension 1",
            ylab = "Image Dimension 2",
            useLog = "xy",
            myCol = customPalette,
            cex.lab = 1.4,
            zlim = zlim,
            optimal_point = interp_data$optimal_point)
  })
  
  # Contextual Note Output
  output$contextNote <- renderText({
    if (input$compareToBest) {
      paste("This heatmap shows the improvement in", input$metric, 
            "over the best single scale for", input$application, 
            "using the", input$model, "model. The green star marks the optimal point.",
            "The Peru RCT involves a multifaceted graduation program treatment to reduce poverty outcomes.",
            "The Uganda RCT involves a cash grant program to stimulate human capital and living conditions among the poor.",
            "For more information, see <a href='https://arxiv.org/abs/2411.02134' target='_blank'>https://arxiv.org/abs/2411.02134</a>", 
            "<div style='font-size: 10px; line-height: 1.5;'>",
            "<b>Glossary:</b><br>",
            "• <b>Model:</b> The neural-network backbone (e.g., clip-rsicd) transforming satellite images into numerical representations.<br>",
            "• <b>Metric:</b> The criterion (e.g., RATE Ratio, RMSE) measuring performance or heterogeneity detection.<br>",
            "• <b>Compare to best single-scale:</b> Toggle showing metric improvement relative to the best single-scale baseline.<br>",
            "• <b>ImageDim1, ImageDim2:</b> Image sizes (e.g., 64×64, 128×128) for multi-scale analysis.<br>",
            "• <b>RATE Ratio:</b> Statistic indicating how well the model detects treatment-effect variation.<br>",
            "• <b>PC:</b> Principal Components; a compression step of neural representations.<br>",
            "• <b>MeanDiff, MeanDiff_pc:</b> Gain in RATE Ratio from multi-scale vs. single-scale, with '_pc' for compressed data.<br>",
            "• <b>RMSE:</b> Root Mean Squared Error, measuring prediction accuracy in simulations.<br>",
            "</div>"
            )
    } else {
      paste("This heatmap displays", input$metric, 
            "for", input$application, 
            "using the", input$model, 
            "model across different image dimension combinations. The green star marks the optimal point.",
            "The Peru RCT involves a multifaceted graduation program treatment to reduce poverty outcomes.",
            "The Uganda RCT involves a cash grant program to stimulate human capital and living conditions among the poor.",
            "For more information, see <a href='https://arxiv.org/abs/2411.02134' target='_blank'>https://arxiv.org/abs/2411.02134</a>", 
            "<div style='font-size: 10px; line-height: 1.5;'>",
            "<b>Glossary:</b><br>",
            "• <b>Model:</b> The neural-network backbone (e.g., clip-rsicd) transforming satellite images into numerical representations.<br>",
            "• <b>Metric:</b> The criterion (e.g., RATE Ratio, RMSE) measuring performance or heterogeneity detection.<br>",
            "• <b>Compare to best single-scale:</b> Toggle showing metric improvement relative to the best single-scale baseline.<br>",
            "• <b>ImageDim1, ImageDim2:</b> Image sizes (e.g., 64×64, 128×128) for multi-scale analysis.<br>",
            "• <b>RATE Ratio:</b> Statistic indicating how well the model detects treatment-effect variation.<br>",
            "• <b>PC:</b> Principal Components; a compression step of neural representations.<br>",
            "• <b>MeanDiff, MeanDiff_pc:</b> Gain in RATE Ratio from multi-scale vs. single-scale, with '_pc' for compressed data.<br>",
            "• <b>RMSE:</b> Root Mean Squared Error, measuring prediction accuracy in simulations.<br>",
            "</div>"
            )
    }
  })
}

# Run the Shiny App
shinyApp(ui = ui, server = server)