diff --git a/evalbench/reporting/bqstore.py b/evalbench/reporting/bqstore.py index 997472ef..8b94f28f 100644 --- a/evalbench/reporting/bqstore.py +++ b/evalbench/reporting/bqstore.py @@ -109,6 +109,7 @@ def store(self, results, type: STORETYPE): ) job_config = bigquery.LoadJobConfig() job_config.autodetect = True + job_config.allow_quoted_newlines = True job_config.schema_update_options = [ bigquery.SchemaUpdateOption.ALLOW_FIELD_ADDITION, bigquery.SchemaUpdateOption.ALLOW_FIELD_RELAXATION, diff --git a/evalbench/util/sessionmgr.py b/evalbench/util/sessionmgr.py index 96706d81..62810d7d 100644 --- a/evalbench/util/sessionmgr.py +++ b/evalbench/util/sessionmgr.py @@ -41,11 +41,28 @@ def __init__( self.sessions = {} self.ttl = 10800 self.lock = RWLock() + self.load_sessions_from_disk() logging.debug("Starting reaper...") reaper = Thread(target=self.reaper, args=[]) reaper.daemon = True reaper.start() + def load_sessions_from_disk(self): + try: + if not os.path.exists(SESSION_RESOURCES_PATH): + return + for sid in os.listdir(SESSION_RESOURCES_PATH): + dir_path = os.path.join(SESSION_RESOURCES_PATH, sid) + if os.path.isdir(dir_path): + mtime = os.path.getmtime(dir_path) + logging.info(f"Loading session {sid} from disk with mtime {mtime}.") + self.sessions[sid] = { + "create_ts": mtime, + "session_id": sid, + } + except Exception as e: + logging.error(f"Error loading sessions from disk: {e}") + def set_ttl(self, ttl): self.ttl = ttl @@ -78,7 +95,10 @@ def prune_resource_files(self, session_id): os.remove(file_path) for dir in dirs: dir_path = os.path.join(root, dir) - os.rmdir(dir_path) + if os.path.islink(dir_path): + os.unlink(dir_path) + else: + os.rmdir(dir_path) os.rmdir(path) def create_session(self, session_id): diff --git a/evalbench_service/k8s/evalbench.yaml b/evalbench_service/k8s/evalbench.yaml index 9f4f233d..083fdd14 100644 --- a/evalbench_service/k8s/evalbench.yaml +++ b/evalbench_service/k8s/evalbench.yaml @@ -37,11 +37,11 @@ spec: name: evalbench-eval resources: requests: - cpu: "42" - memory: "168Gi" + cpu: "20" + memory: "80Gi" limits: - cpu: "42" - memory: "168Gi" + cpu: "20" + memory: "80Gi" securityContext: allowPrivilegeEscalation: true capabilities: diff --git a/viewer/main.py b/viewer/main.py index 75891a92..a893da77 100644 --- a/viewer/main.py +++ b/viewer/main.py @@ -237,6 +237,19 @@ def on_load(e: me.LoadEvent): if job_id and job_id in directories: state.selected_directory = job_id + tab = me.query_params.get("tab") + eval1 = me.query_params.get("eval1") + eval2 = me.query_params.get("eval2") + + if tab == "compare" and eval1 and eval2: + state.selected_main_tab = "Compare" + state.compare_tab_visible = True + state.compare_evals = json.dumps([eval1, eval2]) + # Trigger the AI comparison + state.ai_comparison = compare_evals(eval1, eval2) + + + def status_component(): @@ -2281,36 +2294,25 @@ def on_next_conversation(e: me.ClickEvent): from trends import trends_component state = me.state(State) + def on_main_tab_change(e: me.ButtonToggleChangeEvent): + st = me.state(State) + st.selected_main_tab = e.value + logging.info(f"Main tab changed to: {e.value}") + with me.box(style=me.Style(margin=me.Margin(bottom="12px"))): - tabs = ["Status", "List", "Charts"] + buttons = [ + me.ButtonToggleButton(label="Status", value="Status"), + me.ButtonToggleButton(label="List", value="List"), + me.ButtonToggleButton(label="Charts", value="Charts"), + ] if state.compare_tab_visible: - tabs.append("Compare") - for tab in tabs: - is_active = state.selected_main_tab == tab - tab_text = tab - if tab == "Compare" and state.ai_comparison == "Comparing...": - tab_text += " (Loading...)" - - click_handler = None - if tab == "Status": click_handler = on_status_tab_click - elif tab == "List": click_handler = on_list_tab_click - elif tab == "Charts": click_handler = on_charts_tab_click - elif tab == "Compare": click_handler = on_compare_tab_click + buttons.append(me.ButtonToggleButton(label="Compare", value="Compare")) - me.button( - tab_text, - on_click=click_handler, - style=me.Style( - padding=me.Padding.symmetric(vertical="6px", horizontal="12px"), - background="#1e293b" if is_active else "#f1f5f9", - color="#ffffff" if is_active else "#475569", - border_radius="4px", - cursor="pointer", - font_weight="600" if is_active else "500", - font_size="14px", - margin=me.Margin(right="8px") - ), - ) + me.button_toggle( + value=state.selected_main_tab, + buttons=buttons, + on_change=on_main_tab_change, + ) if state.selected_main_tab == "List": try: diff --git a/viewer/static/chart.js b/viewer/static/chart.js index e81a4c78..1ebf0ee2 100644 --- a/viewer/static/chart.js +++ b/viewer/static/chart.js @@ -1,10 +1,12 @@ function drawChart() { const data = window.chartData; - const xCol = window.chartConfig.xCol; - const yCol = window.chartConfig.yCol; - const hueCol = window.chartConfig.hueCol; - const title = window.chartConfig.title; - const ylabel = window.chartConfig.ylabel; + const config = window.chartConfig; + + const xCol = config.xCol; + const yCol = config.yCol; + const hueCol = config.hueCol; + const title = config.title; + const ylabel = config.ylabel; const margin = { top: 60, right: 350, bottom: 60, left: 60 }; const container = document.getElementById('chart-container'); @@ -14,18 +16,22 @@ function drawChart() { const height = 500 - margin.top - margin.bottom; // Clear previous SVG - d3.select("#chart").selectAll("*").remove(); + d3.select('#chart').selectAll("*").remove(); - const svg = d3.select("#chart") + // Sort data by time + data.sort((a, b) => new Date(a[xCol]) - new Date(b[xCol])); + data.forEach((d, i) => d.index = i); + + const svg = d3.select('#chart') .append("svg") .attr("width", width + margin.left + margin.right) .attr("height", height + margin.top + margin.bottom) .append("g") .attr("transform", `translate(${margin.left},${margin.top})`); - // X axis - const x = d3.scalePoint() - .domain(data.map(d => d[xCol])) + // X axis - Use linear scale with indices to support zooming while maintaining even spacing + const x = d3.scaleLinear() + .domain([0, data.length - 1]) .range([0, width]); // Y axis @@ -43,11 +49,21 @@ function drawChart() { .attr("class", "grid") .call(d3.axisLeft(y).tickSize(-width).tickFormat("")); - // Axes - svg.append("g") + // X Axis Label formatter + const xAxis = d3.axisBottom(x) + .tickFormat(i => { + const idx = Math.round(i); + if (Math.abs(i - idx) < 0.1 && data[idx]) { + return data[idx][xCol]; + } + return ""; + }); + + const gX = svg.append("g") .attr("transform", `translate(0,${height})`) - .call(d3.axisBottom(x)) - .selectAll("text") + .call(xAxis); + + gX.selectAll("text") .attr("transform", "rotate(-45)") .style("text-anchor", "end") .attr("class", "axis-label"); @@ -67,25 +83,54 @@ function drawChart() { // Group data by product const dataByProduct = d3.group(data, d => d[hueCol]); + // Clip path + svg.append("defs").append("clipPath") + .attr("id", "clip") + .append("rect") + .attr("width", width) + .attr("height", height); + + // Chart body + const chartBody = svg.append("g") + .attr("clip-path", "url(#clip)"); + + // Zoom + const zoom = d3.zoom() + .scaleExtent([1, 20]) + .translateExtent([[0, 0], [width, height]]) + .extent([[0, 0], [width, height]]) + .on("zoom", zoomed); + + // Zoom rect + chartBody.append("rect") + .attr("width", width) + .attr("height", height) + .style("fill", "none") + .style("pointer-events", "all") + .call(zoom); + // Draw smooth lines and areas dataByProduct.forEach((productData, product) => { + const productIndex = products.indexOf(product); // Area - svg.append("path") + chartBody.append("path") .datum(productData) - .attr("class", "area") + .attr("class", `area product-${productIndex}`) .attr("d", d3.area() - .x(d => x(d[xCol])) + .x(d => x(d.index)) .y0(height) .y1(d => y(d[yCol])) ) - .style("fill", color(product)); + .style("fill", color(product)) + .style("opacity", 0.1) + .style("pointer-events", "none"); // Line - svg.append("path") + chartBody.append("path") .datum(productData) - .attr("class", "line") + .attr("class", `line product-${productIndex}`) .attr("d", d3.line() - .x(d => x(d[xCol])) + .x(d => x(d.index)) .y(d => y(d[yCol])) ) .style("stroke", color(product)); @@ -94,32 +139,29 @@ function drawChart() { // Add dots and tooltips const tooltip = d3.select("#tooltip"); - data.forEach(d => { - svg.append("circle") - .attr("cx", x(d[xCol])) - .attr("cy", y(d[yCol])) - .attr("r", 5) - .attr("fill", color(d[hueCol])) - .attr("class", "dot") - .on("mouseover", function (event) { - d3.select(this).attr("r", 8).style("stroke-width", "3px"); - tooltip.style("opacity", 1) - .html(`Product: ${d[hueCol]}
Time: ${d[xCol]}
Value: ${d[yCol]}
Eval ID: ${d.job_id}`); - }) - .on("mousemove", function (event) { - tooltip.style("left", (event.pageX + 15) + "px") - .style("top", (event.pageY - 28) + "px"); - }) - .on("mouseout", function () { - d3.select(this).attr("r", 5).style("stroke-width", "2px"); - tooltip.style("opacity", 0); - }) - .on("click", function(event, d) { - if (d && d.job_id) { - window.open("/?job_id=" + d.job_id, "_blank"); - } - }); - }); + chartBody.selectAll(".dot") + .data(data) + .enter() + .append("circle") + .attr("cx", d => x(d.index)) + .attr("cy", d => y(d[yCol])) + .attr("r", 5) + .attr("fill", d => color(d[hueCol])) + .attr("class", d => `dot product-${products.indexOf(d[hueCol])}`) + .on("mouseover", function (event, d) { + d3.select(this).attr("r", 8).style("stroke-width", "3px"); + tooltip.style("opacity", 1) + .html(`Product: ${d[hueCol]}
Time: ${d[xCol]}
Value: ${d[yCol]}
Eval ID: ${d.job_id}`); + }) + .on("mousemove", function (event) { + tooltip.style("left", (event.pageX + 15) + "px") + .style("top", (event.pageY - 28) + "px"); + }) + .on("mouseout", function () { + d3.select(this).attr("r", 5).style("stroke-width", "2px"); + tooltip.style("opacity", 0); + }); + // Add Title svg.append("text") @@ -145,7 +187,57 @@ function drawChart() { .data(products) .enter().append("g") .attr("class", "legend") - .attr("transform", (d, i) => `translate(${width + 20}, ${i * 25})`); + .attr("transform", (d, i) => `translate(${width + 20}, ${i * 25})`) + .style("cursor", "pointer") + .on("click", function(event, product) { + const productIndex = products.indexOf(product); + + // Check if ANY OTHER line is visible + let anyOtherVisible = false; + products.forEach((p, i) => { + if (i !== productIndex) { + const el = d3.selectAll(`.line.product-${i}`); + if (el.style("opacity") !== "0") { + anyOtherVisible = true; + } + } + }); + + if (anyOtherVisible) { + // ISOLATE + products.forEach((p, i) => { + const newOpacity = (i === productIndex) ? 1 : 0; + const areaOpacity = (i === productIndex) ? 0.1 : 0; + + d3.selectAll(`.line.product-${i}, .dot.product-${i}`) + .transition().duration(200).style("opacity", newOpacity) + .style("pointer-events", newOpacity === 0 ? "none" : "all"); + + d3.selectAll(`.area.product-${i}`) + .transition().duration(200).style("opacity", areaOpacity); + + // Update legend + const leg = d3.selectAll(".legend").filter(d => d === p); + leg.select("rect").style("opacity", newOpacity === 0 ? 0.3 : 1); + leg.select("text").style("opacity", newOpacity === 0 ? 0.5 : 1); + }); + } else { + // RESTORE + products.forEach((p, i) => { + d3.selectAll(`.line.product-${i}, .dot.product-${i}`) + .transition().duration(200).style("opacity", 1) + .style("pointer-events", "all"); + + d3.selectAll(`.area.product-${i}`) + .transition().duration(200).style("opacity", 0.1); + + // Update legend + const leg = d3.selectAll(".legend").filter(d => d === p); + leg.select("rect").style("opacity", 1); + leg.select("text").style("opacity", 1); + }); + } + }); legend.append("rect") .attr("x", 0) @@ -160,6 +252,39 @@ function drawChart() { .attr("dy", ".35em") .style("text-anchor", "start") .text(d => d.replace('.json', '')); + + function zoomed(event) { + const newX = event.transform.rescaleX(x); + + // Update axis + gX.call(xAxis.scale(newX)); + gX.selectAll("text") + .attr("transform", "rotate(-45)") + .style("text-anchor", "end"); + + // Update lines + chartBody.selectAll(".line") + .attr("d", function(d) { + return d3.line() + .x(p => newX(p.index)) + .y(p => y(p[yCol])) + (d); + }); + + // Update areas + chartBody.selectAll(".area") + .attr("d", function(d) { + return d3.area() + .x(p => newX(p.index)) + .y0(height) + .y1(p => y(p[yCol])) + (d); + }); + + // Update dots + chartBody.selectAll(".dot") + .attr("cx", d => newX(d.index)); + } } // Initial draw diff --git a/viewer/trends.py b/viewer/trends.py index cbd43de8..30642717 100644 --- a/viewer/trends.py +++ b/viewer/trends.py @@ -37,34 +37,27 @@ def generate_d3_chart(df, x_col, y_col, hue_col, title, ylabel): @@ -89,9 +82,6 @@ def generate_d3_chart(df, x_col, y_col, hue_col, title, ylabel): """ return html - - - def trends_component(): results_dir = get_results_dir() @@ -221,59 +211,54 @@ def handler(e: me.ClickEvent): st = me.state(State) st.trends_product_filter = val st.open_dropdown = "" - handler.__name__ = f"click_trends_product_{val}" return handler - with me.box(style=me.Style(display="flex", align_items="center", gap="8px", margin=me.Margin(bottom="16px"))): - me.text("Filter by Product:", style=me.Style(font_weight="600")) - - with me.box(style=me.Style(position="relative", width="200px")): - # Trigger + with me.box(style=me.Style(position="relative", width="300px")): + with me.box( + style=me.Style( + padding=me.Padding.all("12px"), + background="#f8fafc", + border=me.Border.all(me.BorderSide(width="1px", color="#e2e8f0")), + border_radius="6px", + cursor="pointer", + display="flex", + justify_content="space-between", + align_items="center", + ), + on_click=toggle_trends_product_dropdown, + ): + me.text(state.trends_product_filter if state.trends_product_filter else "All Products", style=me.Style(font_weight="500")) + me.text("▼", style=me.Style(font_size="10px", color="#64748b")) + + if state.open_dropdown == "trends_product": with me.box( style=me.Style( + position="absolute", + top="100%", + left="0", + z_index=10, background="#ffffff", border=me.Border.all(me.BorderSide(width="1px", color="#e2e8f0")), border_radius="4px", - padding=me.Padding.all("8px"), - cursor="pointer", - ), - on_click=toggle_trends_product_dropdown, - ): - me.text( - state.trends_product_filter if state.trends_product_filter else "All Products", - style=me.Style(color="#1f2937"), + width="100%", + max_height="200px", + overflow_y="auto", ) - - # Popup - if state.open_dropdown == "trends_product": + ): + # All option with me.box( - style=me.Style( - position="absolute", - top="100%", - left="0", - z_index=10, - background="#ffffff", - border=me.Border.all(me.BorderSide(width="1px", color="#e2e8f0")), - border_radius="4px", - width="100%", - max_height="200px", - overflow_y="auto", - ) + style=me.Style(padding=me.Padding.all("8px"), cursor="pointer"), + on_click=make_product_handler(""), ): - # All option + me.text("All Products", style=me.Style(color="#1f2937")) + + # Product options + for p in all_products: with me.box( style=me.Style(padding=me.Padding.all("8px"), cursor="pointer"), - on_click=make_product_handler(""), + on_click=make_product_handler(p), ): - me.text("All Products", style=me.Style(color="#1f2937")) - - # Product options - for p in all_products: - with me.box( - style=me.Style(padding=me.Padding.all("8px"), cursor="pointer"), - on_click=make_product_handler(p), - ): - me.text(p, style=me.Style(color="#1f2937")) + me.text(p, style=me.Style(color="#1f2937")) with me.box(style=me.Style(display="flex", flex_direction="column", gap="16px", width="100%")): me.text("AI Score", style=me.Style(font_size="16px", font_weight="600"))