diff --git a/evalbench/reporting/bqstore.py b/evalbench/reporting/bqstore.py
index 997472ef..8b94f28f 100644
--- a/evalbench/reporting/bqstore.py
+++ b/evalbench/reporting/bqstore.py
@@ -109,6 +109,7 @@ def store(self, results, type: STORETYPE):
)
job_config = bigquery.LoadJobConfig()
job_config.autodetect = True
+ job_config.allow_quoted_newlines = True
job_config.schema_update_options = [
bigquery.SchemaUpdateOption.ALLOW_FIELD_ADDITION,
bigquery.SchemaUpdateOption.ALLOW_FIELD_RELAXATION,
diff --git a/evalbench/util/sessionmgr.py b/evalbench/util/sessionmgr.py
index 96706d81..62810d7d 100644
--- a/evalbench/util/sessionmgr.py
+++ b/evalbench/util/sessionmgr.py
@@ -41,11 +41,28 @@ def __init__(
self.sessions = {}
self.ttl = 10800
self.lock = RWLock()
+ self.load_sessions_from_disk()
logging.debug("Starting reaper...")
reaper = Thread(target=self.reaper, args=[])
reaper.daemon = True
reaper.start()
+ def load_sessions_from_disk(self):
+ try:
+ if not os.path.exists(SESSION_RESOURCES_PATH):
+ return
+ for sid in os.listdir(SESSION_RESOURCES_PATH):
+ dir_path = os.path.join(SESSION_RESOURCES_PATH, sid)
+ if os.path.isdir(dir_path):
+ mtime = os.path.getmtime(dir_path)
+ logging.info(f"Loading session {sid} from disk with mtime {mtime}.")
+ self.sessions[sid] = {
+ "create_ts": mtime,
+ "session_id": sid,
+ }
+ except Exception as e:
+ logging.error(f"Error loading sessions from disk: {e}")
+
def set_ttl(self, ttl):
self.ttl = ttl
@@ -78,7 +95,10 @@ def prune_resource_files(self, session_id):
os.remove(file_path)
for dir in dirs:
dir_path = os.path.join(root, dir)
- os.rmdir(dir_path)
+ if os.path.islink(dir_path):
+ os.unlink(dir_path)
+ else:
+ os.rmdir(dir_path)
os.rmdir(path)
def create_session(self, session_id):
diff --git a/evalbench_service/k8s/evalbench.yaml b/evalbench_service/k8s/evalbench.yaml
index 9f4f233d..083fdd14 100644
--- a/evalbench_service/k8s/evalbench.yaml
+++ b/evalbench_service/k8s/evalbench.yaml
@@ -37,11 +37,11 @@ spec:
name: evalbench-eval
resources:
requests:
- cpu: "42"
- memory: "168Gi"
+ cpu: "20"
+ memory: "80Gi"
limits:
- cpu: "42"
- memory: "168Gi"
+ cpu: "20"
+ memory: "80Gi"
securityContext:
allowPrivilegeEscalation: true
capabilities:
diff --git a/viewer/main.py b/viewer/main.py
index 75891a92..a893da77 100644
--- a/viewer/main.py
+++ b/viewer/main.py
@@ -237,6 +237,19 @@ def on_load(e: me.LoadEvent):
if job_id and job_id in directories:
state.selected_directory = job_id
+ tab = me.query_params.get("tab")
+ eval1 = me.query_params.get("eval1")
+ eval2 = me.query_params.get("eval2")
+
+ if tab == "compare" and eval1 and eval2:
+ state.selected_main_tab = "Compare"
+ state.compare_tab_visible = True
+ state.compare_evals = json.dumps([eval1, eval2])
+ # Trigger the AI comparison
+ state.ai_comparison = compare_evals(eval1, eval2)
+
+
+
def status_component():
@@ -2281,36 +2294,25 @@ def on_next_conversation(e: me.ClickEvent):
from trends import trends_component
state = me.state(State)
+ def on_main_tab_change(e: me.ButtonToggleChangeEvent):
+ st = me.state(State)
+ st.selected_main_tab = e.value
+ logging.info(f"Main tab changed to: {e.value}")
+
with me.box(style=me.Style(margin=me.Margin(bottom="12px"))):
- tabs = ["Status", "List", "Charts"]
+ buttons = [
+ me.ButtonToggleButton(label="Status", value="Status"),
+ me.ButtonToggleButton(label="List", value="List"),
+ me.ButtonToggleButton(label="Charts", value="Charts"),
+ ]
if state.compare_tab_visible:
- tabs.append("Compare")
- for tab in tabs:
- is_active = state.selected_main_tab == tab
- tab_text = tab
- if tab == "Compare" and state.ai_comparison == "Comparing...":
- tab_text += " (Loading...)"
-
- click_handler = None
- if tab == "Status": click_handler = on_status_tab_click
- elif tab == "List": click_handler = on_list_tab_click
- elif tab == "Charts": click_handler = on_charts_tab_click
- elif tab == "Compare": click_handler = on_compare_tab_click
+ buttons.append(me.ButtonToggleButton(label="Compare", value="Compare"))
- me.button(
- tab_text,
- on_click=click_handler,
- style=me.Style(
- padding=me.Padding.symmetric(vertical="6px", horizontal="12px"),
- background="#1e293b" if is_active else "#f1f5f9",
- color="#ffffff" if is_active else "#475569",
- border_radius="4px",
- cursor="pointer",
- font_weight="600" if is_active else "500",
- font_size="14px",
- margin=me.Margin(right="8px")
- ),
- )
+ me.button_toggle(
+ value=state.selected_main_tab,
+ buttons=buttons,
+ on_change=on_main_tab_change,
+ )
if state.selected_main_tab == "List":
try:
diff --git a/viewer/static/chart.js b/viewer/static/chart.js
index e81a4c78..1ebf0ee2 100644
--- a/viewer/static/chart.js
+++ b/viewer/static/chart.js
@@ -1,10 +1,12 @@
function drawChart() {
const data = window.chartData;
- const xCol = window.chartConfig.xCol;
- const yCol = window.chartConfig.yCol;
- const hueCol = window.chartConfig.hueCol;
- const title = window.chartConfig.title;
- const ylabel = window.chartConfig.ylabel;
+ const config = window.chartConfig;
+
+ const xCol = config.xCol;
+ const yCol = config.yCol;
+ const hueCol = config.hueCol;
+ const title = config.title;
+ const ylabel = config.ylabel;
const margin = { top: 60, right: 350, bottom: 60, left: 60 };
const container = document.getElementById('chart-container');
@@ -14,18 +16,22 @@ function drawChart() {
const height = 500 - margin.top - margin.bottom;
// Clear previous SVG
- d3.select("#chart").selectAll("*").remove();
+ d3.select('#chart').selectAll("*").remove();
- const svg = d3.select("#chart")
+ // Sort data by time
+ data.sort((a, b) => new Date(a[xCol]) - new Date(b[xCol]));
+ data.forEach((d, i) => d.index = i);
+
+ const svg = d3.select('#chart')
.append("svg")
.attr("width", width + margin.left + margin.right)
.attr("height", height + margin.top + margin.bottom)
.append("g")
.attr("transform", `translate(${margin.left},${margin.top})`);
- // X axis
- const x = d3.scalePoint()
- .domain(data.map(d => d[xCol]))
+ // X axis - Use linear scale with indices to support zooming while maintaining even spacing
+ const x = d3.scaleLinear()
+ .domain([0, data.length - 1])
.range([0, width]);
// Y axis
@@ -43,11 +49,21 @@ function drawChart() {
.attr("class", "grid")
.call(d3.axisLeft(y).tickSize(-width).tickFormat(""));
- // Axes
- svg.append("g")
+ // X Axis Label formatter
+ const xAxis = d3.axisBottom(x)
+ .tickFormat(i => {
+ const idx = Math.round(i);
+ if (Math.abs(i - idx) < 0.1 && data[idx]) {
+ return data[idx][xCol];
+ }
+ return "";
+ });
+
+ const gX = svg.append("g")
.attr("transform", `translate(0,${height})`)
- .call(d3.axisBottom(x))
- .selectAll("text")
+ .call(xAxis);
+
+ gX.selectAll("text")
.attr("transform", "rotate(-45)")
.style("text-anchor", "end")
.attr("class", "axis-label");
@@ -67,25 +83,54 @@ function drawChart() {
// Group data by product
const dataByProduct = d3.group(data, d => d[hueCol]);
+ // Clip path
+ svg.append("defs").append("clipPath")
+ .attr("id", "clip")
+ .append("rect")
+ .attr("width", width)
+ .attr("height", height);
+
+ // Chart body
+ const chartBody = svg.append("g")
+ .attr("clip-path", "url(#clip)");
+
+ // Zoom
+ const zoom = d3.zoom()
+ .scaleExtent([1, 20])
+ .translateExtent([[0, 0], [width, height]])
+ .extent([[0, 0], [width, height]])
+ .on("zoom", zoomed);
+
+ // Zoom rect
+ chartBody.append("rect")
+ .attr("width", width)
+ .attr("height", height)
+ .style("fill", "none")
+ .style("pointer-events", "all")
+ .call(zoom);
+
// Draw smooth lines and areas
dataByProduct.forEach((productData, product) => {
+ const productIndex = products.indexOf(product);
// Area
- svg.append("path")
+ chartBody.append("path")
.datum(productData)
- .attr("class", "area")
+ .attr("class", `area product-${productIndex}`)
.attr("d", d3.area()
- .x(d => x(d[xCol]))
+ .x(d => x(d.index))
.y0(height)
.y1(d => y(d[yCol]))
)
- .style("fill", color(product));
+ .style("fill", color(product))
+ .style("opacity", 0.1)
+ .style("pointer-events", "none");
// Line
- svg.append("path")
+ chartBody.append("path")
.datum(productData)
- .attr("class", "line")
+ .attr("class", `line product-${productIndex}`)
.attr("d", d3.line()
- .x(d => x(d[xCol]))
+ .x(d => x(d.index))
.y(d => y(d[yCol]))
)
.style("stroke", color(product));
@@ -94,32 +139,29 @@ function drawChart() {
// Add dots and tooltips
const tooltip = d3.select("#tooltip");
- data.forEach(d => {
- svg.append("circle")
- .attr("cx", x(d[xCol]))
- .attr("cy", y(d[yCol]))
- .attr("r", 5)
- .attr("fill", color(d[hueCol]))
- .attr("class", "dot")
- .on("mouseover", function (event) {
- d3.select(this).attr("r", 8).style("stroke-width", "3px");
- tooltip.style("opacity", 1)
- .html(`Product: ${d[hueCol]}
Time: ${d[xCol]}
Value: ${d[yCol]}
Eval ID: ${d.job_id}`);
- })
- .on("mousemove", function (event) {
- tooltip.style("left", (event.pageX + 15) + "px")
- .style("top", (event.pageY - 28) + "px");
- })
- .on("mouseout", function () {
- d3.select(this).attr("r", 5).style("stroke-width", "2px");
- tooltip.style("opacity", 0);
- })
- .on("click", function(event, d) {
- if (d && d.job_id) {
- window.open("/?job_id=" + d.job_id, "_blank");
- }
- });
- });
+ chartBody.selectAll(".dot")
+ .data(data)
+ .enter()
+ .append("circle")
+ .attr("cx", d => x(d.index))
+ .attr("cy", d => y(d[yCol]))
+ .attr("r", 5)
+ .attr("fill", d => color(d[hueCol]))
+ .attr("class", d => `dot product-${products.indexOf(d[hueCol])}`)
+ .on("mouseover", function (event, d) {
+ d3.select(this).attr("r", 8).style("stroke-width", "3px");
+ tooltip.style("opacity", 1)
+ .html(`Product: ${d[hueCol]}
Time: ${d[xCol]}
Value: ${d[yCol]}
Eval ID: ${d.job_id}`);
+ })
+ .on("mousemove", function (event) {
+ tooltip.style("left", (event.pageX + 15) + "px")
+ .style("top", (event.pageY - 28) + "px");
+ })
+ .on("mouseout", function () {
+ d3.select(this).attr("r", 5).style("stroke-width", "2px");
+ tooltip.style("opacity", 0);
+ });
+
// Add Title
svg.append("text")
@@ -145,7 +187,57 @@ function drawChart() {
.data(products)
.enter().append("g")
.attr("class", "legend")
- .attr("transform", (d, i) => `translate(${width + 20}, ${i * 25})`);
+ .attr("transform", (d, i) => `translate(${width + 20}, ${i * 25})`)
+ .style("cursor", "pointer")
+ .on("click", function(event, product) {
+ const productIndex = products.indexOf(product);
+
+ // Check if ANY OTHER line is visible
+ let anyOtherVisible = false;
+ products.forEach((p, i) => {
+ if (i !== productIndex) {
+ const el = d3.selectAll(`.line.product-${i}`);
+ if (el.style("opacity") !== "0") {
+ anyOtherVisible = true;
+ }
+ }
+ });
+
+ if (anyOtherVisible) {
+ // ISOLATE
+ products.forEach((p, i) => {
+ const newOpacity = (i === productIndex) ? 1 : 0;
+ const areaOpacity = (i === productIndex) ? 0.1 : 0;
+
+ d3.selectAll(`.line.product-${i}, .dot.product-${i}`)
+ .transition().duration(200).style("opacity", newOpacity)
+ .style("pointer-events", newOpacity === 0 ? "none" : "all");
+
+ d3.selectAll(`.area.product-${i}`)
+ .transition().duration(200).style("opacity", areaOpacity);
+
+ // Update legend
+ const leg = d3.selectAll(".legend").filter(d => d === p);
+ leg.select("rect").style("opacity", newOpacity === 0 ? 0.3 : 1);
+ leg.select("text").style("opacity", newOpacity === 0 ? 0.5 : 1);
+ });
+ } else {
+ // RESTORE
+ products.forEach((p, i) => {
+ d3.selectAll(`.line.product-${i}, .dot.product-${i}`)
+ .transition().duration(200).style("opacity", 1)
+ .style("pointer-events", "all");
+
+ d3.selectAll(`.area.product-${i}`)
+ .transition().duration(200).style("opacity", 0.1);
+
+ // Update legend
+ const leg = d3.selectAll(".legend").filter(d => d === p);
+ leg.select("rect").style("opacity", 1);
+ leg.select("text").style("opacity", 1);
+ });
+ }
+ });
legend.append("rect")
.attr("x", 0)
@@ -160,6 +252,39 @@ function drawChart() {
.attr("dy", ".35em")
.style("text-anchor", "start")
.text(d => d.replace('.json', ''));
+
+ function zoomed(event) {
+ const newX = event.transform.rescaleX(x);
+
+ // Update axis
+ gX.call(xAxis.scale(newX));
+ gX.selectAll("text")
+ .attr("transform", "rotate(-45)")
+ .style("text-anchor", "end");
+
+ // Update lines
+ chartBody.selectAll(".line")
+ .attr("d", function(d) {
+ return d3.line()
+ .x(p => newX(p.index))
+ .y(p => y(p[yCol]))
+ (d);
+ });
+
+ // Update areas
+ chartBody.selectAll(".area")
+ .attr("d", function(d) {
+ return d3.area()
+ .x(p => newX(p.index))
+ .y0(height)
+ .y1(p => y(p[yCol]))
+ (d);
+ });
+
+ // Update dots
+ chartBody.selectAll(".dot")
+ .attr("cx", d => newX(d.index));
+ }
}
// Initial draw
diff --git a/viewer/trends.py b/viewer/trends.py
index cbd43de8..30642717 100644
--- a/viewer/trends.py
+++ b/viewer/trends.py
@@ -37,34 +37,27 @@ def generate_d3_chart(df, x_col, y_col, hue_col, title, ylabel):