holoviz · ahuang11 · Oct 17, 2025 · Oct 20, 2025 · Oct 20, 2025 · Oct 20, 2025
diff --git a/lumen/ai/agents.py b/lumen/ai/agents.py
diff --git a/lumen/ai/coordinator.py b/lumen/ai/coordinator.py
@@ -196,14 +196,15 @@ async def _retry_from_provider(self, provider_index: int, failed_index: int, err
                     for tidx, t in enumerate(self)
                 )
                 self._coordinator._todos.object = todos
-
                 # Run with mutated history
                 kwargs = {"agents": self.agents} if 'agents' in task.param else {}
                 with task.param.update(
                     memory=self.memory, interface=self.interface, steps_layout=self.steps_layout,
                     history=retry_history, **kwargs
                 ):
-                    outputs += await task.execute(**kwargs)
+                    output = await task.execute(**kwargs)
+                    breakpoint()  # TODO: investigate; is it caching??
+                    outputs += output
                 retry_step.success_title = f"✅ {task.title} successfully completed on retry"
         return outputs
 

diff --git a/lumen/ai/models.py b/lumen/ai/models.py
@@ -15,12 +15,17 @@ class PartialBaseModel(BaseModel, PartialLiteralMixin):
 class EscapeBaseModel(PartialBaseModel):
 
     insufficient_context_reason: str = Field(
-        description="If lacking sufficient context, explain why; else use ''. Do not base off the user query; only from the data context provided.",
+        description=(
+            "If the model lacks sufficient context from the provided data, explain why. "
+            "Do not infer from the user query. For example, if values such as '899 ms' or '1.06s' "
+            "are not correctly converted to numeric seconds (0.899, 1.06), describe the missing conversion logic. "
+            "Leave empty ('') if context is sufficient."
+        ),
         examples=[
-            "A timeseries is requested but SQL only provides customer and order data; please include a time dimension",
-            "The previous result is one aggregated value; try a different aggregation or more dimensions",
-            ""
-        ]
+            "Time values include units (e.g. '899 ms', '1.06s') but are not normalized to numeric seconds; conversion logic is required.",
+            "A timeseries is requested but SQL only provides customer and order data; please include a time dimension.",
+            "The previous result is one aggregated value; try a different aggregation or more dimensions.",
+        ],
     )
 
     insufficient_context: bool = Field(
@@ -132,7 +137,97 @@ def make_sql_model(is_final: bool = False):
         return SqlQueries
 
 
-class VegaLiteSpec(EscapeBaseModel):
+class VegaLiteRow(BaseModel):
+    """A row in the layout, containing one or more plots arranged horizontally."""
+
+    plot_slugs: list[str] = Field(
+        description="""
+        List of plot slugs for this row, in left-to-right order.
+        All plots in a row are arranged side-by-side (hconcat).
+        """
+    )
+
+
+class VegaLitePlotSpec(BaseModel):
+    """Specification for a single plot to be generated independently."""
+
+    slug: str = Field(
+        description="""
+        Unique identifier for this plot (e.g., 'revenue_trend', 'top_5_categories').
+        Must be unique within the layout and descriptive of what's shown.
+        """
+    )
+
+    instruction: str = Field(
+        description="""
+        Detailed natural language instructions for generating this plot.
+        Include: mark type (bar/line/point/etc), x-axis field and type,
+        y-axis field and type, any color encoding, aggregations, filters,
+        and styling preferences. Be specific enough that each plot can be
+        generated independently without additional context.
+
+        Example: "Create a line chart showing revenue over time. X-axis:
+        'month' (temporal), Y-axis: 'revenue' (quantitative, aggregated as sum).
+        Use blue color for the line. Add points to mark each data point."
+        """
+    )
+
+    title: str | None = Field(
+        default=None,
+        description="Optional title for this subplot. If omitted, will be generated during plot creation."
+    )
+
+
+class VegaLiteLayoutPlan(EscapeBaseModel):
+    """
+    Plan for creating a multi-plot visualization.
+
+    Layout structure:
+    - Single row with 1 plot → Simple plot (no concat)
+    - Single row with N plots → hconcat (side-by-side)
+    - Multiple rows with 1 plot each → vconcat (stacked)
+    - Multiple rows with varying plots → vconcat of hconcats (grid)
+    """
+
+    chain_of_thought: str = Field(
+        description="""
+        Explain your visualization strategy:
+        - What story are you telling with this layout?
+        - Why did you choose these specific plots?
+        - Why this arrangement (side-by-side vs stacked)?
+        - How do the plots complement each other?
+        """
+    )
+
+    overall_title: str | None = Field(
+        default=None,
+        description="Optional overall title for the entire visualization dashboard."
+    )
+
+    plots: list[VegaLitePlotSpec] | None = Field(
+        default=None,
+        description="""
+        All plots to generate, listed in the order they should appear.
+        Each plot will be generated independently in parallel.
+        """
+    )
+
+    rows: list[VegaLiteRow] | None = Field(
+        default=None,
+        description="""
+        Layout structure defining how plots are arranged.
+        Each row contains plot slugs that will be arranged horizontally.
+        Multiple rows are stacked vertically.
+
+        Examples:
+        - Side-by-side: [{"plot_slugs": ["plot1", "plot2", "plot3"]}]
+        - Stacked: [{"plot_slugs": ["plot1"]}, {"plot_slugs": ["plot2"]}]
+        - Grid: [{"plot_slugs": ["plot1", "plot2"]}, {"plot_slugs": ["plot3"]}]
+        """
+    )
+
+
+class VegaLiteSubplotSpec(BaseModel):
 
     chain_of_thought: str = Field(
         description="""Explain your design choices based on visualization theory:
@@ -143,18 +238,32 @@ class VegaLiteSpec(EscapeBaseModel):
         Then describe the basic plot structure."""
     )
     yaml_spec: str = Field(
-        description="A basic vega-lite YAML specification with core plot elements only (data, mark, basic x/y encoding)."
+        description="""A vega-lite YAML specification using layer array structure.
+        CRITICAL: ALWAYS use layer array, even for single marks.
+        Example: layer: [{mark: bar, encoding: {...}}]"""
+    )
+    mode: Literal["update", "replace", "append_layers"] = Field(
+        default="update",
+        description="Merge mode (always 'update' for initial plot creation)."
     )
 
 
-class VegaLiteSpecUpdate(BaseModel):
+class VegaLiteUpdateSpec(BaseModel):
     chain_of_thought: str = Field(
         description="Explain what changes you're making to the Vega-Lite spec and why."
     )
     yaml_update: str = Field(
-        description="""Partial YAML with ONLY modified properties (unchanged values omitted).
-        Respect your step's scope; don't override previous steps."""
+        description="""Partial YAML with ONLY modified properties, BUT always include full hierarchy.
+        For layered specs: MUST wrap in 'layer' array even if only updating one property.
+        Example - to change color in layered spec: layer: [{encoding: {color: {value: red}}}]"""
     )
+    mode: Literal["update", "replace", "append_layers"] = Field(
+        description="""Merge strategy:
+        - 'update': Deep merge (fixes/refinements that preserve structure)
+        - 'replace': Full replacement (changing mark type or complete restructure)
+        - 'append_layers': Add new layers (annotations like reference lines/labels)"""
+    )
+
 
 class LineChange(BaseModel):
     line_no: int = Field(description="The line number in the original text that needs to be changed.")

diff --git a/lumen/ai/prompts/LumenBaseAgent/retry_output.jinja2 b/lumen/ai/prompts/LumenBaseAgent/retry_output.jinja2
@@ -26,11 +26,8 @@ Available schema context:
 Language: {{ language }}
 {% endif %}
 
-Original code with line numbers:
+Original code:
 ```{% if language %}{{ language }}{% endif %}
-{{ numbered_text }}
+{{ original_text }}
 ```
-
-Please restate this feedback in your own words, and then try to fix the code based on the feedback provided:
-{{ feedback }}
 {% endblock %}
diff --git a/lumen/ai/prompts/Planner/main.jinja2 b/lumen/ai/prompts/Planner/main.jinja2
@@ -18,6 +18,9 @@ Ground Rules:
 - Never mention a lack of data in your plan - assume your actors will handle data discovery
 - Do not ignore the actor's exclusions and conditions
 - When keys are already present in memory, utilize them to construct your plan efficiently—avoid assigning an actor to produce memory keys that are already available
+- For follow-ups, when the user asks to add metrics/dimensions to an existing visualization, instruct data agents to modify the original query so it returns all existing and new metrics in a single result set (do not run separate queries), since multi‑plot comparisons require metrics in the same table
+- If the user query is vague, return all the data without limits
+
 {%- if tools %}
 - Tools require actor interpretation - always follow-up tools with agents
 {%- endif %}

diff --git a/lumen/ai/prompts/SQLAgent/main.jinja2 b/lumen/ai/prompts/SQLAgent/main.jinja2
@@ -5,18 +5,20 @@
 Write a SQL query for the user's data transformation request, focusing on intent over literal interpretation.
 
 ## 🎯 PRIMARY RULES:
-1. Reuse materialized data - Check "Current Knowledge" first before writing CTEs
-2. Column quotes - Use double quotes: `"column_name"`
-3. String quotes - Single quotes: `'string_value'`
-4. No CREATE statements - System handles materialization
-5. No LIMIT clauses unless explicitly requested by the user - Pagination is automatically handled
-6. Progressive approach - Each step builds on previous
+1. **Reuse and extend** - When asked to "add" or "include" additional metrics, MODIFY the existing query to include ALL metrics in a SINGLE result set
+2. Reuse materialized data - Check "Current Knowledge" first before writing CTEs
+3. Column quotes - Use double quotes: `"column_name"`
+4. String quotes - Single quotes: `'string_value'`
+5. No CREATE statements - System handles materialization
+6. No LIMIT clauses unless explicitly requested by the user - Pagination is automatically handled
+7. Progressive approach - Each step builds on previous
+8. If the user query is vague, return all the data, e.g. `SELECT * FROM ...` without limits
 
 ## Query Patterns:
 - NULL handling: Exclude unless specified
-- Clean data: `TRIM("column") != ''`, filter -9999, empty strings
+- Clean data: `TRIM("column") != ''`, filter -9999, empty strings 
 - Headers: Use OFFSET 1 for header/metadata rows
-- Mixed units: Use CASE to normalize before aggregating
+- Mixed units: REGEXP_REPLACE to strip suffix, scale appropriately (ms/1000, min*60)
 - Temporal data: Check MIN/MAX dates before joining, ensure overlap validation
 - No inline comments in SQL code
 - Use CTEs only when necessary
@@ -58,6 +60,7 @@ Use `{{ dialect }}` SQL dialect.
 - Date selection: CAST(col as DATE) == '2023-06-01'
 - Window filtering: `QUALIFY ROW_NUMBER() OVER (...) <= 5`
 - Safe conversions: Use `TRY_CAST` for type conversions
+- Unit strings: `TRY_CAST(REGEXP_REPLACE(col, '[a-z]+$', '') AS DOUBLE)` then scale
 - Use materialized tables when available
 {% endif %}
 {%- if dialect == 'snowflake' %}

diff --git a/lumen/ai/prompts/VegaLiteAgent/annotate_plot.jinja2 b/lumen/ai/prompts/VegaLiteAgent/annotate_plot.jinja2
@@ -1,82 +1,76 @@
-{% extends 'Actor/main.jinja2' %}
+{% extends 'LumenBaseAgent/retry_output.jinja2' %}
 
 {% block instructions %}
-Add strategic visual annotations to highlight key insights in your data.
+Add strategic visual annotations to highlight key insights.
 
-# Annotation Patterns
+CRITICAL: Always use mode='append_layers' for annotations.
 
-**Reference lines**: Horizontal/vertical rules for meaningful thresholds
-- Use for goals, benchmarks, or natural breakpoints (e.g., "5,000+ turbines = major wind state")
-- Position with `y: {datum: value}` or `x: {datum: value}`
-- Use subtle styling: `opacity: 0.5`, muted colors, thin lines
-- Anchor labels to the line, not floating positions
+For yaml_update, return ONLY the new layer(s) to add as a list:
+```yaml
+- mark: {type: rule, color: red}
+  encoding: {y: {datum: 100}}
+```
+
+# LAYERS
+
+Wrap chart in `layer` array, then add annotation marks (rules, text, highlights). All layers share data/scales unless overridden.
 
-**Value labels**: Text on specific data points
-- Use only for outliers or endpoints that need emphasis
-- Apply `transform` with `filter` to select points
-- Don't label every bar - let the axis provide that information
+# PATTERNS
+
+**Reference lines**: Horizontal/vertical rules for thresholds (goals, benchmarks)
+- Position: `y: {datum: value}` or `x: {datum: value}`
+- Style: subtle colors, `opacity: 0.5`, `strokeDash: [4,4]`
+- Anchor labels to line
+
+**Value labels**: Text on outliers/endpoints only
+- Filter: `transform: [{filter: "datum.field > threshold"}]`
+- Don't label every point—let axes handle that
 
 **Conditional highlights**: Emphasize data meeting criteria
 - Layer filtered data with distinct color
-- Use `transform: [{filter: "datum.field > threshold"}]`
-- Make sure the threshold is meaningful, not arbitrary
-{% endblock %}
-
-{% block context %}
-Build off the following Vega-Lite yaml:
-```yaml
-{{ vega_spec }}
-```
+- Use meaningful thresholds
 {% endblock %}
 
 {% block examples %}
 # EXAMPLES
 
-Reference line with subtle styling:
+## Converting to layers (reference line):
+
+Before:
+```yaml
+mark: bar
+encoding:
+  x: {field: category, type: ordinal}
+  y: {field: value, type: quantitative}
+```
+
+After (mode='append_layers'):
 ```yaml
-layer:
-  - mark: bar
-    encoding:
-      x: {field: category, type: ordinal}
-      y: {field: value, type: quantitative}
-  - mark: {type: rule, color: '#666666', size: 1, strokeDash: [4,4], opacity: 0.5}
-    encoding:
-      y: {datum: 5000}
-  - mark: {type: text, align: left, dx: 5, dy: -5, fontSize: 10, color: '#666666'}
-    encoding:
-      text: {value: "5,000"}
-      y: {datum: 5000}
-      x: {datum: 0}
+- mark: {type: rule, color: '#666', size: 1, strokeDash: [4,4], opacity: 0.5}
+  encoding: {y: {datum: 5000}}
+- mark: {type: text, align: left, dx: 5, dy: -5, fontSize: 10, color: '#666'}
+  encoding:
+    text: {value: "5,000"}
+    y: {datum: 5000}
+    x: {datum: 0}
 ```
 
-Value labels with filter:
+## Value labels with filter:
 ```yaml
-layer:
-  - mark: line
-    encoding:
-      x: {field: year, type: ordinal}
-      y: {field: value, type: quantitative}
-  - mark: {type: text, align: left, dx: 5, fontSize: 11}
-    transform:
-      - filter: "datum.year == 2022"  # Only label final point
-    encoding:
-      x: {field: year, type: ordinal}
-      y: {field: value, type: quantitative}
-      text: {field: value, type: quantitative, format: ".0f"}
+- mark: {type: text, align: left, dx: 5, fontSize: 11}
+  transform: [{filter: "datum.year == 2022"}]
+  encoding:
+    x: {field: year, type: ordinal}
+    y: {field: value, type: quantitative}
+    text: {field: value, type: quantitative, format: ".0f"}
 ```
 
-Conditional highlighting:
+## Conditional highlighting:
 ```yaml
-layer:
-  - mark: bar
-    encoding:
-      x: {field: category, type: nominal}
-      y: {field: value, type: quantitative}
-  - mark: {type: bar, color: "#e74c3c"}
-    transform:
-      - filter: "datum.value > 500"
-    encoding:
-      x: {field: category, type: nominal}
-      y: {field: value, type: quantitative}
+- mark: {type: bar, color: "#e74c3c"}
+  transform: [{filter: "datum.value > 500"}]
+  encoding:
+    x: {field: category, type: nominal}
+    y: {field: value, type: quantitative}
 ```
 {% endblock %}