Skip to content

Commit 7f1d3df

Browse files
authored
fix: Correct DataFrame widget rendering in Colab (#2319)
This PR fix the _get_anywidget_bundle method. Previously, when the underlying widget's _repr_mimebundle_ method returned a (data, metadata) tuple, the code was only extracting the data portion and discarding the metadata. This resulted in the widget not rendering correctly in environments like Colab, which rely on this metadata. The change corrects this by properly unpacking the tuple into widget_repr and widget_metadata. The method now preserves the metadata and returns it along with the data, ensuring that the necessary information for widget rendering is passed on. We also revert commit 4df3428 to reapply "refactor: Migrate DataFrame display to use IPython's repr_mimebundle() protocol for anywidget mode (#2271)" A testcase is added to verify this new change. We also verified at colab: screen/AzGa5RMTJnMH5NH Fixes #<466155761> 🦕
1 parent 2526448 commit 7f1d3df

File tree

9 files changed

+701
-151
lines changed

9 files changed

+701
-151
lines changed

bigframes/core/indexes/base.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -376,9 +376,7 @@ def __repr__(self) -> __builtins__.str:
376376
# metadata, like we do with DataFrame.
377377
opts = bigframes.options.display
378378
max_results = opts.max_rows
379-
# anywdiget mode uses the same display logic as the "deferred" mode
380-
# for faster execution
381-
if opts.repr_mode in ("deferred", "anywidget"):
379+
if opts.repr_mode == "deferred":
382380
_, dry_run_query_job = self._block._compute_dry_run()
383381
return formatter.repr_query_job(dry_run_query_job)
384382

bigframes/dataframe.py

Lines changed: 124 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -789,9 +789,7 @@ def __repr__(self) -> str:
789789

790790
opts = bigframes.options.display
791791
max_results = opts.max_rows
792-
# anywdiget mode uses the same display logic as the "deferred" mode
793-
# for faster execution
794-
if opts.repr_mode in ("deferred", "anywidget"):
792+
if opts.repr_mode == "deferred":
795793
return formatter.repr_query_job(self._compute_dry_run())
796794

797795
# TODO(swast): pass max_columns and get the true column count back. Maybe
@@ -829,68 +827,149 @@ def __repr__(self) -> str:
829827
lines.append(f"[{row_count} rows x {column_count} columns]")
830828
return "\n".join(lines)
831829

832-
def _repr_html_(self) -> str:
833-
"""
834-
Returns an html string primarily for use by notebooks for displaying
835-
a representation of the DataFrame. Displays 20 rows by default since
836-
many notebooks are not configured for large tables.
837-
"""
838-
opts = bigframes.options.display
839-
max_results = opts.max_rows
840-
if opts.repr_mode == "deferred":
841-
return formatter.repr_query_job(self._compute_dry_run())
842-
843-
# Process blob columns first, regardless of display mode
844-
self._cached()
845-
df = self.copy()
830+
def _get_display_df_and_blob_cols(self) -> tuple[DataFrame, list[str]]:
831+
"""Process blob columns for display."""
832+
df = self
833+
blob_cols = []
846834
if bigframes.options.display.blob_display:
847835
blob_cols = [
848836
series_name
849-
for series_name, series in df.items()
837+
for series_name, series in self.items()
850838
if series.dtype == bigframes.dtypes.OBJ_REF_DTYPE
851839
]
852-
for col in blob_cols:
853-
# TODO(garrettwu): Not necessary to get access urls for all the rows. Update when having a to get URLs from local data.
854-
df[col] = df[col].blob._get_runtime(mode="R", with_metadata=True)
840+
if blob_cols:
841+
df = self.copy()
842+
for col in blob_cols:
843+
# TODO(garrettwu): Not necessary to get access urls for all the rows. Update when having a to get URLs from local data.
844+
df[col] = df[col].blob._get_runtime(mode="R", with_metadata=True)
845+
return df, blob_cols
846+
847+
def _get_anywidget_bundle(
848+
self, include=None, exclude=None
849+
) -> tuple[dict[str, Any], dict[str, Any]]:
850+
"""
851+
Helper method to create and return the anywidget mimebundle.
852+
This function encapsulates the logic for anywidget display.
853+
"""
854+
from bigframes import display
855+
856+
df, blob_cols = self._get_display_df_and_blob_cols()
857+
858+
# Create and display the widget
859+
widget = display.TableWidget(df)
860+
widget_repr_result = widget._repr_mimebundle_(include=include, exclude=exclude)
861+
862+
# Handle both tuple (data, metadata) and dict returns
863+
if isinstance(widget_repr_result, tuple):
864+
widget_repr, widget_metadata = widget_repr_result
855865
else:
856-
blob_cols = []
866+
widget_repr = widget_repr_result
867+
widget_metadata = {}
868+
869+
widget_repr = dict(widget_repr)
870+
871+
# At this point, we have already executed the query as part of the
872+
# widget construction. Let's use the information available to render
873+
# the HTML and plain text versions.
874+
widget_repr["text/html"] = self._create_html_representation(
875+
widget._cached_data,
876+
widget.row_count,
877+
len(self.columns),
878+
blob_cols,
879+
)
857880

858-
if opts.repr_mode == "anywidget":
859-
try:
860-
from IPython.display import display as ipython_display
881+
widget_repr["text/plain"] = self._create_text_representation(
882+
widget._cached_data, widget.row_count
883+
)
884+
885+
return widget_repr, widget_metadata
886+
887+
def _create_text_representation(
888+
self, pandas_df: pandas.DataFrame, total_rows: typing.Optional[int]
889+
) -> str:
890+
"""Create a text representation of the DataFrame."""
891+
opts = bigframes.options.display
892+
with display_options.pandas_repr(opts):
893+
import pandas.io.formats
894+
895+
# safe to mutate this, this dict is owned by this code, and does not affect global config
896+
to_string_kwargs = (
897+
pandas.io.formats.format.get_dataframe_repr_params() # type: ignore
898+
)
899+
if not self._has_index:
900+
to_string_kwargs.update({"index": False})
901+
902+
# We add our own dimensions string, so don't want pandas to.
903+
to_string_kwargs.update({"show_dimensions": False})
904+
repr_string = pandas_df.to_string(**to_string_kwargs)
861905

862-
from bigframes import display
906+
lines = repr_string.split("\n")
863907

864-
# Always create a new widget instance for each display call
865-
# This ensures that each cell gets its own widget and prevents
866-
# unintended sharing between cells
867-
widget = display.TableWidget(df.copy())
908+
if total_rows is not None and total_rows > len(pandas_df):
909+
lines.append("...")
868910

869-
ipython_display(widget)
870-
return "" # Return empty string since we used display()
911+
lines.append("")
912+
column_count = len(self.columns)
913+
lines.append(f"[{total_rows or '?'} rows x {column_count} columns]")
914+
return "\n".join(lines)
871915

872-
except (AttributeError, ValueError, ImportError):
873-
# Fallback if anywidget is not available
916+
def _repr_mimebundle_(self, include=None, exclude=None):
917+
"""
918+
Custom display method for IPython/Jupyter environments.
919+
This is called by IPython's display system when the object is displayed.
920+
"""
921+
# TODO(b/467647693): Anywidget integration has been tested in Jupyter, VS Code, and
922+
# BQ Studio, but there is a known compatibility issue with Marimo that needs to be addressed.
923+
opts = bigframes.options.display
924+
# Only handle widget display in anywidget mode
925+
if opts.repr_mode == "anywidget":
926+
try:
927+
return self._get_anywidget_bundle(include=include, exclude=exclude)
928+
929+
except ImportError:
930+
# Anywidget is an optional dependency, so warn rather than fail.
931+
# TODO(shuowei): When Anywidget becomes the default for all repr modes,
932+
# remove this warning.
874933
warnings.warn(
875934
"Anywidget mode is not available. "
876935
"Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use interactive tables. "
877-
f"Falling back to deferred mode. Error: {traceback.format_exc()}"
936+
f"Falling back to static HTML. Error: {traceback.format_exc()}"
878937
)
879-
return formatter.repr_query_job(self._compute_dry_run())
880938

881-
# Continue with regular HTML rendering for non-anywidget modes
882-
# TODO(swast): pass max_columns and get the true column count back. Maybe
883-
# get 1 more column than we have requested so that pandas can add the
884-
# ... for us?
939+
# In non-anywidget mode, fetch data once and use it for both HTML
940+
# and plain text representations to avoid multiple queries.
941+
opts = bigframes.options.display
942+
max_results = opts.max_rows
943+
944+
df, blob_cols = self._get_display_df_and_blob_cols()
945+
885946
pandas_df, row_count, query_job = df._block.retrieve_repr_request_results(
886947
max_results
887948
)
888-
889949
self._set_internal_query_job(query_job)
890950
column_count = len(pandas_df.columns)
891951

952+
html_string = self._create_html_representation(
953+
pandas_df, row_count, column_count, blob_cols
954+
)
955+
956+
text_representation = self._create_text_representation(pandas_df, row_count)
957+
958+
return {"text/html": html_string, "text/plain": text_representation}
959+
960+
def _create_html_representation(
961+
self,
962+
pandas_df: pandas.DataFrame,
963+
row_count: int,
964+
column_count: int,
965+
blob_cols: list[str],
966+
) -> str:
967+
"""Create an HTML representation of the DataFrame."""
968+
opts = bigframes.options.display
892969
with display_options.pandas_repr(opts):
893-
# Allows to preview images in the DataFrame. The implementation changes the string repr as well, that it doesn't truncate strings or escape html charaters such as "<" and ">". We may need to implement a full-fledged repr module to better support types not in pandas.
970+
# TODO(shuowei, b/464053870): Escaping HTML would be useful, but
971+
# `escape=False` is needed to show images. We may need to implement
972+
# a full-fledged repr module to better support types not in pandas.
894973
if bigframes.options.display.blob_display and blob_cols:
895974

896975
def obj_ref_rt_to_html(obj_ref_rt) -> str:
@@ -919,15 +998,12 @@ def obj_ref_rt_to_html(obj_ref_rt) -> str:
919998

920999
# set max_colwidth so not to truncate the image url
9211000
with pandas.option_context("display.max_colwidth", None):
922-
max_rows = pandas.get_option("display.max_rows")
923-
max_cols = pandas.get_option("display.max_columns")
924-
show_dimensions = pandas.get_option("display.show_dimensions")
9251001
html_string = pandas_df.to_html(
9261002
escape=False,
9271003
notebook=True,
928-
max_rows=max_rows,
929-
max_cols=max_cols,
930-
show_dimensions=show_dimensions,
1004+
max_rows=pandas.get_option("display.max_rows"),
1005+
max_cols=pandas.get_option("display.max_columns"),
1006+
show_dimensions=pandas.get_option("display.show_dimensions"),
9311007
formatters=formatters, # type: ignore
9321008
)
9331009
else:

bigframes/streaming/dataframe.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -291,13 +291,13 @@ def __repr__(self, *args, **kwargs):
291291

292292
__repr__.__doc__ = _curate_df_doc(inspect.getdoc(dataframe.DataFrame.__repr__))
293293

294-
def _repr_html_(self, *args, **kwargs):
295-
return _return_type_wrapper(self._df._repr_html_, StreamingDataFrame)(
294+
def _repr_mimebundle_(self, *args, **kwargs):
295+
return _return_type_wrapper(self._df._repr_mimebundle_, StreamingDataFrame)(
296296
*args, **kwargs
297297
)
298298

299-
_repr_html_.__doc__ = _curate_df_doc(
300-
inspect.getdoc(dataframe.DataFrame._repr_html_)
299+
_repr_mimebundle_.__doc__ = _curate_df_doc(
300+
inspect.getdoc(dataframe.DataFrame._repr_mimebundle_)
301301
)
302302

303303
@property

0 commit comments

Comments
 (0)