This repository was archived by the owner on Apr 1, 2026. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 67
refactor: Migrate DataFrame display to use IPython's _repr_mimebundle_() protocol for anywidget mode #2271
Merged
Merged
refactor: Migrate DataFrame display to use IPython's _repr_mimebundle_() protocol for anywidget mode #2271
Changes from all commits
Commits
Show all changes
27 commits
Select commit
Hold shift + click to select a range
0dc287e
feat: Refactor anywidget display to use _ipython_display_
shuoweil 94c05d7
Revert accidental changes to README.rst
shuoweil 1b15ef0
add testcase
shuoweil 53953bb
ipython_display_ to set fallback mimetypes
shuoweil 5397a59
remove _repr_html_()
shuoweil 689fa74
Revert "remove _repr_html_()"
shuoweil 6b407d4
change the fallback function name to better reflect the change
shuoweil 3cd960e
code refactor
shuoweil b0f1525
fix mypy
shuoweil 7e47895
Revert unncessary change
shuoweil 736929a
use test/plain for print(df)
shuoweil eb61fc0
fix failed testcase
shuoweil 3079a3c
reuse available data and optimize error handling
shuoweil 86e0b6a
update the testcase due to my optimization reduce the query calls number
shuoweil d53eea3
code refactor
shuoweil 8b9a2cf
function polish
shuoweil 1042a9e
Merge branch 'main' into shuowei-anywidget-html-repr
shuoweil 83513b5
Merge branch 'main' into shuowei-anywidget-html-repr
shuoweil 05a9245
notebook change
shuoweil 9b4b18a
Merge branch 'main' into shuowei-anywidget-html-repr
shuoweil 8325f8d
remove dead code
shuoweil 0a1df42
Merge branch 'main' into shuowei-anywidget-html-repr
shuoweil cbae5d8
Merge branch 'main' into shuowei-anywidget-html-repr
shuoweil d61e03f
merge from main
shuoweil bf661c7
comments update
shuoweil f47f87b
testcase update due to merge from main
shuoweil 09ee218
Merge branch 'main' into shuowei-anywidget-html-repr
shuoweil File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Some comments aren't visible on the classic Files Changed page.
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -789,9 +789,7 @@ def __repr__(self) -> str: | |
|
|
||
| opts = bigframes.options.display | ||
| max_results = opts.max_rows | ||
| # anywdiget mode uses the same display logic as the "deferred" mode | ||
| # for faster execution | ||
| if opts.repr_mode in ("deferred", "anywidget"): | ||
| if opts.repr_mode == "deferred": | ||
| return formatter.repr_query_job(self._compute_dry_run()) | ||
|
|
||
| # TODO(swast): pass max_columns and get the true column count back. Maybe | ||
|
|
@@ -829,68 +827,138 @@ def __repr__(self) -> str: | |
| lines.append(f"[{row_count} rows x {column_count} columns]") | ||
| return "\n".join(lines) | ||
|
|
||
| def _repr_html_(self) -> str: | ||
| """ | ||
| Returns an html string primarily for use by notebooks for displaying | ||
| a representation of the DataFrame. Displays 20 rows by default since | ||
| many notebooks are not configured for large tables. | ||
| """ | ||
| opts = bigframes.options.display | ||
| max_results = opts.max_rows | ||
| if opts.repr_mode == "deferred": | ||
| return formatter.repr_query_job(self._compute_dry_run()) | ||
|
|
||
| # Process blob columns first, regardless of display mode | ||
| self._cached() | ||
| df = self.copy() | ||
| def _get_display_df_and_blob_cols(self) -> tuple[DataFrame, list[str]]: | ||
| """Process blob columns for display.""" | ||
| df = self | ||
| blob_cols = [] | ||
| if bigframes.options.display.blob_display: | ||
| blob_cols = [ | ||
| series_name | ||
| for series_name, series in df.items() | ||
| for series_name, series in self.items() | ||
| if series.dtype == bigframes.dtypes.OBJ_REF_DTYPE | ||
| ] | ||
| for col in blob_cols: | ||
| # TODO(garrettwu): Not necessary to get access urls for all the rows. Update when having a to get URLs from local data. | ||
| df[col] = df[col].blob._get_runtime(mode="R", with_metadata=True) | ||
| if blob_cols: | ||
| df = self.copy() | ||
| for col in blob_cols: | ||
| # TODO(garrettwu): Not necessary to get access urls for all the rows. Update when having a to get URLs from local data. | ||
| df[col] = df[col].blob._get_runtime(mode="R", with_metadata=True) | ||
| return df, blob_cols | ||
|
|
||
| def _get_anywidget_bundle(self, include=None, exclude=None): | ||
| """ | ||
| Helper method to create and return the anywidget mimebundle. | ||
| This function encapsulates the logic for anywidget display. | ||
| """ | ||
| from bigframes import display | ||
|
|
||
| # TODO(shuowei): Keep blob_cols and pass them to TableWidget so that they can render properly. | ||
| df, _ = self._get_display_df_and_blob_cols() | ||
|
|
||
| # Create and display the widget | ||
| widget = display.TableWidget(df) | ||
| widget_repr_result = widget._repr_mimebundle_(include=include, exclude=exclude) | ||
|
|
||
| # Handle both tuple (data, metadata) and dict returns | ||
| if isinstance(widget_repr_result, tuple): | ||
| widget_repr = dict(widget_repr_result[0]) # Extract data dict from tuple | ||
| else: | ||
| blob_cols = [] | ||
| widget_repr = dict(widget_repr_result) | ||
|
|
||
| if opts.repr_mode == "anywidget": | ||
| try: | ||
| from IPython.display import display as ipython_display | ||
| # At this point, we have already executed the query as part of the | ||
| # widget construction. Let's use the information available to render | ||
| # the HTML and plain text versions. | ||
| widget_repr["text/html"] = widget.table_html | ||
|
|
||
| widget_repr["text/plain"] = self._create_text_representation( | ||
| widget._cached_data, widget.row_count | ||
| ) | ||
|
|
||
| return widget_repr | ||
|
|
||
| def _create_text_representation( | ||
| self, pandas_df: pandas.DataFrame, total_rows: typing.Optional[int] | ||
| ) -> str: | ||
| """Create a text representation of the DataFrame.""" | ||
| opts = bigframes.options.display | ||
| with display_options.pandas_repr(opts): | ||
| import pandas.io.formats | ||
|
|
||
| # safe to mutate this, this dict is owned by this code, and does not affect global config | ||
| to_string_kwargs = ( | ||
| pandas.io.formats.format.get_dataframe_repr_params() # type: ignore | ||
| ) | ||
| if not self._has_index: | ||
| to_string_kwargs.update({"index": False}) | ||
|
|
||
| # We add our own dimensions string, so don't want pandas to. | ||
| to_string_kwargs.update({"show_dimensions": False}) | ||
| repr_string = pandas_df.to_string(**to_string_kwargs) | ||
|
|
||
| from bigframes import display | ||
| lines = repr_string.split("\n") | ||
|
|
||
| # Always create a new widget instance for each display call | ||
| # This ensures that each cell gets its own widget and prevents | ||
| # unintended sharing between cells | ||
| widget = display.TableWidget(df.copy()) | ||
| if total_rows is not None and total_rows > len(pandas_df): | ||
| lines.append("...") | ||
|
|
||
| ipython_display(widget) | ||
| return "" # Return empty string since we used display() | ||
| lines.append("") | ||
| column_count = len(self.columns) | ||
| lines.append(f"[{total_rows or '?'} rows x {column_count} columns]") | ||
| return "\n".join(lines) | ||
|
|
||
| except (AttributeError, ValueError, ImportError): | ||
| # Fallback if anywidget is not available | ||
| def _repr_mimebundle_(self, include=None, exclude=None): | ||
| """ | ||
| Custom display method for IPython/Jupyter environments. | ||
| This is called by IPython's display system when the object is displayed. | ||
| """ | ||
| opts = bigframes.options.display | ||
| # Only handle widget display in anywidget mode | ||
| if opts.repr_mode == "anywidget": | ||
| try: | ||
| return self._get_anywidget_bundle(include=include, exclude=exclude) | ||
|
|
||
| except ImportError: | ||
| # Anywidget is an optional dependency, so warn rather than fail. | ||
| # TODO(shuowei): When Anywidget becomes the default for all repr modes, | ||
| # remove this warning. | ||
| warnings.warn( | ||
| "Anywidget mode is not available. " | ||
| "Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use interactive tables. " | ||
| f"Falling back to deferred mode. Error: {traceback.format_exc()}" | ||
| f"Falling back to static HTML. Error: {traceback.format_exc()}" | ||
| ) | ||
| return formatter.repr_query_job(self._compute_dry_run()) | ||
|
|
||
| # Continue with regular HTML rendering for non-anywidget modes | ||
| # TODO(swast): pass max_columns and get the true column count back. Maybe | ||
| # get 1 more column than we have requested so that pandas can add the | ||
| # ... for us? | ||
| # In non-anywidget mode, fetch data once and use it for both HTML | ||
| # and plain text representations to avoid multiple queries. | ||
| opts = bigframes.options.display | ||
| max_results = opts.max_rows | ||
|
|
||
| df, blob_cols = self._get_display_df_and_blob_cols() | ||
|
|
||
| pandas_df, row_count, query_job = df._block.retrieve_repr_request_results( | ||
| max_results | ||
| ) | ||
|
|
||
| self._set_internal_query_job(query_job) | ||
| column_count = len(pandas_df.columns) | ||
|
|
||
| html_string = self._create_html_representation( | ||
| pandas_df, row_count, column_count, blob_cols | ||
| ) | ||
|
|
||
| text_representation = self._create_text_representation(pandas_df, row_count) | ||
|
|
||
| return {"text/html": html_string, "text/plain": text_representation} | ||
|
|
||
| def _create_html_representation( | ||
| self, | ||
| pandas_df: pandas.DataFrame, | ||
| row_count: int, | ||
| column_count: int, | ||
| blob_cols: list[str], | ||
| ) -> str: | ||
| """Create an HTML representation of the DataFrame.""" | ||
| opts = bigframes.options.display | ||
| with display_options.pandas_repr(opts): | ||
| # Allows to preview images in the DataFrame. The implementation changes the string repr as well, that it doesn't truncate strings or escape html charaters such as "<" and ">". We may need to implement a full-fledged repr module to better support types not in pandas. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe convert this comment into a TODO? Escaping HTML seems like it'd be useful.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. filed b/464053870 |
||
| # TODO(shuowei, b/464053870): Escaping HTML would be useful, but | ||
| # `escape=False` is needed to show images. We may need to implement | ||
| # a full-fledged repr module to better support types not in pandas. | ||
| if bigframes.options.display.blob_display and blob_cols: | ||
|
|
||
| def obj_ref_rt_to_html(obj_ref_rt) -> str: | ||
|
|
@@ -919,15 +987,12 @@ def obj_ref_rt_to_html(obj_ref_rt) -> str: | |
|
|
||
| # set max_colwidth so not to truncate the image url | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This comment seems important, too. |
||
| with pandas.option_context("display.max_colwidth", None): | ||
| max_rows = pandas.get_option("display.max_rows") | ||
| max_cols = pandas.get_option("display.max_columns") | ||
| show_dimensions = pandas.get_option("display.show_dimensions") | ||
| html_string = pandas_df.to_html( | ||
| escape=False, | ||
| notebook=True, | ||
| max_rows=max_rows, | ||
| max_cols=max_cols, | ||
| show_dimensions=show_dimensions, | ||
| max_rows=pandas.get_option("display.max_rows"), | ||
| max_cols=pandas.get_option("display.max_columns"), | ||
| show_dimensions=pandas.get_option("display.show_dimensions"), | ||
| formatters=formatters, # type: ignore | ||
| ) | ||
| else: | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.