Import python venv for stability

2026-02-15 21:24:16 -08:00
parent 1343e93a59
commit 7d784705c9
4997 changed files with 1628270 additions and 0 deletions
@@ -0,0 +1,13 @@
+# ruff: noqa: TC004
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    # import modules that have public classes/functions
+    from pandas.io import (
+        formats,
+        json,
+        stata,
+    )
+
+    # mark only those modules as public
+    __all__ = ["formats", "json", "stata"]
@@ -0,0 +1,169 @@
+from __future__ import annotations
+
+from typing import (
+    TYPE_CHECKING,
+    Literal,
+)
+
+import numpy as np
+
+from pandas._config import using_string_dtype
+
+from pandas._libs import lib
+from pandas.compat import (
+    pa_version_under18p0,
+    pa_version_under19p0,
+)
+from pandas.compat._optional import import_optional_dependency
+
+from pandas.core.dtypes.common import pandas_dtype
+
+import pandas as pd
+
+if TYPE_CHECKING:
+    from collections.abc import (
+        Callable,
+        Hashable,
+        Sequence,
+    )
+
+    import pyarrow
+
+    from pandas._typing import (
+        DtypeArg,
+        DtypeBackend,
+    )
+
+
+def _arrow_dtype_mapping() -> dict:
+    pa = import_optional_dependency("pyarrow")
+    return {
+        pa.int8(): pd.Int8Dtype(),
+        pa.int16(): pd.Int16Dtype(),
+        pa.int32(): pd.Int32Dtype(),
+        pa.int64(): pd.Int64Dtype(),
+        pa.uint8(): pd.UInt8Dtype(),
+        pa.uint16(): pd.UInt16Dtype(),
+        pa.uint32(): pd.UInt32Dtype(),
+        pa.uint64(): pd.UInt64Dtype(),
+        pa.bool_(): pd.BooleanDtype(),
+        pa.string(): pd.StringDtype(),
+        pa.float32(): pd.Float32Dtype(),
+        pa.float64(): pd.Float64Dtype(),
+        pa.string(): pd.StringDtype(),
+        pa.large_string(): pd.StringDtype(),
+    }
+
+
+def _arrow_string_types_mapper() -> Callable:
+    pa = import_optional_dependency("pyarrow")
+
+    mapping = {
+        pa.string(): pd.StringDtype(na_value=np.nan),
+        pa.large_string(): pd.StringDtype(na_value=np.nan),
+    }
+    if not pa_version_under18p0:
+        mapping[pa.string_view()] = pd.StringDtype(na_value=np.nan)
+
+    return mapping.get
+
+
+def arrow_table_to_pandas(
+    table: pyarrow.Table,
+    dtype_backend: DtypeBackend | Literal["numpy"] | lib.NoDefault = lib.no_default,
+    null_to_int64: bool = False,
+    to_pandas_kwargs: dict | None = None,
+    dtype: DtypeArg | None = None,
+    names: Sequence[Hashable] | None = None,
+) -> pd.DataFrame:
+    pa = import_optional_dependency("pyarrow")
+
+    to_pandas_kwargs = {} if to_pandas_kwargs is None else to_pandas_kwargs
+
+    types_mapper: type[pd.ArrowDtype] | None | Callable
+    if dtype_backend == "numpy_nullable":
+        mapping = _arrow_dtype_mapping()
+        if null_to_int64:
+            # Modify the default mapping to also map null to Int64
+            # (to match other engines - only for CSV parser)
+            mapping[pa.null()] = pd.Int64Dtype()
+        types_mapper = mapping.get
+    elif dtype_backend == "pyarrow":
+        types_mapper = pd.ArrowDtype
+    elif using_string_dtype():
+        if pa_version_under19p0:
+            types_mapper = _arrow_string_types_mapper()
+        elif dtype is not None:
+            # GH#56136 Avoid lossy conversion to float64
+            # We'll convert to numpy below if
+            types_mapper = {
+                pa.int8(): pd.Int8Dtype(),
+                pa.int16(): pd.Int16Dtype(),
+                pa.int32(): pd.Int32Dtype(),
+                pa.int64(): pd.Int64Dtype(),
+            }.get
+        else:
+            types_mapper = None
+    elif dtype_backend is lib.no_default or dtype_backend == "numpy":
+        if dtype is not None:
+            # GH#56136 Avoid lossy conversion to float64
+            # We'll convert to numpy below if
+            types_mapper = {
+                pa.int8(): pd.Int8Dtype(),
+                pa.int16(): pd.Int16Dtype(),
+                pa.int32(): pd.Int32Dtype(),
+                pa.int64(): pd.Int64Dtype(),
+            }.get
+        else:
+            types_mapper = None
+    else:
+        raise NotImplementedError
+
+    df = table.to_pandas(types_mapper=types_mapper, **to_pandas_kwargs)
+    return _post_convert_dtypes(df, dtype_backend, dtype, names)
+
+
+def _post_convert_dtypes(
+    df: pd.DataFrame,
+    dtype_backend: DtypeBackend | Literal["numpy"] | lib.NoDefault,
+    dtype: DtypeArg | None,
+    names: Sequence[Hashable] | None,
+) -> pd.DataFrame:
+    if dtype is not None and (
+        dtype_backend is lib.no_default or dtype_backend == "numpy"
+    ):
+        # GH#56136 apply any user-provided dtype, and convert any IntegerDtype
+        #  columns the user didn't explicitly ask for.
+        if isinstance(dtype, dict):
+            if names is not None:
+                df.columns = names
+
+            cmp_dtypes = {
+                pd.Int8Dtype(),
+                pd.Int16Dtype(),
+                pd.Int32Dtype(),
+                pd.Int64Dtype(),
+            }
+            for col in df.columns:
+                if col not in dtype and df[col].dtype in cmp_dtypes:
+                    # Any key that the user didn't explicitly specify
+                    #  that got converted to IntegerDtype now gets converted
+                    #  to numpy dtype.
+                    dtype[col] = df[col].dtype.numpy_dtype
+
+            # Ignore non-existent columns from dtype mapping
+            # like other parsers do
+            dtype = {
+                key: pandas_dtype(dtype[key]) for key in dtype if key in df.columns
+            }
+
+        else:
+            dtype = pandas_dtype(dtype)
+
+        try:
+            df = df.astype(dtype)
+        except TypeError as err:
+            # GH#44901 reraise to keep api consistent
+            raise ValueError(str(err)) from err
+
+    return df
@@ -0,0 +1,65 @@
+"""
+Data I/O API
+"""
+
+from pandas.io.clipboards import read_clipboard
+from pandas.io.excel import (
+    ExcelFile,
+    ExcelWriter,
+    read_excel,
+)
+from pandas.io.feather_format import read_feather
+from pandas.io.html import read_html
+from pandas.io.iceberg import read_iceberg
+from pandas.io.json import read_json
+from pandas.io.orc import read_orc
+from pandas.io.parquet import read_parquet
+from pandas.io.parsers import (
+    read_csv,
+    read_fwf,
+    read_table,
+)
+from pandas.io.pickle import (
+    read_pickle,
+    to_pickle,
+)
+from pandas.io.pytables import (
+    HDFStore,
+    read_hdf,
+)
+from pandas.io.sas import read_sas
+from pandas.io.spss import read_spss
+from pandas.io.sql import (
+    read_sql,
+    read_sql_query,
+    read_sql_table,
+)
+from pandas.io.stata import read_stata
+from pandas.io.xml import read_xml
+
+__all__ = [
+    "ExcelFile",
+    "ExcelWriter",
+    "HDFStore",
+    "read_clipboard",
+    "read_csv",
+    "read_excel",
+    "read_feather",
+    "read_fwf",
+    "read_hdf",
+    "read_html",
+    "read_iceberg",
+    "read_json",
+    "read_orc",
+    "read_parquet",
+    "read_pickle",
+    "read_sas",
+    "read_spss",
+    "read_sql",
+    "read_sql_query",
+    "read_sql_table",
+    "read_stata",
+    "read_table",
+    "read_xml",
+    "to_pickle",
+]
@@ -0,0 +1,747 @@
+"""
+Pyperclip
+
+A cross-platform clipboard module for Python,
+with copy & paste functions for plain text.
+By Al Sweigart al@inventwithpython.com
+Licence at LICENSES/PYPERCLIP_LICENSE
+
+Usage:
+  import pyperclip
+  pyperclip.copy('The text to be copied to the clipboard.')
+  spam = pyperclip.paste()
+
+  if not pyperclip.is_available():
+    print("Copy functionality unavailable!")
+
+On Windows, no additional modules are needed.
+On Mac, the pyobjc module is used, falling back to the pbcopy and pbpaste cli
+    commands. (These commands should come with OS X.).
+On Linux, install xclip, xsel, or wl-clipboard (for "wayland" sessions) via
+package manager.
+For example, in Debian:
+    sudo apt-get install xclip
+    sudo apt-get install xsel
+    sudo apt-get install wl-clipboard
+
+Otherwise on Linux, you will need the PyQt5 modules installed.
+
+This module does not work with PyGObject yet.
+
+Cygwin is currently not supported.
+
+Security Note: This module runs programs with these names:
+    - pbcopy
+    - pbpaste
+    - xclip
+    - xsel
+    - wl-copy/wl-paste
+    - klipper
+    - qdbus
+A malicious user could rename or add programs with these names, tricking
+Pyperclip into running them with whatever permissions the Python process has.
+
+"""
+
+__version__ = "1.8.2"
+
+
+import contextlib
+import ctypes
+from ctypes import (
+    c_size_t,
+    c_wchar,
+    c_wchar_p,
+    get_errno,
+    sizeof,
+)
+import os
+import platform
+from shutil import which as _executable_exists
+import subprocess
+import time
+import warnings
+
+from pandas.errors import (
+    PyperclipException,
+    PyperclipWindowsException,
+)
+from pandas.util._exceptions import find_stack_level
+
+# `import PyQt4` sys.exit()s if DISPLAY is not in the environment.
+# Thus, we need to detect the presence of $DISPLAY manually
+# and not load PyQt4 if it is absent.
+HAS_DISPLAY = os.getenv("DISPLAY")
+
+EXCEPT_MSG = """
+    Pyperclip could not find a copy/paste mechanism for your system.
+    For more information, please visit
+    https://pyperclip.readthedocs.io/en/latest/index.html#not-implemented-error
+    """
+
+ENCODING = "utf-8"
+
+
+class PyperclipTimeoutException(PyperclipException):
+    pass
+
+
+def _stringifyText(text) -> str:
+    acceptedTypes = (str, int, float, bool)
+    if not isinstance(text, acceptedTypes):
+        raise PyperclipException(
+            f"only str, int, float, and bool values "
+            f"can be copied to the clipboard, not {type(text).__name__}"
+        )
+    return str(text)
+
+
+def init_osx_pbcopy_clipboard():
+    def copy_osx_pbcopy(text):
+        text = _stringifyText(text)  # Converts non-str values to str.
+        with subprocess.Popen(
+            ["pbcopy", "w"], stdin=subprocess.PIPE, close_fds=True
+        ) as p:
+            p.communicate(input=text.encode(ENCODING))
+
+    def paste_osx_pbcopy():
+        with subprocess.Popen(
+            ["pbpaste", "r"], stdout=subprocess.PIPE, close_fds=True
+        ) as p:
+            stdout = p.communicate()[0]
+        return stdout.decode(ENCODING)
+
+    return copy_osx_pbcopy, paste_osx_pbcopy
+
+
+def init_osx_pyobjc_clipboard():
+    def copy_osx_pyobjc(text):
+        """Copy string argument to clipboard"""
+        text = _stringifyText(text)  # Converts non-str values to str.
+        newStr = Foundation.NSString.stringWithString_(text).nsstring()
+        newData = newStr.dataUsingEncoding_(Foundation.NSUTF8StringEncoding)
+        board = AppKit.NSPasteboard.generalPasteboard()
+        board.declareTypes_owner_([AppKit.NSStringPboardType], None)
+        board.setData_forType_(newData, AppKit.NSStringPboardType)
+
+    def paste_osx_pyobjc():
+        """Returns contents of clipboard"""
+        board = AppKit.NSPasteboard.generalPasteboard()
+        content = board.stringForType_(AppKit.NSStringPboardType)
+        return content
+
+    return copy_osx_pyobjc, paste_osx_pyobjc
+
+
+def init_qt_clipboard():
+    global QApplication
+    # $DISPLAY should exist
+
+    # Try to import from qtpy, but if that fails try PyQt5 then PyQt4
+    try:
+        from qtpy.QtWidgets import QApplication
+    except ImportError:
+        try:
+            from PyQt5.QtWidgets import QApplication
+        except ImportError:
+            from PyQt4.QtGui import QApplication
+
+    app = QApplication.instance()
+    if app is None:
+        app = QApplication([])
+
+    def copy_qt(text):
+        text = _stringifyText(text)  # Converts non-str values to str.
+        cb = app.clipboard()
+        cb.setText(text)
+
+    def paste_qt() -> str:
+        cb = app.clipboard()
+        return str(cb.text())
+
+    return copy_qt, paste_qt
+
+
+def init_xclip_clipboard():
+    DEFAULT_SELECTION = "c"
+    PRIMARY_SELECTION = "p"
+
+    def copy_xclip(text, primary=False):
+        text = _stringifyText(text)  # Converts non-str values to str.
+        selection = DEFAULT_SELECTION
+        if primary:
+            selection = PRIMARY_SELECTION
+        with subprocess.Popen(
+            ["xclip", "-selection", selection], stdin=subprocess.PIPE, close_fds=True
+        ) as p:
+            p.communicate(input=text.encode(ENCODING))
+
+    def paste_xclip(primary=False):
+        selection = DEFAULT_SELECTION
+        if primary:
+            selection = PRIMARY_SELECTION
+        with subprocess.Popen(
+            ["xclip", "-selection", selection, "-o"],
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            close_fds=True,
+        ) as p:
+            stdout = p.communicate()[0]
+        # Intentionally ignore extraneous output on stderr when clipboard is empty
+        return stdout.decode(ENCODING)
+
+    return copy_xclip, paste_xclip
+
+
+def init_xsel_clipboard():
+    DEFAULT_SELECTION = "-b"
+    PRIMARY_SELECTION = "-p"
+
+    def copy_xsel(text, primary=False):
+        text = _stringifyText(text)  # Converts non-str values to str.
+        selection_flag = DEFAULT_SELECTION
+        if primary:
+            selection_flag = PRIMARY_SELECTION
+        with subprocess.Popen(
+            ["xsel", selection_flag, "-i"], stdin=subprocess.PIPE, close_fds=True
+        ) as p:
+            p.communicate(input=text.encode(ENCODING))
+
+    def paste_xsel(primary=False):
+        selection_flag = DEFAULT_SELECTION
+        if primary:
+            selection_flag = PRIMARY_SELECTION
+        with subprocess.Popen(
+            ["xsel", selection_flag, "-o"], stdout=subprocess.PIPE, close_fds=True
+        ) as p:
+            stdout = p.communicate()[0]
+        return stdout.decode(ENCODING)
+
+    return copy_xsel, paste_xsel
+
+
+def init_wl_clipboard():
+    PRIMARY_SELECTION = "-p"
+
+    def copy_wl(text, primary=False):
+        text = _stringifyText(text)  # Converts non-str values to str.
+        args = ["wl-copy"]
+        if primary:
+            args.append(PRIMARY_SELECTION)
+        if not text:
+            args.append("--clear")
+            subprocess.check_call(args, close_fds=True)
+        else:
+            p = subprocess.Popen(args, stdin=subprocess.PIPE, close_fds=True)
+            p.communicate(input=text.encode(ENCODING))
+
+    def paste_wl(primary=False):
+        args = ["wl-paste", "-n"]
+        if primary:
+            args.append(PRIMARY_SELECTION)
+        p = subprocess.Popen(args, stdout=subprocess.PIPE, close_fds=True)
+        stdout, _stderr = p.communicate()
+        return stdout.decode(ENCODING)
+
+    return copy_wl, paste_wl
+
+
+def init_klipper_clipboard():
+    def copy_klipper(text):
+        text = _stringifyText(text)  # Converts non-str values to str.
+        with subprocess.Popen(
+            [
+                "qdbus",
+                "org.kde.klipper",
+                "/klipper",
+                "setClipboardContents",
+                text.encode(ENCODING),
+            ],
+            stdin=subprocess.PIPE,
+            close_fds=True,
+        ) as p:
+            p.communicate(input=None)
+
+    def paste_klipper():
+        with subprocess.Popen(
+            ["qdbus", "org.kde.klipper", "/klipper", "getClipboardContents"],
+            stdout=subprocess.PIPE,
+            close_fds=True,
+        ) as p:
+            stdout = p.communicate()[0]
+
+        # Workaround for https://bugs.kde.org/show_bug.cgi?id=342874
+        # TODO: https://github.com/asweigart/pyperclip/issues/43
+        clipboardContents = stdout.decode(ENCODING)
+        # even if blank, Klipper will append a newline at the end
+        assert len(clipboardContents) > 0
+        # make sure that newline is there
+        assert clipboardContents.endswith("\n")
+        if clipboardContents.endswith("\n"):
+            clipboardContents = clipboardContents[:-1]
+        return clipboardContents
+
+    return copy_klipper, paste_klipper
+
+
+def init_dev_clipboard_clipboard():
+    def copy_dev_clipboard(text):
+        text = _stringifyText(text)  # Converts non-str values to str.
+        if text == "":
+            warnings.warn(
+                "Pyperclip cannot copy a blank string to the clipboard on Cygwin. "
+                "This is effectively a no-op.",
+                stacklevel=find_stack_level(),
+            )
+        if "\r" in text:
+            warnings.warn(
+                "Pyperclip cannot handle \\r characters on Cygwin.",
+                stacklevel=find_stack_level(),
+            )
+
+        with open("/dev/clipboard", "w", encoding="utf-8") as fd:
+            fd.write(text)
+
+    def paste_dev_clipboard() -> str:
+        with open("/dev/clipboard", encoding="utf-8") as fd:
+            content = fd.read()
+        return content
+
+    return copy_dev_clipboard, paste_dev_clipboard
+
+
+def init_no_clipboard():
+    class ClipboardUnavailable:
+        def __call__(self, *args, **kwargs):
+            raise PyperclipException(EXCEPT_MSG)
+
+        def __bool__(self) -> bool:
+            return False
+
+    return ClipboardUnavailable(), ClipboardUnavailable()
+
+
+# Windows-related clipboard functions:
+class CheckedCall:
+    def __init__(self, f) -> None:
+        super().__setattr__("f", f)
+
+    def __call__(self, *args):
+        ret = self.f(*args)
+        if not ret and get_errno():
+            raise PyperclipWindowsException("Error calling " + self.f.__name__)
+        return ret
+
+    def __setattr__(self, key, value):
+        setattr(self.f, key, value)
+
+
+def init_windows_clipboard():
+    global HGLOBAL, LPVOID, DWORD, LPCSTR, INT
+    global HWND, HINSTANCE, HMENU, BOOL, UINT, HANDLE
+    from ctypes.wintypes import (
+        BOOL,
+        DWORD,
+        HANDLE,
+        HGLOBAL,
+        HINSTANCE,
+        HMENU,
+        HWND,
+        INT,
+        LPCSTR,
+        LPVOID,
+        UINT,
+    )
+
+    windll = ctypes.windll
+    msvcrt = ctypes.CDLL("msvcrt")
+
+    safeCreateWindowExA = CheckedCall(windll.user32.CreateWindowExA)
+    safeCreateWindowExA.argtypes = [
+        DWORD,
+        LPCSTR,
+        LPCSTR,
+        DWORD,
+        INT,
+        INT,
+        INT,
+        INT,
+        HWND,
+        HMENU,
+        HINSTANCE,
+        LPVOID,
+    ]
+    safeCreateWindowExA.restype = HWND
+
+    safeDestroyWindow = CheckedCall(windll.user32.DestroyWindow)
+    safeDestroyWindow.argtypes = [HWND]
+    safeDestroyWindow.restype = BOOL
+
+    OpenClipboard = windll.user32.OpenClipboard
+    OpenClipboard.argtypes = [HWND]
+    OpenClipboard.restype = BOOL
+
+    safeCloseClipboard = CheckedCall(windll.user32.CloseClipboard)
+    safeCloseClipboard.argtypes = []
+    safeCloseClipboard.restype = BOOL
+
+    safeEmptyClipboard = CheckedCall(windll.user32.EmptyClipboard)
+    safeEmptyClipboard.argtypes = []
+    safeEmptyClipboard.restype = BOOL
+
+    safeGetClipboardData = CheckedCall(windll.user32.GetClipboardData)
+    safeGetClipboardData.argtypes = [UINT]
+    safeGetClipboardData.restype = HANDLE
+
+    safeSetClipboardData = CheckedCall(windll.user32.SetClipboardData)
+    safeSetClipboardData.argtypes = [UINT, HANDLE]
+    safeSetClipboardData.restype = HANDLE
+
+    safeGlobalAlloc = CheckedCall(windll.kernel32.GlobalAlloc)
+    safeGlobalAlloc.argtypes = [UINT, c_size_t]
+    safeGlobalAlloc.restype = HGLOBAL
+
+    safeGlobalLock = CheckedCall(windll.kernel32.GlobalLock)
+    safeGlobalLock.argtypes = [HGLOBAL]
+    safeGlobalLock.restype = LPVOID
+
+    safeGlobalUnlock = CheckedCall(windll.kernel32.GlobalUnlock)
+    safeGlobalUnlock.argtypes = [HGLOBAL]
+    safeGlobalUnlock.restype = BOOL
+
+    wcslen = CheckedCall(msvcrt.wcslen)
+    wcslen.argtypes = [c_wchar_p]
+    wcslen.restype = UINT
+
+    GMEM_MOVEABLE = 0x0002
+    CF_UNICODETEXT = 13
+
+    @contextlib.contextmanager
+    def window():
+        """
+        Context that provides a valid Windows hwnd.
+        """
+        # we really just need the hwnd, so setting "STATIC"
+        # as predefined lpClass is just fine.
+        hwnd = safeCreateWindowExA(
+            0, b"STATIC", None, 0, 0, 0, 0, 0, None, None, None, None
+        )
+        try:
+            yield hwnd
+        finally:
+            safeDestroyWindow(hwnd)
+
+    @contextlib.contextmanager
+    def clipboard(hwnd):
+        """
+        Context manager that opens the clipboard and prevents
+        other applications from modifying the clipboard content.
+        """
+        # We may not get the clipboard handle immediately because
+        # some other application is accessing it (?)
+        # We try for at least 500ms to get the clipboard.
+        t = time.time() + 0.5
+        success = False
+        while time.time() < t:
+            success = OpenClipboard(hwnd)
+            if success:
+                break
+            time.sleep(0.01)
+        if not success:
+            raise PyperclipWindowsException("Error calling OpenClipboard")
+
+        try:
+            yield
+        finally:
+            safeCloseClipboard()
+
+    def copy_windows(text):
+        # This function is heavily based on
+        # http://msdn.com/ms649016#_win32_Copying_Information_to_the_Clipboard
+
+        text = _stringifyText(text)  # Converts non-str values to str.
+
+        with window() as hwnd:
+            # http://msdn.com/ms649048
+            # If an application calls OpenClipboard with hwnd set to NULL,
+            # EmptyClipboard sets the clipboard owner to NULL;
+            # this causes SetClipboardData to fail.
+            # => We need a valid hwnd to copy something.
+            with clipboard(hwnd):
+                safeEmptyClipboard()
+
+                if text:
+                    # http://msdn.com/ms649051
+                    # If the hMem parameter identifies a memory object,
+                    # the object must have been allocated using the
+                    # function with the GMEM_MOVEABLE flag.
+                    count = wcslen(text) + 1
+                    handle = safeGlobalAlloc(GMEM_MOVEABLE, count * sizeof(c_wchar))
+                    locked_handle = safeGlobalLock(handle)
+
+                    ctypes.memmove(
+                        c_wchar_p(locked_handle),
+                        c_wchar_p(text),
+                        count * sizeof(c_wchar),
+                    )
+
+                    safeGlobalUnlock(handle)
+                    safeSetClipboardData(CF_UNICODETEXT, handle)
+
+    def paste_windows():
+        with clipboard(None):
+            handle = safeGetClipboardData(CF_UNICODETEXT)
+            if not handle:
+                # GetClipboardData may return NULL with errno == NO_ERROR
+                # if the clipboard is empty.
+                # (Also, it may return a handle to an empty buffer,
+                # but technically that's not empty)
+                return ""
+            return c_wchar_p(handle).value
+
+    return copy_windows, paste_windows
+
+
+def init_wsl_clipboard():
+    def copy_wsl(text):
+        text = _stringifyText(text)  # Converts non-str values to str.
+        with subprocess.Popen(["clip.exe"], stdin=subprocess.PIPE, close_fds=True) as p:
+            p.communicate(input=text.encode(ENCODING))
+
+    def paste_wsl():
+        with subprocess.Popen(
+            ["powershell.exe", "-command", "Get-Clipboard"],
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            close_fds=True,
+        ) as p:
+            stdout = p.communicate()[0]
+        # WSL appends "\r\n" to the contents.
+        return stdout[:-2].decode(ENCODING)
+
+    return copy_wsl, paste_wsl
+
+
+# Automatic detection of clipboard mechanisms
+# and importing is done in determine_clipboard():
+def determine_clipboard():
+    """
+    Determine the OS/platform and set the copy() and paste() functions
+    accordingly.
+    """
+    global Foundation, AppKit, qtpy, PyQt4, PyQt5
+
+    # Setup for the CYGWIN platform:
+    if (
+        "cygwin" in platform.system().lower()
+    ):  # Cygwin has a variety of values returned by platform.system(),
+        # such as 'CYGWIN_NT-6.1'
+        # FIXME(pyperclip#55): pyperclip currently does not support Cygwin,
+        # see https://github.com/asweigart/pyperclip/issues/55
+        if os.path.exists("/dev/clipboard"):
+            warnings.warn(
+                "Pyperclip's support for Cygwin is not perfect, "
+                "see https://github.com/asweigart/pyperclip/issues/55",
+                stacklevel=find_stack_level(),
+            )
+            return init_dev_clipboard_clipboard()
+
+    # Setup for the WINDOWS platform:
+    elif os.name == "nt" or platform.system() == "Windows":
+        return init_windows_clipboard()
+
+    if platform.system() == "Linux":
+        if _executable_exists("wslconfig.exe"):
+            return init_wsl_clipboard()
+
+    # Setup for the macOS platform:
+    if os.name == "mac" or platform.system() == "Darwin":
+        try:
+            import AppKit
+            import Foundation  # check if pyobjc is installed
+        except ImportError:
+            return init_osx_pbcopy_clipboard()
+        else:
+            return init_osx_pyobjc_clipboard()
+
+    # Setup for the LINUX platform:
+    if HAS_DISPLAY:
+        if os.environ.get("WAYLAND_DISPLAY") and _executable_exists("wl-copy"):
+            return init_wl_clipboard()
+        if _executable_exists("xsel"):
+            return init_xsel_clipboard()
+        if _executable_exists("xclip"):
+            return init_xclip_clipboard()
+        if _executable_exists("klipper") and _executable_exists("qdbus"):
+            return init_klipper_clipboard()
+
+        try:
+            # qtpy is a small abstraction layer that lets you write applications
+            # using a single api call to either PyQt or PySide.
+            # https://pypi.python.org/project/QtPy
+            import qtpy  # check if qtpy is installed
+        except ImportError:
+            # If qtpy isn't installed, fall back on importing PyQt4.
+            try:
+                import PyQt5  # check if PyQt5 is installed
+            except ImportError:
+                try:
+                    import PyQt4  # check if PyQt4 is installed
+                except ImportError:
+                    pass  # We want to fail fast for all non-ImportError exceptions.
+                else:
+                    return init_qt_clipboard()
+            else:
+                return init_qt_clipboard()
+        else:
+            return init_qt_clipboard()
+
+    return init_no_clipboard()
+
+
+def set_clipboard(clipboard):
+    """
+    Explicitly sets the clipboard mechanism. The "clipboard mechanism" is how
+    the copy() and paste() functions interact with the operating system to
+    implement the copy/paste feature. The clipboard parameter must be one of:
+        - pbcopy
+        - pyobjc (default on macOS)
+        - qt
+        - xclip
+        - xsel
+        - klipper
+        - windows (default on Windows)
+        - no (this is what is set when no clipboard mechanism can be found)
+    """
+    global copy, paste
+
+    clipboard_types = {
+        "pbcopy": init_osx_pbcopy_clipboard,
+        "pyobjc": init_osx_pyobjc_clipboard,
+        "qt": init_qt_clipboard,  # TODO - split this into 'qtpy', 'pyqt4', and 'pyqt5'
+        "xclip": init_xclip_clipboard,
+        "xsel": init_xsel_clipboard,
+        "wl-clipboard": init_wl_clipboard,
+        "klipper": init_klipper_clipboard,
+        "windows": init_windows_clipboard,
+        "no": init_no_clipboard,
+    }
+
+    if clipboard not in clipboard_types:
+        allowed_clipboard_types = [repr(_) for _ in clipboard_types]
+        raise ValueError(
+            f"Argument must be one of {', '.join(allowed_clipboard_types)}"
+        )
+
+    # Sets pyperclip's copy() and paste() functions:
+    copy, paste = clipboard_types[clipboard]()
+
+
+def lazy_load_stub_copy(text):
+    """
+    A stub function for copy(), which will load the real copy() function when
+    called so that the real copy() function is used for later calls.
+
+    This allows users to import pyperclip without having determine_clipboard()
+    automatically run, which will automatically select a clipboard mechanism.
+    This could be a problem if it selects, say, the memory-heavy PyQt4 module
+    but the user was just going to immediately call set_clipboard() to use a
+    different clipboard mechanism.
+
+    The lazy loading this stub function implements gives the user a chance to
+    call set_clipboard() to pick another clipboard mechanism. Or, if the user
+    simply calls copy() or paste() without calling set_clipboard() first,
+    will fall back on whatever clipboard mechanism that determine_clipboard()
+    automatically chooses.
+    """
+    global copy, paste
+    copy, paste = determine_clipboard()
+    return copy(text)
+
+
+def lazy_load_stub_paste():
+    """
+    A stub function for paste(), which will load the real paste() function when
+    called so that the real paste() function is used for later calls.
+
+    This allows users to import pyperclip without having determine_clipboard()
+    automatically run, which will automatically select a clipboard mechanism.
+    This could be a problem if it selects, say, the memory-heavy PyQt4 module
+    but the user was just going to immediately call set_clipboard() to use a
+    different clipboard mechanism.
+
+    The lazy loading this stub function implements gives the user a chance to
+    call set_clipboard() to pick another clipboard mechanism. Or, if the user
+    simply calls copy() or paste() without calling set_clipboard() first,
+    will fall back on whatever clipboard mechanism that determine_clipboard()
+    automatically chooses.
+    """
+    global copy, paste
+    copy, paste = determine_clipboard()
+    return paste()
+
+
+def is_available() -> bool:
+    return copy != lazy_load_stub_copy and paste != lazy_load_stub_paste
+
+
+# Initially, copy() and paste() are set to lazy loading wrappers which will
+# set `copy` and `paste` to real functions the first time they're used, unless
+# set_clipboard() or determine_clipboard() is called first.
+copy, paste = lazy_load_stub_copy, lazy_load_stub_paste
+
+
+def waitForPaste(timeout=None):
+    """This function call blocks until a non-empty text string exists on the
+    clipboard. It returns this text.
+
+    This function raises PyperclipTimeoutException if timeout was set to
+    a number of seconds that has elapsed without non-empty text being put on
+    the clipboard."""
+    startTime = time.time()
+    while True:
+        clipboardText = paste()
+        if clipboardText != "":
+            return clipboardText
+        time.sleep(0.01)
+
+        if timeout is not None and time.time() > startTime + timeout:
+            raise PyperclipTimeoutException(
+                "waitForPaste() timed out after " + str(timeout) + " seconds."
+            )
+
+
+def waitForNewPaste(timeout=None):
+    """This function call blocks until a new text string exists on the
+    clipboard that is different from the text that was there when the function
+    was first called. It returns this text.
+
+    This function raises PyperclipTimeoutException if timeout was set to
+    a number of seconds that has elapsed without non-empty text being put on
+    the clipboard."""
+    startTime = time.time()
+    originalText = paste()
+    while True:
+        currentText = paste()
+        if currentText != originalText:
+            return currentText
+        time.sleep(0.01)
+
+        if timeout is not None and time.time() > startTime + timeout:
+            raise PyperclipTimeoutException(
+                "waitForNewPaste() timed out after " + str(timeout) + " seconds."
+            )
+
+
+__all__ = [
+    "copy",
+    "paste",
+    "waitForPaste",
+    "waitForNewPaste",
+    "set_clipboard",
+    "determine_clipboard",
+]
+
+# pandas aliases
+clipboard_get = paste
+clipboard_set = copy
@@ -0,0 +1,200 @@
+"""io on the clipboard"""
+
+from __future__ import annotations
+
+from io import StringIO
+from typing import TYPE_CHECKING
+import warnings
+
+from pandas._libs import lib
+from pandas.util._decorators import set_module
+from pandas.util._exceptions import find_stack_level
+from pandas.util._validators import check_dtype_backend
+
+from pandas.core.dtypes.generic import ABCDataFrame
+
+from pandas import (
+    get_option,
+    option_context,
+)
+
+if TYPE_CHECKING:
+    from pandas._typing import DtypeBackend
+
+
+@set_module("pandas")
+def read_clipboard(
+    sep: str = r"\s+",
+    dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
+    **kwargs,
+):  # pragma: no cover
+    r"""
+    Read text from clipboard and pass to :func:`~pandas.read_csv`.
+
+    Parses clipboard contents similar to how CSV files are parsed
+    using :func:`~pandas.read_csv`.
+
+    Parameters
+    ----------
+    sep : str, default '\\s+'
+        A string or regex delimiter. The default of ``'\\s+'`` denotes
+        one or more whitespace characters.
+
+    dtype_backend : {'numpy_nullable', 'pyarrow'}
+        Back-end data type applied to the resultant :class:`DataFrame`
+        (still experimental). If not specified, the default behavior
+        is to not use nullable data types. If specified, the behavior
+        is as follows:
+
+        * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
+        * ``"pyarrow"``: returns pyarrow-backed nullable
+          :class:`ArrowDtype` :class:`DataFrame`
+
+        .. versionadded:: 2.0
+
+    **kwargs
+        See :func:`~pandas.read_csv` for the full argument list.
+
+    Returns
+    -------
+    DataFrame
+        A parsed :class:`~pandas.DataFrame` object.
+
+    See Also
+    --------
+    DataFrame.to_clipboard : Copy object to the system clipboard.
+    read_csv : Read a comma-separated values (csv) file into DataFrame.
+    read_fwf : Read a table of fixed-width formatted lines into DataFrame.
+
+    Examples
+    --------
+    >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=["A", "B", "C"])
+    >>> df.to_clipboard()  # doctest: +SKIP
+    >>> pd.read_clipboard()  # doctest: +SKIP
+         A  B  C
+    0    1  2  3
+    1    4  5  6
+    """
+    encoding = kwargs.pop("encoding", "utf-8")
+
+    # only utf-8 is valid for passed value because that's what clipboard
+    # supports
+    if encoding is not None and encoding.lower().replace("-", "") != "utf8":
+        raise NotImplementedError("reading from clipboard only supports utf-8 encoding")
+
+    check_dtype_backend(dtype_backend)
+
+    from pandas.io.clipboard import clipboard_get
+    from pandas.io.parsers import read_csv
+
+    text = clipboard_get()
+
+    # Try to decode (if needed, as "text" might already be a string here).
+    try:
+        text = text.decode(kwargs.get("encoding") or get_option("display.encoding"))
+    except AttributeError:
+        pass
+
+    # Excel copies into clipboard with \t separation
+    # inspect no more then the 10 first lines, if they
+    # all contain an equal number (>0) of tabs, infer
+    # that this came from excel and set 'sep' accordingly
+    lines = text[:10000].split("\n")[:-1][:10]
+
+    # Need to remove leading white space, since read_csv
+    # accepts:
+    #    a  b
+    # 0  1  2
+    # 1  3  4
+
+    counts = {x.lstrip(" ").count("\t") for x in lines}
+    if len(lines) > 1 and len(counts) == 1 and counts.pop() != 0:
+        sep = "\t"
+        # check the number of leading tabs in the first line
+        # to account for index columns
+        index_length = len(lines[0]) - len(lines[0].lstrip(" \t"))
+        if index_length != 0:
+            kwargs.setdefault("index_col", list(range(index_length)))
+
+    elif not isinstance(sep, str):
+        raise ValueError(f"{sep=} must be a string")
+
+    # Regex separator currently only works with python engine.
+    # Default to python if separator is multi-character (regex)
+    if len(sep) > 1 and kwargs.get("engine") is None:
+        kwargs["engine"] = "python"
+    elif len(sep) > 1 and kwargs.get("engine") == "c":
+        warnings.warn(
+            "read_clipboard with regex separator does not work properly with c engine.",
+            stacklevel=find_stack_level(),
+        )
+
+    return read_csv(StringIO(text), sep=sep, dtype_backend=dtype_backend, **kwargs)
+
+
+def to_clipboard(
+    obj, excel: bool | None = True, sep: str | None = None, **kwargs
+) -> None:  # pragma: no cover
+    """
+    Attempt to write text representation of object to the system clipboard
+    The clipboard can be then pasted into Excel for example.
+
+    Parameters
+    ----------
+    obj : the object to write to the clipboard
+    excel : bool, defaults to True
+            if True, use the provided separator, writing in a csv
+            format for allowing easy pasting into excel.
+            if False, write a string representation of the object
+            to the clipboard
+    sep : optional, defaults to tab
+    other keywords are passed to to_csv
+
+    Notes
+    -----
+    Requirements for your platform
+      - Linux: xclip, or xsel (with PyQt4 modules)
+      - Windows:
+      - OS X:
+    """
+    encoding = kwargs.pop("encoding", "utf-8")
+
+    # testing if an invalid encoding is passed to clipboard
+    if encoding is not None and encoding.lower().replace("-", "") != "utf8":
+        raise ValueError("clipboard only supports utf-8 encoding")
+
+    from pandas.io.clipboard import clipboard_set
+
+    if excel is None:
+        excel = True
+
+    if excel:
+        try:
+            if sep is None:
+                sep = "\t"
+            buf = StringIO()
+
+            # clipboard_set (pyperclip) expects unicode
+            obj.to_csv(buf, sep=sep, encoding="utf-8", **kwargs)
+            text = buf.getvalue()
+
+            clipboard_set(text)
+            return
+        except TypeError:
+            warnings.warn(
+                "to_clipboard in excel mode requires a single character separator.",
+                stacklevel=find_stack_level(),
+            )
+    elif sep is not None:
+        warnings.warn(
+            "to_clipboard with excel=False ignores the sep argument.",
+            stacklevel=find_stack_level(),
+        )
+
+    if isinstance(obj, ABCDataFrame):
+        # str(df) has various unhelpful defaults, like truncation
+        with option_context("display.max_colwidth", None):
+            objstr = obj.to_string(**kwargs)
+    else:
+        objstr = str(obj)
+    clipboard_set(objstr)
@@ -0,0 +1,19 @@
+from pandas.io.excel._base import (
+    ExcelFile,
+    ExcelWriter,
+    read_excel,
+)
+from pandas.io.excel._odswriter import ODSWriter as _ODSWriter
+from pandas.io.excel._openpyxl import OpenpyxlWriter as _OpenpyxlWriter
+from pandas.io.excel._util import register_writer
+from pandas.io.excel._xlsxwriter import XlsxWriter as _XlsxWriter
+
+__all__ = ["ExcelFile", "ExcelWriter", "read_excel"]
+
+
+register_writer(_OpenpyxlWriter)
+
+register_writer(_XlsxWriter)
+
+
+register_writer(_ODSWriter)
@@ -0,0 +1,129 @@
+from __future__ import annotations
+
+from datetime import (
+    date,
+    datetime,
+    time,
+    timedelta,
+)
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    TypeAlias,
+)
+
+from pandas.compat._optional import import_optional_dependency
+
+from pandas.io.excel._base import BaseExcelReader
+
+if TYPE_CHECKING:
+    from python_calamine import (
+        CalamineSheet,
+        CalamineWorkbook,
+    )
+
+    from pandas._typing import (
+        FilePath,
+        NaTType,
+        ReadBuffer,
+        Scalar,
+        StorageOptions,
+    )
+
+_CellValue: TypeAlias = int | float | str | bool | time | date | datetime | timedelta
+
+
+class CalamineReader(BaseExcelReader["CalamineWorkbook"]):
+    def __init__(
+        self,
+        filepath_or_buffer: FilePath | ReadBuffer[bytes],
+        storage_options: StorageOptions | None = None,
+        engine_kwargs: dict | None = None,
+    ) -> None:
+        """
+        Reader using calamine engine (xlsx/xls/xlsb/ods).
+
+        Parameters
+        ----------
+        filepath_or_buffer : str, path to be parsed or
+            an open readable stream.
+        storage_options : dict, optional
+            Extra options that make sense for a particular storage connection, e.g.
+            host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
+            are forwarded to ``urllib.request.Request`` as header options. For other
+            URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are
+            forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more
+            details, and for more examples on storage options refer `here
+            <https://pandas.pydata.org/docs/user_guide/io.html?
+            highlight=storage_options#reading-writing-remote-files>`_.
+        engine_kwargs : dict, optional
+            Arbitrary keyword arguments passed to excel engine.
+        """
+        import_optional_dependency("python_calamine")
+        super().__init__(
+            filepath_or_buffer,
+            storage_options=storage_options,
+            engine_kwargs=engine_kwargs,
+        )
+
+    @property
+    def _workbook_class(self) -> type[CalamineWorkbook]:
+        from python_calamine import CalamineWorkbook
+
+        return CalamineWorkbook
+
+    def load_workbook(
+        self, filepath_or_buffer: FilePath | ReadBuffer[bytes], engine_kwargs: Any
+    ) -> CalamineWorkbook:
+        from python_calamine import load_workbook
+
+        return load_workbook(
+            filepath_or_buffer,
+            **engine_kwargs,
+        )
+
+    @property
+    def sheet_names(self) -> list[str]:
+        from python_calamine import SheetTypeEnum
+
+        return [
+            sheet.name
+            for sheet in self.book.sheets_metadata
+            if sheet.typ == SheetTypeEnum.WorkSheet
+        ]
+
+    def get_sheet_by_name(self, name: str) -> CalamineSheet:
+        self.raise_if_bad_sheet_by_name(name)
+        return self.book.get_sheet_by_name(name)
+
+    def get_sheet_by_index(self, index: int) -> CalamineSheet:
+        self.raise_if_bad_sheet_by_index(index)
+        return self.book.get_sheet_by_index(index)
+
+    def get_sheet_data(
+        self, sheet: CalamineSheet, file_rows_needed: int | None = None
+    ) -> list[list[Scalar | NaTType | time]]:
+        def _convert_cell(value: _CellValue) -> Scalar | NaTType | time:
+            if isinstance(value, float):
+                val = int(value)
+                if val == value:
+                    return val
+                else:
+                    return value
+            elif isinstance(value, (datetime, timedelta)):
+                # Return as-is to match openpyxl behavior (GH#59186)
+                return value
+            elif isinstance(value, date):
+                # Convert date to datetime to match openpyxl behavior (GH#59186)
+                return datetime(value.year, value.month, value.day)
+            elif isinstance(value, time):
+                return value
+
+            return value
+
+        rows: list[list[_CellValue]] = sheet.to_python(
+            skip_empty_area=False, nrows=file_rows_needed
+        )
+        data = [[_convert_cell(cell) for cell in row] for row in rows]
+
+        return data
@@ -0,0 +1,249 @@
+from __future__ import annotations
+
+from typing import (
+    TYPE_CHECKING,
+    cast,
+)
+
+import numpy as np
+
+from pandas._typing import (
+    FilePath,
+    ReadBuffer,
+    Scalar,
+    StorageOptions,
+)
+from pandas.compat._optional import import_optional_dependency
+from pandas.util._decorators import doc
+
+import pandas as pd
+from pandas.core.shared_docs import _shared_docs
+
+from pandas.io.excel._base import BaseExcelReader
+
+if TYPE_CHECKING:
+    from odf.opendocument import OpenDocument
+
+    from pandas._libs.tslibs.nattype import NaTType
+
+
+@doc(storage_options=_shared_docs["storage_options"])
+class ODFReader(BaseExcelReader["OpenDocument"]):
+    def __init__(
+        self,
+        filepath_or_buffer: FilePath | ReadBuffer[bytes],
+        storage_options: StorageOptions | None = None,
+        engine_kwargs: dict | None = None,
+    ) -> None:
+        """
+        Read tables out of OpenDocument formatted files.
+
+        Parameters
+        ----------
+        filepath_or_buffer : str, path to be parsed or
+            an open readable stream.
+        {storage_options}
+        engine_kwargs : dict, optional
+            Arbitrary keyword arguments passed to excel engine.
+        """
+        import_optional_dependency("odf")
+        super().__init__(
+            filepath_or_buffer,
+            storage_options=storage_options,
+            engine_kwargs=engine_kwargs,
+        )
+
+    @property
+    def _workbook_class(self) -> type[OpenDocument]:
+        from odf.opendocument import OpenDocument
+
+        return OpenDocument
+
+    def load_workbook(
+        self, filepath_or_buffer: FilePath | ReadBuffer[bytes], engine_kwargs
+    ) -> OpenDocument:
+        from odf.opendocument import load
+
+        return load(filepath_or_buffer, **engine_kwargs)
+
+    @property
+    def empty_value(self) -> str:
+        """Property for compat with other readers."""
+        return ""
+
+    @property
+    def sheet_names(self) -> list[str]:
+        """Return a list of sheet names present in the document"""
+        from odf.table import Table
+
+        tables = self.book.getElementsByType(Table)
+        return [t.getAttribute("name") for t in tables]
+
+    def get_sheet_by_index(self, index: int):
+        from odf.table import Table
+
+        self.raise_if_bad_sheet_by_index(index)
+        tables = self.book.getElementsByType(Table)
+        return tables[index]
+
+    def get_sheet_by_name(self, name: str):
+        from odf.table import Table
+
+        self.raise_if_bad_sheet_by_name(name)
+        tables = self.book.getElementsByType(Table)
+
+        for table in tables:
+            if table.getAttribute("name") == name:
+                return table
+
+        self.close()
+        raise ValueError(f"sheet {name} not found")
+
+    def get_sheet_data(
+        self, sheet, file_rows_needed: int | None = None
+    ) -> list[list[Scalar | NaTType]]:
+        """
+        Parse an ODF Table into a list of lists
+        """
+        from odf.table import (
+            CoveredTableCell,
+            TableCell,
+            TableRow,
+        )
+
+        covered_cell_name = CoveredTableCell().qname
+        table_cell_name = TableCell().qname
+        cell_names = {covered_cell_name, table_cell_name}
+
+        sheet_rows = sheet.getElementsByType(TableRow)
+        empty_rows = 0
+        max_row_len = 0
+
+        table: list[list[Scalar | NaTType]] = []
+
+        for sheet_row in sheet_rows:
+            empty_cells = 0
+            table_row: list[Scalar | NaTType] = []
+
+            for sheet_cell in sheet_row.childNodes:
+                if hasattr(sheet_cell, "qname") and sheet_cell.qname in cell_names:
+                    if sheet_cell.qname == table_cell_name:
+                        value = self._get_cell_value(sheet_cell)
+                    else:
+                        value = self.empty_value
+
+                    column_repeat = self._get_column_repeat(sheet_cell)
+
+                    # Queue up empty values, writing only if content succeeds them
+                    if value == self.empty_value:
+                        empty_cells += column_repeat
+                    else:
+                        table_row.extend([self.empty_value] * empty_cells)
+                        empty_cells = 0
+                        table_row.extend([value] * column_repeat)
+
+            if max_row_len < len(table_row):
+                max_row_len = len(table_row)
+
+            row_repeat = self._get_row_repeat(sheet_row)
+            if len(table_row) == 0:
+                empty_rows += row_repeat
+            else:
+                # add blank rows to our table
+                table.extend([[self.empty_value]] * empty_rows)
+                empty_rows = 0
+                table.extend(table_row for _ in range(row_repeat))
+            if file_rows_needed is not None and len(table) >= file_rows_needed:
+                break
+
+        # Make our table square
+        for row in table:
+            if len(row) < max_row_len:
+                row.extend([self.empty_value] * (max_row_len - len(row)))
+
+        return table
+
+    def _get_row_repeat(self, row) -> int:
+        """
+        Return number of times this row was repeated
+        Repeating an empty row appeared to be a common way
+        of representing sparse rows in the table.
+        """
+        from odf.namespaces import TABLENS
+
+        return int(row.attributes.get((TABLENS, "number-rows-repeated"), 1))
+
+    def _get_column_repeat(self, cell) -> int:
+        from odf.namespaces import TABLENS
+
+        return int(cell.attributes.get((TABLENS, "number-columns-repeated"), 1))
+
+    def _get_cell_value(self, cell) -> Scalar | NaTType:
+        from odf.namespaces import OFFICENS
+
+        if str(cell) == "#N/A":
+            return np.nan
+
+        cell_type = cell.attributes.get((OFFICENS, "value-type"))
+        if cell_type == "boolean":
+            if str(cell) == "TRUE":
+                return True
+            return False
+        if cell_type is None:
+            return self.empty_value
+        elif cell_type == "float":
+            # GH5394
+            cell_value = float(cell.attributes.get((OFFICENS, "value")))
+            val = int(cell_value)
+            if val == cell_value:
+                return val
+            return cell_value
+        elif cell_type == "percentage":
+            cell_value = cell.attributes.get((OFFICENS, "value"))
+            return float(cell_value)
+        elif cell_type == "string":
+            return self._get_cell_string_value(cell)
+        elif cell_type == "currency":
+            cell_value = cell.attributes.get((OFFICENS, "value"))
+            return float(cell_value)
+        elif cell_type == "date":
+            cell_value = cell.attributes.get((OFFICENS, "date-value"))
+            return pd.Timestamp(cell_value)
+        elif cell_type == "time":
+            stamp = pd.Timestamp(str(cell))
+            # cast needed here because Scalar doesn't include datetime.time
+            return cast(Scalar, stamp.time())
+        else:
+            self.close()
+            raise ValueError(f"Unrecognized type {cell_type}")
+
+    def _get_cell_string_value(self, cell) -> str:
+        """
+        Find and decode OpenDocument text:s tags that represent
+        a run length encoded sequence of space characters.
+        """
+        from odf.element import Element
+        from odf.namespaces import TEXTNS
+        from odf.office import Annotation
+        from odf.text import S
+
+        office_annotation = Annotation().qname
+        text_s = S().qname
+
+        value = []
+
+        for fragment in cell.childNodes:
+            if isinstance(fragment, Element):
+                if fragment.qname == text_s:
+                    spaces = int(fragment.attributes.get((TEXTNS, "c"), 1))
+                    value.append(" " * spaces)
+                elif fragment.qname == office_annotation:
+                    continue
+                else:
+                    # recursive impl needed in case of nested fragments
+                    # with multiple spaces
+                    # https://github.com/pandas-dev/pandas/pull/36175#discussion_r484639704
+                    value.append(self._get_cell_string_value(fragment))
+            else:
+                value.append(str(fragment).strip("\n"))
+        return "".join(value)
@@ -0,0 +1,362 @@
+from __future__ import annotations
+
+from collections import defaultdict
+import datetime
+import json
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    DefaultDict,
+    cast,
+    overload,
+)
+
+from pandas.io.excel._base import ExcelWriter
+from pandas.io.excel._util import (
+    combine_kwargs,
+    validate_freeze_panes,
+)
+
+if TYPE_CHECKING:
+    from odf.opendocument import OpenDocumentSpreadsheet
+
+    from pandas._typing import (
+        ExcelWriterIfSheetExists,
+        FilePath,
+        StorageOptions,
+        WriteExcelBuffer,
+    )
+
+    from pandas.io.formats.excel import ExcelCell
+
+
+class ODSWriter(ExcelWriter):
+    _engine = "odf"
+    _supported_extensions = (".ods",)
+
+    def __init__(  # pyright: ignore[reportInconsistentConstructor]
+        self,
+        path: FilePath | WriteExcelBuffer | ExcelWriter,
+        engine: str | None = None,
+        date_format: str | None = None,
+        datetime_format: str | None = None,
+        mode: str = "w",
+        storage_options: StorageOptions | None = None,
+        if_sheet_exists: ExcelWriterIfSheetExists | None = None,
+        engine_kwargs: dict[str, Any] | None = None,
+        **kwargs: Any,
+    ) -> None:
+        from odf.opendocument import OpenDocumentSpreadsheet
+
+        if mode == "a":
+            raise ValueError("Append mode is not supported with odf!")
+
+        engine_kwargs = combine_kwargs(engine_kwargs, kwargs)
+        self._book = OpenDocumentSpreadsheet(**engine_kwargs)
+
+        super().__init__(
+            path,
+            mode=mode,
+            storage_options=storage_options,
+            if_sheet_exists=if_sheet_exists,
+            engine_kwargs=engine_kwargs,
+        )
+
+        self._style_dict: dict[str, str] = {}
+
+    @property
+    def book(self) -> OpenDocumentSpreadsheet:
+        """
+        Book instance of class odf.opendocument.OpenDocumentSpreadsheet.
+
+        This attribute can be used to access engine-specific features.
+        """
+        return self._book
+
+    @property
+    def sheets(self) -> dict[str, Any]:
+        """Mapping of sheet names to sheet objects."""
+        from odf.table import Table
+
+        result = {
+            sheet.getAttribute("name"): sheet
+            for sheet in self.book.getElementsByType(Table)
+        }
+        return result
+
+    def _save(self) -> None:
+        """
+        Save workbook to disk.
+        """
+        for sheet in self.sheets.values():
+            self.book.spreadsheet.addElement(sheet)
+        self.book.save(self._handles.handle)
+
+    def _write_cells(
+        self,
+        cells: list[ExcelCell],
+        sheet_name: str | None = None,
+        startrow: int = 0,
+        startcol: int = 0,
+        freeze_panes: tuple[int, int] | None = None,
+        autofilter_range: str | None = None,
+    ) -> None:
+        """
+        Write the frame cells using odf
+        """
+
+        if autofilter_range:
+            raise ValueError("Autofilter is not supported with odf!")
+
+        from odf.table import (
+            Table,
+            TableCell,
+            TableRow,
+        )
+        from odf.text import P
+
+        sheet_name = self._get_sheet_name(sheet_name)
+        assert sheet_name is not None
+
+        if sheet_name in self.sheets:
+            wks = self.sheets[sheet_name]
+        else:
+            wks = Table(name=sheet_name)
+            self.book.spreadsheet.addElement(wks)
+
+        if validate_freeze_panes(freeze_panes):
+            freeze_panes = cast(tuple[int, int], freeze_panes)
+            self._create_freeze_panes(sheet_name, freeze_panes)
+
+        for _ in range(startrow):
+            wks.addElement(TableRow())
+
+        rows: DefaultDict = defaultdict(TableRow)
+        col_count: DefaultDict = defaultdict(int)
+
+        for cell in sorted(cells, key=lambda cell: (cell.row, cell.col)):
+            # only add empty cells if the row is still empty
+            if not col_count[cell.row]:
+                for _ in range(startcol):
+                    rows[cell.row].addElement(TableCell())
+
+            # fill with empty cells if needed
+            for _ in range(cell.col - col_count[cell.row]):
+                rows[cell.row].addElement(TableCell())
+                col_count[cell.row] += 1
+
+            pvalue, tc = self._make_table_cell(cell)
+            rows[cell.row].addElement(tc)
+            col_count[cell.row] += 1
+            p = P(text=pvalue)
+            tc.addElement(p)
+
+        # add all rows to the sheet
+        if len(rows) > 0:
+            for row_nr in range(max(rows.keys()) + 1):
+                wks.addElement(rows[row_nr])
+
+    def _make_table_cell_attributes(self, cell: ExcelCell) -> dict[str, int | str]:
+        """Convert cell attributes to OpenDocument attributes
+
+        Parameters
+        ----------
+        cell : ExcelCell
+            Spreadsheet cell data
+
+        Returns
+        -------
+        attributes : Dict[str, Union[int, str]]
+            Dictionary with attributes and attribute values
+        """
+        attributes: dict[str, int | str] = {}
+        style_name = self._process_style(cell.style)
+        if style_name is not None:
+            attributes["stylename"] = style_name
+        if cell.mergestart is not None and cell.mergeend is not None:
+            attributes["numberrowsspanned"] = max(1, cell.mergestart)
+            attributes["numbercolumnsspanned"] = cell.mergeend
+        return attributes
+
+    def _make_table_cell(self, cell: ExcelCell) -> tuple[object, Any]:
+        """Convert cell data to an OpenDocument spreadsheet cell
+
+        Parameters
+        ----------
+        cell : ExcelCell
+            Spreadsheet cell data
+
+        Returns
+        -------
+        pvalue, cell : Tuple[str, TableCell]
+            Display value, Cell value
+        """
+        from odf.table import TableCell
+
+        attributes = self._make_table_cell_attributes(cell)
+        val, fmt = self._value_with_fmt(cell.val)
+        pvalue = value = val
+        if isinstance(val, bool):
+            value = str(val).lower()
+            pvalue = str(val).upper()
+            return (
+                pvalue,
+                TableCell(
+                    valuetype="boolean",
+                    booleanvalue=value,
+                    attributes=attributes,
+                ),
+            )
+        elif isinstance(val, datetime.datetime):
+            # Fast formatting
+            value = val.isoformat()
+            # Slow but locale-dependent
+            pvalue = val.strftime("%c")
+            return (
+                pvalue,
+                TableCell(valuetype="date", datevalue=value, attributes=attributes),
+            )
+        elif isinstance(val, datetime.date):
+            # Fast formatting
+            value = f"{val.year}-{val.month:02d}-{val.day:02d}"
+            # Slow but locale-dependent
+            pvalue = val.strftime("%x")
+            return (
+                pvalue,
+                TableCell(valuetype="date", datevalue=value, attributes=attributes),
+            )
+        elif isinstance(val, str):
+            return (
+                pvalue,
+                TableCell(
+                    valuetype="string",
+                    stringvalue=value,
+                    attributes=attributes,
+                ),
+            )
+        else:
+            return (
+                pvalue,
+                TableCell(
+                    valuetype="float",
+                    value=value,
+                    attributes=attributes,
+                ),
+            )
+
+    @overload
+    def _process_style(self, style: dict[str, Any]) -> str: ...
+
+    @overload
+    def _process_style(self, style: None) -> None: ...
+
+    def _process_style(self, style: dict[str, Any] | None) -> str | None:
+        """Convert a style dictionary to an OpenDocument style sheet
+
+        Parameters
+        ----------
+        style : Dict
+            Style dictionary
+
+        Returns
+        -------
+        style_key : str
+            Unique style key for later reference in sheet
+        """
+        from odf.style import (
+            ParagraphProperties,
+            Style,
+            TableCellProperties,
+            TextProperties,
+        )
+
+        if style is None:
+            return None
+        style_key = json.dumps(style)
+        if style_key in self._style_dict:
+            return self._style_dict[style_key]
+        name = f"pd{len(self._style_dict) + 1}"
+        self._style_dict[style_key] = name
+        odf_style = Style(name=name, family="table-cell")
+        if "font" in style:
+            font = style["font"]
+            if font.get("bold", False):
+                odf_style.addElement(TextProperties(fontweight="bold"))
+        if "borders" in style:
+            borders = style["borders"]
+            for side, thickness in borders.items():
+                thickness_translation = {"thin": "0.75pt solid #000000"}
+                odf_style.addElement(
+                    TableCellProperties(
+                        attributes={f"border{side}": thickness_translation[thickness]}
+                    )
+                )
+        if "alignment" in style:
+            alignment = style["alignment"]
+            horizontal = alignment.get("horizontal")
+            if horizontal:
+                odf_style.addElement(ParagraphProperties(textalign=horizontal))
+            vertical = alignment.get("vertical")
+            if vertical:
+                odf_style.addElement(TableCellProperties(verticalalign=vertical))
+        self.book.styles.addElement(odf_style)
+        return name
+
+    def _create_freeze_panes(
+        self, sheet_name: str, freeze_panes: tuple[int, int]
+    ) -> None:
+        """
+        Create freeze panes in the sheet.
+
+        Parameters
+        ----------
+        sheet_name : str
+            Name of the spreadsheet
+        freeze_panes : tuple of (int, int)
+            Freeze pane location x and y
+        """
+        from odf.config import (
+            ConfigItem,
+            ConfigItemMapEntry,
+            ConfigItemMapIndexed,
+            ConfigItemMapNamed,
+            ConfigItemSet,
+        )
+
+        config_item_set = ConfigItemSet(name="ooo:view-settings")
+        self.book.settings.addElement(config_item_set)
+
+        config_item_map_indexed = ConfigItemMapIndexed(name="Views")
+        config_item_set.addElement(config_item_map_indexed)
+
+        config_item_map_entry = ConfigItemMapEntry()
+        config_item_map_indexed.addElement(config_item_map_entry)
+
+        config_item_map_named = ConfigItemMapNamed(name="Tables")
+        config_item_map_entry.addElement(config_item_map_named)
+
+        config_item_map_entry = ConfigItemMapEntry(name=sheet_name)
+        config_item_map_named.addElement(config_item_map_entry)
+
+        config_item_map_entry.addElement(
+            ConfigItem(name="HorizontalSplitMode", type="short", text="2")
+        )
+        config_item_map_entry.addElement(
+            ConfigItem(name="VerticalSplitMode", type="short", text="2")
+        )
+        config_item_map_entry.addElement(
+            ConfigItem(
+                name="HorizontalSplitPosition", type="int", text=str(freeze_panes[0])
+            )
+        )
+        config_item_map_entry.addElement(
+            ConfigItem(
+                name="VerticalSplitPosition", type="int", text=str(freeze_panes[1])
+            )
+        )
+        config_item_map_entry.addElement(
+            ConfigItem(name="PositionRight", type="int", text=str(freeze_panes[0]))
+        )
+        config_item_map_entry.addElement(
+            ConfigItem(name="PositionBottom", type="int", text=str(freeze_panes[1]))
+        )
@@ -0,0 +1,646 @@
+from __future__ import annotations
+
+import mmap
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    cast,
+)
+
+import numpy as np
+
+from pandas.compat._optional import import_optional_dependency
+from pandas.util._decorators import doc
+
+from pandas.core.shared_docs import _shared_docs
+
+from pandas.io.excel._base import (
+    BaseExcelReader,
+    ExcelWriter,
+)
+from pandas.io.excel._util import (
+    combine_kwargs,
+    validate_freeze_panes,
+)
+
+if TYPE_CHECKING:
+    from openpyxl import Workbook
+    from openpyxl.descriptors.serialisable import Serialisable
+    from openpyxl.styles import Fill
+
+    from pandas._typing import (
+        ExcelWriterIfSheetExists,
+        FilePath,
+        ReadBuffer,
+        Scalar,
+        StorageOptions,
+        WriteExcelBuffer,
+    )
+
+
+class OpenpyxlWriter(ExcelWriter):
+    _engine = "openpyxl"
+    _supported_extensions = (".xlsx", ".xlsm")
+
+    def __init__(  # pyright: ignore[reportInconsistentConstructor]
+        self,
+        path: FilePath | WriteExcelBuffer | ExcelWriter,
+        engine: str | None = None,
+        date_format: str | None = None,
+        datetime_format: str | None = None,
+        mode: str = "w",
+        storage_options: StorageOptions | None = None,
+        if_sheet_exists: ExcelWriterIfSheetExists | None = None,
+        engine_kwargs: dict[str, Any] | None = None,
+        **kwargs,
+    ) -> None:
+        # Use the openpyxl module as the Excel writer.
+        from openpyxl.workbook import Workbook
+
+        engine_kwargs = combine_kwargs(engine_kwargs, kwargs)
+
+        super().__init__(
+            path,
+            mode=mode,
+            storage_options=storage_options,
+            if_sheet_exists=if_sheet_exists,
+            engine_kwargs=engine_kwargs,
+        )
+
+        # ExcelWriter replaced "a" by "r+" to allow us to first read the excel file from
+        # the file and later write to it
+        if "r+" in self._mode:  # Load from existing workbook
+            from openpyxl import load_workbook
+
+            try:
+                self._book = load_workbook(self._handles.handle, **engine_kwargs)
+            except TypeError:
+                self._handles.handle.close()
+                raise
+            self._handles.handle.seek(0)
+        else:
+            # Create workbook object with default optimized_write=True.
+            try:
+                self._book = Workbook(**engine_kwargs)
+            except TypeError:
+                self._handles.handle.close()
+                raise
+
+            if self.book.worksheets:
+                self.book.remove(self.book.worksheets[0])
+
+    @property
+    def book(self) -> Workbook:
+        """
+        Book instance of class openpyxl.workbook.Workbook.
+
+        This attribute can be used to access engine-specific features.
+        """
+        return self._book
+
+    @property
+    def sheets(self) -> dict[str, Any]:
+        """Mapping of sheet names to sheet objects."""
+        result = {name: self.book[name] for name in self.book.sheetnames}
+        return result
+
+    def _save(self) -> None:
+        """
+        Save workbook to disk.
+        """
+        self.book.save(self._handles.handle)
+        if "r+" in self._mode and not isinstance(self._handles.handle, mmap.mmap):
+            # truncate file to the written content
+            self._handles.handle.truncate()
+
+    @classmethod
+    def _convert_to_style_kwargs(
+        cls, style_dict: dict[str, Serialisable]
+    ) -> dict[str, Serialisable]:
+        """
+        Convert a style_dict to a set of kwargs suitable for initializing
+        or updating-on-copy an openpyxl v2 style object.
+
+        Parameters
+        ----------
+        style_dict : dict
+            A dict with zero or more of the following keys (or their synonyms).
+                'font'
+                'fill'
+                'border' ('borders')
+                'alignment'
+                'number_format'
+                'protection'
+
+        Returns
+        -------
+        style_kwargs : dict
+            A dict with the same, normalized keys as ``style_dict`` but each
+            value has been replaced with a native openpyxl style object of the
+            appropriate class.
+        """
+        _style_key_map = {"borders": "border"}
+
+        style_kwargs: dict[str, Serialisable] = {}
+        for k, v in style_dict.items():
+            k = _style_key_map.get(k, k)
+            _conv_to_x = getattr(cls, f"_convert_to_{k}", lambda x: None)
+            new_v = _conv_to_x(v)
+            if new_v:
+                style_kwargs[k] = new_v
+
+        return style_kwargs
+
+    @classmethod
+    def _convert_to_color(cls, color_spec):
+        """
+        Convert ``color_spec`` to an openpyxl v2 Color object.
+
+        Parameters
+        ----------
+        color_spec : str, dict
+            A 32-bit ARGB hex string, or a dict with zero or more of the
+            following keys.
+                'rgb'
+                'indexed'
+                'auto'
+                'theme'
+                'tint'
+                'index'
+                'type'
+
+        Returns
+        -------
+        color : openpyxl.styles.Color
+        """
+        from openpyxl.styles import Color
+
+        if isinstance(color_spec, str):
+            return Color(color_spec)
+        else:
+            return Color(**color_spec)
+
+    @classmethod
+    def _convert_to_font(cls, font_dict):
+        """
+        Convert ``font_dict`` to an openpyxl v2 Font object.
+
+        Parameters
+        ----------
+        font_dict : dict
+            A dict with zero or more of the following keys (or their synonyms).
+                'name'
+                'size' ('sz')
+                'bold' ('b')
+                'italic' ('i')
+                'underline' ('u')
+                'strikethrough' ('strike')
+                'color'
+                'vertAlign' ('vertalign')
+                'charset'
+                'scheme'
+                'family'
+                'outline'
+                'shadow'
+                'condense'
+
+        Returns
+        -------
+        font : openpyxl.styles.Font
+        """
+        from openpyxl.styles import Font
+
+        _font_key_map = {
+            "sz": "size",
+            "b": "bold",
+            "i": "italic",
+            "u": "underline",
+            "strike": "strikethrough",
+            "vertalign": "vertAlign",
+        }
+
+        font_kwargs = {}
+        for k, v in font_dict.items():
+            k = _font_key_map.get(k, k)
+            if k == "color":
+                v = cls._convert_to_color(v)
+            font_kwargs[k] = v
+
+        return Font(**font_kwargs)
+
+    @classmethod
+    def _convert_to_stop(cls, stop_seq):
+        """
+        Convert ``stop_seq`` to a list of openpyxl v2 Color objects,
+        suitable for initializing the ``GradientFill`` ``stop`` parameter.
+
+        Parameters
+        ----------
+        stop_seq : iterable
+            An iterable that yields objects suitable for consumption by
+            ``_convert_to_color``.
+
+        Returns
+        -------
+        stop : list of openpyxl.styles.Color
+        """
+        return map(cls._convert_to_color, stop_seq)
+
+    @classmethod
+    def _convert_to_fill(cls, fill_dict: dict[str, Any]) -> Fill:
+        """
+        Convert ``fill_dict`` to an openpyxl v2 Fill object.
+
+        Parameters
+        ----------
+        fill_dict : dict
+            A dict with one or more of the following keys (or their synonyms),
+                'fill_type' ('patternType', 'patterntype')
+                'start_color' ('fgColor', 'fgcolor')
+                'end_color' ('bgColor', 'bgcolor')
+            or one or more of the following keys (or their synonyms).
+                'type' ('fill_type')
+                'degree'
+                'left'
+                'right'
+                'top'
+                'bottom'
+                'stop'
+
+        Returns
+        -------
+        fill : openpyxl.styles.Fill
+        """
+        from openpyxl.styles import (
+            GradientFill,
+            PatternFill,
+        )
+
+        _pattern_fill_key_map = {
+            "patternType": "fill_type",
+            "patterntype": "fill_type",
+            "fgColor": "start_color",
+            "fgcolor": "start_color",
+            "bgColor": "end_color",
+            "bgcolor": "end_color",
+        }
+
+        _gradient_fill_key_map = {"fill_type": "type"}
+
+        pfill_kwargs = {}
+        gfill_kwargs = {}
+        for k, v in fill_dict.items():
+            pk = _pattern_fill_key_map.get(k)
+            gk = _gradient_fill_key_map.get(k)
+            if pk in ["start_color", "end_color"]:
+                v = cls._convert_to_color(v)
+            if gk == "stop":
+                v = cls._convert_to_stop(v)
+            if pk:
+                pfill_kwargs[pk] = v
+            elif gk:
+                gfill_kwargs[gk] = v
+            else:
+                pfill_kwargs[k] = v
+                gfill_kwargs[k] = v
+
+        try:
+            return PatternFill(**pfill_kwargs)
+        except TypeError:
+            return GradientFill(**gfill_kwargs)
+
+    @classmethod
+    def _convert_to_side(cls, side_spec):
+        """
+        Convert ``side_spec`` to an openpyxl v2 Side object.
+
+        Parameters
+        ----------
+        side_spec : str, dict
+            A string specifying the border style, or a dict with zero or more
+            of the following keys (or their synonyms).
+                'style' ('border_style')
+                'color'
+
+        Returns
+        -------
+        side : openpyxl.styles.Side
+        """
+        from openpyxl.styles import Side
+
+        _side_key_map = {"border_style": "style"}
+
+        if isinstance(side_spec, str):
+            return Side(style=side_spec)
+
+        side_kwargs = {}
+        for k, v in side_spec.items():
+            k = _side_key_map.get(k, k)
+            if k == "color":
+                v = cls._convert_to_color(v)
+            side_kwargs[k] = v
+
+        return Side(**side_kwargs)
+
+    @classmethod
+    def _convert_to_border(cls, border_dict):
+        """
+        Convert ``border_dict`` to an openpyxl v2 Border object.
+
+        Parameters
+        ----------
+        border_dict : dict
+            A dict with zero or more of the following keys (or their synonyms).
+                'left'
+                'right'
+                'top'
+                'bottom'
+                'diagonal'
+                'diagonal_direction'
+                'vertical'
+                'horizontal'
+                'diagonalUp' ('diagonalup')
+                'diagonalDown' ('diagonaldown')
+                'outline'
+
+        Returns
+        -------
+        border : openpyxl.styles.Border
+        """
+        from openpyxl.styles import Border
+
+        _border_key_map = {"diagonalup": "diagonalUp", "diagonaldown": "diagonalDown"}
+
+        border_kwargs = {}
+        for k, v in border_dict.items():
+            k = _border_key_map.get(k, k)
+            if k == "color":
+                v = cls._convert_to_color(v)
+            if k in ["left", "right", "top", "bottom", "diagonal"]:
+                v = cls._convert_to_side(v)
+            border_kwargs[k] = v
+
+        return Border(**border_kwargs)
+
+    @classmethod
+    def _convert_to_alignment(cls, alignment_dict):
+        """
+        Convert ``alignment_dict`` to an openpyxl v2 Alignment object.
+
+        Parameters
+        ----------
+        alignment_dict : dict
+            A dict with zero or more of the following keys (or their synonyms).
+                'horizontal'
+                'vertical'
+                'text_rotation'
+                'wrap_text'
+                'shrink_to_fit'
+                'indent'
+        Returns
+        -------
+        alignment : openpyxl.styles.Alignment
+        """
+        from openpyxl.styles import Alignment
+
+        return Alignment(**alignment_dict)
+
+    @classmethod
+    def _convert_to_number_format(cls, number_format_dict):
+        """
+        Convert ``number_format_dict`` to an openpyxl v2.1.0 number format
+        initializer.
+
+        Parameters
+        ----------
+        number_format_dict : dict
+            A dict with zero or more of the following keys.
+                'format_code' : str
+
+        Returns
+        -------
+        number_format : str
+        """
+        return number_format_dict["format_code"]
+
+    @classmethod
+    def _convert_to_protection(cls, protection_dict):
+        """
+        Convert ``protection_dict`` to an openpyxl v2 Protection object.
+
+        Parameters
+        ----------
+        protection_dict : dict
+            A dict with zero or more of the following keys.
+                'locked'
+                'hidden'
+
+        Returns
+        -------
+        """
+        from openpyxl.styles import Protection
+
+        return Protection(**protection_dict)
+
+    def _write_cells(
+        self,
+        cells,
+        sheet_name: str | None = None,
+        startrow: int = 0,
+        startcol: int = 0,
+        freeze_panes: tuple[int, int] | None = None,
+        autofilter_range: str | None = None,
+    ) -> None:
+        # Write the frame cells using openpyxl.
+        sheet_name = self._get_sheet_name(sheet_name)
+
+        _style_cache: dict[str, dict[str, Serialisable]] = {}
+
+        if sheet_name in self.sheets and self._if_sheet_exists != "new":
+            if "r+" in self._mode:
+                if self._if_sheet_exists == "replace":
+                    old_wks = self.sheets[sheet_name]
+                    target_index = self.book.index(old_wks)
+                    del self.book[sheet_name]
+                    wks = self.book.create_sheet(sheet_name, target_index)
+                elif self._if_sheet_exists == "error":
+                    raise ValueError(
+                        f"Sheet '{sheet_name}' already exists and "
+                        f"if_sheet_exists is set to 'error'."
+                    )
+                elif self._if_sheet_exists == "overlay":
+                    wks = self.sheets[sheet_name]
+                else:
+                    raise ValueError(
+                        f"'{self._if_sheet_exists}' is not valid for if_sheet_exists. "
+                        "Valid options are 'error', 'new', 'replace' and 'overlay'."
+                    )
+            else:
+                wks = self.sheets[sheet_name]
+        else:
+            wks = self.book.create_sheet()
+            wks.title = sheet_name
+
+        if validate_freeze_panes(freeze_panes):
+            freeze_panes = cast(tuple[int, int], freeze_panes)
+            wks.freeze_panes = wks.cell(
+                row=freeze_panes[0] + 1, column=freeze_panes[1] + 1
+            )
+
+        for cell in cells:
+            xcell = wks.cell(
+                row=startrow + cell.row + 1, column=startcol + cell.col + 1
+            )
+            xcell.value, fmt = self._value_with_fmt(cell.val)
+            if fmt:
+                xcell.number_format = fmt
+
+            style_kwargs: dict[str, Serialisable] | None = {}
+            if cell.style:
+                key = str(cell.style)
+                style_kwargs = _style_cache.get(key)
+                if style_kwargs is None:
+                    style_kwargs = self._convert_to_style_kwargs(cell.style)
+                    _style_cache[key] = style_kwargs
+
+            if style_kwargs:
+                for k, v in style_kwargs.items():
+                    setattr(xcell, k, v)
+
+            if cell.mergestart is not None and cell.mergeend is not None:
+                wks.merge_cells(
+                    start_row=startrow + cell.row + 1,
+                    start_column=startcol + cell.col + 1,
+                    end_column=startcol + cell.mergeend + 1,
+                    end_row=startrow + cell.mergestart + 1,
+                )
+
+                # When cells are merged only the top-left cell is preserved
+                # The behaviour of the other cells in a merged range is
+                # undefined
+                if style_kwargs:
+                    first_row = startrow + cell.row + 1
+                    last_row = startrow + cell.mergestart + 1
+                    first_col = startcol + cell.col + 1
+                    last_col = startcol + cell.mergeend + 1
+
+                    for row in range(first_row, last_row + 1):
+                        for col in range(first_col, last_col + 1):
+                            if row == first_row and col == first_col:
+                                # Ignore first cell. It is already handled.
+                                continue
+                            xcell = wks.cell(column=col, row=row)
+                            for k, v in style_kwargs.items():
+                                setattr(xcell, k, v)
+
+        if autofilter_range:
+            wks.auto_filter.ref = autofilter_range
+
+
+class OpenpyxlReader(BaseExcelReader["Workbook"]):
+    @doc(storage_options=_shared_docs["storage_options"])
+    def __init__(
+        self,
+        filepath_or_buffer: FilePath | ReadBuffer[bytes],
+        storage_options: StorageOptions | None = None,
+        engine_kwargs: dict | None = None,
+    ) -> None:
+        """
+        Reader using openpyxl engine.
+
+        Parameters
+        ----------
+        filepath_or_buffer : str, path object or Workbook
+            Object to be parsed.
+        {storage_options}
+        engine_kwargs : dict, optional
+            Arbitrary keyword arguments passed to excel engine.
+        """
+        import_optional_dependency("openpyxl")
+        super().__init__(
+            filepath_or_buffer,
+            storage_options=storage_options,
+            engine_kwargs=engine_kwargs,
+        )
+
+    @property
+    def _workbook_class(self) -> type[Workbook]:
+        from openpyxl import Workbook
+
+        return Workbook
+
+    def load_workbook(
+        self, filepath_or_buffer: FilePath | ReadBuffer[bytes], engine_kwargs
+    ) -> Workbook:
+        from openpyxl import load_workbook
+
+        default_kwargs = {"read_only": True, "data_only": True, "keep_links": False}
+
+        return load_workbook(
+            filepath_or_buffer,
+            **(default_kwargs | engine_kwargs),
+        )
+
+    @property
+    def sheet_names(self) -> list[str]:
+        return [sheet.title for sheet in self.book.worksheets]
+
+    def get_sheet_by_name(self, name: str):
+        self.raise_if_bad_sheet_by_name(name)
+        return self.book[name]
+
+    def get_sheet_by_index(self, index: int):
+        self.raise_if_bad_sheet_by_index(index)
+        return self.book.worksheets[index]
+
+    def _convert_cell(self, cell) -> Scalar:
+        from openpyxl.cell.cell import (
+            TYPE_ERROR,
+            TYPE_NUMERIC,
+        )
+
+        if cell.value is None:
+            return ""  # compat with xlrd
+        elif cell.data_type == TYPE_ERROR:
+            return np.nan
+        elif cell.data_type == TYPE_NUMERIC:
+            val = int(cell.value)
+            if val == cell.value:
+                return val
+            return float(cell.value)
+
+        return cell.value
+
+    def get_sheet_data(
+        self, sheet, file_rows_needed: int | None = None
+    ) -> list[list[Scalar]]:
+        if self.book.read_only:
+            sheet.reset_dimensions()
+
+        data: list[list[Scalar]] = []
+        last_row_with_data = -1
+        for row_number, row in enumerate(sheet.rows):
+            converted_row = [self._convert_cell(cell) for cell in row]
+            while converted_row and converted_row[-1] == "":
+                # trim trailing empty elements
+                converted_row.pop()
+            if converted_row:
+                last_row_with_data = row_number
+            data.append(converted_row)
+            if file_rows_needed is not None and len(data) >= file_rows_needed:
+                break
+
+        # Trim trailing empty rows
+        data = data[: last_row_with_data + 1]
+
+        if len(data) > 0:
+            # extend rows to max width
+            max_width = max(len(data_row) for data_row in data)
+            if min(len(data_row) for data_row in data) < max_width:
+                empty_cell: list[Scalar] = [""]
+                data = [
+                    data_row + (max_width - len(data_row)) * empty_cell
+                    for data_row in data
+                ]
+
+        return data
@@ -0,0 +1,131 @@
+# pyright: reportMissingImports=false
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from pandas.compat._optional import import_optional_dependency
+
+from pandas.io.excel._base import BaseExcelReader
+
+if TYPE_CHECKING:
+    from pyxlsb import Workbook
+
+    from pandas._typing import (
+        FilePath,
+        ReadBuffer,
+        Scalar,
+        StorageOptions,
+    )
+
+
+class PyxlsbReader(BaseExcelReader["Workbook"]):
+    def __init__(
+        self,
+        filepath_or_buffer: FilePath | ReadBuffer[bytes],
+        storage_options: StorageOptions | None = None,
+        engine_kwargs: dict | None = None,
+    ) -> None:
+        """
+        Reader using pyxlsb engine.
+
+        Parameters
+        ----------
+        filepath_or_buffer : str, path object, or Workbook
+            Object to be parsed.
+        storage_options : dict, optional
+            Extra options that make sense for a particular storage connection, e.g.
+            host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
+            are forwarded to ``urllib.request.Request`` as header options. For other
+            URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are
+            forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more
+            details, and for more examples on storage options refer `here
+            <https://pandas.pydata.org/docs/user_guide/io.html?
+            highlight=storage_options#reading-writing-remote-files>`_.
+        engine_kwargs : dict, optional
+            Arbitrary keyword arguments passed to excel engine.
+        """
+        import_optional_dependency("pyxlsb")
+        # This will call load_workbook on the filepath or buffer
+        # And set the result to the book-attribute
+        super().__init__(
+            filepath_or_buffer,
+            storage_options=storage_options,
+            engine_kwargs=engine_kwargs,
+        )
+
+    @property
+    def _workbook_class(self) -> type[Workbook]:
+        from pyxlsb import Workbook
+
+        return Workbook
+
+    def load_workbook(
+        self, filepath_or_buffer: FilePath | ReadBuffer[bytes], engine_kwargs
+    ) -> Workbook:
+        from pyxlsb import open_workbook
+
+        # TODO: hack in buffer capability
+        # This might need some modifications to the Pyxlsb library
+        # Actual work for opening it is in xlsbpackage.py, line 20-ish
+
+        return open_workbook(filepath_or_buffer, **engine_kwargs)
+
+    @property
+    def sheet_names(self) -> list[str]:
+        return self.book.sheets
+
+    def get_sheet_by_name(self, name: str):
+        self.raise_if_bad_sheet_by_name(name)
+        return self.book.get_sheet(name)
+
+    def get_sheet_by_index(self, index: int):
+        self.raise_if_bad_sheet_by_index(index)
+        # pyxlsb sheets are indexed from 1 onwards
+        # There's a fix for this in the source, but the pypi package doesn't have it
+        return self.book.get_sheet(index + 1)
+
+    def _convert_cell(self, cell) -> Scalar:
+        # TODO: there is no way to distinguish between floats and datetimes in pyxlsb
+        # This means that there is no way to read datetime types from an xlsb file yet
+        if cell.v is None:
+            return ""  # Prevents non-named columns from not showing up as Unnamed: i
+        if isinstance(cell.v, float):
+            val = int(cell.v)
+            if val == cell.v:
+                return val
+            else:
+                return float(cell.v)
+
+        return cell.v
+
+    def get_sheet_data(
+        self,
+        sheet,
+        file_rows_needed: int | None = None,
+    ) -> list[list[Scalar]]:
+        data: list[list[Scalar]] = []
+        previous_row_number = -1
+        # When sparse=True the rows can have different lengths and empty rows are
+        # not returned. The cells are namedtuples of row, col, value (r, c, v).
+        for row in sheet.rows(sparse=True):
+            row_number = row[0].r
+            converted_row = [self._convert_cell(cell) for cell in row]
+            while converted_row and converted_row[-1] == "":
+                # trim trailing empty elements
+                converted_row.pop()
+            if converted_row:
+                data.extend([[]] * (row_number - previous_row_number - 1))
+                data.append(converted_row)
+                previous_row_number = row_number
+            if file_rows_needed is not None and len(data) >= file_rows_needed:
+                break
+        if data:
+            # extend rows to max_width
+            max_width = max(len(data_row) for data_row in data)
+            if min(len(data_row) for data_row in data) < max_width:
+                empty_cell: list[Scalar] = [""]
+                data = [
+                    data_row + (max_width - len(data_row)) * empty_cell
+                    for data_row in data
+                ]
+        return data
@@ -0,0 +1,328 @@
+from __future__ import annotations
+
+from collections.abc import (
+    Callable,
+    Hashable,
+    Iterable,
+    MutableMapping,
+    Sequence,
+)
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Literal,
+    TypeVar,
+    overload,
+)
+
+from pandas.compat._optional import import_optional_dependency
+
+from pandas.core.dtypes.common import (
+    is_integer,
+    is_list_like,
+)
+
+if TYPE_CHECKING:
+    from pandas.io.excel._base import ExcelWriter
+
+    ExcelWriter_t = type[ExcelWriter]
+    usecols_func = TypeVar("usecols_func", bound=Callable[[Hashable], object])
+
+_writers: MutableMapping[str, ExcelWriter_t] = {}
+
+
+def register_writer(klass: ExcelWriter_t) -> None:
+    """
+    Add engine to the excel writer registry.io.excel.
+
+    You must use this method to integrate with ``to_excel``.
+
+    Parameters
+    ----------
+    klass : ExcelWriter
+    """
+    if not callable(klass):
+        raise ValueError("Can only register callables as engines")
+    engine_name = klass._engine
+    _writers[engine_name] = klass
+
+
+def get_default_engine(ext: str, mode: Literal["reader", "writer"] = "reader") -> str:
+    """
+    Return the default reader/writer for the given extension.
+
+    Parameters
+    ----------
+    ext : str
+        The excel file extension for which to get the default engine.
+    mode : str {'reader', 'writer'}
+        Whether to get the default engine for reading or writing.
+        Either 'reader' or 'writer'
+
+    Returns
+    -------
+    str
+        The default engine for the extension.
+    """
+    _default_readers = {
+        "xlsx": "openpyxl",
+        "xlsm": "openpyxl",
+        "xlsb": "pyxlsb",
+        "xls": "xlrd",
+        "ods": "odf",
+    }
+    _default_writers = {
+        "xlsx": "openpyxl",
+        "xlsm": "openpyxl",
+        "xlsb": "pyxlsb",
+        "ods": "odf",
+    }
+    assert mode in ["reader", "writer"]
+    if mode == "writer":
+        # Prefer xlsxwriter over openpyxl if installed
+        xlsxwriter = import_optional_dependency("xlsxwriter", errors="warn")
+        if xlsxwriter:
+            _default_writers["xlsx"] = "xlsxwriter"
+        return _default_writers[ext]
+    else:
+        return _default_readers[ext]
+
+
+def get_writer(engine_name: str) -> ExcelWriter_t:
+    try:
+        return _writers[engine_name]
+    except KeyError as err:
+        raise ValueError(f"No Excel writer '{engine_name}'") from err
+
+
+def _excel2num(x: str) -> int:
+    """
+    Convert Excel column name like 'AB' to 0-based column index.
+
+    Parameters
+    ----------
+    x : str
+        The Excel column name to convert to a 0-based column index.
+
+    Returns
+    -------
+    num : int
+        The column index corresponding to the name.
+
+    Raises
+    ------
+    ValueError
+        Part of the Excel column name was invalid.
+    """
+    index = 0
+
+    for c in x.upper().strip():
+        cp = ord(c)
+
+        if cp < ord("A") or cp > ord("Z"):
+            raise ValueError(f"Invalid column name: {x}")
+
+        index = index * 26 + cp - ord("A") + 1
+
+    return index - 1
+
+
+def _range2cols(areas: str) -> list[int]:
+    """
+    Convert comma separated list of column names and ranges to indices.
+
+    Parameters
+    ----------
+    areas : str
+        A string containing a sequence of column ranges (or areas).
+
+    Returns
+    -------
+    cols : list
+        A list of 0-based column indices.
+
+    Examples
+    --------
+    >>> _range2cols("A:E")
+    [0, 1, 2, 3, 4]
+    >>> _range2cols("A,C,Z:AB")
+    [0, 2, 25, 26, 27]
+    """
+    cols: list[int] = []
+
+    for rng in areas.split(","):
+        if ":" in rng:
+            rngs = rng.split(":")
+            cols.extend(range(_excel2num(rngs[0]), _excel2num(rngs[1]) + 1))
+        else:
+            cols.append(_excel2num(rng))
+
+    return cols
+
+
+@overload
+def maybe_convert_usecols(usecols: str | list[int]) -> list[int]: ...
+
+
+@overload
+def maybe_convert_usecols(usecols: list[str]) -> list[str]: ...
+
+
+@overload
+def maybe_convert_usecols(usecols: usecols_func) -> usecols_func: ...
+
+
+@overload
+def maybe_convert_usecols(usecols: None) -> None: ...
+
+
+def maybe_convert_usecols(
+    usecols: str | list[int] | list[str] | usecols_func | None,
+) -> None | list[int] | list[str] | usecols_func:
+    """
+    Convert `usecols` into a compatible format for parsing in `parsers.py`.
+
+    Parameters
+    ----------
+    usecols : object
+        The use-columns object to potentially convert.
+
+    Returns
+    -------
+    converted : object
+        The compatible format of `usecols`.
+    """
+    if usecols is None:
+        return usecols
+
+    if is_integer(usecols):
+        raise ValueError(
+            "Passing an integer for `usecols` is no longer supported.  "
+            "Please pass in a list of int from 0 to `usecols` inclusive instead."
+        )
+
+    if isinstance(usecols, str):
+        return _range2cols(usecols)
+
+    return usecols
+
+
+@overload
+def validate_freeze_panes(freeze_panes: tuple[int, int]) -> Literal[True]: ...
+
+
+@overload
+def validate_freeze_panes(freeze_panes: None) -> Literal[False]: ...
+
+
+def validate_freeze_panes(freeze_panes: tuple[int, int] | None) -> bool:
+    if freeze_panes is not None:
+        if len(freeze_panes) == 2 and all(
+            isinstance(item, int) for item in freeze_panes
+        ):
+            return True
+
+        raise ValueError(
+            "freeze_panes must be of form (row, column) "
+            "where row and column are integers"
+        )
+
+    # freeze_panes wasn't specified, return False so it won't be applied
+    # to output sheet
+    return False
+
+
+def fill_mi_header(
+    row: list[Hashable], control_row: list[bool]
+) -> tuple[list[Hashable], list[bool]]:
+    """
+    Forward fill blank entries in row but only inside the same parent index.
+
+    Used for creating headers in Multiindex.
+
+    Parameters
+    ----------
+    row : list
+        List of items in a single row.
+    control_row : list of bool
+        Helps to determine if particular column is in same parent index as the
+        previous value. Used to stop propagation of empty cells between
+        different indexes.
+
+    Returns
+    -------
+    Returns changed row and control_row
+    """
+    last = row[0]
+    for i in range(1, len(row)):
+        if not control_row[i]:
+            last = row[i]
+
+        if row[i] == "" or row[i] is None:
+            row[i] = last
+        else:
+            control_row[i] = False
+            last = row[i]
+
+    return row, control_row
+
+
+def pop_header_name(
+    row: list[Hashable], index_col: int | Sequence[int]
+) -> tuple[Hashable | None, list[Hashable]]:
+    """
+    Pop the header name for MultiIndex parsing.
+
+    Parameters
+    ----------
+    row : list
+        The data row to parse for the header name.
+    index_col : int, list
+        The index columns for our data. Assumed to be non-null.
+
+    Returns
+    -------
+    header_name : str
+        The extracted header name.
+    trimmed_row : list
+        The original data row with the header name removed.
+    """
+    # Pop out header name and fill w/blank.
+    if is_list_like(index_col):
+        assert isinstance(index_col, Iterable)
+        i = max(index_col)
+    else:
+        assert not isinstance(index_col, Iterable)
+        i = index_col
+
+    header_name = row[i]
+    header_name = None if header_name == "" else header_name
+
+    return header_name, [*row[:i], "", *row[i + 1 :]]
+
+
+def combine_kwargs(engine_kwargs: dict[str, Any] | None, kwargs: dict) -> dict:
+    """
+    Used to combine two sources of kwargs for the backend engine.
+
+    Use of kwargs is deprecated, this function is solely for use in 1.3 and should
+    be removed in 1.4/2.0. Also _base.ExcelWriter.__new__ ensures either engine_kwargs
+    or kwargs must be None or empty respectively.
+
+    Parameters
+    ----------
+    engine_kwargs: dict
+        kwargs to be passed through to the engine.
+    kwargs: dict
+        kwargs to be psased through to the engine (deprecated)
+
+    Returns
+    -------
+    engine_kwargs combined with kwargs
+    """
+    if engine_kwargs is None:
+        result = {}
+    else:
+        result = engine_kwargs.copy()
+    result.update(kwargs)
+    return result
@@ -0,0 +1,147 @@
+from __future__ import annotations
+
+from datetime import time
+import math
+from typing import TYPE_CHECKING
+
+import numpy as np
+
+from pandas.compat._optional import import_optional_dependency
+
+from pandas.io.excel._base import BaseExcelReader
+
+if TYPE_CHECKING:
+    from xlrd import Book
+
+    from pandas._typing import (
+        Scalar,
+        StorageOptions,
+    )
+
+
+class XlrdReader(BaseExcelReader["Book"]):
+    def __init__(
+        self,
+        filepath_or_buffer,
+        storage_options: StorageOptions | None = None,
+        engine_kwargs: dict | None = None,
+    ) -> None:
+        """
+        Reader using xlrd engine.
+
+        Parameters
+        ----------
+        filepath_or_buffer : str, path object or Workbook
+            Object to be parsed.
+        storage_options : dict, optional
+            Extra options that make sense for a particular storage connection,
+            e.g. host, port, username, password, etc. For HTTP(S) URLs the
+            key-value pairs are forwarded to ``urllib.request.Request`` as
+            header options. For other URLs (e.g. starting with "s3://", and
+            "gcs://") the key-value pairs are forwarded to ``fsspec.open``.
+            Please see ``fsspec`` and ``urllib`` for more details, and for more
+            examples on storage options refer `here <https://pandas.pydata.org/
+            pandas-docs/stable/user_guide/io.html?
+            highlight=storage_options#reading-writing-remote-files>`__.
+        engine_kwargs : dict, optional
+            Arbitrary keyword arguments passed to excel engine.
+        """
+        err_msg = "Install xlrd >= 2.0.1 for xls Excel support"
+        import_optional_dependency("xlrd", extra=err_msg)
+        super().__init__(
+            filepath_or_buffer,
+            storage_options=storage_options,
+            engine_kwargs=engine_kwargs,
+        )
+
+    @property
+    def _workbook_class(self) -> type[Book]:
+        from xlrd import Book
+
+        return Book
+
+    def load_workbook(self, filepath_or_buffer, engine_kwargs) -> Book:
+        from xlrd import open_workbook
+
+        if hasattr(filepath_or_buffer, "read"):
+            data = filepath_or_buffer.read()
+            return open_workbook(file_contents=data, **engine_kwargs)
+        else:
+            return open_workbook(filepath_or_buffer, **engine_kwargs)
+
+    @property
+    def sheet_names(self):
+        return self.book.sheet_names()
+
+    def get_sheet_by_name(self, name):
+        self.raise_if_bad_sheet_by_name(name)
+        return self.book.sheet_by_name(name)
+
+    def get_sheet_by_index(self, index):
+        self.raise_if_bad_sheet_by_index(index)
+        return self.book.sheet_by_index(index)
+
+    def get_sheet_data(
+        self, sheet, file_rows_needed: int | None = None
+    ) -> list[list[Scalar]]:
+        from xlrd import (
+            XL_CELL_BOOLEAN,
+            XL_CELL_DATE,
+            XL_CELL_ERROR,
+            XL_CELL_NUMBER,
+            xldate,
+        )
+
+        epoch1904 = self.book.datemode
+
+        def _parse_cell(cell_contents, cell_typ):
+            """
+            converts the contents of the cell into a pandas appropriate object
+            """
+            if cell_typ == XL_CELL_DATE:
+                # Use the newer xlrd datetime handling.
+                try:
+                    cell_contents = xldate.xldate_as_datetime(cell_contents, epoch1904)
+                except OverflowError:
+                    return cell_contents
+
+                # Excel doesn't distinguish between dates and time,
+                # so we treat dates on the epoch as times only.
+                # Also, Excel supports 1900 and 1904 epochs.
+                year = (cell_contents.timetuple())[0:3]
+                if (not epoch1904 and year == (1899, 12, 31)) or (
+                    epoch1904 and year == (1904, 1, 1)
+                ):
+                    cell_contents = time(
+                        cell_contents.hour,
+                        cell_contents.minute,
+                        cell_contents.second,
+                        cell_contents.microsecond,
+                    )
+
+            elif cell_typ == XL_CELL_ERROR:
+                cell_contents = np.nan
+            elif cell_typ == XL_CELL_BOOLEAN:
+                cell_contents = bool(cell_contents)
+            elif cell_typ == XL_CELL_NUMBER:
+                # GH5394 - Excel 'numbers' are always floats
+                # it's a minimal perf hit and less surprising
+                if math.isfinite(cell_contents):
+                    # GH54564 - don't attempt to convert NaN/Inf
+                    val = int(cell_contents)
+                    if val == cell_contents:
+                        cell_contents = val
+            return cell_contents
+
+        nrows = sheet.nrows
+        if file_rows_needed is not None:
+            nrows = min(nrows, file_rows_needed)
+        return [
+            [
+                _parse_cell(value, typ)
+                for value, typ in zip(
+                    sheet.row_values(i), sheet.row_types(i), strict=True
+                )
+            ]
+            for i in range(nrows)
+        ]
@@ -0,0 +1,288 @@
+from __future__ import annotations
+
+import json
+from typing import (
+    TYPE_CHECKING,
+    Any,
+)
+
+from pandas.io.excel._base import ExcelWriter
+from pandas.io.excel._util import (
+    combine_kwargs,
+    validate_freeze_panes,
+)
+
+if TYPE_CHECKING:
+    from pandas._typing import (
+        ExcelWriterIfSheetExists,
+        FilePath,
+        StorageOptions,
+        WriteExcelBuffer,
+    )
+
+
+class _XlsxStyler:
+    # Map from openpyxl-oriented styles to flatter xlsxwriter representation
+    # Ordering necessary for both determinism and because some are keyed by
+    # prefixes of others.
+    STYLE_MAPPING: dict[str, list[tuple[tuple[str, ...], str]]] = {
+        "font": [
+            (("name",), "font_name"),
+            (("sz",), "font_size"),
+            (("size",), "font_size"),
+            (("color", "rgb"), "font_color"),
+            (("color",), "font_color"),
+            (("b",), "bold"),
+            (("bold",), "bold"),
+            (("i",), "italic"),
+            (("italic",), "italic"),
+            (("u",), "underline"),
+            (("underline",), "underline"),
+            (("strike",), "font_strikeout"),
+            (("vertAlign",), "font_script"),
+            (("vertalign",), "font_script"),
+        ],
+        "number_format": [(("format_code",), "num_format"), ((), "num_format")],
+        "protection": [(("locked",), "locked"), (("hidden",), "hidden")],
+        "alignment": [
+            (("horizontal",), "align"),
+            (("vertical",), "valign"),
+            (("text_rotation",), "rotation"),
+            (("wrap_text",), "text_wrap"),
+            (("indent",), "indent"),
+            (("shrink_to_fit",), "shrink"),
+        ],
+        "fill": [
+            (("patternType",), "pattern"),
+            (("patterntype",), "pattern"),
+            (("fill_type",), "pattern"),
+            (("start_color", "rgb"), "fg_color"),
+            (("fgColor", "rgb"), "fg_color"),
+            (("fgcolor", "rgb"), "fg_color"),
+            (("start_color",), "fg_color"),
+            (("fgColor",), "fg_color"),
+            (("fgcolor",), "fg_color"),
+            (("end_color", "rgb"), "bg_color"),
+            (("bgColor", "rgb"), "bg_color"),
+            (("bgcolor", "rgb"), "bg_color"),
+            (("end_color",), "bg_color"),
+            (("bgColor",), "bg_color"),
+            (("bgcolor",), "bg_color"),
+        ],
+        "border": [
+            (("color", "rgb"), "border_color"),
+            (("color",), "border_color"),
+            (("style",), "border"),
+            (("top", "color", "rgb"), "top_color"),
+            (("top", "color"), "top_color"),
+            (("top", "style"), "top"),
+            (("top",), "top"),
+            (("right", "color", "rgb"), "right_color"),
+            (("right", "color"), "right_color"),
+            (("right", "style"), "right"),
+            (("right",), "right"),
+            (("bottom", "color", "rgb"), "bottom_color"),
+            (("bottom", "color"), "bottom_color"),
+            (("bottom", "style"), "bottom"),
+            (("bottom",), "bottom"),
+            (("left", "color", "rgb"), "left_color"),
+            (("left", "color"), "left_color"),
+            (("left", "style"), "left"),
+            (("left",), "left"),
+        ],
+    }
+
+    @classmethod
+    def convert(cls, style_dict, num_format_str=None) -> dict[str, Any]:
+        """
+        converts a style_dict to an xlsxwriter format dict
+
+        Parameters
+        ----------
+        style_dict : style dictionary to convert
+        num_format_str : optional number format string
+        """
+        # Create an XlsxWriter format object.
+        props = {}
+
+        if num_format_str is not None:
+            props["num_format"] = num_format_str
+
+        if style_dict is None:
+            return props
+
+        if "borders" in style_dict:
+            style_dict = style_dict.copy()
+            style_dict["border"] = style_dict.pop("borders")
+
+        for style_group_key, style_group in style_dict.items():
+            for src, dst in cls.STYLE_MAPPING.get(style_group_key, []):
+                # src is a sequence of keys into a nested dict
+                # dst is a flat key
+                if dst in props:
+                    continue
+                v = style_group
+                for k in src:
+                    try:
+                        v = v[k]
+                    except (KeyError, TypeError):
+                        break
+                else:
+                    props[dst] = v
+
+        if isinstance(props.get("pattern"), str):
+            # TODO: support other fill patterns
+            props["pattern"] = 0 if props["pattern"] == "none" else 1
+
+        for k in ["border", "top", "right", "bottom", "left"]:
+            if isinstance(props.get(k), str):
+                try:
+                    props[k] = [
+                        "none",
+                        "thin",
+                        "medium",
+                        "dashed",
+                        "dotted",
+                        "thick",
+                        "double",
+                        "hair",
+                        "mediumDashed",
+                        "dashDot",
+                        "mediumDashDot",
+                        "dashDotDot",
+                        "mediumDashDotDot",
+                        "slantDashDot",
+                    ].index(props[k])
+                except ValueError:
+                    props[k] = 2
+
+        if isinstance(props.get("font_script"), str):
+            props["font_script"] = ["baseline", "superscript", "subscript"].index(
+                props["font_script"]
+            )
+
+        if isinstance(props.get("underline"), str):
+            props["underline"] = {
+                "none": 0,
+                "single": 1,
+                "double": 2,
+                "singleAccounting": 33,
+                "doubleAccounting": 34,
+            }[props["underline"]]
+
+        # GH 30107 - xlsxwriter uses different name
+        if props.get("valign") == "center":
+            props["valign"] = "vcenter"
+
+        return props
+
+
+class XlsxWriter(ExcelWriter):
+    _engine = "xlsxwriter"
+    _supported_extensions = (".xlsx",)
+
+    def __init__(  # pyright: ignore[reportInconsistentConstructor]
+        self,
+        path: FilePath | WriteExcelBuffer | ExcelWriter,
+        engine: str | None = None,
+        date_format: str | None = None,
+        datetime_format: str | None = None,
+        mode: str = "w",
+        storage_options: StorageOptions | None = None,
+        if_sheet_exists: ExcelWriterIfSheetExists | None = None,
+        engine_kwargs: dict[str, Any] | None = None,
+        **kwargs,
+    ) -> None:
+        # Use the xlsxwriter module as the Excel writer.
+        from xlsxwriter import Workbook
+
+        engine_kwargs = combine_kwargs(engine_kwargs, kwargs)
+
+        if mode == "a":
+            raise ValueError("Append mode is not supported with xlsxwriter!")
+
+        super().__init__(
+            path,
+            engine=engine,
+            date_format=date_format,
+            datetime_format=datetime_format,
+            mode=mode,
+            storage_options=storage_options,
+            if_sheet_exists=if_sheet_exists,
+            engine_kwargs=engine_kwargs,
+        )
+
+        try:
+            self._book = Workbook(self._handles.handle, **engine_kwargs)
+        except TypeError:
+            self._handles.handle.close()
+            raise
+
+    @property
+    def book(self):
+        """
+        Book instance of class xlsxwriter.Workbook.
+
+        This attribute can be used to access engine-specific features.
+        """
+        return self._book
+
+    @property
+    def sheets(self) -> dict[str, Any]:
+        result = self.book.sheetnames
+        return result
+
+    def _save(self) -> None:
+        """
+        Save workbook to disk.
+        """
+        self.book.close()
+
+    def _write_cells(
+        self,
+        cells,
+        sheet_name: str | None = None,
+        startrow: int = 0,
+        startcol: int = 0,
+        freeze_panes: tuple[int, int] | None = None,
+        autofilter_range: str | None = None,
+    ) -> None:
+        # Write the frame cells using xlsxwriter.
+        sheet_name = self._get_sheet_name(sheet_name)
+
+        wks = self.book.get_worksheet_by_name(sheet_name)
+        if wks is None:
+            wks = self.book.add_worksheet(sheet_name)
+
+        style_dict = {"null": None}
+
+        if validate_freeze_panes(freeze_panes):
+            wks.freeze_panes(*(freeze_panes))
+
+        for cell in cells:
+            val, fmt = self._value_with_fmt(cell.val)
+
+            stylekey = json.dumps(cell.style)
+            if fmt:
+                stylekey += fmt
+
+            if stylekey in style_dict:
+                style = style_dict[stylekey]
+            else:
+                style = self.book.add_format(_XlsxStyler.convert(cell.style, fmt))
+                style_dict[stylekey] = style
+
+            if cell.mergestart is not None and cell.mergeend is not None:
+                wks.merge_range(
+                    startrow + cell.row,
+                    startcol + cell.col,
+                    startrow + cell.mergestart,
+                    startcol + cell.mergeend,
+                    val,
+                    style,
+                )
+            else:
+                wks.write(startrow + cell.row, startcol + cell.col, val, style)
+
+        if autofilter_range:
+            wks.autofilter(autofilter_range)
@@ -0,0 +1,172 @@
+"""feather-format compat"""
+
+from __future__ import annotations
+
+from typing import (
+    TYPE_CHECKING,
+    Any,
+)
+import warnings
+
+from pandas._config import using_string_dtype
+
+from pandas._libs import lib
+from pandas.compat._optional import import_optional_dependency
+from pandas.errors import Pandas4Warning
+from pandas.util._decorators import set_module
+from pandas.util._validators import check_dtype_backend
+
+from pandas.core.api import DataFrame
+
+from pandas.io._util import arrow_table_to_pandas
+from pandas.io.common import get_handle
+
+if TYPE_CHECKING:
+    from collections.abc import (
+        Hashable,
+        Sequence,
+    )
+
+    from pandas._typing import (
+        DtypeBackend,
+        FilePath,
+        ReadBuffer,
+        StorageOptions,
+        WriteBuffer,
+    )
+
+
+def to_feather(
+    df: DataFrame,
+    path: FilePath | WriteBuffer[bytes],
+    storage_options: StorageOptions | None = None,
+    **kwargs: Any,
+) -> None:
+    """
+    Write a DataFrame to the binary Feather format.
+
+    Parameters
+    ----------
+    df : DataFrame
+    path : str, path object, or file-like object
+    storage_options : dict, optional
+        Extra options that make sense for a particular storage connection, e.g.
+        host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
+        are forwarded to ``urllib.request.Request`` as header options. For other
+        URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are
+        forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more
+        details, and for more examples on storage options refer `here
+        <https://pandas.pydata.org/docs/user_guide/io.html?
+        highlight=storage_options#reading-writing-remote-files>`_.
+    **kwargs :
+        Additional keywords passed to `pyarrow.feather.write_feather`.
+
+    """
+    import_optional_dependency("pyarrow")
+    from pyarrow import feather
+
+    if not isinstance(df, DataFrame):
+        raise ValueError("feather only support IO with DataFrames")
+
+    with get_handle(
+        path, "wb", storage_options=storage_options, is_text=False
+    ) as handles:
+        feather.write_feather(df, handles.handle, **kwargs)
+
+
+@set_module("pandas")
+def read_feather(
+    path: FilePath | ReadBuffer[bytes],
+    columns: Sequence[Hashable] | None = None,
+    use_threads: bool = True,
+    storage_options: StorageOptions | None = None,
+    dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
+) -> DataFrame:
+    """
+    Load a feather-format object from the file path.
+
+    Feather is particularly useful for scenarios that require efficient
+    serialization and deserialization of tabular data. It supports
+    schema preservation, making it a reliable choice for use cases
+    such as sharing data between Python and R, or persisting intermediate
+    results during data processing pipelines. This method provides additional
+    flexibility with options for selective column reading, thread parallelism,
+    and choosing the backend for data types.
+
+    Parameters
+    ----------
+    path : str, path object, or file-like object
+        String, path object (implementing ``os.PathLike[str]``), or file-like
+        object implementing a binary ``read()`` function. The string could be a URL.
+        Valid URL schemes include http, ftp, s3, gs and file. For file URLs, a host is
+        expected. A local file could be: ``file://localhost/path/to/table.feather``.
+    columns : sequence, default None
+        If not provided, all columns are read.
+    use_threads : bool, default True
+        Whether to parallelize reading using multiple threads.
+    storage_options : dict, optional
+        Extra options that make sense for a particular storage connection, e.g.
+        host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
+        are forwarded to ``urllib.request.Request`` as header options. For other
+        URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are
+        forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more
+        details, and for more examples on storage options refer `here
+        <https://pandas.pydata.org/docs/user_guide/io.html?
+        highlight=storage_options#reading-writing-remote-files>`_.
+
+    dtype_backend : {{'numpy_nullable', 'pyarrow'}}
+        Back-end data type applied to the resultant :class:`DataFrame`
+        (still experimental). If not specified, the default behavior
+        is to not use nullable data types. If specified, the behavior
+        is as follows:
+
+        * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`.
+        * ``"pyarrow"``: returns pyarrow-backed nullable
+          :class:`ArrowDtype` :class:`DataFrame`
+
+        .. versionadded:: 2.0
+
+    Returns
+    -------
+    type of object stored in file
+        DataFrame object stored in the file.
+
+    See Also
+    --------
+    read_csv : Read a comma-separated values (csv) file into a pandas DataFrame.
+    read_excel : Read an Excel file into a pandas DataFrame.
+    read_spss : Read an SPSS file into a pandas DataFrame.
+    read_orc : Load an ORC object into a pandas DataFrame.
+    read_sas : Read SAS file into a pandas DataFrame.
+
+    Examples
+    --------
+    >>> df = pd.read_feather("path/to/file.feather")  # doctest: +SKIP
+    """
+    import_optional_dependency("pyarrow")
+    from pyarrow import feather
+
+    # import utils to register the pyarrow extension types
+    import pandas.core.arrays.arrow.extension_types  # pyright: ignore[reportUnusedImport] # noqa: F401
+
+    check_dtype_backend(dtype_backend)
+
+    with get_handle(
+        path, "rb", storage_options=storage_options, is_text=False
+    ) as handles:
+        if dtype_backend is lib.no_default and not using_string_dtype():
+            with warnings.catch_warnings():
+                warnings.filterwarnings(
+                    "ignore",
+                    "make_block is deprecated",
+                    Pandas4Warning,
+                )
+
+                return feather.read_feather(
+                    handles.handle, columns=columns, use_threads=bool(use_threads)
+                )
+
+        pa_table = feather.read_table(
+            handles.handle, columns=columns, use_threads=bool(use_threads)
+        )
+        return arrow_table_to_pandas(pa_table, dtype_backend=dtype_backend)
@@ -0,0 +1,9 @@
+# ruff: noqa: TC004
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    # import modules that have public classes/functions
+    from pandas.io.formats import style
+
+    # and mark only those modules as public
+    __all__ = ["style"]
@@ -0,0 +1,157 @@
+# GH37967: Enable the use of CSS named colors, as defined in
+# matplotlib.colors.CSS4_COLORS, when exporting to Excel.
+# This data has been copied here, instead of being imported from matplotlib,
+# not to have ``to_excel`` methods require matplotlib.
+# source: matplotlib._color_data (3.3.3)
+from __future__ import annotations
+
+CSS4_COLORS = {
+    "aliceblue": "F0F8FF",
+    "antiquewhite": "FAEBD7",
+    "aqua": "00FFFF",
+    "aquamarine": "7FFFD4",
+    "azure": "F0FFFF",
+    "beige": "F5F5DC",
+    "bisque": "FFE4C4",
+    "black": "000000",
+    "blanchedalmond": "FFEBCD",
+    "blue": "0000FF",
+    "blueviolet": "8A2BE2",
+    "brown": "A52A2A",
+    "burlywood": "DEB887",
+    "cadetblue": "5F9EA0",
+    "chartreuse": "7FFF00",
+    "chocolate": "D2691E",
+    "coral": "FF7F50",
+    "cornflowerblue": "6495ED",
+    "cornsilk": "FFF8DC",
+    "crimson": "DC143C",
+    "cyan": "00FFFF",
+    "darkblue": "00008B",
+    "darkcyan": "008B8B",
+    "darkgoldenrod": "B8860B",
+    "darkgray": "A9A9A9",
+    "darkgreen": "006400",
+    "darkgrey": "A9A9A9",
+    "darkkhaki": "BDB76B",
+    "darkmagenta": "8B008B",
+    "darkolivegreen": "556B2F",
+    "darkorange": "FF8C00",
+    "darkorchid": "9932CC",
+    "darkred": "8B0000",
+    "darksalmon": "E9967A",
+    "darkseagreen": "8FBC8F",
+    "darkslateblue": "483D8B",
+    "darkslategray": "2F4F4F",
+    "darkslategrey": "2F4F4F",
+    "darkturquoise": "00CED1",
+    "darkviolet": "9400D3",
+    "deeppink": "FF1493",
+    "deepskyblue": "00BFFF",
+    "dimgray": "696969",
+    "dimgrey": "696969",
+    "dodgerblue": "1E90FF",
+    "firebrick": "B22222",
+    "floralwhite": "FFFAF0",
+    "forestgreen": "228B22",
+    "fuchsia": "FF00FF",
+    "gainsboro": "DCDCDC",
+    "ghostwhite": "F8F8FF",
+    "gold": "FFD700",
+    "goldenrod": "DAA520",
+    "gray": "808080",
+    "green": "008000",
+    "greenyellow": "ADFF2F",
+    "grey": "808080",
+    "honeydew": "F0FFF0",
+    "hotpink": "FF69B4",
+    "indianred": "CD5C5C",
+    "indigo": "4B0082",
+    "ivory": "FFFFF0",
+    "khaki": "F0E68C",
+    "lavender": "E6E6FA",
+    "lavenderblush": "FFF0F5",
+    "lawngreen": "7CFC00",
+    "lemonchiffon": "FFFACD",
+    "lightblue": "ADD8E6",
+    "lightcoral": "F08080",
+    "lightcyan": "E0FFFF",
+    "lightgoldenrodyellow": "FAFAD2",
+    "lightgray": "D3D3D3",
+    "lightgreen": "90EE90",
+    "lightgrey": "D3D3D3",
+    "lightpink": "FFB6C1",
+    "lightsalmon": "FFA07A",
+    "lightseagreen": "20B2AA",
+    "lightskyblue": "87CEFA",
+    "lightslategray": "778899",
+    "lightslategrey": "778899",
+    "lightsteelblue": "B0C4DE",
+    "lightyellow": "FFFFE0",
+    "lime": "00FF00",
+    "limegreen": "32CD32",
+    "linen": "FAF0E6",
+    "magenta": "FF00FF",
+    "maroon": "800000",
+    "mediumaquamarine": "66CDAA",
+    "mediumblue": "0000CD",
+    "mediumorchid": "BA55D3",
+    "mediumpurple": "9370DB",
+    "mediumseagreen": "3CB371",
+    "mediumslateblue": "7B68EE",
+    "mediumspringgreen": "00FA9A",
+    "mediumturquoise": "48D1CC",
+    "mediumvioletred": "C71585",
+    "midnightblue": "191970",
+    "mintcream": "F5FFFA",
+    "mistyrose": "FFE4E1",
+    "moccasin": "FFE4B5",
+    "navajowhite": "FFDEAD",
+    "navy": "000080",
+    "oldlace": "FDF5E6",
+    "olive": "808000",
+    "olivedrab": "6B8E23",
+    "orange": "FFA500",
+    "orangered": "FF4500",
+    "orchid": "DA70D6",
+    "palegoldenrod": "EEE8AA",
+    "palegreen": "98FB98",
+    "paleturquoise": "AFEEEE",
+    "palevioletred": "DB7093",
+    "papayawhip": "FFEFD5",
+    "peachpuff": "FFDAB9",
+    "peru": "CD853F",
+    "pink": "FFC0CB",
+    "plum": "DDA0DD",
+    "powderblue": "B0E0E6",
+    "purple": "800080",
+    "rebeccapurple": "663399",
+    "red": "FF0000",
+    "rosybrown": "BC8F8F",
+    "royalblue": "4169E1",
+    "saddlebrown": "8B4513",
+    "salmon": "FA8072",
+    "sandybrown": "F4A460",
+    "seagreen": "2E8B57",
+    "seashell": "FFF5EE",
+    "sienna": "A0522D",
+    "silver": "C0C0C0",
+    "skyblue": "87CEEB",
+    "slateblue": "6A5ACD",
+    "slategray": "708090",
+    "slategrey": "708090",
+    "snow": "FFFAFA",
+    "springgreen": "00FF7F",
+    "steelblue": "4682B4",
+    "tan": "D2B48C",
+    "teal": "008080",
+    "thistle": "D8BFD8",
+    "tomato": "FF6347",
+    "turquoise": "40E0D0",
+    "violet": "EE82EE",
+    "wheat": "F5DEB3",
+    "white": "FFFFFF",
+    "whitesmoke": "F5F5F5",
+    "yellow": "FFFF00",
+    "yellowgreen": "9ACD32",
+}
@@ -0,0 +1,95 @@
+"""
+Internal module for console introspection
+"""
+
+from __future__ import annotations
+
+from shutil import get_terminal_size
+
+
+def get_console_size() -> tuple[int | None, int | None]:
+    """
+    Return console size as tuple = (width, height).
+
+    Returns (None,None) in non-interactive session.
+    """
+    from pandas import get_option
+
+    display_width = get_option("display.width")
+    display_height = get_option("display.max_rows")
+
+    # Consider
+    # interactive shell terminal, can detect term size
+    # interactive non-shell terminal (ipnb/ipqtconsole), cannot detect term
+    # size non-interactive script, should disregard term size
+
+    # in addition
+    # width,height have default values, but setting to 'None' signals
+    # should use Auto-Detection, But only in interactive shell-terminal.
+    # Simple. yeah.
+
+    if in_interactive_session():
+        if in_ipython_frontend():
+            # sane defaults for interactive non-shell terminal
+            # match default for width,height in config_init
+            from pandas._config.config import get_default_val
+
+            terminal_width = get_default_val("display.width")
+            terminal_height = get_default_val("display.max_rows")
+        else:
+            # pure terminal
+            terminal_width, terminal_height = get_terminal_size()
+    else:
+        terminal_width, terminal_height = None, None
+
+    # Note if the User sets width/Height to None (auto-detection)
+    # and we're in a script (non-inter), this will return (None,None)
+    # caller needs to deal.
+    return display_width or terminal_width, display_height or terminal_height
+
+
+# ----------------------------------------------------------------------
+# Detect our environment
+
+
+def in_interactive_session() -> bool:
+    """
+    Check if we're running in an interactive shell.
+
+    Returns
+    -------
+    bool
+        True if running under python/ipython interactive shell.
+    """
+    from pandas import get_option
+
+    def check_main() -> bool:
+        try:
+            import __main__ as main
+        except ModuleNotFoundError:
+            return get_option("mode.sim_interactive")
+        return not hasattr(main, "__file__") or get_option("mode.sim_interactive")
+
+    try:
+        # error: Name '__IPYTHON__' is not defined
+        return __IPYTHON__ or check_main()  # type: ignore[name-defined]
+    except NameError:
+        return check_main()
+
+
+def in_ipython_frontend() -> bool:
+    """
+    Check if we're inside an IPython zmq frontend.
+
+    Returns
+    -------
+    bool
+    """
+    try:
+        # error: Name 'get_ipython' is not defined
+        ip = get_ipython()  # type: ignore[name-defined]
+        return "zmq" in str(type(ip)).lower()
+    except NameError:
+        pass
+
+    return False
@@ -0,0 +1,425 @@
+"""
+Utilities for interpreting CSS from Stylers for formatting non-HTML outputs.
+"""
+
+from __future__ import annotations
+
+import re
+from typing import TYPE_CHECKING
+import warnings
+
+from pandas.errors import CSSWarning
+from pandas.util._exceptions import find_stack_level
+
+if TYPE_CHECKING:
+    from collections.abc import (
+        Callable,
+        Generator,
+        Iterable,
+        Iterator,
+    )
+
+
+def _side_expander(prop_fmt: str) -> Callable:
+    """
+    Wrapper to expand shorthand property into top, right, bottom, left properties
+
+    Parameters
+    ----------
+    side : str
+        The border side to expand into properties
+
+    Returns
+    -------
+        function: Return to call when a 'border(-{side}): {value}' string is encountered
+    """
+
+    def expand(self: CSSResolver, prop: str, value: str) -> Generator[tuple[str, str]]:
+        """
+        Expand shorthand property into side-specific property (top, right, bottom, left)
+
+        Parameters
+        ----------
+            prop (str): CSS property name
+            value (str): String token for property
+
+        Yields
+        ------
+            Tuple (str, str): Expanded property, value
+        """
+        tokens = value.split()
+        try:
+            mapping = self.SIDE_SHORTHANDS[len(tokens)]
+        except KeyError:
+            warnings.warn(
+                f'Could not expand "{prop}: {value}"',
+                CSSWarning,
+                stacklevel=find_stack_level(),
+            )
+            return
+        for key, idx in zip(self.SIDES, mapping, strict=True):
+            yield prop_fmt.format(key), tokens[idx]
+
+    return expand
+
+
+def _border_expander(side: str = "") -> Callable:
+    """
+    Wrapper to expand 'border' property into border color, style, and width properties
+
+    Parameters
+    ----------
+    side : str
+        The border side to expand into properties
+
+    Returns
+    -------
+        function: Return to call when a 'border(-{side}): {value}' string is encountered
+    """
+    if side != "":
+        side = f"-{side}"
+
+    def expand(self: CSSResolver, prop: str, value: str) -> Generator[tuple[str, str]]:
+        """
+        Expand border into color, style, and width tuples
+
+        Parameters
+        ----------
+            prop : str
+                CSS property name passed to styler
+            value : str
+                Value passed to styler for property
+
+        Yields
+        ------
+            Tuple (str, str): Expanded property, value
+        """
+        tokens = value.split()
+        if len(tokens) == 0 or len(tokens) > 3:
+            warnings.warn(
+                f'Too many tokens provided to "{prop}" (expected 1-3)',
+                CSSWarning,
+                stacklevel=find_stack_level(),
+            )
+
+        # TODO: Can we use current color as initial value to comply with CSS standards?
+        border_declarations = {
+            f"border{side}-color": "black",
+            f"border{side}-style": "none",
+            f"border{side}-width": "medium",
+        }
+        for token in tokens:
+            if token.lower() in self.BORDER_STYLES:
+                border_declarations[f"border{side}-style"] = token
+            elif any(ratio in token.lower() for ratio in self.BORDER_WIDTH_RATIOS):
+                border_declarations[f"border{side}-width"] = token
+            else:
+                border_declarations[f"border{side}-color"] = token
+            # TODO: Warn user if item entered more than once (e.g. "border: red green")
+
+        # Per CSS, "border" will reset previous "border-*" definitions
+        yield from self.atomize(border_declarations.items())
+
+    return expand
+
+
+class CSSResolver:
+    """
+    A callable for parsing and resolving CSS to atomic properties.
+    """
+
+    UNIT_RATIOS = {
+        "pt": ("pt", 1),
+        "em": ("em", 1),
+        "rem": ("pt", 12),
+        "ex": ("em", 0.5),
+        # 'ch':
+        "px": ("pt", 0.75),
+        "pc": ("pt", 12),
+        "in": ("pt", 72),
+        "cm": ("in", 1 / 2.54),
+        "mm": ("in", 1 / 25.4),
+        "q": ("mm", 0.25),
+        "!!default": ("em", 0),
+    }
+
+    FONT_SIZE_RATIOS = UNIT_RATIOS.copy()
+    FONT_SIZE_RATIOS.update(
+        {
+            "%": ("em", 0.01),
+            "xx-small": ("rem", 0.5),
+            "x-small": ("rem", 0.625),
+            "small": ("rem", 0.8),
+            "medium": ("rem", 1),
+            "large": ("rem", 1.125),
+            "x-large": ("rem", 1.5),
+            "xx-large": ("rem", 2),
+            "smaller": ("em", 1 / 1.2),
+            "larger": ("em", 1.2),
+            "!!default": ("em", 1),
+        }
+    )
+
+    MARGIN_RATIOS = UNIT_RATIOS.copy()
+    MARGIN_RATIOS.update({"none": ("pt", 0)})
+
+    BORDER_WIDTH_RATIOS = UNIT_RATIOS.copy()
+    BORDER_WIDTH_RATIOS.update(
+        {
+            "none": ("pt", 0),
+            "thick": ("px", 4),
+            "medium": ("px", 2),
+            "thin": ("px", 1),
+            # Default: medium only if solid
+        }
+    )
+
+    BORDER_STYLES = [
+        "none",
+        "hidden",
+        "dotted",
+        "dashed",
+        "solid",
+        "double",
+        "groove",
+        "ridge",
+        "inset",
+        "outset",
+        "mediumdashdot",
+        "dashdotdot",
+        "hair",
+        "mediumdashdotdot",
+        "dashdot",
+        "slantdashdot",
+        "mediumdashed",
+    ]
+
+    SIDE_SHORTHANDS = {
+        1: [0, 0, 0, 0],
+        2: [0, 1, 0, 1],
+        3: [0, 1, 2, 1],
+        4: [0, 1, 2, 3],
+    }
+
+    SIDES = ("top", "right", "bottom", "left")
+
+    CSS_EXPANSIONS = {
+        **{
+            (f"border-{prop}" if prop else "border"): _border_expander(prop)
+            for prop in ["", "top", "right", "bottom", "left"]
+        },
+        **{
+            f"border-{prop}": _side_expander(f"border-{{:s}}-{prop}")
+            for prop in ["color", "style", "width"]
+        },
+        "margin": _side_expander("margin-{:s}"),
+        "padding": _side_expander("padding-{:s}"),
+    }
+
+    def __call__(
+        self,
+        declarations: str | Iterable[tuple[str, str]],
+        inherited: dict[str, str] | None = None,
+    ) -> dict[str, str]:
+        """
+        The given declarations to atomic properties.
+
+        Parameters
+        ----------
+        declarations_str : str | Iterable[tuple[str, str]]
+            A CSS string or set of CSS declaration tuples
+            e.g. "font-weight: bold; background: blue" or
+            {("font-weight", "bold"), ("background", "blue")}
+        inherited : dict, optional
+            Atomic properties indicating the inherited style context in which
+            declarations_str is to be resolved. ``inherited`` should already
+            be resolved, i.e. valid output of this method.
+
+        Returns
+        -------
+        dict
+            Atomic CSS 2.2 properties.
+
+        Examples
+        --------
+        >>> resolve = CSSResolver()
+        >>> inherited = {"font-family": "serif", "font-weight": "bold"}
+        >>> out = resolve(
+        ...     '''
+        ...               border-color: BLUE RED;
+        ...               font-size: 1em;
+        ...               font-size: 2em;
+        ...               font-weight: normal;
+        ...               font-weight: inherit;
+        ...               ''',
+        ...     inherited,
+        ... )
+        >>> sorted(out.items())  # doctest: +NORMALIZE_WHITESPACE
+        [('border-bottom-color', 'blue'),
+         ('border-left-color', 'red'),
+         ('border-right-color', 'red'),
+         ('border-top-color', 'blue'),
+         ('font-family', 'serif'),
+         ('font-size', '24pt'),
+         ('font-weight', 'bold')]
+        """
+        if isinstance(declarations, str):
+            declarations = self.parse(declarations)
+        props = dict(self.atomize(declarations))
+        if inherited is None:
+            inherited = {}
+
+        props = self._update_initial(props, inherited)
+        props = self._update_font_size(props, inherited)
+        return self._update_other_units(props)
+
+    def _update_initial(
+        self,
+        props: dict[str, str],
+        inherited: dict[str, str],
+    ) -> dict[str, str]:
+        # 1. resolve inherited, initial
+        for prop, val in inherited.items():
+            if prop not in props:
+                props[prop] = val
+
+        new_props = props.copy()
+        for prop, val in props.items():
+            if val == "inherit":
+                val = inherited.get(prop, "initial")
+
+            if val in ("initial", None):
+                # we do not define a complete initial stylesheet
+                del new_props[prop]
+            else:
+                new_props[prop] = val
+        return new_props
+
+    def _update_font_size(
+        self,
+        props: dict[str, str],
+        inherited: dict[str, str],
+    ) -> dict[str, str]:
+        # 2. resolve relative font size
+        if props.get("font-size"):
+            props["font-size"] = self.size_to_pt(
+                props["font-size"],
+                self._get_font_size(inherited),
+                conversions=self.FONT_SIZE_RATIOS,
+            )
+        return props
+
+    def _get_font_size(self, props: dict[str, str]) -> float | None:
+        if props.get("font-size"):
+            font_size_string = props["font-size"]
+            return self._get_float_font_size_from_pt(font_size_string)
+        return None
+
+    def _get_float_font_size_from_pt(self, font_size_string: str) -> float:
+        assert font_size_string.endswith("pt")
+        return float(font_size_string.rstrip("pt"))
+
+    def _update_other_units(self, props: dict[str, str]) -> dict[str, str]:
+        font_size = self._get_font_size(props)
+        # 3. TODO: resolve other font-relative units
+        for side in self.SIDES:
+            prop = f"border-{side}-width"
+            if prop in props:
+                props[prop] = self.size_to_pt(
+                    props[prop],
+                    em_pt=font_size,
+                    conversions=self.BORDER_WIDTH_RATIOS,
+                )
+
+            for prop in [f"margin-{side}", f"padding-{side}"]:
+                if prop in props:
+                    # TODO: support %
+                    props[prop] = self.size_to_pt(
+                        props[prop],
+                        em_pt=font_size,
+                        conversions=self.MARGIN_RATIOS,
+                    )
+        return props
+
+    def size_to_pt(
+        self, in_val: str, em_pt: float | None = None, conversions: dict = UNIT_RATIOS
+    ) -> str:
+        def _error() -> str:
+            warnings.warn(
+                f"Unhandled size: {in_val!r}",
+                CSSWarning,
+                stacklevel=find_stack_level(),
+            )
+            return self.size_to_pt("1!!default", conversions=conversions)
+
+        match = re.match(r"^(\S*?)([a-zA-Z%!].*)", in_val)
+        if match is None:
+            return _error()
+
+        val, unit = match.groups()
+        if val == "":
+            # hack for 'large' etc.
+            val = 1
+        else:
+            try:
+                val = float(val)
+            except ValueError:
+                return _error()
+
+        while unit != "pt":
+            if unit == "em":
+                if em_pt is None:
+                    unit = "rem"
+                else:
+                    val *= em_pt
+                    unit = "pt"
+                continue
+
+            try:
+                unit, mul = conversions[unit]
+            except KeyError:
+                return _error()
+            val *= mul
+
+        val = round(val, 5)
+        if int(val) == val:
+            size_fmt = f"{int(val):d}pt"
+        else:
+            size_fmt = f"{val:f}pt"
+        return size_fmt
+
+    def atomize(self, declarations: Iterable) -> Generator[tuple[str, str]]:
+        for prop, value in declarations:
+            prop = prop.lower()
+            value = value.lower()
+            if prop in self.CSS_EXPANSIONS:
+                expand = self.CSS_EXPANSIONS[prop]
+                yield from expand(self, prop, value)
+            else:
+                yield prop, value
+
+    def parse(self, declarations_str: str) -> Iterator[tuple[str, str]]:
+        """
+        Generates (prop, value) pairs from declarations.
+
+        In a future version may generate parsed tokens from tinycss/tinycss2
+
+        Parameters
+        ----------
+        declarations_str : str
+        """
+        for decl in declarations_str.split(";"):
+            if not decl.strip():
+                continue
+            prop, sep, val = decl.partition(":")
+            prop = prop.strip().lower()
+            # TODO: don't lowercase case sensitive parts of values (strings)
+            val = val.strip().lower()
+            if sep:
+                yield prop, val
+            else:
+                warnings.warn(
+                    f"Ill-formatted attribute: expected a colon in {decl!r}",
+                    CSSWarning,
+                    stacklevel=find_stack_level(),
+                )
@@ -0,0 +1,336 @@
+"""
+Module for formatting output data into CSV files.
+"""
+
+from __future__ import annotations
+
+from collections.abc import (
+    Hashable,
+    Iterable,
+    Iterator,
+    Sequence,
+)
+import csv as csvlib
+import os
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    cast,
+)
+
+import numpy as np
+
+from pandas._libs import writers as libwriters
+from pandas._typing import SequenceNotStr
+from pandas.util._decorators import cache_readonly
+
+from pandas.core.dtypes.generic import (
+    ABCDatetimeIndex,
+    ABCIndex,
+    ABCMultiIndex,
+    ABCPeriodIndex,
+)
+from pandas.core.dtypes.missing import notna
+
+from pandas.core.indexes.api import Index
+
+from pandas.io.common import get_handle
+
+if TYPE_CHECKING:
+    from pandas._typing import (
+        CompressionOptions,
+        FilePath,
+        FloatFormatType,
+        IndexLabel,
+        StorageOptions,
+        WriteBuffer,
+        npt,
+    )
+
+    from pandas.io.formats.format import DataFrameFormatter
+
+
+_DEFAULT_CHUNKSIZE_CELLS = 100_000
+
+
+class CSVFormatter:
+    cols: npt.NDArray[np.object_]
+
+    def __init__(
+        self,
+        formatter: DataFrameFormatter,
+        path_or_buf: FilePath | WriteBuffer[str] | WriteBuffer[bytes] = "",
+        sep: str = ",",
+        cols: Sequence[Hashable] | None = None,
+        index_label: IndexLabel | None = None,
+        mode: str = "w",
+        encoding: str | None = None,
+        errors: str = "strict",
+        compression: CompressionOptions = "infer",
+        quoting: int | None = None,
+        lineterminator: str | None = "\n",
+        chunksize: int | None = None,
+        quotechar: str | None = '"',
+        date_format: str | None = None,
+        doublequote: bool = True,
+        escapechar: str | None = None,
+        storage_options: StorageOptions | None = None,
+    ) -> None:
+        self.fmt = formatter
+
+        self.obj = self.fmt.frame
+
+        self.filepath_or_buffer = path_or_buf
+        self.encoding = encoding
+        self.compression: CompressionOptions = compression
+        self.mode = mode
+        self.storage_options = storage_options
+
+        self.sep = sep
+        self.index_label = self._initialize_index_label(index_label)
+        self.errors = errors
+        self.quoting = quoting or csvlib.QUOTE_MINIMAL
+        self.doublequote = doublequote
+        self.escapechar = escapechar
+        self.quotechar = self._initialize_quotechar(quotechar)
+        self.lineterminator = lineterminator or os.linesep
+        self.date_format = date_format
+        self.cols = self._initialize_columns(cols)
+        self.chunksize = self._initialize_chunksize(chunksize)
+
+    @property
+    def na_rep(self) -> str:
+        return self.fmt.na_rep
+
+    @property
+    def float_format(self) -> FloatFormatType | None:
+        return self.fmt.float_format
+
+    @property
+    def decimal(self) -> str:
+        return self.fmt.decimal
+
+    @property
+    def header(self) -> bool | SequenceNotStr[str]:
+        return self.fmt.header
+
+    @property
+    def index(self) -> bool:
+        return self.fmt.index
+
+    def _initialize_index_label(self, index_label: IndexLabel | None) -> IndexLabel:
+        if index_label is not False:
+            if index_label is None:
+                return self._get_index_label_from_obj()
+            elif not isinstance(index_label, (list, tuple, np.ndarray, ABCIndex)):
+                # given a string for a DF with Index
+                return [index_label]
+        return index_label
+
+    def _get_index_label_from_obj(self) -> Sequence[Hashable]:
+        if isinstance(self.obj.index, ABCMultiIndex):
+            return self._get_index_label_multiindex()
+        else:
+            return self._get_index_label_flat()
+
+    def _get_index_label_multiindex(self) -> Sequence[Hashable]:
+        return [name or "" for name in self.obj.index.names]
+
+    def _get_index_label_flat(self) -> Sequence[Hashable]:
+        index_label = self.obj.index.name
+        return [""] if index_label is None else [index_label]
+
+    def _initialize_quotechar(self, quotechar: str | None) -> str | None:
+        if self.quoting != csvlib.QUOTE_NONE or self.escapechar is not None:
+            # prevents crash in _csv
+            return quotechar
+        return None
+
+    @property
+    def has_mi_columns(self) -> bool:
+        return bool(isinstance(self.obj.columns, ABCMultiIndex))
+
+    def _initialize_columns(
+        self, cols: Iterable[Hashable] | None
+    ) -> npt.NDArray[np.object_]:
+        # validate mi options
+        if self.has_mi_columns:
+            if cols is not None:
+                msg = "cannot specify cols with a MultiIndex on the columns"
+                raise TypeError(msg)
+
+        if cols is not None:
+            if isinstance(cols, ABCIndex):
+                cols = cols._get_values_for_csv(**self._number_format)
+            else:
+                cols = list(cols)
+            self.obj = self.obj.loc[:, cols]
+
+        # update columns to include possible multiplicity of dupes
+        # and make sure cols is just a list of labels
+        new_cols = self.obj.columns
+        return new_cols._get_values_for_csv(**self._number_format)
+
+    def _initialize_chunksize(self, chunksize: int | None) -> int:
+        if chunksize is None:
+            return (_DEFAULT_CHUNKSIZE_CELLS // (len(self.cols) or 1)) or 1
+        return int(chunksize)
+
+    @property
+    def _number_format(self) -> dict[str, Any]:
+        """Dictionary used for storing number formatting settings."""
+        return {
+            "na_rep": self.na_rep,
+            "float_format": self.float_format,
+            "date_format": self.date_format,
+            "quoting": self.quoting,
+            "decimal": self.decimal,
+        }
+
+    @cache_readonly
+    def data_index(self) -> Index:
+        data_index = self.obj.index
+        if (
+            isinstance(data_index, (ABCDatetimeIndex, ABCPeriodIndex))
+            and self.date_format is not None
+        ):
+            data_index = Index(
+                [x.strftime(self.date_format) if notna(x) else "" for x in data_index]
+            )
+        elif isinstance(data_index, ABCMultiIndex):
+            data_index = data_index.remove_unused_levels()
+        return data_index
+
+    @property
+    def nlevels(self) -> int:
+        if self.index:
+            return getattr(self.data_index, "nlevels", 1)
+        else:
+            return 0
+
+    @property
+    def _has_aliases(self) -> bool:
+        return isinstance(self.header, (tuple, list, np.ndarray, ABCIndex))
+
+    @property
+    def _need_to_save_header(self) -> bool:
+        return bool(self._has_aliases or self.header)
+
+    @property
+    def write_cols(self) -> SequenceNotStr[Hashable]:
+        if self._has_aliases:
+            assert not isinstance(self.header, bool)
+            if len(self.header) != len(self.cols):
+                raise ValueError(
+                    f"Writing {len(self.cols)} cols but got {len(self.header)} aliases"
+                )
+            return self.header
+        else:
+            # self.cols is an ndarray derived from Index._get_values_for_csv,
+            #  so its entries are strings, i.e. hashable
+            return cast(SequenceNotStr[Hashable], self.cols)
+
+    @property
+    def encoded_labels(self) -> list[Hashable]:
+        encoded_labels: list[Hashable] = []
+
+        if self.index and self.index_label:
+            assert isinstance(self.index_label, Sequence)
+            encoded_labels = list(self.index_label)
+
+        if not self.has_mi_columns or self._has_aliases:
+            encoded_labels += list(self.write_cols)
+
+        return encoded_labels
+
+    def save(self) -> None:
+        """
+        Create the writer & save.
+        """
+        # apply compression and byte/text conversion
+        with get_handle(
+            self.filepath_or_buffer,
+            self.mode,
+            encoding=self.encoding,
+            errors=self.errors,
+            compression=self.compression,
+            storage_options=self.storage_options,
+        ) as handles:
+            # Note: self.encoding is irrelevant here
+            # error: Argument "quoting" to "writer" has incompatible type "int";
+            # expected "Literal[0, 1, 2, 3]"
+            self.writer = csvlib.writer(
+                handles.handle,
+                lineterminator=self.lineterminator,
+                delimiter=self.sep,
+                quoting=self.quoting,  # type: ignore[arg-type]
+                doublequote=self.doublequote,
+                escapechar=self.escapechar,
+                quotechar=self.quotechar,
+            )
+
+            self._save()
+
+    def _save(self) -> None:
+        if self._need_to_save_header:
+            self._save_header()
+        self._save_body()
+
+    def _save_header(self) -> None:
+        if not self.has_mi_columns or self._has_aliases:
+            self.writer.writerow(self.encoded_labels)
+        else:
+            for row in self._generate_multiindex_header_rows():
+                self.writer.writerow(row)
+
+    def _generate_multiindex_header_rows(self) -> Iterator[list[Hashable]]:
+        columns = self.obj.columns
+        for i in range(columns.nlevels):
+            # we need at least 1 index column to write our col names
+            col_line = []
+            if self.index:
+                # name is the first column
+                col_line.append(columns.names[i])
+
+                if isinstance(self.index_label, list) and len(self.index_label) > 1:
+                    col_line.extend([""] * (len(self.index_label) - 1))
+
+            col_line.extend(columns._get_level_values(i))
+            yield col_line
+
+        # Write out the index line if it's not empty.
+        # Otherwise, we will print out an extraneous
+        # blank line between the mi and the data rows.
+        if self.encoded_labels and set(self.encoded_labels) != {""}:
+            yield self.encoded_labels + [""] * len(columns)
+
+    def _save_body(self) -> None:
+        nrows = len(self.data_index)
+        chunks = (nrows // self.chunksize) + 1
+        for i in range(chunks):
+            start_i = i * self.chunksize
+            end_i = min(start_i + self.chunksize, nrows)
+            if start_i >= end_i:
+                break
+            self._save_chunk(start_i, end_i)
+
+    def _save_chunk(self, start_i: int, end_i: int) -> None:
+        # create the data for a chunk
+        slicer = slice(start_i, end_i)
+        df = self.obj.iloc[slicer]
+
+        res = df._get_values_for_csv(**self._number_format)
+        data = list(res._iter_column_arrays())
+
+        ix = (
+            self.data_index[slicer]._get_values_for_csv(**self._number_format)
+            if self.nlevels != 0
+            else np.empty(end_i - start_i)
+        )
+        libwriters.write_csv_rows(
+            data,
+            ix,
+            self.nlevels,
+            self.cols,
+            self.writer,
+        )
@@ -0,0 +1,657 @@
+"""
+Module for formatting output data in HTML.
+"""
+
+from __future__ import annotations
+
+from textwrap import dedent
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Final,
+    cast,
+)
+
+from pandas._config import get_option
+
+from pandas._libs import lib
+
+from pandas import (
+    MultiIndex,
+    option_context,
+)
+
+from pandas.io.common import is_url
+from pandas.io.formats.format import (
+    DataFrameFormatter,
+    get_level_lengths,
+)
+from pandas.io.formats.printing import pprint_thing
+
+if TYPE_CHECKING:
+    from collections.abc import (
+        Hashable,
+        Iterable,
+        Mapping,
+    )
+
+
+class HTMLFormatter:
+    """
+    Internal class for formatting output data in html.
+    This class is intended for shared functionality between
+    DataFrame.to_html() and DataFrame._repr_html_().
+    Any logic in common with other output formatting methods
+    should ideally be inherited from classes in format.py
+    and this class responsible for only producing html markup.
+    """
+
+    indent_delta: Final = 2
+
+    def __init__(
+        self,
+        formatter: DataFrameFormatter,
+        classes: str | list[str] | tuple[str, ...] | None = None,
+        border: int | bool | None = None,
+        table_id: str | None = None,
+        render_links: bool = False,
+    ) -> None:
+        self.fmt = formatter
+        self.classes = classes
+
+        self.frame = self.fmt.frame
+        self.columns = self.fmt.tr_frame.columns
+        self.elements: list[str] = []
+        self.bold_rows = self.fmt.bold_rows
+        self.escape = self.fmt.escape
+        self.show_dimensions = self.fmt.show_dimensions
+        if border is None or border is True:
+            border = cast(int, get_option("display.html.border"))
+        elif not border:
+            border = None
+
+        self.border = border
+        self.table_id = table_id
+        self.render_links = render_links
+
+        self.col_space = {}
+        is_multi_index = isinstance(self.columns, MultiIndex)
+        for column, value in self.fmt.col_space.items():
+            col_space_value = f"{value}px" if isinstance(value, int) else value
+            self.col_space[column] = col_space_value
+            # GH 53885: Handling case where column is index
+            # Flatten the data in the multi index and add in the map
+            if is_multi_index and isinstance(column, tuple):
+                for column_index in column:
+                    self.col_space[str(column_index)] = col_space_value
+
+    def to_string(self) -> str:
+        lines = self.render()
+        if any(isinstance(x, str) for x in lines):
+            lines = [str(x) for x in lines]
+        return "\n".join(lines)
+
+    def render(self) -> list[str]:
+        self._write_table()
+
+        if self.should_show_dimensions:
+            by = chr(215)  # ×  # noqa: RUF003
+            self.write(
+                f"<p>{len(self.frame)} rows {by} {len(self.frame.columns)} columns</p>"
+            )
+
+        return self.elements
+
+    @property
+    def should_show_dimensions(self) -> bool:
+        return self.fmt.should_show_dimensions
+
+    @property
+    def show_row_idx_names(self) -> bool:
+        return self.fmt.show_row_idx_names
+
+    @property
+    def show_col_idx_names(self) -> bool:
+        return self.fmt.show_col_idx_names
+
+    @property
+    def row_levels(self) -> int:
+        if self.fmt.index:
+            # showing (row) index
+            return self.frame.index.nlevels
+        elif self.show_col_idx_names:
+            # see gh-22579
+            # Column misalignment also occurs for
+            # a standard index when the columns index is named.
+            # If the row index is not displayed a column of
+            # blank cells need to be included before the DataFrame values.
+            return 1
+        # not showing (row) index
+        return 0
+
+    def _get_columns_formatted_values(self) -> Iterable:
+        return self.columns
+
+    @property
+    def is_truncated(self) -> bool:
+        return self.fmt.is_truncated
+
+    @property
+    def ncols(self) -> int:
+        return len(self.fmt.tr_frame.columns)
+
+    def write(self, s: Any, indent: int = 0) -> None:
+        rs = pprint_thing(s)
+        self.elements.append(" " * indent + rs)
+
+    def write_th(
+        self, s: Any, header: bool = False, indent: int = 0, tags: str | None = None
+    ) -> None:
+        """
+        Method for writing a formatted <th> cell.
+
+        If col_space is set on the formatter then that is used for
+        the value of min-width.
+
+        Parameters
+        ----------
+        s : object
+            The data to be written inside the cell.
+        header : bool, default False
+            Set to True if the <th> is for use inside <thead>.  This will
+            cause min-width to be set if there is one.
+        indent : int, default 0
+            The indentation level of the cell.
+        tags : str, default None
+            Tags to include in the cell.
+
+        Returns
+        -------
+        A written <th> cell.
+        """
+        col_space = self.col_space.get(s, None)
+
+        if header and col_space is not None:
+            tags = tags or ""
+            tags += f'style="min-width: {col_space};"'
+
+        self._write_cell(s, kind="th", indent=indent, tags=tags)
+
+    def write_td(self, s: Any, indent: int = 0, tags: str | None = None) -> None:
+        self._write_cell(s, kind="td", indent=indent, tags=tags)
+
+    def _write_cell(
+        self, s: Any, kind: str = "td", indent: int = 0, tags: str | None = None
+    ) -> None:
+        if tags is not None:
+            start_tag = f"<{kind} {tags}>"
+        else:
+            start_tag = f"<{kind}>"
+
+        if self.escape:
+            # escape & first to prevent double escaping of &
+            esc = {"&": r"&amp;", "<": r"&lt;", ">": r"&gt;"}
+        else:
+            esc = {}
+
+        rs = pprint_thing(s, escape_chars=esc).strip()
+        # replace spaces betweens strings with non-breaking spaces
+        rs = rs.replace("  ", "&nbsp;&nbsp;")
+
+        if self.render_links and is_url(rs):
+            rs_unescaped = pprint_thing(s, escape_chars={}).strip()
+            start_tag += f'<a href="{rs_unescaped}" target="_blank">'
+            end_a = "</a>"
+        else:
+            end_a = ""
+
+        self.write(f"{start_tag}{rs}{end_a}</{kind}>", indent)
+
+    def write_tr(
+        self,
+        line: Iterable,
+        indent: int = 0,
+        indent_delta: int = 0,
+        header: bool = False,
+        align: str | None = None,
+        tags: dict[int, str] | None = None,
+        nindex_levels: int = 0,
+    ) -> None:
+        if tags is None:
+            tags = {}
+
+        if align is None:
+            self.write("<tr>", indent)
+        else:
+            self.write(f'<tr style="text-align: {align};">', indent)
+        indent += indent_delta
+
+        for i, s in enumerate(line):
+            val_tag = tags.get(i, None)
+            if header or (self.bold_rows and i < nindex_levels):
+                self.write_th(s, indent=indent, header=header, tags=val_tag)
+            else:
+                self.write_td(s, indent, tags=val_tag)
+
+        indent -= indent_delta
+        self.write("</tr>", indent)
+
+    def _write_table(self, indent: int = 0) -> None:
+        _classes = ["dataframe"]  # Default class.
+        use_mathjax = get_option("display.html.use_mathjax")
+        if not use_mathjax:
+            _classes.append("tex2jax_ignore")
+            _classes.append("mathjax_ignore")
+        if self.classes is not None:
+            if isinstance(self.classes, str):
+                self.classes = self.classes.split()
+            if not isinstance(self.classes, (list, tuple)):
+                raise TypeError(
+                    "classes must be a string, list, "
+                    f"or tuple, not {type(self.classes)}"
+                )
+            _classes.extend(self.classes)
+
+        if self.table_id is None:
+            id_section = ""
+        else:
+            id_section = f' id="{self.table_id}"'
+
+        if self.border is None:
+            border_attr = ""
+        else:
+            border_attr = f' border="{self.border}"'
+
+        self.write(
+            f'<table{border_attr} class="{" ".join(_classes)}"{id_section}>',
+            indent,
+        )
+
+        if self.fmt.header or self.show_row_idx_names:
+            self._write_header(indent + self.indent_delta)
+
+        self._write_body(indent + self.indent_delta)
+
+        self.write("</table>", indent)
+
+    def _write_col_header(self, indent: int) -> None:
+        row: list[Hashable]
+        is_truncated_horizontally = self.fmt.is_truncated_horizontally
+        if isinstance(self.columns, MultiIndex):
+            template = 'colspan="{span:d}" halign="left"'
+
+            sentinel: lib.NoDefault | bool
+            if self.fmt.sparsify:
+                # GH3547
+                sentinel = lib.no_default
+            else:
+                sentinel = False
+            levels = self.columns._format_multi(sparsify=sentinel, include_names=False)
+            level_lengths = get_level_lengths(levels, sentinel)
+            inner_lvl = len(level_lengths) - 1
+            for lnum, (records, values) in enumerate(
+                zip(level_lengths, levels, strict=True)
+            ):
+                if is_truncated_horizontally:
+                    # modify the header lines
+                    ins_col = self.fmt.tr_col_num
+                    if self.fmt.sparsify:
+                        recs_new = {}
+                        # Increment tags after ... col.
+                        for tag, span in list(records.items()):
+                            if tag >= ins_col:
+                                recs_new[tag + 1] = span
+                            elif tag + span > ins_col:
+                                recs_new[tag] = span + 1
+                                if lnum == inner_lvl:
+                                    values = (
+                                        *values[:ins_col],
+                                        "...",
+                                        *values[ins_col:],
+                                    )
+                                else:
+                                    # sparse col headers do not receive a ...
+                                    values = (
+                                        *values[:ins_col],
+                                        values[ins_col - 1],
+                                        *values[ins_col:],
+                                    )
+                            else:
+                                recs_new[tag] = span
+                            # if ins_col lies between tags, all col headers
+                            # get ...
+                            if tag + span == ins_col:
+                                recs_new[ins_col] = 1
+                                values = (*values[:ins_col], "...", *values[ins_col:])
+                        records = recs_new
+                        inner_lvl = len(level_lengths) - 1
+                        if lnum == inner_lvl:
+                            records[ins_col] = 1
+                    else:
+                        recs_new = {}
+                        for tag, span in list(records.items()):
+                            if tag >= ins_col:
+                                recs_new[tag + 1] = span
+                            else:
+                                recs_new[tag] = span
+                        recs_new[ins_col] = 1
+                        records = recs_new
+                        values = [*values[:ins_col], "...", *values[ins_col:]]
+
+                # see gh-22579
+                # Column Offset Bug with to_html(index=False) with
+                # MultiIndex Columns and Index.
+                # Initially fill row with blank cells before column names.
+                # TODO: Refactor to remove code duplication with code
+                # block below for standard columns index.
+                row = [""] * (self.row_levels - 1)
+                if self.fmt.index or self.show_col_idx_names:
+                    # see gh-22747
+                    # If to_html(index_names=False) do not show columns
+                    # index names.
+                    # TODO: Refactor to use _get_column_name_list from
+                    # DataFrameFormatter class and create a
+                    # _get_formatted_column_labels function for code
+                    # parity with DataFrameFormatter class.
+                    if self.fmt.show_index_names:
+                        name = self.columns.names[lnum]
+                        row.append(pprint_thing(name or ""))
+                    else:
+                        row.append("")
+
+                tags = {}
+                j = len(row)
+                for i, v in enumerate(values):
+                    if i in records:
+                        if records[i] > 1:
+                            tags[j] = template.format(span=records[i])
+                    else:
+                        continue
+                    j += 1
+                    row.append(v)
+                self.write_tr(row, indent, self.indent_delta, tags=tags, header=True)
+        else:
+            # see gh-22579
+            # Column misalignment also occurs for
+            # a standard index when the columns index is named.
+            # Initially fill row with blank cells before column names.
+            # TODO: Refactor to remove code duplication with code block
+            # above for columns MultiIndex.
+            row = [""] * (self.row_levels - 1)
+            if self.fmt.index or self.show_col_idx_names:
+                # see gh-22747
+                # If to_html(index_names=False) do not show columns
+                # index names.
+                # TODO: Refactor to use _get_column_name_list from
+                # DataFrameFormatter class.
+                if self.fmt.show_index_names:
+                    row.append(self.columns.name or "")
+                else:
+                    row.append("")
+            row.extend(self._get_columns_formatted_values())
+            align = self.fmt.justify
+
+            if is_truncated_horizontally:
+                ins_col = self.row_levels + self.fmt.tr_col_num
+                row.insert(ins_col, "...")
+
+            self.write_tr(row, indent, self.indent_delta, header=True, align=align)
+
+    def _write_row_header(self, indent: int) -> None:
+        is_truncated_horizontally = self.fmt.is_truncated_horizontally
+        row = [x if x is not None else "" for x in self.frame.index.names] + [""] * (
+            self.ncols + (1 if is_truncated_horizontally else 0)
+        )
+        self.write_tr(row, indent, self.indent_delta, header=True)
+
+    def _write_header(self, indent: int) -> None:
+        self.write("<thead>", indent)
+
+        if self.fmt.header:
+            self._write_col_header(indent + self.indent_delta)
+
+        if self.show_row_idx_names:
+            self._write_row_header(indent + self.indent_delta)
+
+        self.write("</thead>", indent)
+
+    def _get_formatted_values(self) -> dict[int, list[str]]:
+        with option_context("display.max_colwidth", None):
+            fmt_values = {i: self.fmt.format_col(i) for i in range(self.ncols)}
+        return fmt_values
+
+    def _write_body(self, indent: int) -> None:
+        self.write("<tbody>", indent)
+        fmt_values = self._get_formatted_values()
+
+        # write values
+        if self.fmt.index and isinstance(self.frame.index, MultiIndex):
+            self._write_hierarchical_rows(fmt_values, indent + self.indent_delta)
+        else:
+            self._write_regular_rows(fmt_values, indent + self.indent_delta)
+
+        self.write("</tbody>", indent)
+
+    def _write_regular_rows(
+        self, fmt_values: Mapping[int, list[str]], indent: int
+    ) -> None:
+        is_truncated_horizontally = self.fmt.is_truncated_horizontally
+        is_truncated_vertically = self.fmt.is_truncated_vertically
+
+        nrows = len(self.fmt.tr_frame)
+
+        if self.fmt.index:
+            fmt = self.fmt._get_formatter("__index__")
+            if fmt is not None:
+                index_values = self.fmt.tr_frame.index.map(fmt)
+            else:
+                # only reached with non-Multi index
+                index_values = self.fmt.tr_frame.index._format_flat(include_name=False)
+
+        row: list[str] = []
+        for i in range(nrows):
+            if is_truncated_vertically and i == (self.fmt.tr_row_num):
+                str_sep_row = ["..."] * len(row)
+                self.write_tr(
+                    str_sep_row,
+                    indent,
+                    self.indent_delta,
+                    tags=None,
+                    nindex_levels=self.row_levels,
+                )
+
+            row = []
+            if self.fmt.index:
+                row.append(index_values[i])
+            # see gh-22579
+            # Column misalignment also occurs for
+            # a standard index when the columns index is named.
+            # Add blank cell before data cells.
+            elif self.show_col_idx_names:
+                row.append("")
+            row.extend(fmt_values[j][i] for j in range(self.ncols))
+
+            if is_truncated_horizontally:
+                dot_col_ix = self.fmt.tr_col_num + self.row_levels
+                row.insert(dot_col_ix, "...")
+            self.write_tr(
+                row, indent, self.indent_delta, tags=None, nindex_levels=self.row_levels
+            )
+
+    def _write_hierarchical_rows(
+        self, fmt_values: Mapping[int, list[str]], indent: int
+    ) -> None:
+        template = 'rowspan="{span}" valign="top"'
+
+        is_truncated_horizontally = self.fmt.is_truncated_horizontally
+        is_truncated_vertically = self.fmt.is_truncated_vertically
+        frame = self.fmt.tr_frame
+        nrows = len(frame)
+
+        assert isinstance(frame.index, MultiIndex)
+        idx_values = frame.index._format_multi(sparsify=False, include_names=False)
+        idx_values = list(zip(*idx_values, strict=True))
+
+        if self.fmt.sparsify:
+            # GH3547
+            sentinel = lib.no_default
+            levels = frame.index._format_multi(sparsify=sentinel, include_names=False)
+
+            level_lengths = get_level_lengths(levels, sentinel)
+            inner_lvl = len(level_lengths) - 1
+            if is_truncated_vertically:
+                # Insert ... row and adjust idx_values and
+                # level_lengths to take this into account.
+                ins_row = self.fmt.tr_row_num
+                inserted = False
+                for lnum, records in enumerate(level_lengths):
+                    rec_new = {}
+                    for tag, span in list(records.items()):
+                        if tag >= ins_row:
+                            rec_new[tag + 1] = span
+                        elif tag + span > ins_row:
+                            rec_new[tag] = span + 1
+
+                            # GH 14882 - Make sure insertion done once
+                            if not inserted:
+                                dot_row = list(idx_values[ins_row - 1])
+                                dot_row[-1] = "..."
+                                idx_values.insert(ins_row, tuple(dot_row))
+                                inserted = True
+                            else:
+                                dot_row = list(idx_values[ins_row])
+                                dot_row[inner_lvl - lnum] = "..."
+                                idx_values[ins_row] = tuple(dot_row)
+                        else:
+                            rec_new[tag] = span
+                        # If ins_row lies between tags, all cols idx cols
+                        # receive ...
+                        if tag + span == ins_row:
+                            rec_new[ins_row] = 1
+                            if lnum == 0:
+                                idx_values.insert(
+                                    ins_row, tuple(["..."] * len(level_lengths))
+                                )
+
+                            # GH 14882 - Place ... in correct level
+                            elif inserted:
+                                dot_row = list(idx_values[ins_row])
+                                dot_row[inner_lvl - lnum] = "..."
+                                idx_values[ins_row] = tuple(dot_row)
+                    level_lengths[lnum] = rec_new
+
+                level_lengths[inner_lvl][ins_row] = 1
+                for ix_col in fmt_values:
+                    fmt_values[ix_col].insert(ins_row, "...")
+                nrows += 1
+
+            for i in range(nrows):
+                row = []
+                tags = {}
+
+                sparse_offset = 0
+                j = 0
+                for records, v in zip(level_lengths, idx_values[i], strict=True):
+                    if i in records:
+                        if records[i] > 1:
+                            tags[j] = template.format(span=records[i])
+                    else:
+                        sparse_offset += 1
+                        continue
+
+                    j += 1
+                    row.append(v)
+
+                row.extend(fmt_values[j][i] for j in range(self.ncols))
+                if is_truncated_horizontally:
+                    row.insert(
+                        self.row_levels - sparse_offset + self.fmt.tr_col_num, "..."
+                    )
+                self.write_tr(
+                    row,
+                    indent,
+                    self.indent_delta,
+                    tags=tags,
+                    nindex_levels=len(levels) - sparse_offset,
+                )
+        else:
+            row = []
+            for i in range(len(frame)):
+                if is_truncated_vertically and i == (self.fmt.tr_row_num):
+                    str_sep_row = ["..."] * len(row)
+                    self.write_tr(
+                        str_sep_row,
+                        indent,
+                        self.indent_delta,
+                        tags=None,
+                        nindex_levels=self.row_levels,
+                    )
+
+                idx_values = list(
+                    zip(
+                        *frame.index._format_multi(sparsify=False, include_names=False),
+                        strict=True,
+                    )
+                )
+                row = []
+                row.extend(idx_values[i])
+                row.extend(fmt_values[j][i] for j in range(self.ncols))
+                if is_truncated_horizontally:
+                    row.insert(self.row_levels + self.fmt.tr_col_num, "...")
+                self.write_tr(
+                    row,
+                    indent,
+                    self.indent_delta,
+                    tags=None,
+                    nindex_levels=frame.index.nlevels,
+                )
+
+
+class NotebookFormatter(HTMLFormatter):
+    """
+    Internal class for formatting output data in html for display in Jupyter
+    Notebooks. This class is intended for functionality specific to
+    DataFrame._repr_html_() and DataFrame.to_html(notebook=True)
+    """
+
+    def _get_formatted_values(self) -> dict[int, list[str]]:
+        return {i: self.fmt.format_col(i) for i in range(self.ncols)}
+
+    def _get_columns_formatted_values(self) -> list[str]:
+        # only reached with non-Multi Index
+        return self.columns._format_flat(include_name=False)
+
+    def write_style(self) -> None:
+        # We use the "scoped" attribute here so that the desired
+        # style properties for the data frame are not then applied
+        # throughout the entire notebook.
+        template_first = """\
+            <style scoped>"""
+        template_last = """\
+            </style>"""
+        template_select = """\
+                .dataframe %s {
+                    %s: %s;
+                }"""
+        element_props = [
+            ("tbody tr th:only-of-type", "vertical-align", "middle"),
+            ("tbody tr th", "vertical-align", "top"),
+        ]
+        if isinstance(self.columns, MultiIndex):
+            element_props.append(("thead tr th", "text-align", "left"))
+            if self.show_row_idx_names:
+                element_props.append(
+                    ("thead tr:last-of-type th", "text-align", "right")
+                )
+        else:
+            element_props.append(("thead th", "text-align", "right"))
+        template_mid = "\n\n".join(template_select % t for t in element_props)
+        template = dedent(f"{template_first}\n{template_mid}\n{template_last}")
+        self.write(template)
+
+    def render(self) -> list[str]:
+        self.write("<div>")
+        self.write_style()
+        super().render()
+        self.write("</div>")
+        return self.elements
@@ -0,0 +1,943 @@
+from __future__ import annotations
+
+from abc import (
+    ABC,
+    abstractmethod,
+)
+import sys
+from textwrap import dedent
+from typing import TYPE_CHECKING
+
+from pandas._config import get_option
+
+from pandas.io.formats import format as fmt
+from pandas.io.formats.printing import pprint_thing
+
+if TYPE_CHECKING:
+    from collections.abc import (
+        Iterable,
+        Iterator,
+        Mapping,
+        Sequence,
+    )
+
+    from pandas._typing import (
+        Dtype,
+        WriteBuffer,
+    )
+
+    from pandas import (
+        DataFrame,
+        Index,
+        Series,
+    )
+
+show_counts_sub = dedent(
+    """\
+    show_counts : bool, optional
+        Whether to show the non-null counts. By default, this is shown
+        only if the DataFrame is smaller than
+        ``pandas.options.display.max_info_rows`` and
+        ``pandas.options.display.max_info_columns``. A value of True always
+        shows the counts, and False never shows the counts."""
+)
+
+series_examples_sub = dedent(
+    """\
+    >>> int_values = [1, 2, 3, 4, 5]
+    >>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon']
+    >>> s = pd.Series(text_values, index=int_values)
+    >>> s.info()
+    <class 'pandas.Series'>
+    Index: 5 entries, 1 to 5
+    Series name: None
+    Non-Null Count  Dtype
+    --------------  -----
+    5 non-null      object
+    dtypes: object(1)
+    memory usage: 80.0+ bytes
+
+    Prints a summary excluding information about its values:
+
+    >>> s.info(verbose=False)
+    <class 'pandas.Series'>
+    Index: 5 entries, 1 to 5
+    dtypes: object(1)
+    memory usage: 80.0+ bytes
+
+    Pipe output of Series.info to buffer instead of sys.stdout, get
+    buffer content and writes to a text file:
+
+    >>> import io
+    >>> buffer = io.StringIO()
+    >>> s.info(buf=buffer)
+    >>> s = buffer.getvalue()
+    >>> with open("df_info.txt", "w",
+    ...           encoding="utf-8") as f:  # doctest: +SKIP
+    ...     f.write(s)
+    260
+
+    The `memory_usage` parameter allows deep introspection mode, specially
+    useful for big Series and fine-tune memory optimization:
+
+    >>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6)
+    >>> s = pd.Series(np.random.choice(['a', 'b', 'c'], 10 ** 6))
+    >>> s.info()
+    <class 'pandas.Series'>
+    RangeIndex: 1000000 entries, 0 to 999999
+    Series name: None
+    Non-Null Count    Dtype
+    --------------    -----
+    1000000 non-null  object
+    dtypes: object(1)
+    memory usage: 7.6+ MB
+
+    >>> s.info(memory_usage='deep')
+    <class 'pandas.Series'>
+    RangeIndex: 1000000 entries, 0 to 999999
+    Series name: None
+    Non-Null Count    Dtype
+    --------------    -----
+    1000000 non-null  object
+    dtypes: object(1)
+    memory usage: 55.3 MB"""
+)
+
+
+series_see_also_sub = dedent(
+    """\
+    Series.describe: Generate descriptive statistics of Series.
+    Series.memory_usage: Memory usage of Series."""
+)
+series_max_cols_sub = dedent(
+    """\
+    max_cols : int, optional
+        Unused, exists only for compatibility with DataFrame.info."""
+)
+
+
+series_sub_kwargs = {
+    "klass": "Series",
+    "type_sub": "",
+    "max_cols_sub": series_max_cols_sub,
+    "show_counts_sub": show_counts_sub,
+    "examples_sub": series_examples_sub,
+    "see_also_sub": series_see_also_sub,
+    "version_added_sub": "\n.. versionadded:: 1.4.0\n",
+}
+
+
+def _put_str(s: str | Dtype, space: int) -> str:
+    """
+    Make string of specified length, padding to the right if necessary.
+
+    Parameters
+    ----------
+    s : Union[str, Dtype]
+        String to be formatted.
+    space : int
+        Length to force string to be of.
+
+    Returns
+    -------
+    str
+        String coerced to given length.
+
+    Examples
+    --------
+    >>> pd.io.formats.info._put_str("panda", 6)
+    'panda '
+    >>> pd.io.formats.info._put_str("panda", 4)
+    'pand'
+    """
+    return str(s)[:space].ljust(space)
+
+
+def _sizeof_fmt(num: float, size_qualifier: str) -> str:
+    """
+    Return size in human readable format.
+
+    Parameters
+    ----------
+    num : int
+        Size in bytes.
+    size_qualifier : str
+        Either empty, or '+' (if lower bound).
+
+    Returns
+    -------
+    str
+        Size in human readable format.
+
+    Examples
+    --------
+    >>> _sizeof_fmt(23028, "")
+    '22.5 KB'
+
+    >>> _sizeof_fmt(23028, "+")
+    '22.5+ KB'
+    """
+    for x in ["bytes", "KB", "MB", "GB", "TB"]:
+        if num < 1024.0:
+            return f"{num:3.1f}{size_qualifier} {x}"
+        num /= 1024.0
+    return f"{num:3.1f}{size_qualifier} PB"
+
+
+def _initialize_memory_usage(
+    memory_usage: bool | str | None = None,
+) -> bool | str:
+    """Get memory usage based on inputs and display options."""
+    if memory_usage is None:
+        memory_usage = get_option("display.memory_usage")
+    return memory_usage
+
+
+class _BaseInfo(ABC):
+    """
+    Base class for DataFrameInfo and SeriesInfo.
+
+    Parameters
+    ----------
+    data : DataFrame or Series
+        Either dataframe or series.
+    memory_usage : bool or str, optional
+        If "deep", introspect the data deeply by interrogating object dtypes
+        for system-level memory consumption, and include it in the returned
+        values.
+    """
+
+    data: DataFrame | Series
+    memory_usage: bool | str
+
+    @property
+    @abstractmethod
+    def dtypes(self) -> Iterable[Dtype]:
+        """
+        Dtypes.
+
+        Returns
+        -------
+        dtypes : sequence
+            Dtype of each of the DataFrame's columns (or one series column).
+        """
+
+    @property
+    @abstractmethod
+    def dtype_counts(self) -> Mapping[str, int]:
+        """Mapping dtype - number of counts."""
+
+    @property
+    @abstractmethod
+    def non_null_counts(self) -> list[int] | Series:
+        """Sequence of non-null counts for all columns or column (if series)."""
+
+    @property
+    @abstractmethod
+    def memory_usage_bytes(self) -> int:
+        """
+        Memory usage in bytes.
+
+        Returns
+        -------
+        memory_usage_bytes : int
+            Object's total memory usage in bytes.
+        """
+
+    @property
+    def memory_usage_string(self) -> str:
+        """Memory usage in a form of human readable string."""
+        return f"{_sizeof_fmt(self.memory_usage_bytes, self.size_qualifier)}\n"
+
+    @property
+    def size_qualifier(self) -> str:
+        size_qualifier = ""
+        if self.memory_usage:
+            if self.memory_usage != "deep":
+                # size_qualifier is just a best effort; not guaranteed to catch
+                # all cases (e.g., it misses categorical data even with object
+                # categories)
+                if (
+                    "object" in self.dtype_counts
+                    or self.data.index._is_memory_usage_qualified
+                ):
+                    size_qualifier = "+"
+        return size_qualifier
+
+    @abstractmethod
+    def render(
+        self,
+        *,
+        buf: WriteBuffer[str] | None,
+        max_cols: int | None,
+        verbose: bool | None,
+        show_counts: bool | None,
+    ) -> None:
+        pass
+
+
+class DataFrameInfo(_BaseInfo):
+    """
+    Class storing dataframe-specific info.
+    """
+
+    def __init__(
+        self,
+        data: DataFrame,
+        memory_usage: bool | str | None = None,
+    ) -> None:
+        self.data: DataFrame = data
+        self.memory_usage = _initialize_memory_usage(memory_usage)
+
+    @property
+    def dtype_counts(self) -> Mapping[str, int]:
+        return _get_dataframe_dtype_counts(self.data)
+
+    @property
+    def dtypes(self) -> Iterable[Dtype]:
+        """
+        Dtypes.
+
+        Returns
+        -------
+        dtypes
+            Dtype of each of the DataFrame's columns.
+        """
+        return self.data.dtypes
+
+    @property
+    def ids(self) -> Index:
+        """
+        Column names.
+
+        Returns
+        -------
+        ids : Index
+            DataFrame's column names.
+        """
+        return self.data.columns
+
+    @property
+    def col_count(self) -> int:
+        """Number of columns to be summarized."""
+        return len(self.ids)
+
+    @property
+    def non_null_counts(self) -> Series:
+        """Sequence of non-null counts for all columns or column (if series)."""
+        return self.data.count()
+
+    @property
+    def memory_usage_bytes(self) -> int:
+        deep = self.memory_usage == "deep"
+        return self.data.memory_usage(index=True, deep=deep).sum()
+
+    def render(
+        self,
+        *,
+        buf: WriteBuffer[str] | None,
+        max_cols: int | None,
+        verbose: bool | None,
+        show_counts: bool | None,
+    ) -> None:
+        printer = _DataFrameInfoPrinter(
+            info=self,
+            max_cols=max_cols,
+            verbose=verbose,
+            show_counts=show_counts,
+        )
+        printer.to_buffer(buf)
+
+
+class SeriesInfo(_BaseInfo):
+    """
+    Class storing series-specific info.
+    """
+
+    def __init__(
+        self,
+        data: Series,
+        memory_usage: bool | str | None = None,
+    ) -> None:
+        self.data: Series = data
+        self.memory_usage = _initialize_memory_usage(memory_usage)
+
+    def render(
+        self,
+        *,
+        buf: WriteBuffer[str] | None = None,
+        max_cols: int | None = None,
+        verbose: bool | None = None,
+        show_counts: bool | None = None,
+    ) -> None:
+        if max_cols is not None:
+            raise ValueError(
+                "Argument `max_cols` can only be passed "
+                "in DataFrame.info, not Series.info"
+            )
+        printer = _SeriesInfoPrinter(
+            info=self,
+            verbose=verbose,
+            show_counts=show_counts,
+        )
+        printer.to_buffer(buf)
+
+    @property
+    def non_null_counts(self) -> list[int]:
+        return [self.data.count()]
+
+    @property
+    def dtypes(self) -> Iterable[Dtype]:
+        return [self.data.dtypes]
+
+    @property
+    def dtype_counts(self) -> Mapping[str, int]:
+        from pandas.core.frame import DataFrame
+
+        return _get_dataframe_dtype_counts(DataFrame(self.data))
+
+    @property
+    def memory_usage_bytes(self) -> int:
+        """Memory usage in bytes.
+
+        Returns
+        -------
+        memory_usage_bytes : int
+            Object's total memory usage in bytes.
+        """
+        deep = self.memory_usage == "deep"
+        return self.data.memory_usage(index=True, deep=deep)
+
+
+class _InfoPrinterAbstract:
+    """
+    Class for printing dataframe or series info.
+    """
+
+    def to_buffer(self, buf: WriteBuffer[str] | None = None) -> None:
+        """Save dataframe info into buffer."""
+        table_builder = self._create_table_builder()
+        lines = table_builder.get_lines()
+        if buf is None:  # pragma: no cover
+            buf = sys.stdout
+        fmt.buffer_put_lines(buf, lines)
+
+    @abstractmethod
+    def _create_table_builder(self) -> _TableBuilderAbstract:
+        """Create instance of table builder."""
+
+
+class _DataFrameInfoPrinter(_InfoPrinterAbstract):
+    """
+    Class for printing dataframe info.
+
+    Parameters
+    ----------
+    info : DataFrameInfo
+        Instance of DataFrameInfo.
+    max_cols : int, optional
+        When to switch from the verbose to the truncated output.
+    verbose : bool, optional
+        Whether to print the full summary.
+    show_counts : bool, optional
+        Whether to show the non-null counts.
+    """
+
+    def __init__(
+        self,
+        info: DataFrameInfo,
+        max_cols: int | None = None,
+        verbose: bool | None = None,
+        show_counts: bool | None = None,
+    ) -> None:
+        self.info = info
+        self.data = info.data
+        self.verbose = verbose
+        self.max_cols = self._initialize_max_cols(max_cols)
+        self.show_counts = self._initialize_show_counts(show_counts)
+
+    @property
+    def max_rows(self) -> int:
+        """Maximum info rows to be displayed."""
+        return get_option("display.max_info_rows")
+
+    @property
+    def exceeds_info_cols(self) -> bool:
+        """Check if number of columns to be summarized does not exceed maximum."""
+        return bool(self.col_count > self.max_cols)
+
+    @property
+    def exceeds_info_rows(self) -> bool:
+        """Check if number of rows to be summarized does not exceed maximum."""
+        return bool(len(self.data) > self.max_rows)
+
+    @property
+    def col_count(self) -> int:
+        """Number of columns to be summarized."""
+        return self.info.col_count
+
+    def _initialize_max_cols(self, max_cols: int | None) -> int:
+        if max_cols is None:
+            return get_option("display.max_info_columns")
+        return max_cols
+
+    def _initialize_show_counts(self, show_counts: bool | None) -> bool:
+        if show_counts is None:
+            return bool(not self.exceeds_info_cols and not self.exceeds_info_rows)
+        else:
+            return show_counts
+
+    def _create_table_builder(self) -> _DataFrameTableBuilder:
+        """
+        Create instance of table builder based on verbosity and display settings.
+        """
+        if self.verbose:
+            return _DataFrameTableBuilderVerbose(
+                info=self.info,
+                with_counts=self.show_counts,
+            )
+        elif self.verbose is False:  # specifically set to False, not necessarily None
+            return _DataFrameTableBuilderNonVerbose(info=self.info)
+        elif self.exceeds_info_cols:
+            return _DataFrameTableBuilderNonVerbose(info=self.info)
+        else:
+            return _DataFrameTableBuilderVerbose(
+                info=self.info,
+                with_counts=self.show_counts,
+            )
+
+
+class _SeriesInfoPrinter(_InfoPrinterAbstract):
+    """Class for printing series info.
+
+    Parameters
+    ----------
+    info : SeriesInfo
+        Instance of SeriesInfo.
+    verbose : bool, optional
+        Whether to print the full summary.
+    show_counts : bool, optional
+        Whether to show the non-null counts.
+    """
+
+    def __init__(
+        self,
+        info: SeriesInfo,
+        verbose: bool | None = None,
+        show_counts: bool | None = None,
+    ) -> None:
+        self.info = info
+        self.data = info.data
+        self.verbose = verbose
+        self.show_counts = self._initialize_show_counts(show_counts)
+
+    def _create_table_builder(self) -> _SeriesTableBuilder:
+        """
+        Create instance of table builder based on verbosity.
+        """
+        if self.verbose or self.verbose is None:
+            return _SeriesTableBuilderVerbose(
+                info=self.info,
+                with_counts=self.show_counts,
+            )
+        else:
+            return _SeriesTableBuilderNonVerbose(info=self.info)
+
+    def _initialize_show_counts(self, show_counts: bool | None) -> bool:
+        if show_counts is None:
+            return True
+        else:
+            return show_counts
+
+
+class _TableBuilderAbstract(ABC):
+    """
+    Abstract builder for info table.
+    """
+
+    _lines: list[str]
+    info: _BaseInfo
+
+    @abstractmethod
+    def get_lines(self) -> list[str]:
+        """Product in a form of list of lines (strings)."""
+
+    @property
+    def data(self) -> DataFrame | Series:
+        return self.info.data
+
+    @property
+    def dtypes(self) -> Iterable[Dtype]:
+        """Dtypes of each of the DataFrame's columns."""
+        return self.info.dtypes
+
+    @property
+    def dtype_counts(self) -> Mapping[str, int]:
+        """Mapping dtype - number of counts."""
+        return self.info.dtype_counts
+
+    @property
+    def display_memory_usage(self) -> bool:
+        """Whether to display memory usage."""
+        return bool(self.info.memory_usage)
+
+    @property
+    def memory_usage_string(self) -> str:
+        """Memory usage string with proper size qualifier."""
+        return self.info.memory_usage_string
+
+    @property
+    def non_null_counts(self) -> list[int] | Series:
+        return self.info.non_null_counts
+
+    def add_object_type_line(self) -> None:
+        """Add line with string representation of dataframe to the table."""
+        self._lines.append(str(type(self.data)))
+
+    def add_index_range_line(self) -> None:
+        """Add line with range of indices to the table."""
+        self._lines.append(self.data.index._summary())
+
+    def add_dtypes_line(self) -> None:
+        """Add summary line with dtypes present in dataframe."""
+        collected_dtypes = [
+            f"{key}({val:d})" for key, val in sorted(self.dtype_counts.items())
+        ]
+        self._lines.append(f"dtypes: {', '.join(collected_dtypes)}")
+
+
+class _DataFrameTableBuilder(_TableBuilderAbstract):
+    """
+    Abstract builder for dataframe info table.
+
+    Parameters
+    ----------
+    info : DataFrameInfo.
+        Instance of DataFrameInfo.
+    """
+
+    def __init__(self, *, info: DataFrameInfo) -> None:
+        self.info: DataFrameInfo = info
+
+    def get_lines(self) -> list[str]:
+        self._lines = []
+        if self.col_count == 0:
+            self._fill_empty_info()
+        else:
+            self._fill_non_empty_info()
+        return self._lines
+
+    def _fill_empty_info(self) -> None:
+        """Add lines to the info table, pertaining to empty dataframe."""
+        self.add_object_type_line()
+        self.add_index_range_line()
+        self._lines.append(f"Empty {type(self.data).__name__}\n")
+
+    @abstractmethod
+    def _fill_non_empty_info(self) -> None:
+        """Add lines to the info table, pertaining to non-empty dataframe."""
+
+    @property
+    def data(self) -> DataFrame:
+        """DataFrame."""
+        return self.info.data
+
+    @property
+    def ids(self) -> Index:
+        """Dataframe columns."""
+        return self.info.ids
+
+    @property
+    def col_count(self) -> int:
+        """Number of dataframe columns to be summarized."""
+        return self.info.col_count
+
+    def add_memory_usage_line(self) -> None:
+        """Add line containing memory usage."""
+        self._lines.append(f"memory usage: {self.memory_usage_string}")
+
+
+class _DataFrameTableBuilderNonVerbose(_DataFrameTableBuilder):
+    """
+    Dataframe info table builder for non-verbose output.
+    """
+
+    def _fill_non_empty_info(self) -> None:
+        """Add lines to the info table, pertaining to non-empty dataframe."""
+        self.add_object_type_line()
+        self.add_index_range_line()
+        self.add_columns_summary_line()
+        self.add_dtypes_line()
+        if self.display_memory_usage:
+            self.add_memory_usage_line()
+
+    def add_columns_summary_line(self) -> None:
+        self._lines.append(self.ids._summary(name="Columns"))
+
+
+class _TableBuilderVerboseMixin(_TableBuilderAbstract):
+    """
+    Mixin for verbose info output.
+    """
+
+    SPACING: str = " " * 2
+    strrows: Sequence[Sequence[str]]
+    gross_column_widths: Sequence[int]
+    with_counts: bool
+
+    @property
+    @abstractmethod
+    def headers(self) -> Sequence[str]:
+        """Headers names of the columns in verbose table."""
+
+    @property
+    def header_column_widths(self) -> Sequence[int]:
+        """Widths of header columns (only titles)."""
+        return [len(col) for col in self.headers]
+
+    def _get_gross_column_widths(self) -> Sequence[int]:
+        """Get widths of columns containing both headers and actual content."""
+        body_column_widths = self._get_body_column_widths()
+        return [
+            max(*widths)
+            for widths in zip(
+                self.header_column_widths, body_column_widths, strict=False
+            )
+        ]
+
+    def _get_body_column_widths(self) -> Sequence[int]:
+        """Get widths of table content columns."""
+        strcols: Sequence[Sequence[str]] = list(zip(*self.strrows, strict=True))
+        return [max(len(x) for x in col) for col in strcols]
+
+    def _gen_rows(self) -> Iterator[Sequence[str]]:
+        """
+        Generator function yielding rows content.
+
+        Each element represents a row comprising a sequence of strings.
+        """
+        if self.with_counts:
+            return self._gen_rows_with_counts()
+        else:
+            return self._gen_rows_without_counts()
+
+    @abstractmethod
+    def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]:
+        """Iterator with string representation of body data with counts."""
+
+    @abstractmethod
+    def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]:
+        """Iterator with string representation of body data without counts."""
+
+    def add_header_line(self) -> None:
+        header_line = self.SPACING.join(
+            [
+                _put_str(header, col_width)
+                for header, col_width in zip(
+                    self.headers, self.gross_column_widths, strict=True
+                )
+            ]
+        )
+        self._lines.append(header_line)
+
+    def add_separator_line(self) -> None:
+        separator_line = self.SPACING.join(
+            [
+                _put_str("-" * header_colwidth, gross_colwidth)
+                for header_colwidth, gross_colwidth in zip(
+                    self.header_column_widths, self.gross_column_widths, strict=True
+                )
+            ]
+        )
+        self._lines.append(separator_line)
+
+    def add_body_lines(self) -> None:
+        for row in self.strrows:
+            body_line = self.SPACING.join(
+                [
+                    _put_str(col, gross_colwidth)
+                    for col, gross_colwidth in zip(
+                        row, self.gross_column_widths, strict=True
+                    )
+                ]
+            )
+            self._lines.append(body_line)
+
+    def _gen_non_null_counts(self) -> Iterator[str]:
+        """Iterator with string representation of non-null counts."""
+        for count in self.non_null_counts:
+            yield f"{count} non-null"
+
+    def _gen_dtypes(self) -> Iterator[str]:
+        """Iterator with string representation of column dtypes."""
+        for dtype in self.dtypes:
+            yield pprint_thing(dtype)
+
+
+class _DataFrameTableBuilderVerbose(_DataFrameTableBuilder, _TableBuilderVerboseMixin):
+    """
+    Dataframe info table builder for verbose output.
+    """
+
+    def __init__(
+        self,
+        *,
+        info: DataFrameInfo,
+        with_counts: bool,
+    ) -> None:
+        self.info = info
+        self.with_counts = with_counts
+        self.strrows: Sequence[Sequence[str]] = list(self._gen_rows())
+        self.gross_column_widths: Sequence[int] = self._get_gross_column_widths()
+
+    def _fill_non_empty_info(self) -> None:
+        """Add lines to the info table, pertaining to non-empty dataframe."""
+        self.add_object_type_line()
+        self.add_index_range_line()
+        self.add_columns_summary_line()
+        self.add_header_line()
+        self.add_separator_line()
+        self.add_body_lines()
+        self.add_dtypes_line()
+        if self.display_memory_usage:
+            self.add_memory_usage_line()
+
+    @property
+    def headers(self) -> Sequence[str]:
+        """Headers names of the columns in verbose table."""
+        if self.with_counts:
+            return [" # ", "Column", "Non-Null Count", "Dtype"]
+        return [" # ", "Column", "Dtype"]
+
+    def add_columns_summary_line(self) -> None:
+        self._lines.append(f"Data columns (total {self.col_count} columns):")
+
+    def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]:
+        """Iterator with string representation of body data without counts."""
+        yield from zip(
+            self._gen_line_numbers(),
+            self._gen_columns(),
+            self._gen_dtypes(),
+            strict=True,
+        )
+
+    def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]:
+        """Iterator with string representation of body data with counts."""
+        yield from zip(
+            self._gen_line_numbers(),
+            self._gen_columns(),
+            self._gen_non_null_counts(),
+            self._gen_dtypes(),
+            strict=True,
+        )
+
+    def _gen_line_numbers(self) -> Iterator[str]:
+        """Iterator with string representation of column numbers."""
+        for i, _ in enumerate(self.ids):
+            yield f" {i}"
+
+    def _gen_columns(self) -> Iterator[str]:
+        """Iterator with string representation of column names."""
+        for col in self.ids:
+            yield pprint_thing(col)
+
+
+class _SeriesTableBuilder(_TableBuilderAbstract):
+    """
+    Abstract builder for series info table.
+
+    Parameters
+    ----------
+    info : SeriesInfo.
+        Instance of SeriesInfo.
+    """
+
+    def __init__(self, *, info: SeriesInfo) -> None:
+        self.info: SeriesInfo = info
+
+    def get_lines(self) -> list[str]:
+        self._lines = []
+        self._fill_non_empty_info()
+        return self._lines
+
+    @property
+    def data(self) -> Series:
+        """Series."""
+        return self.info.data
+
+    def add_memory_usage_line(self) -> None:
+        """Add line containing memory usage."""
+        self._lines.append(f"memory usage: {self.memory_usage_string}")
+
+    @abstractmethod
+    def _fill_non_empty_info(self) -> None:
+        """Add lines to the info table, pertaining to non-empty series."""
+
+
+class _SeriesTableBuilderNonVerbose(_SeriesTableBuilder):
+    """
+    Series info table builder for non-verbose output.
+    """
+
+    def _fill_non_empty_info(self) -> None:
+        """Add lines to the info table, pertaining to non-empty series."""
+        self.add_object_type_line()
+        self.add_index_range_line()
+        self.add_dtypes_line()
+        if self.display_memory_usage:
+            self.add_memory_usage_line()
+
+
+class _SeriesTableBuilderVerbose(_SeriesTableBuilder, _TableBuilderVerboseMixin):
+    """
+    Series info table builder for verbose output.
+    """
+
+    def __init__(
+        self,
+        *,
+        info: SeriesInfo,
+        with_counts: bool,
+    ) -> None:
+        self.info = info
+        self.with_counts = with_counts
+        self.strrows: Sequence[Sequence[str]] = list(self._gen_rows())
+        self.gross_column_widths: Sequence[int] = self._get_gross_column_widths()
+
+    def _fill_non_empty_info(self) -> None:
+        """Add lines to the info table, pertaining to non-empty series."""
+        self.add_object_type_line()
+        self.add_index_range_line()
+        self.add_series_name_line()
+        self.add_header_line()
+        self.add_separator_line()
+        self.add_body_lines()
+        self.add_dtypes_line()
+        if self.display_memory_usage:
+            self.add_memory_usage_line()
+
+    def add_series_name_line(self) -> None:
+        self._lines.append(f"Series name: {self.data.name}")
+
+    @property
+    def headers(self) -> Sequence[str]:
+        """Headers names of the columns in verbose table."""
+        if self.with_counts:
+            return ["Non-Null Count", "Dtype"]
+        return ["Dtype"]
+
+    def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]:
+        """Iterator with string representation of body data without counts."""
+        yield from ([dtype] for dtype in self._gen_dtypes())
+
+    def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]:
+        """Iterator with string representation of body data with counts."""
+        yield from zip(self._gen_non_null_counts(), self._gen_dtypes(), strict=True)
+
+
+def _get_dataframe_dtype_counts(df: DataFrame) -> Mapping[str, int]:
+    """
+    Create mapping between datatypes and their number of occurrences.
+    """
+    # groupby dtype.name to collect e.g. Categorical columns
+    return df.dtypes.value_counts().groupby(lambda x: x.name).sum()
@@ -0,0 +1,587 @@
+"""
+Printing tools.
+"""
+
+from __future__ import annotations
+
+from collections.abc import (
+    Callable,
+    Iterable,
+    Mapping,
+    Sequence,
+)
+import sys
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    TypeAlias,
+    TypeVar,
+)
+from unicodedata import east_asian_width
+
+from pandas._config import get_option
+
+from pandas.core.dtypes.inference import is_sequence
+
+from pandas.io.formats.console import get_console_size
+
+if TYPE_CHECKING:
+    from pandas._typing import ListLike
+EscapeChars: TypeAlias = Mapping[str, str] | Iterable[str]
+_KT = TypeVar("_KT")
+_VT = TypeVar("_VT")
+
+
+def adjoin(space: int, *lists: list[str], **kwargs: Any) -> str:
+    """
+    Glues together two sets of strings using the amount of space requested.
+    The idea is to prettify.
+
+    ----------
+    space : int
+        number of spaces for padding
+    lists : str
+        list of str which being joined
+    strlen : callable
+        function used to calculate the length of each str. Needed for unicode
+        handling.
+    justfunc : callable
+        function used to justify str. Needed for unicode handling.
+    """
+    strlen = kwargs.pop("strlen", len)
+    justfunc = kwargs.pop("justfunc", _adj_justify)
+
+    newLists = []
+    lengths = [max(map(strlen, x)) + space for x in lists[:-1]]
+    # not the last one
+    lengths.append(max(map(len, lists[-1])))
+    maxLen = max(map(len, lists))
+    for i, lst in enumerate(lists):
+        nl = justfunc(lst, lengths[i], mode="left")
+        nl = ([" " * lengths[i]] * (maxLen - len(lst))) + nl
+        newLists.append(nl)
+    toJoin = zip(*newLists, strict=True)
+    return "\n".join("".join(lines) for lines in toJoin)
+
+
+def _adj_justify(texts: Iterable[str], max_len: int, mode: str = "right") -> list[str]:
+    """
+    Perform ljust, center, rjust against string or list-like
+    """
+    if mode == "left":
+        return [x.ljust(max_len) for x in texts]
+    elif mode == "center":
+        return [x.center(max_len) for x in texts]
+    else:
+        return [x.rjust(max_len) for x in texts]
+
+
+# Unicode consolidation
+# ---------------------
+#
+# pprinting utility functions for generating Unicode text or
+# bytes(3.x)/str(2.x) representations of objects.
+# Try to use these as much as possible rather than rolling your own.
+#
+# When to use
+# -----------
+#
+# 1) If you're writing code internal to pandas (no I/O directly involved),
+#    use pprint_thing().
+#
+#    It will always return unicode text which can handled by other
+#    parts of the package without breakage.
+#
+# 2) if you need to write something out to file, use
+#    pprint_thing_encoded(encoding).
+#
+#    If no encoding is specified, it defaults to utf-8. Since encoding pure
+#    ascii with utf-8 is a no-op you can safely use the default utf-8 if you're
+#    working with straight ascii.
+
+
+def _pprint_seq(
+    seq: ListLike, _nest_lvl: int = 0, max_seq_items: int | None = None, **kwds: Any
+) -> str:
+    """
+    internal. pprinter for iterables. you should probably use pprint_thing()
+    rather than calling this directly.
+
+    bounds length of printed sequence, depending on options
+    """
+    if isinstance(seq, set):
+        fmt = "{{{body}}}"
+    elif isinstance(seq, frozenset):
+        fmt = "frozenset({{{body}}})"
+    else:
+        fmt = "[{body}]" if hasattr(seq, "__setitem__") else "({body})"
+
+    if max_seq_items is False:
+        max_items = None
+    else:
+        max_items = max_seq_items or get_option("max_seq_items") or len(seq)
+
+    s = iter(seq)
+    # handle sets, no slicing
+    r = []
+    max_items_reached = False
+    for i, item in enumerate(s):
+        if (max_items is not None) and (i >= max_items):
+            max_items_reached = True
+            break
+        r.append(pprint_thing(item, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds))
+    body = ", ".join(r)
+
+    if max_items_reached:
+        body += ", ..."
+    elif isinstance(seq, tuple) and len(seq) == 1:
+        body += ","
+
+    return fmt.format(body=body)
+
+
+def _pprint_dict(
+    seq: Mapping, _nest_lvl: int = 0, max_seq_items: int | None = None, **kwds: Any
+) -> str:
+    """
+    internal. pprinter for iterables. you should probably use pprint_thing()
+    rather than calling this directly.
+    """
+    fmt = "{{{things}}}"
+    pairs = []
+
+    pfmt = "{key}: {val}"
+
+    if max_seq_items is False:
+        nitems = len(seq)
+    else:
+        nitems = max_seq_items or get_option("max_seq_items") or len(seq)
+
+    for k, v in list(seq.items())[:nitems]:
+        pairs.append(
+            pfmt.format(
+                key=pprint_thing(k, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds),
+                val=pprint_thing(v, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds),
+            )
+        )
+
+    if nitems < len(seq):
+        return fmt.format(things=", ".join(pairs) + ", ...")
+    else:
+        return fmt.format(things=", ".join(pairs))
+
+
+def pprint_thing(
+    thing: object,
+    _nest_lvl: int = 0,
+    escape_chars: EscapeChars | None = None,
+    default_escapes: bool = False,
+    quote_strings: bool = False,
+    max_seq_items: int | None = None,
+) -> str:
+    """
+    This function is the sanctioned way of converting objects
+    to a string representation and properly handles nested sequences.
+
+    Parameters
+    ----------
+    thing : anything to be formatted
+    _nest_lvl : internal use only. pprint_thing() is mutually-recursive
+        with pprint_sequence, this argument is used to keep track of the
+        current nesting level, and limit it.
+    escape_chars : list[str] or Mapping[str, str], optional
+        Characters to escape. If a Mapping is passed the values are the
+        replacements
+    default_escapes : bool, default False
+        Whether the input escape characters replaces or adds to the defaults
+    max_seq_items : int or None, default None
+        Pass through to other pretty printers to limit sequence printing
+
+    Returns
+    -------
+    str
+    """
+
+    def as_escaped_string(
+        thing: Any, escape_chars: EscapeChars | None = escape_chars
+    ) -> str:
+        translate = {"\t": r"\t", "\n": r"\n", "\r": r"\r", "'": r"\'"}
+        if isinstance(escape_chars, Mapping):
+            if default_escapes:
+                translate.update(escape_chars)
+            else:
+                translate = escape_chars  # type: ignore[assignment]
+            escape_chars = list(escape_chars.keys())
+        else:
+            escape_chars = escape_chars or ()
+
+        result = str(thing)
+        for c in escape_chars:
+            result = result.replace(c, translate[c])
+        return result
+
+    if hasattr(thing, "__next__"):
+        return str(thing)
+    elif isinstance(thing, Mapping) and _nest_lvl < get_option(
+        "display.pprint_nest_depth"
+    ):
+        result = _pprint_dict(
+            thing, _nest_lvl, quote_strings=True, max_seq_items=max_seq_items
+        )
+    elif is_sequence(thing) and _nest_lvl < get_option("display.pprint_nest_depth"):
+        result = _pprint_seq(
+            # error: Argument 1 to "_pprint_seq" has incompatible type "object";
+            # expected "ExtensionArray | ndarray[Any, Any] | Index | Series |
+            # SequenceNotStr[Any] | range"
+            thing,  # type: ignore[arg-type]
+            _nest_lvl,
+            escape_chars=escape_chars,
+            quote_strings=quote_strings,
+            max_seq_items=max_seq_items,
+        )
+    elif isinstance(thing, str) and quote_strings:
+        result = f"'{as_escaped_string(thing)}'"
+    else:
+        result = as_escaped_string(thing)
+
+    return result
+
+
+def pprint_thing_encoded(
+    object: object, encoding: str = "utf-8", errors: str = "replace"
+) -> bytes:
+    value = pprint_thing(object)  # get unicode representation of object
+    return value.encode(encoding, errors)
+
+
+def enable_data_resource_formatter(enable: bool) -> None:
+    if "IPython" not in sys.modules:
+        # definitely not in IPython
+        return
+    from IPython import get_ipython
+
+    # error: Call to untyped function "get_ipython" in typed context
+    ip = get_ipython()  # type: ignore[no-untyped-call]
+    if ip is None:
+        # still not in IPython
+        return
+
+    formatters = ip.display_formatter.formatters
+    mimetype = "application/vnd.dataresource+json"
+
+    if enable:
+        if mimetype not in formatters:
+            # define tableschema formatter
+            from IPython.core.formatters import BaseFormatter
+            from traitlets import ObjectName
+
+            class TableSchemaFormatter(BaseFormatter):
+                print_method = ObjectName("_repr_data_resource_")
+                _return_type = (dict,)
+
+            # register it:
+            formatters[mimetype] = TableSchemaFormatter()
+        # enable it if it's been disabled:
+        formatters[mimetype].enabled = True
+    # unregister tableschema mime-type
+    elif mimetype in formatters:
+        formatters[mimetype].enabled = False
+
+
+def default_pprint(thing: Any, max_seq_items: int | None = None) -> str:
+    return pprint_thing(
+        thing,
+        escape_chars=("\t", "\r", "\n"),
+        quote_strings=True,
+        max_seq_items=max_seq_items,
+    )
+
+
+def format_object_summary(
+    obj: ListLike,
+    formatter: Callable,
+    is_justify: bool = True,
+    name: str | None = None,
+    indent_for_name: bool = True,
+    line_break_each_value: bool = False,
+) -> str:
+    """
+    Return the formatted obj as a unicode string
+
+    Parameters
+    ----------
+    obj : object
+        must be iterable and support __getitem__
+    formatter : callable
+        string formatter for an element
+    is_justify : bool
+        should justify the display
+    name : name, optional
+        defaults to the class name of the obj
+    indent_for_name : bool, default True
+        Whether subsequent lines should be indented to
+        align with the name.
+    line_break_each_value : bool, default False
+        If True, inserts a line break for each value of ``obj``.
+        If False, only break lines when the a line of values gets wider
+        than the display width.
+
+    Returns
+    -------
+    summary string
+    """
+    display_width, _ = get_console_size()
+    if display_width is None:
+        display_width = get_option("display.width") or 80
+    if name is None:
+        name = type(obj).__name__
+
+    if indent_for_name:
+        name_len = len(name)
+        space1 = f"\n{(' ' * (name_len + 1))}"
+        space2 = f"\n{(' ' * (name_len + 2))}"
+    else:
+        space1 = "\n"
+        space2 = "\n "  # space for the opening '['
+
+    n = len(obj)
+    if line_break_each_value:
+        # If we want to vertically align on each value of obj, we need to
+        # separate values by a line break and indent the values
+        sep = ",\n " + " " * len(name)
+    else:
+        sep = ","
+    max_seq_items = get_option("display.max_seq_items") or n
+
+    # are we a truncated display
+    is_truncated = n > max_seq_items
+
+    # adj can optionally handle unicode eastern asian width
+    adj = get_adjustment()
+
+    def _extend_line(
+        s: str, line: str, value: str, display_width: int, next_line_prefix: str
+    ) -> tuple[str, str]:
+        if adj.len(line.rstrip()) + adj.len(value.rstrip()) >= display_width:
+            s += line.rstrip()
+            line = next_line_prefix
+        line += value
+        return s, line
+
+    def best_len(values: list[str]) -> int:
+        if values:
+            return max(adj.len(x) for x in values)
+        else:
+            return 0
+
+    close = ", "
+
+    if n == 0:
+        summary = f"[]{close}"
+    elif n == 1 and not line_break_each_value:
+        first = formatter(obj[0])
+        summary = f"[{first}]{close}"
+    elif n == 2 and not line_break_each_value:
+        first = formatter(obj[0])
+        last = formatter(obj[-1])
+        summary = f"[{first}, {last}]{close}"
+    else:
+        if max_seq_items == 1:
+            # If max_seq_items=1 show only last element
+            head = []
+            tail = [formatter(x) for x in obj[-1:]]
+        elif n > max_seq_items:
+            n = min(max_seq_items // 2, 10)
+            head = [formatter(x) for x in obj[:n]]
+            tail = [formatter(x) for x in obj[-n:]]
+        else:
+            head = []
+            tail = [formatter(x) for x in obj]
+
+        # adjust all values to max length if needed
+        if is_justify:
+            if line_break_each_value:
+                # Justify each string in the values of head and tail, so the
+                # strings will right align when head and tail are stacked
+                # vertically.
+                head, tail = _justify(head, tail)
+            elif is_truncated or not (
+                len(", ".join(head)) < display_width
+                and len(", ".join(tail)) < display_width
+            ):
+                # Each string in head and tail should align with each other
+                max_length = max(best_len(head), best_len(tail))
+                head = [x.rjust(max_length) for x in head]
+                tail = [x.rjust(max_length) for x in tail]
+            # If we are not truncated and we are only a single
+            # line, then don't justify
+
+        if line_break_each_value:
+            # Now head and tail are of type List[Tuple[str]]. Below we
+            # convert them into List[str], so there will be one string per
+            # value. Also truncate items horizontally if wider than
+            # max_space
+            max_space = display_width - len(space2)
+            value = tail[0]
+            max_items = 1
+            for num_items in reversed(range(1, len(value) + 1)):
+                pprinted_seq = _pprint_seq(value, max_seq_items=num_items)
+                if len(pprinted_seq) < max_space:
+                    max_items = num_items
+                    break
+            head = [_pprint_seq(x, max_seq_items=max_items) for x in head]
+            tail = [_pprint_seq(x, max_seq_items=max_items) for x in tail]
+
+        summary = ""
+        line = space2
+
+        for head_value in head:
+            word = head_value + sep + " "
+            summary, line = _extend_line(summary, line, word, display_width, space2)
+
+        if is_truncated:
+            # remove trailing space of last line
+            summary += line.rstrip() + space2 + "..."
+            line = space2
+
+        for tail_item in tail[:-1]:
+            word = tail_item + sep + " "
+            summary, line = _extend_line(summary, line, word, display_width, space2)
+
+        # last value: no sep added + 1 space of width used for trailing ','
+        summary, line = _extend_line(summary, line, tail[-1], display_width - 2, space2)
+        summary += line
+
+        # right now close is either '' or ', '
+        # Now we want to include the ']', but not the maybe space.
+        close = "]" + close.rstrip(" ")
+        summary += close
+
+        if len(summary) > (display_width) or line_break_each_value:
+            summary += space1
+        else:  # one row
+            summary += " "
+
+        # remove initial space
+        summary = "[" + summary[len(space2) :]
+
+    return summary
+
+
+def _justify(
+    head: list[Sequence[str]], tail: list[Sequence[str]]
+) -> tuple[list[tuple[str, ...]], list[tuple[str, ...]]]:
+    """
+    Justify items in head and tail, so they are right-aligned when stacked.
+
+    Parameters
+    ----------
+    head : list-like of list-likes of strings
+    tail : list-like of list-likes of strings
+
+    Returns
+    -------
+    tuple of list of tuples of strings
+        Same as head and tail, but items are right aligned when stacked
+        vertically.
+
+    Examples
+    --------
+    >>> _justify([["a", "b"]], [["abc", "abcd"]])
+    ([('  a', '   b')], [('abc', 'abcd')])
+    """
+    combined = head + tail
+
+    # For each position for the sequences in ``combined``,
+    # find the length of the largest string.
+    max_length = [0] * len(combined[0])
+    for inner_seq in combined:
+        length = [len(item) for item in inner_seq]
+        max_length = [max(x, y) for x, y in zip(max_length, length, strict=True)]
+
+    # justify each item in each list-like in head and tail using max_length
+    head_tuples = [
+        tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length, strict=True))
+        for seq in head
+    ]
+    tail_tuples = [
+        tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length, strict=True))
+        for seq in tail
+    ]
+    return head_tuples, tail_tuples
+
+
+class PrettyDict(dict[_KT, _VT]):
+    """Dict extension to support abbreviated __repr__"""
+
+    def __repr__(self) -> str:
+        return pprint_thing(self)
+
+
+class _TextAdjustment:
+    def __init__(self) -> None:
+        self.encoding = get_option("display.encoding")
+
+    def len(self, text: str) -> int:
+        return len(text)
+
+    def justify(self, texts: Any, max_len: int, mode: str = "right") -> list[str]:
+        """
+        Perform ljust, center, rjust against string or list-like
+        """
+        if mode == "left":
+            return [x.ljust(max_len) for x in texts]
+        elif mode == "center":
+            return [x.center(max_len) for x in texts]
+        else:
+            return [x.rjust(max_len) for x in texts]
+
+    def adjoin(self, space: int, *lists: Any, **kwargs: Any) -> str:
+        return adjoin(space, *lists, strlen=self.len, justfunc=self.justify, **kwargs)
+
+
+class _EastAsianTextAdjustment(_TextAdjustment):
+    def __init__(self) -> None:
+        super().__init__()
+        if get_option("display.unicode.ambiguous_as_wide"):
+            self.ambiguous_width = 2
+        else:
+            self.ambiguous_width = 1
+
+        # Definition of East Asian Width
+        # https://unicode.org/reports/tr11/
+        # Ambiguous width can be changed by option
+        self._EAW_MAP = {"Na": 1, "N": 1, "W": 2, "F": 2, "H": 1}
+
+    def len(self, text: str) -> int:
+        """
+        Calculate display width considering unicode East Asian Width
+        """
+        if not isinstance(text, str):
+            return len(text)
+
+        return sum(
+            self._EAW_MAP.get(east_asian_width(c), self.ambiguous_width) for c in text
+        )
+
+    def justify(
+        self, texts: Iterable[str], max_len: int, mode: str = "right"
+    ) -> list[str]:
+        # re-calculate padding space per str considering East Asian Width
+        def _get_pad(t: str) -> int:
+            return max_len - self.len(t) + len(t)
+
+        if mode == "left":
+            return [x.ljust(_get_pad(x)) for x in texts]
+        elif mode == "center":
+            return [x.center(_get_pad(x)) for x in texts]
+        else:
+            return [x.rjust(_get_pad(x)) for x in texts]
+
+
+def get_adjustment() -> _TextAdjustment:
+    use_east_asian_width = get_option("display.unicode.east_asian_width")
+    if use_east_asian_width:
+        return _EastAsianTextAdjustment()
+    else:
+        return _TextAdjustment()
@@ -0,0 +1,207 @@
+"""
+Module for formatting output data in console (to string).
+"""
+
+from __future__ import annotations
+
+from shutil import get_terminal_size
+from typing import TYPE_CHECKING
+
+import numpy as np
+
+from pandas.io.formats.printing import pprint_thing
+
+if TYPE_CHECKING:
+    from collections.abc import Iterable
+
+    from pandas.io.formats.format import DataFrameFormatter
+
+
+class StringFormatter:
+    """Formatter for string representation of a dataframe."""
+
+    def __init__(self, fmt: DataFrameFormatter, line_width: int | None = None) -> None:
+        self.fmt = fmt
+        self.adj = fmt.adj
+        self.frame = fmt.frame
+        self.line_width = line_width
+
+    def to_string(self) -> str:
+        text = self._get_string_representation()
+        if self.fmt.should_show_dimensions:
+            text = f"{text}{self.fmt.dimensions_info}"
+        return text
+
+    def _get_strcols(self) -> list[list[str]]:
+        strcols = self.fmt.get_strcols()
+        if self.fmt.is_truncated:
+            strcols = self._insert_dot_separators(strcols)
+        return strcols
+
+    def _get_string_representation(self) -> str:
+        if self.fmt.frame.empty:
+            return self._empty_info_line
+
+        strcols = self._get_strcols()
+
+        if self.line_width is None:
+            # no need to wrap around just print the whole frame
+            return self.adj.adjoin(1, *strcols)
+
+        if self._need_to_wrap_around:
+            return self._join_multiline(strcols)
+
+        return self._fit_strcols_to_terminal_width(strcols)
+
+    @property
+    def _empty_info_line(self) -> str:
+        return (
+            f"Empty {type(self.frame).__name__}\n"
+            f"Columns: {pprint_thing(self.frame.columns)}\n"
+            f"Index: {pprint_thing(self.frame.index)}"
+        )
+
+    @property
+    def _need_to_wrap_around(self) -> bool:
+        return bool(self.fmt.max_cols is None or self.fmt.max_cols > 0)
+
+    def _insert_dot_separators(self, strcols: list[list[str]]) -> list[list[str]]:
+        str_index = self.fmt._get_formatted_index(self.fmt.tr_frame)
+        index_length = len(str_index)
+
+        if self.fmt.is_truncated_horizontally:
+            strcols = self._insert_dot_separator_horizontal(strcols, index_length)
+
+        if self.fmt.is_truncated_vertically:
+            strcols = self._insert_dot_separator_vertical(strcols, index_length)
+
+        return strcols
+
+    @property
+    def _adjusted_tr_col_num(self) -> int:
+        return self.fmt.tr_col_num + 1 if self.fmt.index else self.fmt.tr_col_num
+
+    def _insert_dot_separator_horizontal(
+        self, strcols: list[list[str]], index_length: int
+    ) -> list[list[str]]:
+        strcols.insert(self._adjusted_tr_col_num, [" ..."] * index_length)
+        return strcols
+
+    def _insert_dot_separator_vertical(
+        self, strcols: list[list[str]], index_length: int
+    ) -> list[list[str]]:
+        n_header_rows = index_length - len(self.fmt.tr_frame)
+        row_num = self.fmt.tr_row_num
+        for ix, col in enumerate(strcols):
+            cwidth = self.adj.len(col[row_num])
+
+            if self.fmt.is_truncated_horizontally:
+                is_dot_col = ix == self._adjusted_tr_col_num
+            else:
+                is_dot_col = False
+
+            if cwidth > 3 or is_dot_col:
+                dots = "..."
+            else:
+                dots = ".."
+
+            if ix == 0 and self.fmt.index:
+                dot_mode = "left"
+            elif is_dot_col:
+                cwidth = 4
+                dot_mode = "right"
+            else:
+                dot_mode = "right"
+
+            dot_str = self.adj.justify([dots], cwidth, mode=dot_mode)[0]
+            col.insert(row_num + n_header_rows, dot_str)
+        return strcols
+
+    def _join_multiline(self, strcols_input: Iterable[list[str]]) -> str:
+        lwidth = self.line_width
+        adjoin_width = 1
+        strcols = list(strcols_input)
+
+        if self.fmt.index:
+            idx = strcols.pop(0)
+            lwidth -= np.array([self.adj.len(x) for x in idx]).max() + adjoin_width
+
+        col_widths = [
+            np.array([self.adj.len(x) for x in col]).max() if len(col) > 0 else 0
+            for col in strcols
+        ]
+
+        assert lwidth is not None
+        col_bins = _binify(col_widths, lwidth)
+        nbins = len(col_bins)
+
+        str_lst = []
+        start = 0
+        for i, end in enumerate(col_bins):
+            row = strcols[start:end]
+            if self.fmt.index:
+                row.insert(0, idx)
+            if nbins > 1:
+                nrows = len(row[-1])
+                if end <= len(strcols) and i < nbins - 1:
+                    row.append([" \\"] + ["  "] * (nrows - 1))
+                else:
+                    row.append([" "] * nrows)
+            str_lst.append(self.adj.adjoin(adjoin_width, *row))
+            start = end
+        return "\n\n".join(str_lst)
+
+    def _fit_strcols_to_terminal_width(self, strcols: list[list[str]]) -> str:
+        from pandas import Series
+
+        lines = self.adj.adjoin(1, *strcols).split("\n")
+        max_len = Series(lines).str.len().max()
+        # plus truncate dot col
+        width, _ = get_terminal_size()
+        dif = max_len - width
+        # '+ 1' to avoid too wide repr (GH PR #17023)
+        adj_dif = dif + 1
+        col_lens = Series([Series(ele).str.len().max() for ele in strcols])
+        n_cols = len(col_lens)
+        counter = 0
+        while adj_dif > 0 and n_cols > 1:
+            counter += 1
+            mid = round(n_cols / 2)
+            mid_ix = col_lens.index[mid]
+            col_len = col_lens[mid_ix]
+            # adjoin adds one
+            adj_dif -= col_len + 1
+            col_lens = col_lens.drop(mid_ix)
+            n_cols = len(col_lens)
+
+        # subtract index column
+        max_cols_fitted = n_cols - self.fmt.index
+        # GH-21180. Ensure that we print at least two.
+        max_cols_fitted = max(max_cols_fitted, 2)
+        self.fmt.max_cols_fitted = max_cols_fitted
+
+        # Call again _truncate to cut frame appropriately
+        # and then generate string representation
+        self.fmt.truncate()
+        strcols = self._get_strcols()
+        return self.adj.adjoin(1, *strcols)
+
+
+def _binify(cols: list[int], line_width: int) -> list[int]:
+    adjoin_width = 1
+    bins = []
+    curr_width = 0
+    i_last_column = len(cols) - 1
+    for i, w in enumerate(cols):
+        w_adjoined = w + adjoin_width
+        curr_width += w_adjoined
+        if i_last_column == i:
+            wrap = curr_width + 1 > line_width and i > 0
+        else:
+            wrap = curr_width + 2 > line_width and i > 0
+        if wrap:
+            bins.append(i)
+            curr_width = w_adjoined
+
+    bins.append(len(cols))
+    return bins
@@ -0,0 +1,16 @@
+{# Update the html_style/table_structure.html documentation too #}
+{% if doctype_html %}
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="{{encoding}}">
+{% if not exclude_styles %}{% include html_style_tpl %}{% endif %}
+</head>
+<body>
+{% include html_table_tpl %}
+</body>
+</html>
+{% elif not doctype_html %}
+{% if not exclude_styles %}{% include html_style_tpl %}{% endif %}
+{% include html_table_tpl %}
+{% endif %}
@@ -0,0 +1,26 @@
+{%- block before_style -%}{%- endblock before_style -%}
+{% block style %}
+<style type="text/css">
+{% block table_styles %}
+{% for s in table_styles %}
+#T_{{uuid}} {{s.selector}} {
+{% for p,val in s.props %}
+  {{p}}: {{val}};
+{% endfor %}
+}
+{% endfor %}
+{% endblock table_styles %}
+{% block before_cellstyle %}{% endblock before_cellstyle %}
+{% block cellstyle %}
+{% for cs in [cellstyle, cellstyle_index, cellstyle_columns] %}
+{% for s in cs %}
+{% for selector in s.selectors %}{% if not loop.first %}, {% endif %}#T_{{uuid}}_{{selector}}{% endfor %} {
+{% for p,val in s.props %}
+  {{p}}: {{val}};
+{% endfor %}
+}
+{% endfor %}
+{% endfor %}
+{% endblock cellstyle %}
+</style>
+{% endblock style %}
@@ -0,0 +1,63 @@
+{% block before_table %}{% endblock before_table %}
+{% block table %}
+{% if exclude_styles %}
+<table>
+{% else %}
+<table id="T_{{uuid}}"{% if table_attributes %} {{table_attributes}}{% endif %}>
+{% endif %}
+{% block caption %}
+{% if caption and caption is string %}
+  <caption>{{caption}}</caption>
+{% elif caption and caption is sequence %}
+  <caption>{{caption[0]}}</caption>
+{% endif %}
+{% endblock caption %}
+{% block thead %}
+  <thead>
+{% block before_head_rows %}{% endblock %}
+{% for r in head %}
+{% block head_tr scoped %}
+    <tr>
+{% if exclude_styles %}
+{% for c in r %}
+{% if c.is_visible != False %}
+      <{{c.type}} {{c.attributes}}>{{c.display_value}}</{{c.type}}>
+{% endif %}
+{% endfor %}
+{% else %}
+{% for c in r %}
+{% if c.is_visible != False %}
+      <{{c.type}} {%- if c.id is defined %} id="T_{{uuid}}_{{c.id}}" {%- endif %} class="{{c.class}}" {{c.attributes}}>{{c.display_value}}</{{c.type}}>
+{% endif %}
+{% endfor %}
+{% endif %}
+    </tr>
+{% endblock head_tr %}
+{% endfor %}
+{% block after_head_rows %}{% endblock %}
+  </thead>
+{% endblock thead %}
+{% block tbody %}
+  <tbody>
+{% block before_rows %}{% endblock before_rows %}
+{% for r in body %}
+{% block tr scoped %}
+    <tr>
+{% if exclude_styles %}
+{% for c in r %}{% if c.is_visible != False %}
+      <{{c.type}} {{c.attributes}}>{{c.display_value}}</{{c.type}}>
+{% endif %}{% endfor %}
+{% else %}
+{% for c in r %}{% if c.is_visible != False %}
+      <{{c.type}} {%- if c.id is defined %} id="T_{{uuid}}_{{c.id}}" {%- endif %} class="{{c.class}}" {{c.attributes}}>{{c.display_value}}</{{c.type}}>
+{% endif %}{% endfor %}
+{% endif %}
+    </tr>
+{% endblock tr %}
+{% endfor %}
+{% block after_rows %}{% endblock after_rows %}
+  </tbody>
+{% endblock tbody %}
+</table>
+{% endblock table %}
+{% block after_table %}{% endblock after_table %}
@@ -0,0 +1,5 @@
+{% if environment == "longtable" %}
+{% include "latex_longtable.tpl" %}
+{% else %}
+{% include "latex_table.tpl" %}
+{% endif %}
@@ -0,0 +1,82 @@
+\begin{longtable}
+{%- set position = parse_table(table_styles, 'position') %}
+{%- if position is not none %}
+[{{position}}]
+{%- endif %}
+{%- set column_format = parse_table(table_styles, 'column_format') %}
+{% raw %}{{% endraw %}{{column_format}}{% raw %}}{% endraw %}
+
+{% for style in table_styles %}
+{% if style['selector'] not in ['position', 'position_float', 'caption', 'toprule', 'midrule', 'bottomrule', 'column_format', 'label'] %}
+\{{style['selector']}}{{parse_table(table_styles, style['selector'])}}
+{% endif %}
+{% endfor %}
+{% if caption and caption is string %}
+\caption{% raw %}{{% endraw %}{{caption}}{% raw %}}{% endraw %}
+{%- set label = parse_table(table_styles, 'label') %}
+{%- if label is not none %}
+ \label{{label}}
+{%- endif %} \\
+{% elif caption and caption is sequence %}
+\caption[{{caption[1]}}]{% raw %}{{% endraw %}{{caption[0]}}{% raw %}}{% endraw %}
+{%- set label = parse_table(table_styles, 'label') %}
+{%- if label is not none %}
+ \label{{label}}
+{%- endif %} \\
+{% else %}
+{%- set label = parse_table(table_styles, 'label') %}
+{%- if label is not none %}
+\label{{label}} \\
+{% endif %}
+{% endif %}
+{% set toprule = parse_table(table_styles, 'toprule') %}
+{% if toprule is not none %}
+\{{toprule}}
+{% endif %}
+{% for row in head %}
+{% for c in row %}{%- if not loop.first %} & {% endif %}{{parse_header(c, multirow_align, multicol_align, siunitx)}}{% endfor %} \\
+{% endfor %}
+{% set midrule = parse_table(table_styles, 'midrule') %}
+{% if midrule is not none %}
+\{{midrule}}
+{% endif %}
+\endfirsthead
+{% if caption and caption is string %}
+\caption[]{% raw %}{{% endraw %}{{caption}}{% raw %}}{% endraw %} \\
+{% elif caption and caption is sequence %}
+\caption[]{% raw %}{{% endraw %}{{caption[0]}}{% raw %}}{% endraw %} \\
+{% endif %}
+{% if toprule is not none %}
+\{{toprule}}
+{% endif %}
+{% for row in head %}
+{% for c in row %}{%- if not loop.first %} & {% endif %}{{parse_header(c, multirow_align, multicol_align, siunitx)}}{% endfor %} \\
+{% endfor %}
+{% if midrule is not none %}
+\{{midrule}}
+{% endif %}
+\endhead
+{% if midrule is not none %}
+\{{midrule}}
+{% endif %}
+\multicolumn{% raw %}{{% endraw %}{{body[0]|length}}{% raw %}}{% endraw %}{r}{Continued on next page} \\
+{% if midrule is not none %}
+\{{midrule}}
+{% endif %}
+\endfoot
+{% set bottomrule = parse_table(table_styles, 'bottomrule') %}
+{% if bottomrule is not none %}
+\{{bottomrule}}
+{% endif %}
+\endlastfoot
+{% for row in body %}
+{% for c in row %}{% if not loop.first %} & {% endif %}
+  {%- if c.type == 'th' %}{{parse_header(c, multirow_align, multicol_align)}}{% else %}{{parse_cell(c.cellstyle, c.display_value, convert_css)}}{% endif %}
+{%- endfor %} \\
+{% if clines and clines[loop.index] | length > 0 %}
+  {%- for cline in clines[loop.index] %}{% if not loop.first %} {% endif %}{{ cline }}{% endfor %}
+
+{% endif %}
+{% endfor %}
+\end{longtable}
+{% raw %}{% endraw %}
@@ -0,0 +1,57 @@
+{% if environment or parse_wrap(table_styles, caption) %}
+\begin{% raw %}{{% endraw %}{{environment if environment else "table"}}{% raw %}}{% endraw %}
+{%- set position = parse_table(table_styles, 'position') %}
+{%- if position is not none %}
+[{{position}}]
+{%- endif %}
+
+{% set position_float = parse_table(table_styles, 'position_float') %}
+{% if position_float is not none%}
+\{{position_float}}
+{% endif %}
+{% if caption and caption is string %}
+\caption{% raw %}{{% endraw %}{{caption}}{% raw %}}{% endraw %}
+
+{% elif caption and caption is sequence %}
+\caption[{{caption[1]}}]{% raw %}{{% endraw %}{{caption[0]}}{% raw %}}{% endraw %}
+
+{% endif %}
+{% for style in table_styles %}
+{% if style['selector'] not in ['position', 'position_float', 'caption', 'toprule', 'midrule', 'bottomrule', 'column_format'] %}
+\{{style['selector']}}{{parse_table(table_styles, style['selector'])}}
+{% endif %}
+{% endfor %}
+{% endif %}
+\begin{tabular}
+{%- set column_format = parse_table(table_styles, 'column_format') %}
+{% raw %}{{% endraw %}{{column_format}}{% raw %}}{% endraw %}
+
+{% set toprule = parse_table(table_styles, 'toprule') %}
+{% if toprule is not none %}
+\{{toprule}}
+{% endif %}
+{% for row in head %}
+{% for c in row %}{%- if not loop.first %} & {% endif %}{{parse_header(c, multirow_align, multicol_align, siunitx, convert_css)}}{% endfor %} \\
+{% endfor %}
+{% set midrule = parse_table(table_styles, 'midrule') %}
+{% if midrule is not none %}
+\{{midrule}}
+{% endif %}
+{% for row in body %}
+{% for c in row %}{% if not loop.first %} & {% endif %}
+  {%- if c.type == 'th' %}{{parse_header(c, multirow_align, multicol_align, False, convert_css)}}{% else %}{{parse_cell(c.cellstyle, c.display_value, convert_css)}}{% endif %}
+{%- endfor %} \\
+{% if clines and clines[loop.index] | length > 0 %}
+  {%- for cline in clines[loop.index] %}{% if not loop.first %} {% endif %}{{ cline }}{% endfor %}
+
+{% endif %}
+{% endfor %}
+{% set bottomrule = parse_table(table_styles, 'bottomrule') %}
+{% if bottomrule is not none %}
+\{{bottomrule}}
+{% endif %}
+\end{tabular}
+{% if environment or parse_wrap(table_styles, caption) %}
+\end{% raw %}{{% endraw %}{{environment if environment else "table"}}{% raw %}}{% endraw %}
+
+{% endif %}
@@ -0,0 +1,12 @@
+{% for r in head %}
+{% for c in r %}{% if c["is_visible"] %}
+{{ c["display_value"] }}{% if not loop.last %}{{ delimiter }}{% endif %}
+{% endif %}{% endfor %}
+
+{% endfor %}
+{% for r in body %}
+{% for c in r %}{% if c["is_visible"] %}
+{{ c["display_value"] }}{% if not loop.last %}{{ delimiter }}{% endif %}
+{% endif %}{% endfor %}
+
+{% endfor %}
@@ -0,0 +1,12 @@
+#table(
+  columns: {{ head[0] | length }},
+{% for r in head %}
+  {% for c in r %}[{% if c["is_visible"] %}{{ c["display_value"] }}{% endif %}],{% if not loop.last %} {% endif%}{% endfor %}
+
+{% endfor %}
+
+{% for r in body %}
+  {% for c in r %}[{% if c["is_visible"] %}{{ c["display_value"] }}{% endif %}],{% if not loop.last %} {% endif%}{% endfor %}
+
+{% endfor %}
+)
@@ -0,0 +1,566 @@
+"""
+:mod:`pandas.io.formats.xml` is a module for formatting data in XML.
+"""
+
+from __future__ import annotations
+
+import codecs
+import io
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    final,
+)
+
+from pandas.errors import AbstractMethodError
+from pandas.util._decorators import cache_readonly
+
+from pandas.core.dtypes.common import is_list_like
+from pandas.core.dtypes.missing import isna
+
+from pandas.io.common import get_handle
+from pandas.io.xml import get_data_from_filepath
+
+if TYPE_CHECKING:
+    from pandas._typing import (
+        CompressionOptions,
+        FilePath,
+        ReadBuffer,
+        StorageOptions,
+        WriteBuffer,
+    )
+
+    from pandas import DataFrame
+
+
+class _BaseXMLFormatter:
+    """
+    Subclass for formatting data in XML.
+
+    Parameters
+    ----------
+    path_or_buffer : str or file-like
+        This can be either a string of raw XML, a valid URL,
+        file or file-like object.
+
+    index : bool
+        Whether to include index in xml document.
+
+    row_name : str
+        Name for root of xml document. Default is 'data'.
+
+    root_name : str
+        Name for row elements of xml document. Default is 'row'.
+
+    na_rep : str
+        Missing data representation.
+
+    attrs_cols : list
+        List of columns to write as attributes in row element.
+
+    elem_cols : list
+        List of columns to write as children in row element.
+
+    namespaces : dict
+        The namespaces to define in XML document as dicts with key
+        being namespace and value the URI.
+
+    prefix : str
+        The prefix for each element in XML document including root.
+
+    encoding : str
+        Encoding of xml object or document.
+
+    xml_declaration : bool
+        Whether to include xml declaration at top line item in xml.
+
+    pretty_print : bool
+        Whether to write xml document with line breaks and indentation.
+
+    stylesheet : str or file-like
+        A URL, file, file-like object, or a raw string containing XSLT.
+
+    compression : str or dict, default 'infer'
+        For on-the-fly compression of the output data. If 'infer' and 'path_or_buffer'
+        is path-like, then detect compression from the following extensions: '.gz',
+        '.bz2', '.zip', '.xz', '.zst', '.tar', '.tar.gz', '.tar.xz' or '.tar.bz2'
+        (otherwise no compression).
+        Set to ``None`` for no compression.
+        Can also be a dict with key ``'method'`` set
+        to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, ``'tar'``}
+        and other key-value pairs are forwarded to
+        ``zipfile.ZipFile``, ``gzip.GzipFile``,
+        ``bz2.BZ2File``, ``zstandard.ZstdCompressor``, ``lzma.LZMAFile`` or
+        ``tarfile.TarFile``, respectively.
+        As an example, the following could be passed for faster compression and to
+        create a reproducible gzip archive:
+        ``compression={'method': 'gzip', 'compresslevel': 1, 'mtime': 1}``.
+
+    storage_options : dict, optional
+        Extra options that make sense for a particular storage connection, e.g.
+        host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
+        are forwarded to ``urllib.request.Request`` as header options. For other
+        URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are
+        forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more
+        details, and for more examples on storage options refer `here
+        <https://pandas.pydata.org/docs/user_guide/io.html?
+        highlight=storage_options#reading-writing-remote-files>`_.
+
+    See also
+    --------
+    pandas.io.formats.xml.EtreeXMLFormatter
+    pandas.io.formats.xml.LxmlXMLFormatter
+
+    """
+
+    def __init__(
+        self,
+        frame: DataFrame,
+        path_or_buffer: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None,
+        index: bool = True,
+        root_name: str | None = "data",
+        row_name: str | None = "row",
+        na_rep: str | None = None,
+        attr_cols: list[str] | None = None,
+        elem_cols: list[str] | None = None,
+        namespaces: dict[str | None, str] | None = None,
+        prefix: str | None = None,
+        encoding: str = "utf-8",
+        xml_declaration: bool | None = True,
+        pretty_print: bool | None = True,
+        stylesheet: FilePath | ReadBuffer[str] | ReadBuffer[bytes] | None = None,
+        compression: CompressionOptions = "infer",
+        storage_options: StorageOptions | None = None,
+    ) -> None:
+        self.frame = frame
+        self.path_or_buffer = path_or_buffer
+        self.index = index
+        self.root_name = root_name
+        self.row_name = row_name
+        self.na_rep = na_rep
+        self.attr_cols = attr_cols
+        self.elem_cols = elem_cols
+        self.namespaces = namespaces
+        self.prefix = prefix
+        self.encoding = encoding
+        self.xml_declaration = xml_declaration
+        self.pretty_print = pretty_print
+        self.stylesheet = stylesheet
+        self.compression: CompressionOptions = compression
+        self.storage_options = storage_options
+
+        self.orig_cols = self.frame.columns.tolist()
+        self.frame_dicts = self._process_dataframe()
+
+        self._validate_columns()
+        self._validate_encoding()
+        self.prefix_uri = self._get_prefix_uri()
+        self._handle_indexes()
+
+    def _build_tree(self) -> bytes:
+        """
+        Build tree from  data.
+
+        This method initializes the root and builds attributes and elements
+        with optional namespaces.
+        """
+        raise AbstractMethodError(self)
+
+    @final
+    def _validate_columns(self) -> None:
+        """
+        Validate elems_cols and attrs_cols.
+
+        This method will check if columns is list-like.
+
+        Raises
+        ------
+        ValueError
+            * If value is not a list and less then length of nodes.
+        """
+        if self.attr_cols and not is_list_like(self.attr_cols):
+            raise TypeError(
+                f"{type(self.attr_cols).__name__} is not a valid type for attr_cols"
+            )
+
+        if self.elem_cols and not is_list_like(self.elem_cols):
+            raise TypeError(
+                f"{type(self.elem_cols).__name__} is not a valid type for elem_cols"
+            )
+
+    @final
+    def _validate_encoding(self) -> None:
+        """
+        Validate encoding.
+
+        This method will check if encoding is among listed under codecs.
+
+        Raises
+        ------
+        LookupError
+            * If encoding is not available in codecs.
+        """
+
+        codecs.lookup(self.encoding)
+
+    @final
+    def _process_dataframe(self) -> dict[int | str, dict[str, Any]]:
+        """
+        Adjust Data Frame to fit xml output.
+
+        This method will adjust underlying data frame for xml output,
+        including optionally replacing missing values and including indexes.
+        """
+
+        df = self.frame
+
+        if self.index:
+            df = df.reset_index()
+
+        if self.na_rep is not None:
+            df = df.fillna(self.na_rep)
+
+        return df.to_dict(orient="index")
+
+    @final
+    def _handle_indexes(self) -> None:
+        """
+        Handle indexes.
+
+        This method will add indexes into attr_cols or elem_cols.
+        """
+
+        if not self.index:
+            return
+
+        first_key = next(iter(self.frame_dicts))
+        indexes: list[str] = [
+            x for x in self.frame_dicts[first_key].keys() if x not in self.orig_cols
+        ]
+
+        if self.attr_cols:
+            self.attr_cols = indexes + self.attr_cols
+
+        if self.elem_cols:
+            self.elem_cols = indexes + self.elem_cols
+
+    def _get_prefix_uri(self) -> str:
+        """
+        Get uri of namespace prefix.
+
+        This method retrieves corresponding URI to prefix in namespaces.
+
+        Raises
+        ------
+        KeyError
+            *If prefix is not included in namespace dict.
+        """
+
+        raise AbstractMethodError(self)
+
+    @final
+    def _other_namespaces(self) -> dict:
+        """
+        Define other namespaces.
+
+        This method will build dictionary of namespaces attributes
+        for root element, conditionally with optional namespaces and
+        prefix.
+        """
+
+        nmsp_dict: dict[str, str] = {}
+        if self.namespaces:
+            nmsp_dict = {
+                f"xmlns{p if p == '' else f':{p}'}": n
+                for p, n in self.namespaces.items()
+                if n != self.prefix_uri[1:-1]
+            }
+
+        return nmsp_dict
+
+    @final
+    def _build_attribs(self, d: dict[str, Any], elem_row: Any) -> Any:
+        """
+        Create attributes of row.
+
+        This method adds attributes using attr_cols to row element and
+        works with tuples for multindex or hierarchical columns.
+        """
+
+        if not self.attr_cols:
+            return elem_row
+
+        for col in self.attr_cols:
+            attr_name = self._get_flat_col_name(col)
+            try:
+                if not isna(d[col]):
+                    elem_row.attrib[attr_name] = str(d[col])
+            except KeyError as err:
+                raise KeyError(f"no valid column, {col}") from err
+        return elem_row
+
+    @final
+    def _get_flat_col_name(self, col: str | tuple) -> str:
+        flat_col = col
+        if isinstance(col, tuple):
+            flat_col = (
+                "".join([str(c) for c in col]).strip()
+                if "" in col
+                else "_".join([str(c) for c in col]).strip()
+            )
+        return f"{self.prefix_uri}{flat_col}"
+
+    @cache_readonly
+    def _sub_element_cls(self):
+        raise AbstractMethodError(self)
+
+    @final
+    def _build_elems(self, d: dict[str, Any], elem_row: Any) -> None:
+        """
+        Create child elements of row.
+
+        This method adds child elements using elem_cols to row element and
+        works with tuples for multindex or hierarchical columns.
+        """
+        sub_element_cls = self._sub_element_cls
+
+        if not self.elem_cols:
+            return
+
+        for col in self.elem_cols:
+            elem_name = self._get_flat_col_name(col)
+            try:
+                val = None if isna(d[col]) or d[col] == "" else str(d[col])
+                sub_element_cls(elem_row, elem_name).text = val
+            except KeyError as err:
+                raise KeyError(f"no valid column, {col}") from err
+
+    @final
+    def write_output(self) -> str | None:
+        xml_doc = self._build_tree()
+
+        if self.path_or_buffer is not None:
+            with get_handle(
+                self.path_or_buffer,
+                "wb",
+                compression=self.compression,
+                storage_options=self.storage_options,
+                is_text=False,
+            ) as handles:
+                handles.handle.write(xml_doc)
+            return None
+
+        else:
+            return xml_doc.decode(self.encoding).rstrip()
+
+
+class EtreeXMLFormatter(_BaseXMLFormatter):
+    """
+    Class for formatting data in xml using Python standard library
+    modules: `xml.etree.ElementTree` and `xml.dom.minidom`.
+    """
+
+    def _build_tree(self) -> bytes:
+        from xml.etree.ElementTree import (
+            Element,
+            SubElement,
+            tostring,
+        )
+
+        self.root = Element(
+            f"{self.prefix_uri}{self.root_name}", attrib=self._other_namespaces()
+        )
+
+        for d in self.frame_dicts.values():
+            elem_row = SubElement(self.root, f"{self.prefix_uri}{self.row_name}")
+
+            if not self.attr_cols and not self.elem_cols:
+                self.elem_cols = list(d.keys())
+                self._build_elems(d, elem_row)
+
+            else:
+                elem_row = self._build_attribs(d, elem_row)
+                self._build_elems(d, elem_row)
+
+        self.out_xml = tostring(
+            self.root,
+            method="xml",
+            encoding=self.encoding,
+            xml_declaration=self.xml_declaration,
+        )
+
+        if self.pretty_print:
+            self.out_xml = self._prettify_tree()
+
+        if self.stylesheet is not None:
+            raise ValueError(
+                "To use stylesheet, you need lxml installed and selected as parser."
+            )
+
+        return self.out_xml
+
+    def _get_prefix_uri(self) -> str:
+        from xml.etree.ElementTree import register_namespace
+
+        uri = ""
+        if self.namespaces:
+            for p, n in self.namespaces.items():
+                if isinstance(p, str) and isinstance(n, str):
+                    register_namespace(p, n)
+            if self.prefix:
+                try:
+                    uri = f"{{{self.namespaces[self.prefix]}}}"
+                except KeyError as err:
+                    raise KeyError(
+                        f"{self.prefix} is not included in namespaces"
+                    ) from err
+            elif "" in self.namespaces:
+                uri = f"{{{self.namespaces['']}}}"
+            else:
+                uri = ""
+
+        return uri
+
+    @cache_readonly
+    def _sub_element_cls(self):
+        from xml.etree.ElementTree import SubElement
+
+        return SubElement
+
+    def _prettify_tree(self) -> bytes:
+        """
+        Output tree for pretty print format.
+
+        This method will pretty print xml with line breaks and indentation.
+        """
+
+        from xml.dom.minidom import parseString
+
+        dom = parseString(self.out_xml)
+
+        return dom.toprettyxml(indent="  ", encoding=self.encoding)
+
+
+class LxmlXMLFormatter(_BaseXMLFormatter):
+    """
+    Class for formatting data in xml using Python standard library
+    modules: `xml.etree.ElementTree` and `xml.dom.minidom`.
+    """
+
+    def __init__(self, *args, **kwargs) -> None:
+        super().__init__(*args, **kwargs)
+
+        self._convert_empty_str_key()
+
+    def _build_tree(self) -> bytes:
+        """
+        Build tree from  data.
+
+        This method initializes the root and builds attributes and elements
+        with optional namespaces.
+        """
+        from lxml.etree import (
+            Element,
+            SubElement,
+            tostring,
+        )
+
+        self.root = Element(f"{self.prefix_uri}{self.root_name}", nsmap=self.namespaces)
+
+        for d in self.frame_dicts.values():
+            elem_row = SubElement(self.root, f"{self.prefix_uri}{self.row_name}")
+
+            if not self.attr_cols and not self.elem_cols:
+                self.elem_cols = list(d.keys())
+                self._build_elems(d, elem_row)
+
+            else:
+                elem_row = self._build_attribs(d, elem_row)
+                self._build_elems(d, elem_row)
+
+        self.out_xml = tostring(
+            self.root,
+            pretty_print=self.pretty_print,
+            method="xml",
+            encoding=self.encoding,
+            xml_declaration=self.xml_declaration,
+        )
+
+        if self.stylesheet is not None:
+            self.out_xml = self._transform_doc()
+
+        return self.out_xml
+
+    def _convert_empty_str_key(self) -> None:
+        """
+        Replace zero-length string in `namespaces`.
+
+        This method will replace '' with None to align to `lxml`
+        requirement that empty string prefixes are not allowed.
+        """
+
+        if self.namespaces and "" in self.namespaces.keys():
+            self.namespaces[None] = self.namespaces.pop("", "default")
+
+    def _get_prefix_uri(self) -> str:
+        uri = ""
+        if self.namespaces:
+            if self.prefix:
+                try:
+                    uri = f"{{{self.namespaces[self.prefix]}}}"
+                except KeyError as err:
+                    raise KeyError(
+                        f"{self.prefix} is not included in namespaces"
+                    ) from err
+            elif "" in self.namespaces:
+                uri = f"{{{self.namespaces['']}}}"
+            else:
+                uri = ""
+
+        return uri
+
+    @cache_readonly
+    def _sub_element_cls(self):
+        from lxml.etree import SubElement
+
+        return SubElement
+
+    def _transform_doc(self) -> bytes:
+        """
+        Parse stylesheet from file or buffer and run it.
+
+        This method will parse stylesheet object into tree for parsing
+        conditionally by its specific object type, then transforms
+        original tree with XSLT script.
+        """
+        from lxml.etree import (
+            XSLT,
+            XMLParser,
+            fromstring,
+            parse,
+        )
+
+        style_doc = self.stylesheet
+        assert style_doc is not None  # is ensured by caller
+
+        handle_data = get_data_from_filepath(
+            filepath_or_buffer=style_doc,
+            encoding=self.encoding,
+            compression=self.compression,
+            storage_options=self.storage_options,
+        )
+
+        with handle_data as xml_data:
+            curr_parser = XMLParser(encoding=self.encoding)
+
+            if isinstance(xml_data, io.StringIO):
+                xsl_doc = fromstring(
+                    xml_data.getvalue().encode(self.encoding), parser=curr_parser
+                )
+            else:
+                xsl_doc = parse(xml_data, parser=curr_parser)
+
+        transformer = XSLT(xsl_doc)
+        new_doc = transformer(self.root)
+
+        return bytes(new_doc)
@@ -0,0 +1,155 @@
+from typing import (
+    Any,
+)
+
+from pandas.compat._optional import import_optional_dependency
+from pandas.util._decorators import set_module
+
+from pandas import DataFrame
+
+
+@set_module("pandas")
+def read_iceberg(
+    table_identifier: str,
+    catalog_name: str | None = None,
+    *,
+    catalog_properties: dict[str, Any] | None = None,
+    columns: list[str] | None = None,
+    row_filter: str | None = None,
+    case_sensitive: bool = True,
+    snapshot_id: int | None = None,
+    limit: int | None = None,
+    scan_properties: dict[str, Any] | None = None,
+) -> DataFrame:
+    """
+    Read an Apache Iceberg table into a pandas DataFrame.
+
+    .. versionadded:: 3.0.0
+
+    .. warning::
+
+       read_iceberg is experimental and may change without warning.
+
+    Parameters
+    ----------
+    table_identifier : str
+        Table identifier.
+    catalog_name : str, optional
+        The name of the catalog.
+    catalog_properties : dict of {str: str}, optional
+        The properties that are used next to the catalog configuration.
+    columns : list of str, optional
+        A list of strings representing the column names to return in the output
+        dataframe.
+    row_filter : str, optional
+        A string that describes the desired rows.
+    case_sensitive : bool, default True
+        If True column matching is case sensitive.
+    snapshot_id : int, optional
+        Snapshot ID to time travel to. By default the table will be scanned as of the
+        current snapshot ID.
+    limit : int, optional
+        An integer representing the number of rows to return in the scan result.
+        By default all matching rows will be fetched.
+    scan_properties : dict of {str: obj}, optional
+        Additional Table properties as a dictionary of string key value pairs to use
+        for this scan.
+
+    Returns
+    -------
+    DataFrame
+        DataFrame based on the Iceberg table.
+
+    See Also
+    --------
+    read_parquet : Read a Parquet file.
+
+    Examples
+    --------
+    >>> df = pd.read_iceberg(
+    ...     table_identifier="my_table",
+    ...     catalog_name="my_catalog",
+    ...     catalog_properties={"s3.secret-access-key": "my-secret"},
+    ...     row_filter="trip_distance >= 10.0",
+    ...     columns=["VendorID", "tpep_pickup_datetime"],
+    ... )  # doctest: +SKIP
+    """
+    pyiceberg_catalog = import_optional_dependency("pyiceberg.catalog")
+    pyiceberg_expressions = import_optional_dependency("pyiceberg.expressions")
+    if catalog_properties is None:
+        catalog_properties = {}
+    catalog = pyiceberg_catalog.load_catalog(catalog_name, **catalog_properties)
+    table = catalog.load_table(table_identifier)
+    if row_filter is None:
+        row_filter = pyiceberg_expressions.AlwaysTrue()
+    if columns is None:
+        selected_fields = ("*",)
+    else:
+        selected_fields = tuple(columns)  # type: ignore[assignment]
+    if scan_properties is None:
+        scan_properties = {}
+    result = table.scan(
+        row_filter=row_filter,
+        selected_fields=selected_fields,
+        case_sensitive=case_sensitive,
+        snapshot_id=snapshot_id,
+        options=scan_properties,
+        limit=limit,
+    )
+    return result.to_pandas()
+
+
+def to_iceberg(
+    df: DataFrame,
+    table_identifier: str,
+    catalog_name: str | None = None,
+    *,
+    catalog_properties: dict[str, Any] | None = None,
+    location: str | None = None,
+    append: bool = False,
+    snapshot_properties: dict[str, str] | None = None,
+) -> None:
+    """
+    Write a DataFrame to an Apache Iceberg table.
+
+    .. versionadded:: 3.0.0
+
+    Parameters
+    ----------
+    table_identifier : str
+        Table identifier.
+    catalog_name : str, optional
+        The name of the catalog.
+    catalog_properties : dict of {str: str}, optional
+        The properties that are used next to the catalog configuration.
+    location : str, optional
+        Location for the table.
+    append : bool, default False
+        If ``True``, append data to the table, instead of replacing the content.
+    snapshot_properties : dict of {str: str}, optional
+        Custom properties to be added to the snapshot summary
+
+    See Also
+    --------
+    read_iceberg : Read an Apache Iceberg table.
+    DataFrame.to_parquet : Write a DataFrame in Parquet format.
+    """
+    pa = import_optional_dependency("pyarrow")
+    pyiceberg_catalog = import_optional_dependency("pyiceberg.catalog")
+    if catalog_properties is None:
+        catalog_properties = {}
+    catalog = pyiceberg_catalog.load_catalog(catalog_name, **catalog_properties)
+    arrow_table = pa.Table.from_pandas(df)
+    table = catalog.create_table_if_not_exists(
+        identifier=table_identifier,
+        schema=arrow_table.schema,
+        location=location,
+        # we could add `partition_spec`, `sort_order` and `properties` in the
+        # future, but it may not be trivial without exposing PyIceberg objects
+    )
+    if snapshot_properties is None:
+        snapshot_properties = {}
+    if append:
+        table.append(arrow_table, snapshot_properties=snapshot_properties)
+    else:
+        table.overwrite(arrow_table, snapshot_properties=snapshot_properties)
@@ -0,0 +1,15 @@
+from pandas.io.json._json import (
+    read_json,
+    to_json,
+    ujson_dumps,
+    ujson_loads,
+)
+from pandas.io.json._table_schema import build_table_schema
+
+__all__ = [
+    "build_table_schema",
+    "read_json",
+    "to_json",
+    "ujson_dumps",
+    "ujson_loads",
+]
@@ -0,0 +1,648 @@
+# ---------------------------------------------------------------------
+# JSON normalization routines
+from __future__ import annotations
+
+from collections import (
+    abc,
+    defaultdict,
+)
+import copy
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    DefaultDict,
+    overload,
+)
+
+import numpy as np
+
+from pandas._libs.writers import convert_json_to_lines
+from pandas.util._decorators import set_module
+
+import pandas as pd
+from pandas import (
+    DataFrame,
+    Series,
+)
+
+if TYPE_CHECKING:
+    from collections.abc import Iterable
+
+    from pandas._typing import (
+        IgnoreRaise,
+        Scalar,
+    )
+
+
+def convert_to_line_delimits(s: str) -> str:
+    """
+    Helper function that converts JSON lists to line delimited JSON.
+    """
+    # Determine we have a JSON list to turn to lines otherwise just return the
+    # json object, only lists can
+    if not s[0] == "[" and s[-1] == "]":
+        return s
+    s = s[1:-1]
+
+    return convert_json_to_lines(s)
+
+
+@overload
+def nested_to_record(
+    ds: dict,
+    prefix: str = ...,
+    sep: str = ...,
+    level: int = ...,
+    max_level: int | None = ...,
+) -> dict[str, Any]: ...
+
+
+@overload
+def nested_to_record(
+    ds: list[dict],
+    prefix: str = ...,
+    sep: str = ...,
+    level: int = ...,
+    max_level: int | None = ...,
+) -> list[dict[str, Any]]: ...
+
+
+def nested_to_record(
+    ds: dict | list[dict],
+    prefix: str = "",
+    sep: str = ".",
+    level: int = 0,
+    max_level: int | None = None,
+) -> dict[str, Any] | list[dict[str, Any]]:
+    """
+    A simplified json_normalize
+
+    Converts a nested dict into a flat dict ("record"), unlike json_normalize,
+    it does not attempt to extract a subset of the data.
+
+    Parameters
+    ----------
+    ds : dict or list of dicts
+    prefix: the prefix, optional, default: ""
+    sep : str, default '.'
+        Nested records will generate names separated by sep,
+        e.g., for sep='.', { 'foo' : { 'bar' : 0 } } -> foo.bar
+    level: int, optional, default: 0
+        The number of levels in the json string.
+
+    max_level: int, optional, default: None
+        The max depth to normalize.
+
+    Returns
+    -------
+    d - dict or list of dicts, matching `ds`
+
+    Examples
+    --------
+    >>> nested_to_record(
+    ...     dict(flat1=1, dict1=dict(c=1, d=2), nested=dict(e=dict(c=1, d=2), d=2))
+    ... )
+    {\
+'flat1': 1, \
+'dict1.c': 1, \
+'dict1.d': 2, \
+'nested.e.c': 1, \
+'nested.e.d': 2, \
+'nested.d': 2\
+}
+    """
+    singleton = False
+    if isinstance(ds, dict):
+        ds = [ds]
+        singleton = True
+    new_ds = []
+    for d in ds:
+        new_d = copy.deepcopy(d)
+        for k, v in d.items():
+            # each key gets renamed with prefix
+            if not isinstance(k, str):
+                k = str(k)
+            if level == 0:
+                newkey = k
+            else:
+                newkey = prefix + sep + k
+
+            # flatten if type is dict and
+            # current dict level  < maximum level provided and
+            # only dicts gets recurse-flattened
+            # only at level>1 do we rename the rest of the keys
+            if not isinstance(v, dict) or (
+                max_level is not None and level >= max_level
+            ):
+                if level != 0:  # so we skip copying for top level, common case
+                    v = new_d.pop(k)
+                    new_d[newkey] = v
+                continue
+
+            v = new_d.pop(k)
+            new_d.update(nested_to_record(v, newkey, sep, level + 1, max_level))
+        new_ds.append(new_d)
+
+    if singleton:
+        return new_ds[0]
+    return new_ds
+
+
+def _normalize_json(
+    data: Any,
+    key_string: str,
+    normalized_dict: dict[str, Any],
+    separator: str,
+) -> dict[str, Any]:
+    """
+    Main recursive function
+    Designed for the most basic use case of pd.json_normalize(data)
+    intended as a performance improvement, see #15621
+
+    Parameters
+    ----------
+    data : Any
+        Type dependent on types contained within nested Json
+    key_string : str
+        New key (with separator(s) in) for data
+    normalized_dict : dict
+        The new normalized/flattened Json dict
+    separator : str, default '.'
+        Nested records will generate names separated by sep,
+        e.g., for sep='.', { 'foo' : { 'bar' : 0 } } -> foo.bar
+    """
+    if isinstance(data, dict):
+        for key, value in data.items():
+            new_key = f"{key_string}{separator}{key}"
+
+            if not key_string:
+                new_key = new_key.removeprefix(separator)
+
+            _normalize_json(
+                data=value,
+                key_string=new_key,
+                normalized_dict=normalized_dict,
+                separator=separator,
+            )
+    else:
+        normalized_dict[key_string] = data
+    return normalized_dict
+
+
+def _normalize_json_ordered(data: dict[str, Any], separator: str) -> dict[str, Any]:
+    """
+    Order the top level keys and then recursively go to depth
+
+    Parameters
+    ----------
+    data : dict or list of dicts
+    separator : str, default '.'
+        Nested records will generate names separated by sep,
+        e.g., for sep='.', { 'foo' : { 'bar' : 0 } } -> foo.bar
+
+    Returns
+    -------
+    dict or list of dicts, matching `normalized_json_object`
+    """
+    top_dict_ = {k: v for k, v in data.items() if not isinstance(v, dict)}
+    nested_dict_ = _normalize_json(
+        data={k: v for k, v in data.items() if isinstance(v, dict)},
+        key_string="",
+        normalized_dict={},
+        separator=separator,
+    )
+    return {**top_dict_, **nested_dict_}
+
+
+def _simple_json_normalize(
+    ds: dict | list[dict],
+    sep: str = ".",
+) -> dict | list[dict] | Any:
+    """
+    An optimized basic json_normalize
+
+    Converts a nested dict into a flat dict ("record"), unlike
+    json_normalize and nested_to_record it doesn't do anything clever.
+    But for the most basic use cases it enhances performance.
+    E.g. pd.json_normalize(data)
+
+    Parameters
+    ----------
+    ds : dict or list of dicts
+    sep : str, default '.'
+        Nested records will generate names separated by sep,
+        e.g., for sep='.', { 'foo' : { 'bar' : 0 } } -> foo.bar
+
+    Returns
+    -------
+    frame : DataFrame
+    d - dict or list of dicts, matching `normalized_json_object`
+
+    Examples
+    --------
+    >>> _simple_json_normalize(
+    ...     {
+    ...         "flat1": 1,
+    ...         "dict1": {"c": 1, "d": 2},
+    ...         "nested": {"e": {"c": 1, "d": 2}, "d": 2},
+    ...     }
+    ... )
+    {\
+'flat1': 1, \
+'dict1.c': 1, \
+'dict1.d': 2, \
+'nested.e.c': 1, \
+'nested.e.d': 2, \
+'nested.d': 2\
+}
+
+    """
+    normalized_json_object = {}
+    # expect a dictionary, as most jsons are. However, lists are perfectly valid
+    if isinstance(ds, dict):
+        normalized_json_object = _normalize_json_ordered(data=ds, separator=sep)
+    elif isinstance(ds, list):
+        normalized_json_list = [_simple_json_normalize(row, sep=sep) for row in ds]
+        return normalized_json_list
+    return normalized_json_object
+
+
+def _validate_meta(meta: str | list[str | list[str]] | None) -> None:
+    """
+    Validate that meta parameter contains only strings or lists of strings.
+    Parameters
+    ----------
+    meta : str or list of str or list of list of str or None
+        The meta parameter to validate.
+    Raises
+    ------
+    TypeError
+        If meta contains elements that are not strings or lists of strings.
+    """
+    if meta is None:
+        return
+    if isinstance(meta, str):
+        return
+    for item in meta:
+        if isinstance(item, list):
+            for subitem in item:
+                if not isinstance(subitem, str):
+                    raise TypeError(
+                        "All elements in nested meta paths must be strings. "
+                        f"Found {type(subitem).__name__}: {subitem!r}"
+                    )
+        elif not isinstance(item, str):
+            raise TypeError(
+                "All elements in 'meta' must be strings or lists of strings. "
+                f"Found {type(item).__name__}: {item!r}"
+            )
+
+
+@set_module("pandas")
+def json_normalize(
+    data: dict | list[dict] | Series,
+    record_path: str | list | None = None,
+    meta: str | list[str | list[str]] | None = None,
+    meta_prefix: str | None = None,
+    record_prefix: str | None = None,
+    errors: IgnoreRaise = "raise",
+    sep: str = ".",
+    max_level: int | None = None,
+) -> DataFrame:
+    """
+    Normalize semi-structured JSON data into a flat table.
+
+    This method is designed to transform semi-structured JSON data, such as nested
+    dictionaries or lists, into a flat table. This is particularly useful when
+    handling JSON-like data structures that contain deeply nested fields.
+
+    Parameters
+    ----------
+    data : dict, list of dicts, or Series of dicts
+        Unserialized JSON objects.
+    record_path : str or list of str, default None
+        Path in each object to list of records. If not passed, data will be
+        assumed to be an array of records.
+    meta : list of paths (str or list of str), default None
+        Fields to use as metadata for each record in resulting table.
+    meta_prefix : str, default None
+        String to prefix records with dotted path, e.g. foo.bar.field if
+        meta is ['foo', 'bar'].
+    record_prefix : str, default None
+        String to prefix records with dotted path, e.g. foo.bar.field if
+        path to records is ['foo', 'bar'].
+    errors : {'raise', 'ignore'}, default 'raise'
+        Configures error handling.
+
+        * 'ignore' : will ignore KeyError if keys listed in meta are not
+          always present.
+        * 'raise' : will raise KeyError if keys listed in meta are not
+          always present.
+    sep : str, default '.'
+        Nested records will generate names separated by sep.
+        e.g., for sep='.', {'foo': {'bar': 0}} -> foo.bar.
+    max_level : int, default None
+        Max number of levels(depth of dict) to normalize.
+        if None, normalizes all levels.
+
+    Returns
+    -------
+    DataFrame
+        The normalized data, represented as a pandas DataFrame.
+
+    See Also
+    --------
+    DataFrame : Two-dimensional, size-mutable, potentially heterogeneous tabular data.
+    Series : One-dimensional ndarray with axis labels (including time series).
+
+    Examples
+    --------
+    >>> data = [
+    ...     {"id": 1, "name": {"first": "Coleen", "last": "Volk"}},
+    ...     {"name": {"given": "Mark", "family": "Regner"}},
+    ...     {"id": 2, "name": "Faye Raker"},
+    ... ]
+    >>> pd.json_normalize(data)
+        id name.first name.last name.given name.family        name
+    0  1.0     Coleen      Volk        NaN         NaN         NaN
+    1  NaN        NaN       NaN       Mark      Regner         NaN
+    2  2.0        NaN       NaN        NaN         NaN  Faye Raker
+
+    >>> data = [
+    ...     {
+    ...         "id": 1,
+    ...         "name": "Cole Volk",
+    ...         "fitness": {"height": 130, "weight": 60},
+    ...     },
+    ...     {"name": "Mark Reg", "fitness": {"height": 130, "weight": 60}},
+    ...     {
+    ...         "id": 2,
+    ...         "name": "Faye Raker",
+    ...         "fitness": {"height": 130, "weight": 60},
+    ...     },
+    ... ]
+    >>> pd.json_normalize(data, max_level=0)
+        id        name                        fitness
+    0  1.0   Cole Volk  {'height': 130, 'weight': 60}
+    1  NaN    Mark Reg  {'height': 130, 'weight': 60}
+    2  2.0  Faye Raker  {'height': 130, 'weight': 60}
+
+    Normalizes nested data up to level 1.
+
+    >>> data = [
+    ...     {
+    ...         "id": 1,
+    ...         "name": "Cole Volk",
+    ...         "fitness": {"height": 130, "weight": 60},
+    ...     },
+    ...     {"name": "Mark Reg", "fitness": {"height": 130, "weight": 60}},
+    ...     {
+    ...         "id": 2,
+    ...         "name": "Faye Raker",
+    ...         "fitness": {"height": 130, "weight": 60},
+    ...     },
+    ... ]
+    >>> pd.json_normalize(data, max_level=1)
+        id        name  fitness.height  fitness.weight
+    0  1.0   Cole Volk             130              60
+    1  NaN    Mark Reg             130              60
+    2  2.0  Faye Raker             130              60
+
+    >>> data = [
+    ...     {
+    ...         "id": 1,
+    ...         "name": "Cole Volk",
+    ...         "fitness": {"height": 130, "weight": 60},
+    ...     },
+    ...     {"name": "Mark Reg", "fitness": {"height": 130, "weight": 60}},
+    ...     {
+    ...         "id": 2,
+    ...         "name": "Faye Raker",
+    ...         "fitness": {"height": 130, "weight": 60},
+    ...     },
+    ... ]
+    >>> series = pd.Series(data, index=pd.Index(["a", "b", "c"]))
+    >>> pd.json_normalize(series)
+        id        name  fitness.height  fitness.weight
+    a  1.0   Cole Volk             130              60
+    b  NaN    Mark Reg             130              60
+    c  2.0  Faye Raker             130              60
+
+    >>> data = [
+    ...     {
+    ...         "state": "Florida",
+    ...         "shortname": "FL",
+    ...         "info": {"governor": "Rick Scott"},
+    ...         "counties": [
+    ...             {"name": "Dade", "population": 12345},
+    ...             {"name": "Broward", "population": 40000},
+    ...             {"name": "Palm Beach", "population": 60000},
+    ...         ],
+    ...     },
+    ...     {
+    ...         "state": "Ohio",
+    ...         "shortname": "OH",
+    ...         "info": {"governor": "John Kasich"},
+    ...         "counties": [
+    ...             {"name": "Summit", "population": 1234},
+    ...             {"name": "Cuyahoga", "population": 1337},
+    ...         ],
+    ...     },
+    ... ]
+    >>> result = pd.json_normalize(
+    ...     data, "counties", ["state", "shortname", ["info", "governor"]]
+    ... )
+    >>> result
+             name  population    state shortname info.governor
+    0        Dade       12345   Florida    FL    Rick Scott
+    1     Broward       40000   Florida    FL    Rick Scott
+    2  Palm Beach       60000   Florida    FL    Rick Scott
+    3      Summit        1234   Ohio       OH    John Kasich
+    4    Cuyahoga        1337   Ohio       OH    John Kasich
+
+    >>> data = {"A": [1, 2]}
+    >>> pd.json_normalize(data, "A", record_prefix="Prefix.")
+        Prefix.0
+    0          1
+    1          2
+
+    Returns normalized data with columns prefixed with the given string.
+    """
+    _validate_meta(meta)
+
+    def _pull_field(
+        js: dict[str, Any], spec: list | str, extract_record: bool = False
+    ) -> Scalar | Iterable:
+        """Internal function to pull field"""
+        result = js
+        try:
+            if isinstance(spec, list):
+                for field in spec:
+                    if result is None:
+                        raise KeyError(field)
+                    result = result[field]
+            else:
+                result = result[spec]
+        except KeyError as e:
+            if extract_record:
+                raise KeyError(
+                    f"Key {e} not found. If specifying a record_path, all elements of "
+                    f"data should have the path."
+                ) from e
+            if errors == "ignore":
+                return np.nan
+            else:
+                raise KeyError(
+                    f"Key {e} not found. To replace missing values of {e} with "
+                    f"np.nan, pass in errors='ignore'"
+                ) from e
+
+        return result
+
+    def _pull_records(js: dict[str, Any], spec: list | str) -> list:
+        """
+        Internal function to pull field for records, and similar to
+        _pull_field, but require to return list. And will raise error
+        if has non iterable value.
+        """
+        result = _pull_field(js, spec, extract_record=True)
+
+        # GH 31507 GH 30145, GH 26284 if result is not list, raise TypeError if not
+        # null, otherwise return an empty list
+        if not isinstance(result, list):
+            if pd.isnull(result):
+                result = []
+            else:
+                raise TypeError(
+                    f"Path must contain list or null, "
+                    f"but got {type(result).__name__} at {spec!r}"
+                )
+        return result
+
+    if isinstance(data, Series):
+        index = data.index
+    else:
+        index = None
+
+    if isinstance(data, list) and not data:
+        return DataFrame()
+    elif isinstance(data, dict):
+        # A bit of a hackjob
+        data = [data]
+    elif isinstance(data, abc.Iterable) and not isinstance(data, str):
+        # GH35923 Fix pd.json_normalize to not skip the first element of a
+        # generator input
+        data = list(data)
+        for item in data:
+            if not isinstance(item, dict):
+                msg = (
+                    "All items in data must be of type dict, "
+                    f"found {type(item).__name__}"
+                )
+                raise TypeError(msg)
+    else:
+        raise NotImplementedError
+
+    # check to see if a simple recursive function is possible to
+    # improve performance (see #15621) but only for cases such
+    # as pd.Dataframe(data) or pd.Dataframe(data, sep)
+    if (
+        record_path is None
+        and meta is None
+        and meta_prefix is None
+        and record_prefix is None
+        and max_level is None
+    ):
+        return DataFrame(_simple_json_normalize(data, sep=sep), index=index)
+
+    if record_path is None:
+        if any([isinstance(x, dict) for x in y.values()] for y in data):
+            # naive normalization, this is idempotent for flat records
+            # and potentially will inflate the data considerably for
+            # deeply nested structures:
+            #  {VeryLong: { b: 1,c:2}} -> {VeryLong.b:1 ,VeryLong.c:@}
+            #
+            # TODO: handle record value which are lists, at least error
+            #       reasonably
+            data = nested_to_record(data, sep=sep, max_level=max_level)
+        result = DataFrame(data, index=index)
+        if record_prefix is not None:
+            result = result.rename(columns=lambda x: f"{record_prefix}{x}")
+        return result
+    elif not isinstance(record_path, list):
+        record_path = [record_path]
+
+    if meta is None:
+        meta = []
+    elif not isinstance(meta, list):
+        meta = [meta]
+
+    _meta = [m if isinstance(m, list) else [m] for m in meta]
+
+    # Disastrously inefficient for now
+    records: list = []
+    lengths = []
+
+    meta_vals: DefaultDict = defaultdict(list)
+    meta_keys = [sep.join(val) for val in _meta]
+
+    def _recursive_extract(data, path, seen_meta, level: int = 0) -> None:
+        if isinstance(data, dict):
+            data = [data]
+        if len(path) > 1:
+            for obj in data:
+                for val, key in zip(_meta, meta_keys, strict=True):
+                    if level + 1 == len(val):
+                        seen_meta[key] = _pull_field(obj, val[-1])
+
+                _recursive_extract(obj[path[0]], path[1:], seen_meta, level=level + 1)
+        else:
+            for obj in data:
+                recs = _pull_records(obj, path[0])
+                recs = [
+                    nested_to_record(r, sep=sep, max_level=max_level)
+                    if isinstance(r, dict)
+                    else r
+                    for r in recs
+                ]
+
+                # For repeating the metadata later
+                lengths.append(len(recs))
+                for val, key in zip(_meta, meta_keys, strict=True):
+                    if level + 1 > len(val):
+                        meta_val = seen_meta[key]
+                    else:
+                        meta_val = _pull_field(obj, val[level:])
+                    meta_vals[key].append(meta_val)
+                records.extend(recs)
+
+    _recursive_extract(data, record_path, {}, level=0)
+
+    result = DataFrame(records)
+
+    if record_prefix is not None:
+        result = result.rename(columns=lambda x: f"{record_prefix}{x}")
+
+    # Data types, a problem
+    for k, v in meta_vals.items():
+        if meta_prefix is not None:
+            k = meta_prefix + k
+
+        if k in result:
+            raise ValueError(
+                f"Conflicting metadata name {k}, need distinguishing prefix "
+            )
+        # GH 37782
+
+        values = np.array(v, dtype=object)
+
+        if values.ndim > 1:
+            # GH 37782
+            values = np.empty((len(v),), dtype=object)
+            for i, val in enumerate(v):
+                values[i] = val
+
+        result[k] = values.repeat(lengths)
+    if index is not None:
+        result.index = index.repeat(lengths)
+    return result
@@ -0,0 +1,402 @@
+"""
+Table Schema builders
+
+https://specs.frictionlessdata.io/table-schema/
+"""
+
+from __future__ import annotations
+
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    cast,
+)
+import warnings
+
+from pandas._config import option_context
+
+from pandas._libs import lib
+from pandas._libs.json import ujson_loads
+from pandas._libs.tslibs import timezones
+from pandas.util._exceptions import find_stack_level
+
+from pandas.core.dtypes.base import _registry as registry
+from pandas.core.dtypes.common import (
+    is_bool_dtype,
+    is_integer_dtype,
+    is_numeric_dtype,
+    is_string_dtype,
+)
+from pandas.core.dtypes.dtypes import (
+    CategoricalDtype,
+    DatetimeTZDtype,
+    ExtensionDtype,
+    PeriodDtype,
+)
+
+from pandas import DataFrame
+import pandas.core.common as com
+
+from pandas.tseries.frequencies import to_offset
+
+if TYPE_CHECKING:
+    from pandas._typing import (
+        DtypeObj,
+        JSONSerializable,
+    )
+
+    from pandas import Series
+    from pandas.core.indexes.multi import MultiIndex
+
+
+TABLE_SCHEMA_VERSION = "1.4.0"
+
+
+def as_json_table_type(x: DtypeObj) -> str:
+    """
+    Convert a NumPy / pandas type to its corresponding json_table.
+
+    Parameters
+    ----------
+    x : np.dtype or ExtensionDtype
+
+    Returns
+    -------
+    str
+        the Table Schema data types
+
+    Notes
+    -----
+    This table shows the relationship between NumPy / pandas dtypes,
+    and Table Schema dtypes.
+
+    ==============  =================
+    Pandas type     Table Schema type
+    ==============  =================
+    int64           integer
+    float64         number
+    bool            boolean
+    datetime64[ns]  datetime
+    timedelta64[ns] duration
+    object          str
+    categorical     any
+    =============== =================
+    """
+    if is_integer_dtype(x):
+        return "integer"
+    elif is_bool_dtype(x):
+        return "boolean"
+    elif is_numeric_dtype(x):
+        return "number"
+    elif lib.is_np_dtype(x, "M") or isinstance(x, (DatetimeTZDtype, PeriodDtype)):
+        return "datetime"
+    elif lib.is_np_dtype(x, "m"):
+        return "duration"
+    elif is_string_dtype(x):
+        return "string"
+    else:
+        return "any"
+
+
+def set_default_names(data):
+    """Sets index names to 'index' for regular, or 'level_x' for Multi"""
+    if com.all_not_none(*data.index.names):
+        nms = data.index.names
+        if len(nms) == 1 and data.index.name == "index":
+            warnings.warn(
+                "Index name of 'index' is not round-trippable.",
+                stacklevel=find_stack_level(),
+            )
+        elif len(nms) > 1 and any(x.startswith("level_") for x in nms):
+            warnings.warn(
+                "Index names beginning with 'level_' are not round-trippable.",
+                stacklevel=find_stack_level(),
+            )
+        return data
+
+    data = data.copy(deep=False)
+    if data.index.nlevels > 1:
+        data.index.names = com.fill_missing_names(data.index.names)
+    else:
+        data.index.name = data.index.name or "index"
+    return data
+
+
+def convert_pandas_type_to_json_field(arr) -> dict[str, JSONSerializable]:
+    dtype = arr.dtype
+    name: JSONSerializable
+    if arr.name is None:
+        name = "values"
+    else:
+        name = arr.name
+    field: dict[str, JSONSerializable] = {
+        "name": name,
+        "type": as_json_table_type(dtype),
+    }
+
+    if isinstance(dtype, CategoricalDtype):
+        cats = dtype.categories
+        ordered = dtype.ordered
+
+        field["constraints"] = {"enum": list(cats)}
+        field["ordered"] = ordered
+    elif isinstance(dtype, PeriodDtype):
+        field["freq"] = dtype.freq.freqstr
+    elif isinstance(dtype, DatetimeTZDtype):
+        if timezones.is_utc(dtype.tz):
+            field["tz"] = "UTC"
+        else:
+            zone = timezones.get_timezone(dtype.tz)
+            if isinstance(zone, str):
+                field["tz"] = zone
+    elif isinstance(dtype, ExtensionDtype):
+        field["extDtype"] = dtype.name
+    return field
+
+
+def convert_json_field_to_pandas_type(field) -> str | CategoricalDtype:
+    """
+    Converts a JSON field descriptor into its corresponding NumPy / pandas type
+
+    Parameters
+    ----------
+    field
+        A JSON field descriptor
+
+    Returns
+    -------
+    dtype
+
+    Raises
+    ------
+    ValueError
+        If the type of the provided field is unknown or currently unsupported
+
+    Examples
+    --------
+    >>> convert_json_field_to_pandas_type({"name": "an_int", "type": "integer"})
+    'int64'
+
+    >>> convert_json_field_to_pandas_type(
+    ...     {
+    ...         "name": "a_categorical",
+    ...         "type": "any",
+    ...         "constraints": {"enum": ["a", "b", "c"]},
+    ...         "ordered": True,
+    ...     }
+    ... )
+    CategoricalDtype(categories=['a', 'b', 'c'], ordered=True, categories_dtype=str)
+
+    >>> convert_json_field_to_pandas_type({"name": "a_datetime", "type": "datetime"})
+    'datetime64[ns]'
+
+    >>> convert_json_field_to_pandas_type(
+    ...     {"name": "a_datetime_with_tz", "type": "datetime", "tz": "US/Central"}
+    ... )
+    'datetime64[ns, US/Central]'
+    """
+    typ = field["type"]
+    if typ == "string":
+        return field.get("extDtype", None)
+    elif typ == "integer":
+        return field.get("extDtype", "int64")
+    elif typ == "number":
+        return field.get("extDtype", "float64")
+    elif typ == "boolean":
+        return field.get("extDtype", "bool")
+    elif typ == "duration":
+        return "timedelta64"
+    elif typ == "datetime":
+        if field.get("tz"):
+            return f"datetime64[ns, {field['tz']}]"
+        elif field.get("freq"):
+            # GH#9586 rename frequency M to ME for offsets
+            offset = to_offset(field["freq"])
+            freq = PeriodDtype(offset)._freqstr
+            # GH#47747 using datetime over period to minimize the change surface
+            return f"period[{freq}]"
+        else:
+            return "datetime64[ns]"
+    elif typ == "any":
+        if "constraints" in field and "ordered" in field:
+            return CategoricalDtype(
+                categories=field["constraints"]["enum"], ordered=field["ordered"]
+            )
+        elif "extDtype" in field:
+            return registry.find(field["extDtype"])
+        else:
+            return "object"
+
+    raise ValueError(f"Unsupported or invalid field type: {typ}")
+
+
+def build_table_schema(
+    data: DataFrame | Series,
+    index: bool = True,
+    primary_key: bool | None = None,
+    version: bool = True,
+) -> dict[str, JSONSerializable]:
+    """
+    Create a Table schema from ``data``.
+
+    This method is a utility to generate a JSON-serializable schema
+    representation of a pandas Series or DataFrame, compatible with the
+    Table Schema specification. It enables structured data to be shared
+    and validated in various applications, ensuring consistency and
+    interoperability.
+
+    Parameters
+    ----------
+    data : Series or DataFrame
+        The input data for which the table schema is to be created.
+    index : bool, default True
+        Whether to include ``data.index`` in the schema.
+    primary_key : bool or None, default True
+        Column names to designate as the primary key.
+        The default `None` will set `'primaryKey'` to the index
+        level or levels if the index is unique.
+    version : bool, default True
+        Whether to include a field `pandas_version` with the version
+        of pandas that last revised the table schema. This version
+        can be different from the installed pandas version.
+
+    Returns
+    -------
+    dict
+        A dictionary representing the Table schema.
+
+    See Also
+    --------
+    DataFrame.to_json : Convert the object to a JSON string.
+    read_json : Convert a JSON string to pandas object.
+
+    Notes
+    -----
+    See `Table Schema
+    <https://pandas.pydata.org/docs/user_guide/io.html#table-schema>`__ for
+    conversion types.
+    Timedeltas as converted to ISO8601 duration format with
+    9 decimal places after the seconds field for nanosecond precision.
+
+    Categoricals are converted to the `any` dtype, and use the `enum` field
+    constraint to list the allowed values. The `ordered` attribute is included
+    in an `ordered` field.
+
+    Examples
+    --------
+    >>> from pandas.io.json._table_schema import build_table_schema
+    >>> df = pd.DataFrame(
+    ...     {'A': [1, 2, 3],
+    ...      'B': ['a', 'b', 'c'],
+    ...      'C': pd.date_range('2016-01-01', freq='D', periods=3),
+    ...      }, index=pd.Index(range(3), name='idx'))
+    >>> build_table_schema(df)
+    {'fields': \
+[{'name': 'idx', 'type': 'integer'}, \
+{'name': 'A', 'type': 'integer'}, \
+{'name': 'B', 'type': 'string', 'extDtype': 'str'}, \
+{'name': 'C', 'type': 'datetime'}], \
+'primaryKey': ['idx'], \
+'pandas_version': '1.4.0'}
+    """
+    if index is True:
+        data = set_default_names(data)
+
+    schema: dict[str, Any] = {}
+    fields = []
+
+    if index:
+        if data.index.nlevels > 1:
+            data.index = cast("MultiIndex", data.index)
+            for level, name in zip(data.index.levels, data.index.names, strict=True):
+                new_field = convert_pandas_type_to_json_field(level)
+                new_field["name"] = name
+                fields.append(new_field)
+        else:
+            fields.append(convert_pandas_type_to_json_field(data.index))
+
+    if data.ndim > 1:
+        for column, s in data.items():
+            fields.append(convert_pandas_type_to_json_field(s))
+    else:
+        fields.append(convert_pandas_type_to_json_field(data))
+
+    schema["fields"] = fields
+    if index and data.index.is_unique and primary_key is None:
+        if data.index.nlevels == 1:
+            schema["primaryKey"] = [data.index.name]
+        else:
+            schema["primaryKey"] = data.index.names
+    elif primary_key is not None:
+        schema["primaryKey"] = primary_key
+
+    if version:
+        schema["pandas_version"] = TABLE_SCHEMA_VERSION
+    return schema
+
+
+def parse_table_schema(json, precise_float: bool) -> DataFrame:
+    """
+    Builds a DataFrame from a given schema
+
+    Parameters
+    ----------
+    json :
+        A JSON table schema
+    precise_float : bool
+        Flag controlling precision when decoding string to double values, as
+        dictated by ``read_json``
+
+    Returns
+    -------
+    df : DataFrame
+
+    Raises
+    ------
+    NotImplementedError
+        If the JSON table schema contains either timezone or timedelta data
+
+    Notes
+    -----
+        Because :func:`DataFrame.to_json` uses the string 'index' to denote a
+        name-less :class:`Index`, this function sets the name of the returned
+        :class:`DataFrame` to ``None`` when said string is encountered with a
+        normal :class:`Index`. For a :class:`MultiIndex`, the same limitation
+        applies to any strings beginning with 'level_'. Therefore, an
+        :class:`Index` name of 'index'  and :class:`MultiIndex` names starting
+        with 'level_' are not supported.
+
+    See Also
+    --------
+    build_table_schema : Inverse function.
+    pandas.read_json
+    """
+    table = ujson_loads(json, precise_float=precise_float)
+    col_order = [field["name"] for field in table["schema"]["fields"]]
+    df = DataFrame(table["data"], columns=col_order)[col_order]
+
+    dtypes = {
+        field["name"]: convert_json_field_to_pandas_type(field)
+        for field in table["schema"]["fields"]
+    }
+
+    # No ISO constructor for Timedelta as of yet, so need to raise
+    if "timedelta64" in dtypes.values():
+        raise NotImplementedError(
+            'table="orient" can not yet read ISO-formatted Timedelta data'
+        )
+
+    with option_context("future.distinguish_nan_and_na", False):
+        df = df.astype(dtypes)
+
+    if "primaryKey" in table["schema"]:
+        df = df.set_index(table["schema"]["primaryKey"])
+        if len(df.index.names) == 1:
+            if df.index.name == "index":
+                df.index.name = None
+        else:
+            df.index.names = [
+                None if x.startswith("level_") else x for x in df.index.names
+            ]
+
+    return df
@@ -0,0 +1,243 @@
+"""orc compat"""
+
+from __future__ import annotations
+
+import io
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Literal,
+)
+
+from pandas._libs import lib
+from pandas.compat._optional import import_optional_dependency
+from pandas.util._decorators import set_module
+from pandas.util._validators import check_dtype_backend
+
+from pandas.core.indexes.api import default_index
+
+from pandas.io._util import arrow_table_to_pandas
+from pandas.io.common import (
+    get_handle,
+    is_fsspec_url,
+)
+
+if TYPE_CHECKING:
+    import fsspec
+    import pyarrow.fs
+
+    from pandas._typing import (
+        DtypeBackend,
+        FilePath,
+        ReadBuffer,
+        WriteBuffer,
+    )
+
+    from pandas.core.frame import DataFrame
+
+
+@set_module("pandas")
+def read_orc(
+    path: FilePath | ReadBuffer[bytes],
+    columns: list[str] | None = None,
+    dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
+    filesystem: pyarrow.fs.FileSystem | fsspec.spec.AbstractFileSystem | None = None,
+    **kwargs: Any,
+) -> DataFrame:
+    """
+    Load an ORC object from the file path, returning a DataFrame.
+
+    This method reads an ORC (Optimized Row Columnar) file into a pandas
+    DataFrame using the `pyarrow.orc` library. ORC is a columnar storage format
+    that provides efficient compression and fast retrieval for analytical workloads.
+    It allows reading specific columns, handling different filesystem
+    types (such as local storage, cloud storage via fsspec, or pyarrow filesystem),
+    and supports different data type backends, including `numpy_nullable` and `pyarrow`.
+
+    Parameters
+    ----------
+    path : str, path object, or file-like object
+        String, path object (implementing ``os.PathLike[str]``), or file-like
+        object implementing a binary ``read()`` function. The string could be a URL.
+        Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is
+        expected. A local file could be:
+        ``file://localhost/path/to/table.orc``.
+    columns : list, default None
+        If not None, only these columns will be read from the file.
+        Output always follows the ordering of the file and not the columns list.
+        This mirrors the original behaviour of
+        :external+pyarrow:py:meth:`pyarrow.orc.ORCFile.read`.
+    dtype_backend : {'numpy_nullable', 'pyarrow'}
+        Back-end data type applied to the resultant :class:`DataFrame`
+        (still experimental). If not specified, the default behavior
+        is to not use nullable data types. If specified, the behavior
+        is as follows:
+
+        * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
+        * ``"pyarrow"``: returns pyarrow-backed nullable
+          :class:`ArrowDtype` :class:`DataFrame`
+
+        .. versionadded:: 2.0
+
+    filesystem : fsspec or pyarrow filesystem, default None
+        Filesystem object to use when reading the orc file.
+
+        .. versionadded:: 2.1.0
+
+    **kwargs
+        Any additional kwargs are passed to pyarrow.
+
+    Returns
+    -------
+    DataFrame
+        DataFrame based on the ORC file.
+
+    See Also
+    --------
+    read_csv : Read a comma-separated values (csv) file into a pandas DataFrame.
+    read_excel : Read an Excel file into a pandas DataFrame.
+    read_spss : Read an SPSS file into a pandas DataFrame.
+    read_sas : Load a SAS file into a pandas DataFrame.
+    read_feather : Load a feather-format object into a pandas DataFrame.
+
+    Notes
+    -----
+    Before using this function you should read the :ref:`user guide about ORC <io.orc>`
+    and :ref:`install optional dependencies <install.warn_orc>`.
+
+    If ``path`` is a URI scheme pointing to a local or remote file (e.g. "s3://"),
+    a ``pyarrow.fs`` filesystem will be attempted to read the file. You can also pass a
+    pyarrow or fsspec filesystem object into the filesystem keyword to override this
+    behavior.
+
+    Examples
+    --------
+    >>> result = pd.read_orc("example_pa.orc")  # doctest: +SKIP
+    """
+    # we require a newer version of pyarrow than we support for orc
+
+    orc = import_optional_dependency("pyarrow.orc")
+
+    check_dtype_backend(dtype_backend)
+
+    with get_handle(path, "rb", is_text=False) as handles:
+        source = handles.handle
+        if is_fsspec_url(path) and filesystem is None:
+            pa = import_optional_dependency("pyarrow")
+            pa_fs = import_optional_dependency("pyarrow.fs")
+            try:
+                filesystem, source = pa_fs.FileSystem.from_uri(path)
+            except (TypeError, pa.ArrowInvalid):
+                pass
+
+        pa_table = orc.read_table(
+            source=source, columns=columns, filesystem=filesystem, **kwargs
+        )
+    return arrow_table_to_pandas(pa_table, dtype_backend=dtype_backend)
+
+
+def to_orc(
+    df: DataFrame,
+    path: FilePath | WriteBuffer[bytes] | None = None,
+    *,
+    engine: Literal["pyarrow"] = "pyarrow",
+    index: bool | None = None,
+    engine_kwargs: dict[str, Any] | None = None,
+) -> bytes | None:
+    """
+    Write a DataFrame to the ORC format.
+
+    Parameters
+    ----------
+    df : DataFrame
+        The dataframe to be written to ORC. Raises NotImplementedError
+        if dtype of one or more columns is category, unsigned integers,
+        intervals, periods or sparse.
+    path : str, file-like object or None, default None
+        If a string, it will be used as Root Directory path
+        when writing a partitioned dataset. By file-like object,
+        we refer to objects with a write() method, such as a file handle
+        (e.g. via builtin open function). If path is None,
+        a bytes object is returned.
+    engine : str, default 'pyarrow'
+        ORC library to use.
+    index : bool, optional
+        If ``True``, include the dataframe's index(es) in the file output. If
+        ``False``, they will not be written to the file.
+        If ``None``, similar to ``infer`` the dataframe's index(es)
+        will be saved. However, instead of being saved as values,
+        the RangeIndex will be stored as a range in the metadata so it
+        doesn't require much space and is faster. Other indexes will
+        be included as columns in the file output.
+    engine_kwargs : dict[str, Any] or None, default None
+        Additional keyword arguments passed to :func:`pyarrow.orc.write_table`.
+
+    Returns
+    -------
+    bytes if no path argument is provided else None
+
+    Raises
+    ------
+    NotImplementedError
+        Dtype of one or more columns is category, unsigned integers, interval,
+        period or sparse.
+    ValueError
+        engine is not pyarrow.
+
+    Notes
+    -----
+    * Before using this function you should read the
+      :ref:`user guide about ORC <io.orc>` and
+      :ref:`install optional dependencies <install.warn_orc>`.
+    * This function requires `pyarrow <https://arrow.apache.org/docs/python/>`_
+      library.
+    * For supported dtypes please refer to `supported ORC features in Arrow
+      <https://arrow.apache.org/docs/cpp/orc.html#data-types>`__.
+    * Currently timezones in datetime columns are not preserved when a
+      dataframe is converted into ORC files.
+    """
+    if index is None:
+        index = df.index.names[0] is not None
+    if engine_kwargs is None:
+        engine_kwargs = {}
+
+    # validate index
+    # --------------
+
+    # validate that we have only a default index
+    # raise on anything else as we don't serialize the index
+
+    if not df.index.equals(default_index(len(df))):
+        raise ValueError(
+            "orc does not support serializing a non-default index for the index; "
+            "you can .reset_index() to make the index into column(s)"
+        )
+
+    if df.index.name is not None:
+        raise ValueError("orc does not serialize index meta-data on a default index")
+
+    if engine != "pyarrow":
+        raise ValueError("engine must be 'pyarrow'")
+    pa = import_optional_dependency("pyarrow")
+    orc = import_optional_dependency("pyarrow.orc")
+
+    was_none = path is None
+    if was_none:
+        path = io.BytesIO()
+    assert path is not None  # For mypy
+    with get_handle(path, "wb", is_text=False) as handles:
+        try:
+            orc.write_table(
+                pa.Table.from_pandas(df, preserve_index=index),
+                handles.handle,
+                **engine_kwargs,
+            )
+        except (TypeError, pa.ArrowNotImplementedError) as e:
+            raise NotImplementedError(
+                "The dtype of one or more columns is not supported yet."
+            ) from e
+
+    if was_none:
+        assert isinstance(path, io.BytesIO)  # For mypy
+        return path.getvalue()
+    return None
@@ -0,0 +1,680 @@
+"""parquet compat"""
+
+from __future__ import annotations
+
+import io
+import json
+import os
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Literal,
+)
+from warnings import (
+    catch_warnings,
+    filterwarnings,
+)
+
+from pandas._libs import lib
+from pandas.compat._optional import import_optional_dependency
+from pandas.errors import (
+    AbstractMethodError,
+    Pandas4Warning,
+)
+from pandas.util._decorators import set_module
+from pandas.util._validators import check_dtype_backend
+
+from pandas import (
+    DataFrame,
+    get_option,
+)
+
+from pandas.io._util import arrow_table_to_pandas
+from pandas.io.common import (
+    IOHandles,
+    get_handle,
+    is_fsspec_url,
+    is_url,
+    stringify_path,
+)
+
+if TYPE_CHECKING:
+    from pandas._typing import (
+        DtypeBackend,
+        FilePath,
+        ParquetCompressionOptions,
+        ReadBuffer,
+        StorageOptions,
+        WriteBuffer,
+    )
+
+
+def get_engine(engine: str) -> BaseImpl:
+    """return our implementation"""
+    if engine == "auto":
+        engine = get_option("io.parquet.engine")
+
+    if engine == "auto":
+        # try engines in this order
+        engine_classes = [PyArrowImpl, FastParquetImpl]
+
+        error_msgs = ""
+        for engine_class in engine_classes:
+            try:
+                return engine_class()
+            except ImportError as err:
+                error_msgs += "\n - " + str(err)
+
+        raise ImportError(
+            "Unable to find a usable engine; "
+            "tried using: 'pyarrow', 'fastparquet'.\n"
+            "A suitable version of "
+            "pyarrow or fastparquet is required for parquet "
+            "support.\n"
+            "Trying to import the above resulted in these errors:"
+            f"{error_msgs}"
+        )
+
+    if engine == "pyarrow":
+        return PyArrowImpl()
+    elif engine == "fastparquet":
+        return FastParquetImpl()
+
+    raise ValueError("engine must be one of 'pyarrow', 'fastparquet'")
+
+
+def _get_path_or_handle(
+    path: FilePath | ReadBuffer[bytes] | WriteBuffer[bytes],
+    fs: Any,
+    storage_options: StorageOptions | None = None,
+    mode: str = "rb",
+    is_dir: bool = False,
+) -> tuple[
+    FilePath | ReadBuffer[bytes] | WriteBuffer[bytes], IOHandles[bytes] | None, Any
+]:
+    """File handling for PyArrow."""
+    path_or_handle = stringify_path(path)
+    if fs is not None:
+        pa_fs = import_optional_dependency("pyarrow.fs", errors="ignore")
+        fsspec = import_optional_dependency("fsspec", errors="ignore")
+        if pa_fs is not None and isinstance(fs, pa_fs.FileSystem):
+            if storage_options:
+                raise NotImplementedError(
+                    "storage_options not supported with a pyarrow FileSystem."
+                )
+        elif fsspec is not None and isinstance(fs, fsspec.spec.AbstractFileSystem):
+            pass
+        else:
+            raise ValueError(
+                f"filesystem must be a pyarrow or fsspec FileSystem, "
+                f"not a {type(fs).__name__}"
+            )
+    if is_fsspec_url(path_or_handle) and fs is None:
+        if storage_options is None:
+            pa = import_optional_dependency("pyarrow")
+            pa_fs = import_optional_dependency("pyarrow.fs")
+
+            try:
+                fs, path_or_handle = pa_fs.FileSystem.from_uri(path)
+            except (TypeError, pa.ArrowInvalid):
+                pass
+        if fs is None:
+            fsspec = import_optional_dependency("fsspec")
+            fs, path_or_handle = fsspec.core.url_to_fs(
+                path_or_handle, **(storage_options or {})
+            )
+    elif storage_options and (not is_url(path_or_handle) or mode != "rb"):
+        # can't write to a remote url
+        # without making use of fsspec at the moment
+        raise ValueError("storage_options passed with buffer, or non-supported URL")
+
+    handles = None
+    if (
+        not fs
+        and not is_dir
+        and isinstance(path_or_handle, str)
+        and not os.path.isdir(path_or_handle)
+    ):
+        # use get_handle only when we are very certain that it is not a directory
+        # fsspec resources can also point to directories
+        # this branch is used for example when reading from non-fsspec URLs
+        handles = get_handle(
+            path_or_handle, mode, is_text=False, storage_options=storage_options
+        )
+        fs = None
+        path_or_handle = handles.handle
+    return path_or_handle, handles, fs
+
+
+class BaseImpl:
+    @staticmethod
+    def validate_dataframe(df: DataFrame) -> None:
+        if not isinstance(df, DataFrame):
+            raise ValueError("to_parquet only supports IO with DataFrames")
+
+    def write(self, df: DataFrame, path, compression, **kwargs) -> None:
+        raise AbstractMethodError(self)
+
+    def read(self, path, columns=None, **kwargs) -> DataFrame:
+        raise AbstractMethodError(self)
+
+
+class PyArrowImpl(BaseImpl):
+    def __init__(self) -> None:
+        import_optional_dependency(
+            "pyarrow", extra="pyarrow is required for parquet support."
+        )
+        import pyarrow.parquet
+
+        # import utils to register the pyarrow extension types
+        import pandas.core.arrays.arrow.extension_types  # pyright: ignore[reportUnusedImport] # noqa: F401
+
+        self.api = pyarrow
+
+    def write(
+        self,
+        df: DataFrame,
+        path: FilePath | WriteBuffer[bytes],
+        compression: ParquetCompressionOptions = "snappy",
+        index: bool | None = None,
+        storage_options: StorageOptions | None = None,
+        partition_cols: list[str] | None = None,
+        filesystem=None,
+        **kwargs,
+    ) -> None:
+        self.validate_dataframe(df)
+
+        from_pandas_kwargs: dict[str, Any] = {"schema": kwargs.pop("schema", None)}
+        if index is not None:
+            from_pandas_kwargs["preserve_index"] = index
+
+        table = self.api.Table.from_pandas(df, **from_pandas_kwargs)
+
+        if df.attrs:
+            df_metadata = {"PANDAS_ATTRS": json.dumps(df.attrs)}
+            existing_metadata = table.schema.metadata
+            merged_metadata = {**existing_metadata, **df_metadata}
+            table = table.replace_schema_metadata(merged_metadata)
+
+        path_or_handle, handles, filesystem = _get_path_or_handle(
+            path,
+            filesystem,
+            storage_options=storage_options,
+            mode="wb",
+            is_dir=partition_cols is not None,
+        )
+        if (
+            isinstance(path_or_handle, io.BufferedWriter)
+            and hasattr(path_or_handle, "name")
+            and isinstance(path_or_handle.name, (str, bytes))
+        ):
+            if isinstance(path_or_handle.name, bytes):
+                path_or_handle = path_or_handle.name.decode()
+            else:
+                path_or_handle = path_or_handle.name
+
+        try:
+            if partition_cols is not None:
+                # writes to multiple files under the given path
+                self.api.parquet.write_to_dataset(
+                    table,
+                    path_or_handle,
+                    compression=compression,
+                    partition_cols=partition_cols,
+                    filesystem=filesystem,
+                    **kwargs,
+                )
+            else:
+                # write to single output file
+                self.api.parquet.write_table(
+                    table,
+                    path_or_handle,
+                    compression=compression,
+                    filesystem=filesystem,
+                    **kwargs,
+                )
+        finally:
+            if handles is not None:
+                handles.close()
+
+    def read(
+        self,
+        path,
+        columns=None,
+        filters=None,
+        dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
+        storage_options: StorageOptions | None = None,
+        filesystem=None,
+        to_pandas_kwargs: dict[str, Any] | None = None,
+        **kwargs,
+    ) -> DataFrame:
+        kwargs["use_pandas_metadata"] = True
+
+        path_or_handle, handles, filesystem = _get_path_or_handle(
+            path,
+            filesystem,
+            storage_options=storage_options,
+            mode="rb",
+        )
+        try:
+            pa_table = self.api.parquet.read_table(
+                path_or_handle,
+                columns=columns,
+                filesystem=filesystem,
+                filters=filters,
+                **kwargs,
+            )
+            with catch_warnings():
+                filterwarnings(
+                    "ignore",
+                    "make_block is deprecated",
+                    Pandas4Warning,
+                )
+                result = arrow_table_to_pandas(
+                    pa_table,
+                    dtype_backend=dtype_backend,
+                    to_pandas_kwargs=to_pandas_kwargs,
+                )
+
+            if pa_table.schema.metadata:
+                if b"PANDAS_ATTRS" in pa_table.schema.metadata:
+                    df_metadata = pa_table.schema.metadata[b"PANDAS_ATTRS"]
+                    result.attrs = json.loads(df_metadata)
+            return result
+        finally:
+            if handles is not None:
+                handles.close()
+
+
+class FastParquetImpl(BaseImpl):
+    def __init__(self) -> None:
+        # since pandas is a dependency of fastparquet
+        # we need to import on first use
+        fastparquet = import_optional_dependency(
+            "fastparquet", extra="fastparquet is required for parquet support."
+        )
+        self.api = fastparquet
+
+    def write(
+        self,
+        df: DataFrame,
+        path,
+        compression: Literal["snappy", "gzip", "brotli"] | None = "snappy",
+        index=None,
+        partition_cols=None,
+        storage_options: StorageOptions | None = None,
+        filesystem=None,
+        **kwargs,
+    ) -> None:
+        self.validate_dataframe(df)
+
+        if "partition_on" in kwargs and partition_cols is not None:
+            raise ValueError(
+                "Cannot use both partition_on and "
+                "partition_cols. Use partition_cols for partitioning data"
+            )
+        if "partition_on" in kwargs:
+            partition_cols = kwargs.pop("partition_on")
+
+        if partition_cols is not None:
+            kwargs["file_scheme"] = "hive"
+
+        if filesystem is not None:
+            raise NotImplementedError(
+                "filesystem is not implemented for the fastparquet engine."
+            )
+
+        # cannot use get_handle as write() does not accept file buffers
+        path = stringify_path(path)
+        if is_fsspec_url(path):
+            fsspec = import_optional_dependency("fsspec")
+
+            # if filesystem is provided by fsspec, file must be opened in 'wb' mode.
+            kwargs["open_with"] = lambda path, _: fsspec.open(
+                path, "wb", **(storage_options or {})
+            ).open()
+        elif storage_options:
+            raise ValueError(
+                "storage_options passed with file object or non-fsspec file path"
+            )
+
+        with catch_warnings(record=True):
+            self.api.write(
+                path,
+                df,
+                compression=compression,
+                write_index=index,
+                partition_on=partition_cols,
+                **kwargs,
+            )
+
+    def read(
+        self,
+        path,
+        columns=None,
+        filters=None,
+        storage_options: StorageOptions | None = None,
+        filesystem=None,
+        to_pandas_kwargs: dict | None = None,
+        **kwargs,
+    ) -> DataFrame:
+        parquet_kwargs: dict[str, Any] = {}
+        dtype_backend = kwargs.pop("dtype_backend", lib.no_default)
+        # We are disabling nullable dtypes for fastparquet pending discussion
+        parquet_kwargs["pandas_nulls"] = False
+        if dtype_backend is not lib.no_default:
+            raise ValueError(
+                "The 'dtype_backend' argument is not supported for the "
+                "fastparquet engine"
+            )
+        if filesystem is not None:
+            raise NotImplementedError(
+                "filesystem is not implemented for the fastparquet engine."
+            )
+        if to_pandas_kwargs is not None:
+            raise NotImplementedError(
+                "to_pandas_kwargs is not implemented for the fastparquet engine."
+            )
+        path = stringify_path(path)
+        handles = None
+        if is_fsspec_url(path):
+            fsspec = import_optional_dependency("fsspec")
+
+            parquet_kwargs["fs"] = fsspec.open(path, "rb", **(storage_options or {})).fs
+        elif isinstance(path, str) and not os.path.isdir(path):
+            # use get_handle only when we are very certain that it is not a directory
+            # fsspec resources can also point to directories
+            # this branch is used for example when reading from non-fsspec URLs
+            handles = get_handle(
+                path, "rb", is_text=False, storage_options=storage_options
+            )
+            path = handles.handle
+
+        try:
+            parquet_file = self.api.ParquetFile(path, **parquet_kwargs)
+            with catch_warnings():
+                filterwarnings(
+                    "ignore",
+                    "make_block is deprecated",
+                    Pandas4Warning,
+                )
+                return parquet_file.to_pandas(
+                    columns=columns, filters=filters, **kwargs
+                )
+        finally:
+            if handles is not None:
+                handles.close()
+
+
+def to_parquet(
+    df: DataFrame,
+    path: FilePath | WriteBuffer[bytes] | None = None,
+    engine: str = "auto",
+    compression: ParquetCompressionOptions = "snappy",
+    index: bool | None = None,
+    storage_options: StorageOptions | None = None,
+    partition_cols: list[str] | None = None,
+    filesystem: Any = None,
+    **kwargs,
+) -> bytes | None:
+    """
+    Write a DataFrame to the parquet format.
+
+    Parameters
+    ----------
+    df : DataFrame
+    path : str, path object, file-like object, or None, default None
+        String, path object (implementing ``os.PathLike[str]``), or file-like
+        object implementing a binary ``write()`` function. If None, the result
+        is returned as bytes. If a string, it will be used as Root Directory
+        path when writing a partitioned dataset. The engine fastparquet does
+        not accept file-like objects.
+    engine : {{'auto', 'pyarrow', 'fastparquet'}}, default 'auto'
+        Parquet library to use. If 'auto', then the option
+        ``io.parquet.engine`` is used. The default ``io.parquet.engine``
+        behavior is to try 'pyarrow', falling back to 'fastparquet' if
+        'pyarrow' is unavailable.
+
+        When using the ``'pyarrow'`` engine and no storage options are provided
+        and a filesystem is implemented by both ``pyarrow.fs`` and ``fsspec``
+        (e.g. "s3://"), then the ``pyarrow.fs`` filesystem is attempted first.
+        Use the filesystem keyword with an instantiated fsspec filesystem
+        if you wish to use its implementation.
+    compression : {{'snappy', 'gzip', 'brotli', 'lz4', 'zstd', None}},
+        default 'snappy'. Name of the compression to use. Use ``None``
+        for no compression.
+    index : bool, default None
+        If ``True``, include the dataframe's index(es) in the file output. If
+        ``False``, they will not be written to the file.
+        If ``None``, similar to ``True`` the dataframe's index(es)
+        will be saved. However, instead of being saved as values,
+        the RangeIndex will be stored as a range in the metadata so it
+        doesn't require much space and is faster. Other indexes will
+        be included as columns in the file output.
+    partition_cols : str or list, optional, default None
+        Column names by which to partition the dataset.
+        Columns are partitioned in the order they are given.
+        Must be None if path is not a string.
+    storage_options : dict, optional
+        Extra options that make sense for a particular storage connection, e.g.
+        host, port, username, password, etc. For HTTP(S) URLs the key-value
+        pairs are forwarded to ``urllib.request.Request`` as header options.
+        For other URLs (e.g. starting with "s3://", and "gcs://") the
+        key-value pairs are forwarded to ``fsspec.open``. Please see ``fsspec``
+        and ``urllib`` for more details, and for more examples on storage
+        options refer `here <https://pandas.pydata.org/docs/user_guide/io.html?
+        highlight=storage_options#reading-writing-remote-files>`_.
+    filesystem : fsspec or pyarrow filesystem, default None
+        Filesystem object to use when reading the parquet file. Only implemented
+        for ``engine="pyarrow"``.
+
+        .. versionadded:: 2.1.0
+
+    **kwargs
+        Additional keyword arguments passed to the engine:
+
+        * For ``engine="pyarrow"``: passed to :func:`pyarrow.parquet.write_table`
+          or :func:`pyarrow.parquet.write_to_dataset` (when using partition_cols)
+        * For ``engine="fastparquet"``: passed to :func:`fastparquet.write`
+
+    Returns
+    -------
+    bytes if no path argument is provided else None
+    """
+    if isinstance(partition_cols, str):
+        partition_cols = [partition_cols]
+    impl = get_engine(engine)
+
+    path_or_buf: FilePath | WriteBuffer[bytes] = io.BytesIO() if path is None else path
+
+    impl.write(
+        df,
+        path_or_buf,
+        compression=compression,
+        index=index,
+        partition_cols=partition_cols,
+        storage_options=storage_options,
+        filesystem=filesystem,
+        **kwargs,
+    )
+
+    if path is None:
+        assert isinstance(path_or_buf, io.BytesIO)
+        return path_or_buf.getvalue()
+    else:
+        return None
+
+
+@set_module("pandas")
+def read_parquet(
+    path: FilePath | ReadBuffer[bytes],
+    engine: str = "auto",
+    columns: list[str] | None = None,
+    storage_options: StorageOptions | None = None,
+    dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
+    filesystem: Any = None,
+    filters: list[tuple] | list[list[tuple]] | None = None,
+    to_pandas_kwargs: dict | None = None,
+    **kwargs,
+) -> DataFrame:
+    """
+    Load a parquet object from the file path, returning a DataFrame.
+
+    The function automatically handles reading the data from a parquet file
+    and creates a DataFrame with the appropriate structure.
+
+    Parameters
+    ----------
+    path : str, path object or file-like object
+        String, path object (implementing ``os.PathLike[str]``), or file-like
+        object implementing a binary ``read()`` function.
+        The string could be a URL. Valid URL schemes include http, ftp, s3,
+        gs, and file. For file URLs, a host is expected. A local file could be:
+        ``file://localhost/path/to/table.parquet``.
+        A file URL can also be a path to a directory that contains multiple
+        partitioned parquet files. Both pyarrow and fastparquet support
+        paths to directories as well as file URLs. A directory path could be:
+        ``file://localhost/path/to/tables`` or ``s3://bucket/partition_dir``.
+    engine : {{'auto', 'pyarrow', 'fastparquet'}}, default 'auto'
+        Parquet library to use. If 'auto', then the option
+        ``io.parquet.engine`` is used. The default ``io.parquet.engine``
+        behavior is to try 'pyarrow', falling back to 'fastparquet' if
+        'pyarrow' is unavailable.
+
+        When using the ``'pyarrow'`` engine and no storage options are provided
+        and a filesystem is implemented by both ``pyarrow.fs`` and ``fsspec``
+        (e.g. "s3://"), then the ``pyarrow.fs`` filesystem is attempted first.
+        Use the filesystem keyword with an instantiated fsspec filesystem
+        if you wish to use its implementation.
+    columns : list, default=None
+        If not None, only these columns will be read from the file.
+    storage_options : dict, optional
+        Extra options that make sense for a particular storage connection, e.g.
+        host, port, username, password, etc. For HTTP(S) URLs the key-value
+        pairs are forwarded to ``urllib.request.Request`` as header options.
+        For other URLs (e.g. starting with "s3://", and "gcs://") the
+        key-value pairs are forwarded to ``fsspec.open``. Please see ``fsspec``
+        and ``urllib`` for more details, and for more examples on storage
+        options refer `here <https://pandas.pydata.org/docs/user_guide/io.html?
+        highlight=storage_options#reading-writing-remote-files>`_.
+    dtype_backend : {{'numpy_nullable', 'pyarrow'}}
+        Back-end data type applied to the resultant :class:`DataFrame`
+        (still experimental). If not specified, the default behavior
+        is to not use nullable data types. If specified, the behavior
+        is as follows:
+
+        * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
+        * ``"pyarrow"``: returns pyarrow-backed nullable
+          :class:`ArrowDtype` :class:`DataFrame`
+
+        .. versionadded:: 2.0
+
+    filesystem : fsspec or pyarrow filesystem, default None
+        Filesystem object to use when reading the parquet file. Only implemented
+        for ``engine="pyarrow"``.
+
+        .. versionadded:: 2.1.0
+
+    filters : List[Tuple] or List[List[Tuple]], default None
+        To filter out data.
+        Filter syntax: [[(column, op, val), ...],...]
+        where op is [==, =, >, >=, <, <=, !=, in, not in]
+        The innermost tuples are transposed into a set of filters applied
+        through an `AND` operation.
+        The outer list combines these sets of filters through an `OR`
+        operation.
+        A single list of tuples can also be used, meaning that no `OR`
+        operation between set of filters is to be conducted.
+
+        Using this argument will NOT result in row-wise filtering of the final
+        partitions unless ``engine="pyarrow"`` is also specified.  For
+        other engines, filtering is only performed at the partition level, that is,
+        to prevent the loading of some row-groups and/or files.
+
+        .. versionadded:: 2.1.0
+
+    to_pandas_kwargs : dict | None, default None
+        Keyword arguments to pass through to :func:`pyarrow.Table.to_pandas`
+        when ``engine="pyarrow"``.
+
+        .. versionadded:: 3.0.0
+
+    **kwargs
+        Additional keyword arguments passed to the engine:
+
+        * For ``engine="pyarrow"``: passed to :func:`pyarrow.parquet.read_table`
+        * For ``engine="fastparquet"``: passed to
+          :meth:`fastparquet.ParquetFile.to_pandas`
+
+    Returns
+    -------
+    DataFrame
+        DataFrame based on parquet file.
+
+    See Also
+    --------
+    DataFrame.to_parquet : Create a parquet object that serializes a DataFrame.
+
+    Examples
+    --------
+    >>> original_df = pd.DataFrame({"foo": range(5), "bar": range(5, 10)})
+    >>> original_df
+       foo  bar
+    0    0    5
+    1    1    6
+    2    2    7
+    3    3    8
+    4    4    9
+    >>> df_parquet_bytes = original_df.to_parquet()
+    >>> from io import BytesIO
+    >>> restored_df = pd.read_parquet(BytesIO(df_parquet_bytes))
+    >>> restored_df
+       foo  bar
+    0    0    5
+    1    1    6
+    2    2    7
+    3    3    8
+    4    4    9
+    >>> restored_df.equals(original_df)
+    True
+    >>> restored_bar = pd.read_parquet(BytesIO(df_parquet_bytes), columns=["bar"])
+    >>> restored_bar
+        bar
+    0    5
+    1    6
+    2    7
+    3    8
+    4    9
+    >>> restored_bar.equals(original_df[["bar"]])
+    True
+
+    The function uses `kwargs` that are passed directly to the engine.
+    In the following example, we use the `filters` argument of the pyarrow
+    engine to filter the rows of the DataFrame.
+
+    Since `pyarrow` is the default engine, we can omit the `engine` argument.
+    Note that the `filters` argument is implemented by the `pyarrow` engine,
+    which can benefit from multithreading and also potentially be more
+    economical in terms of memory.
+
+    >>> sel = [("foo", ">", 2)]
+    >>> restored_part = pd.read_parquet(BytesIO(df_parquet_bytes), filters=sel)
+    >>> restored_part
+        foo  bar
+    0    3    8
+    1    4    9
+    """
+
+    impl = get_engine(engine)
+    check_dtype_backend(dtype_backend)
+
+    return impl.read(
+        path,
+        columns=columns,
+        filters=filters,
+        storage_options=storage_options,
+        dtype_backend=dtype_backend,
+        filesystem=filesystem,
+        to_pandas_kwargs=to_pandas_kwargs,
+        **kwargs,
+    )
@@ -0,0 +1,9 @@
+from pandas.io.parsers.readers import (
+    TextFileReader,
+    TextParser,
+    read_csv,
+    read_fwf,
+    read_table,
+)
+
+__all__ = ["TextFileReader", "TextParser", "read_csv", "read_fwf", "read_table"]
@@ -0,0 +1,328 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+import warnings
+
+from pandas._libs import lib
+from pandas.compat._optional import import_optional_dependency
+from pandas.errors import (
+    Pandas4Warning,
+    ParserError,
+    ParserWarning,
+)
+from pandas.util._exceptions import (
+    find_stack_level,
+)
+
+from pandas.core.dtypes.common import (
+    pandas_dtype,
+)
+from pandas.core.dtypes.inference import is_integer
+
+from pandas.io._util import arrow_table_to_pandas
+from pandas.io.parsers.base_parser import ParserBase
+
+if TYPE_CHECKING:
+    import pyarrow as pa
+
+    from pandas._typing import ReadBuffer
+
+    from pandas import DataFrame
+
+
+class ArrowParserWrapper(ParserBase):
+    """
+    Wrapper for the pyarrow engine for read_csv()
+    """
+
+    def __init__(self, src: ReadBuffer[bytes], **kwds) -> None:
+        super().__init__(kwds)
+        self.kwds = kwds
+        self.src = src
+
+        self._parse_kwds()
+
+    def _parse_kwds(self) -> None:
+        """
+        Validates keywords before passing to pyarrow.
+        """
+        encoding: str | None = self.kwds.get("encoding")
+        self.encoding = "utf-8" if encoding is None else encoding
+
+        na_values = self.kwds["na_values"]
+        if isinstance(na_values, dict):
+            raise ValueError(
+                "The pyarrow engine doesn't support passing a dict for na_values"
+            )
+        self.na_values = list(self.kwds["na_values"])
+
+    def _get_pyarrow_options(self) -> None:
+        """
+        Rename some arguments to pass to pyarrow
+        """
+        mapping = {
+            "usecols": "include_columns",
+            "na_values": "null_values",
+            "escapechar": "escape_char",
+            "skip_blank_lines": "ignore_empty_lines",
+            "decimal": "decimal_point",
+            "quotechar": "quote_char",
+        }
+        for pandas_name, pyarrow_name in mapping.items():
+            if pandas_name in self.kwds and self.kwds.get(pandas_name) is not None:
+                self.kwds[pyarrow_name] = self.kwds.pop(pandas_name)
+
+        # Date format handling
+        # If we get a string, we need to convert it into a list for pyarrow
+        # If we get a dict, we want to parse those separately
+        date_format = self.date_format
+        if isinstance(date_format, str):
+            date_format = [date_format]
+        else:
+            # In case of dict, we don't want to propagate through, so
+            # just set to pyarrow default of None
+
+            # Ideally, in future we disable pyarrow dtype inference (read in as string)
+            # to prevent misreads.
+            date_format = None
+        self.kwds["timestamp_parsers"] = date_format
+
+        self.parse_options = {
+            option_name: option_value
+            for option_name, option_value in self.kwds.items()
+            if option_value is not None
+            and option_name
+            in ("delimiter", "quote_char", "escape_char", "ignore_empty_lines")
+        }
+
+        on_bad_lines = self.kwds.get("on_bad_lines")
+        if on_bad_lines is not None:
+            if callable(on_bad_lines):
+                self.parse_options["invalid_row_handler"] = on_bad_lines
+            elif on_bad_lines == ParserBase.BadLineHandleMethod.ERROR:
+                self.parse_options["invalid_row_handler"] = (
+                    None  # PyArrow raises an exception by default
+                )
+            elif on_bad_lines == ParserBase.BadLineHandleMethod.WARN:
+
+                def handle_warning(invalid_row) -> str:
+                    warnings.warn(
+                        f"Expected {invalid_row.expected_columns} columns, but found "
+                        f"{invalid_row.actual_columns}: {invalid_row.text}",
+                        ParserWarning,
+                        stacklevel=find_stack_level(),
+                    )
+                    return "skip"
+
+                self.parse_options["invalid_row_handler"] = handle_warning
+            elif on_bad_lines == ParserBase.BadLineHandleMethod.SKIP:
+                self.parse_options["invalid_row_handler"] = lambda _: "skip"
+
+        self.convert_options = {
+            option_name: option_value
+            for option_name, option_value in self.kwds.items()
+            if option_value is not None
+            and option_name
+            in (
+                "include_columns",
+                "null_values",
+                "true_values",
+                "false_values",
+                "decimal_point",
+                "timestamp_parsers",
+            )
+        }
+        self.convert_options["strings_can_be_null"] = "" in self.kwds["null_values"]
+        # autogenerated column names are prefixed with 'f' in pyarrow.csv
+        if self.header is None and "include_columns" in self.convert_options:
+            self.convert_options["include_columns"] = [
+                f"f{n}" for n in self.convert_options["include_columns"]
+            ]
+
+        self.read_options = {
+            "autogenerate_column_names": self.header is None,
+            "skip_rows": self.header
+            if self.header is not None
+            else self.kwds["skiprows"],
+            "encoding": self.encoding,
+        }
+
+    def _get_convert_options(self):
+        pyarrow_csv = import_optional_dependency("pyarrow.csv")
+
+        try:
+            convert_options = pyarrow_csv.ConvertOptions(**self.convert_options)
+        except TypeError as err:
+            include = self.convert_options.get("include_columns", None)
+            if include is not None:
+                self._validate_usecols(include)
+
+            nulls = self.convert_options.get("null_values", set())
+            if not lib.is_list_like(nulls) or not all(
+                isinstance(x, str) for x in nulls
+            ):
+                raise TypeError(
+                    "The 'pyarrow' engine requires all na_values to be strings"
+                ) from err
+
+            raise
+
+        return convert_options
+
+    def _adjust_column_names(self, table: pa.Table) -> bool:
+        num_cols = len(table.columns)
+        multi_index_named = True
+        if self.header is None:
+            if self.names is None:
+                self.names = range(num_cols)
+            if len(self.names) != num_cols:
+                # usecols is passed through to pyarrow, we only handle index col here
+                # The only way self.names is not the same length as number of cols is
+                # if we have int index_col. We should just pad the names(they will get
+                # removed anyways) to expected length then.
+                columns_prefix = [str(x) for x in range(num_cols - len(self.names))]
+                self.names = columns_prefix + self.names
+                multi_index_named = False
+        return multi_index_named
+
+    def _finalize_index(self, frame: DataFrame, multi_index_named: bool) -> DataFrame:
+        if self.index_col is not None:
+            index_to_set = self.index_col.copy()
+            for i, item in enumerate(self.index_col):
+                if is_integer(item):
+                    index_to_set[i] = frame.columns[item]
+                # String case
+                elif item not in frame.columns:
+                    raise ValueError(f"Index {item} invalid")
+
+                # Process dtype for index_col and drop from dtypes
+                if self.dtype is not None:
+                    key, new_dtype = (
+                        (item, self.dtype.get(item))
+                        if self.dtype.get(item) is not None
+                        else (frame.columns[item], self.dtype.get(frame.columns[item]))
+                    )
+                    if new_dtype is not None:
+                        frame[key] = frame[key].astype(new_dtype)
+                        del self.dtype[key]
+
+            frame.set_index(index_to_set, drop=True, inplace=True)
+            # Clear names if headerless and no name given
+            if self.header is None and not multi_index_named:
+                frame.index.names = [None] * len(frame.index.names)
+
+        return frame
+
+    def _finalize_dtype(self, frame: DataFrame) -> DataFrame:
+        if self.dtype is not None:
+            # Ignore non-existent columns from dtype mapping
+            # like other parsers do
+            if isinstance(self.dtype, dict):
+                self.dtype = {
+                    k: pandas_dtype(v)
+                    for k, v in self.dtype.items()
+                    if k in frame.columns
+                }
+            else:
+                self.dtype = pandas_dtype(self.dtype)
+            try:
+                frame = frame.astype(self.dtype)
+            except TypeError as err:
+                # GH#44901 reraise to keep api consistent
+                raise ValueError(str(err)) from err
+        return frame
+
+    def _finalize_pandas_output(
+        self, frame: DataFrame, multi_index_named: bool
+    ) -> DataFrame:
+        """
+        Processes data read in based on kwargs.
+
+        Parameters
+        ----------
+        frame : DataFrame
+            The DataFrame to process.
+        multi_index_named : bool
+
+        Returns
+        -------
+        DataFrame
+            The processed DataFrame.
+        """
+        frame = self._do_date_conversions(frame.columns, frame)
+        frame = self._finalize_index(frame, multi_index_named)
+        frame = self._finalize_dtype(frame)
+        return frame
+
+    def _validate_usecols(self, usecols) -> None:
+        if lib.is_list_like(usecols) and not all(isinstance(x, str) for x in usecols):
+            raise ValueError(
+                "The pyarrow engine does not allow 'usecols' to be integer "
+                "column positions. Pass a list of string column names instead."
+            )
+        elif callable(usecols):
+            raise ValueError(
+                "The pyarrow engine does not allow 'usecols' to be a callable."
+            )
+
+    def read(self) -> DataFrame:
+        """
+        Reads the contents of a CSV file into a DataFrame and
+        processes it according to the kwargs passed in the
+        constructor.
+
+        Returns
+        -------
+        DataFrame
+            The DataFrame created from the CSV file.
+        """
+        pa = import_optional_dependency("pyarrow")
+        pyarrow_csv = import_optional_dependency("pyarrow.csv")
+        self._get_pyarrow_options()
+        convert_options = self._get_convert_options()
+
+        try:
+            table = pyarrow_csv.read_csv(
+                self.src,
+                read_options=pyarrow_csv.ReadOptions(**self.read_options),
+                parse_options=pyarrow_csv.ParseOptions(**self.parse_options),
+                convert_options=convert_options,
+            )
+        except pa.ArrowInvalid as e:
+            raise ParserError(e) from e
+
+        dtype_backend = self.kwds["dtype_backend"]
+
+        # Convert all pa.null() cols -> float64 (non nullable)
+        # else Int64 (nullable case, see below)
+        if dtype_backend is lib.no_default:
+            new_schema = table.schema
+            new_type = pa.float64()
+            for i, arrow_type in enumerate(table.schema.types):
+                if pa.types.is_null(arrow_type):
+                    new_schema = new_schema.set(
+                        i, new_schema.field(i).with_type(new_type)
+                    )
+
+            table = table.cast(new_schema)
+
+        multi_index_named = self._adjust_column_names(table)
+
+        with warnings.catch_warnings():
+            warnings.filterwarnings(
+                "ignore",
+                "make_block is deprecated",
+                Pandas4Warning,
+            )
+            frame = arrow_table_to_pandas(
+                table,
+                dtype_backend=dtype_backend,
+                null_to_int64=True,
+                dtype=self.dtype,
+                names=self.names,
+            )
+
+        if self.header is None:
+            frame.columns = self.names
+
+        return self._finalize_pandas_output(frame, multi_index_named)
@@ -0,0 +1,997 @@
+from __future__ import annotations
+
+from collections import defaultdict
+from copy import copy
+import csv
+from enum import Enum
+import itertools
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    cast,
+    final,
+    overload,
+)
+import warnings
+
+import numpy as np
+
+from pandas._libs import (
+    lib,
+    parsers,
+)
+import pandas._libs.ops as libops
+from pandas._libs.parsers import STR_NA_VALUES
+from pandas.compat._optional import import_optional_dependency
+from pandas.errors import (
+    ParserError,
+    ParserWarning,
+)
+from pandas.util._exceptions import find_stack_level
+
+from pandas.core.dtypes.common import (
+    is_bool_dtype,
+    is_dict_like,
+    is_float_dtype,
+    is_integer,
+    is_integer_dtype,
+    is_list_like,
+    is_object_dtype,
+    is_string_dtype,
+)
+from pandas.core.dtypes.missing import isna
+
+from pandas import (
+    DataFrame,
+    DatetimeIndex,
+    StringDtype,
+)
+from pandas.core import algorithms
+from pandas.core.arrays import (
+    ArrowExtensionArray,
+    BaseMaskedArray,
+    BooleanArray,
+    FloatingArray,
+    IntegerArray,
+)
+from pandas.core.indexes.api import (
+    Index,
+    MultiIndex,
+    default_index,
+    ensure_index_from_sequences,
+)
+from pandas.core.series import Series
+from pandas.core.tools import datetimes as tools
+
+from pandas.io.common import is_potential_multi_index
+
+if TYPE_CHECKING:
+    from collections.abc import (
+        Callable,
+        Iterable,
+        Mapping,
+        Sequence,
+    )
+
+    from pandas._typing import (
+        ArrayLike,
+        DtypeArg,
+        Hashable,
+        HashableT,
+        Scalar,
+        SequenceT,
+    )
+
+
+class ParserBase:
+    class BadLineHandleMethod(Enum):
+        ERROR = 0
+        WARN = 1
+        SKIP = 2
+
+    _implicit_index: bool
+    _first_chunk: bool
+    keep_default_na: bool
+    dayfirst: bool
+    cache_dates: bool
+    usecols_dtype: str | None
+
+    def __init__(self, kwds) -> None:
+        self._implicit_index = False
+
+        self.names = kwds.get("names")
+        self.orig_names: Sequence[Hashable] | None = None
+
+        self.index_col = kwds.get("index_col", None)
+        self.unnamed_cols: set = set()
+        self.index_names: Sequence[Hashable] | None = None
+        self.col_names: Sequence[Hashable] | None = None
+
+        parse_dates = kwds.pop("parse_dates", False)
+        if parse_dates is None or lib.is_bool(parse_dates):
+            parse_dates = bool(parse_dates)
+        elif not isinstance(parse_dates, list):
+            raise TypeError(
+                "Only booleans and lists are accepted for the 'parse_dates' parameter"
+            )
+        self.parse_dates: bool | list = parse_dates
+        self.date_parser = kwds.pop("date_parser", lib.no_default)
+        self.date_format = kwds.pop("date_format", None)
+        self.dayfirst = kwds.pop("dayfirst", False)
+
+        self.na_values = kwds.get("na_values")
+        self.na_fvalues = kwds.get("na_fvalues")
+        self.na_filter = kwds.get("na_filter", False)
+        self.keep_default_na = kwds.get("keep_default_na", True)
+
+        self.dtype = copy(kwds.get("dtype", None))
+        self.converters = kwds.get("converters")
+        self.dtype_backend = kwds.get("dtype_backend")
+
+        self.true_values = kwds.get("true_values")
+        self.false_values = kwds.get("false_values")
+        self.cache_dates = kwds.pop("cache_dates", True)
+
+        # validate header options for mi
+        self.header = kwds.get("header")
+        if is_list_like(self.header, allow_sets=False):
+            if kwds.get("usecols"):
+                raise ValueError(
+                    "cannot specify usecols when specifying a multi-index header"
+                )
+            if kwds.get("names"):
+                raise ValueError(
+                    "cannot specify names when specifying a multi-index header"
+                )
+
+            # validate index_col that only contains integers
+            if self.index_col is not None:
+                # In this case we can pin down index_col as list[int]
+                if is_integer(self.index_col):
+                    self.index_col = [self.index_col]
+                elif not (
+                    is_list_like(self.index_col, allow_sets=False)
+                    and all(map(is_integer, self.index_col))
+                ):
+                    raise ValueError(
+                        "index_col must only contain integers of column positions "
+                        "when specifying a multi-index header"
+                    )
+                else:
+                    self.index_col = list(self.index_col)
+
+        self._first_chunk = True
+
+        self.usecols, self.usecols_dtype = _validate_usecols_arg(kwds["usecols"])
+
+        # Fallback to error to pass a sketchy test(test_override_set_noconvert_columns)
+        # Normally, this arg would get pre-processed earlier on
+        self.on_bad_lines = kwds.get("on_bad_lines", self.BadLineHandleMethod.ERROR)
+
+    def close(self) -> None:
+        pass
+
+    @final
+    def _should_parse_dates(self, i: int) -> bool:
+        if isinstance(self.parse_dates, bool):
+            return self.parse_dates
+        else:
+            if self.index_names is not None:
+                name = self.index_names[i]
+            else:
+                name = None
+            j = i if self.index_col is None else self.index_col[i]
+
+            return (j in self.parse_dates) or (
+                name is not None and name in self.parse_dates
+            )
+
+    @final
+    def _extract_multi_indexer_columns(
+        self,
+        header,
+        index_names: Sequence[Hashable] | None,
+        passed_names: bool = False,
+    ) -> tuple[
+        Sequence[Hashable], Sequence[Hashable] | None, Sequence[Hashable] | None, bool
+    ]:
+        """
+        Extract and return the names, index_names, col_names if the column
+        names are a MultiIndex.
+
+        Parameters
+        ----------
+        header: list of lists
+            The header rows
+        index_names: list, optional
+            The names of the future index
+        passed_names: bool, default False
+            A flag specifying if names where passed
+
+        """
+        if len(header) < 2:
+            return header[0], index_names, None, passed_names
+
+        # the names are the tuples of the header that are not the index cols
+        # 0 is the name of the index, assuming index_col is a list of column
+        # numbers
+        ic = self.index_col
+        if ic is None:
+            ic = []
+
+        if not isinstance(ic, (list, tuple, np.ndarray)):
+            ic = [ic]
+        sic = set(ic)
+
+        # clean the index_names
+        index_names = header.pop(-1)
+        index_names, _, _ = self._clean_index_names(index_names, self.index_col)
+
+        # extract the columns
+        field_count = len(header[0])
+
+        # check if header lengths are equal
+        if not all(len(header_iter) == field_count for header_iter in header[1:]):
+            raise ParserError("Header rows must have an equal number of columns.")
+
+        def extract(r):
+            return tuple(r[i] for i in range(field_count) if i not in sic)
+
+        columns = list(zip(*(extract(r) for r in header), strict=True))
+        names = columns.copy()
+        for single_ic in sorted(ic):
+            names.insert(single_ic, single_ic)
+
+        # Clean the column names (if we have an index_col).
+        if ic:
+            col_names = [
+                r[ic[0]]
+                if ((r[ic[0]] is not None) and r[ic[0]] not in self.unnamed_cols)
+                else None
+                for r in header
+            ]
+        else:
+            col_names = [None] * len(header)
+
+        passed_names = True
+
+        return names, index_names, col_names, passed_names
+
+    @final
+    def _maybe_make_multi_index_columns(
+        self,
+        columns: SequenceT,
+        col_names: Sequence[Hashable] | None = None,
+    ) -> SequenceT | MultiIndex:
+        # possibly create a column mi here
+        if is_potential_multi_index(columns):
+            columns_mi = cast("Sequence[tuple[Hashable, ...]]", columns)
+            return MultiIndex.from_tuples(columns_mi, names=col_names)
+        return columns
+
+    @final
+    def _make_index(
+        self, alldata, columns, indexnamerow: list[Scalar] | None = None
+    ) -> tuple[Index | None, Sequence[Hashable] | MultiIndex]:
+        index: Index | None
+        if isinstance(self.index_col, list) and len(self.index_col):
+            to_remove = []
+            indexes = []
+            for idx in self.index_col:
+                if isinstance(idx, str):
+                    raise ValueError(f"Index {idx} invalid")
+                to_remove.append(idx)
+                indexes.append(alldata[idx])
+            # remove index items from content and columns, don't pop in
+            # loop
+            for i in sorted(to_remove, reverse=True):
+                alldata.pop(i)
+                if not self._implicit_index:
+                    columns.pop(i)
+            index = self._agg_index(indexes)
+
+            # add names for the index
+            if indexnamerow:
+                coffset = len(indexnamerow) - len(columns)
+                index = index.set_names(indexnamerow[:coffset])
+        else:
+            index = None
+
+        # maybe create a mi on the columns
+        columns = self._maybe_make_multi_index_columns(columns, self.col_names)
+
+        return index, columns
+
+    @final
+    def _clean_mapping(self, mapping):
+        """converts col numbers to names"""
+        if not isinstance(mapping, dict):
+            return mapping
+        clean = {}
+        # for mypy
+        assert self.orig_names is not None
+
+        for col, v in mapping.items():
+            if isinstance(col, int) and col not in self.orig_names:
+                col = self.orig_names[col]
+            clean[col] = v
+        if isinstance(mapping, defaultdict):
+            remaining_cols = set(self.orig_names) - set(clean.keys())
+            clean.update({col: mapping[col] for col in remaining_cols})
+        return clean
+
+    @final
+    def _agg_index(self, index) -> Index:
+        arrays = []
+        converters = self._clean_mapping(self.converters)
+        clean_dtypes = self._clean_mapping(self.dtype)
+
+        if self.index_names is not None:
+            names: Iterable = self.index_names
+            zip_strict = True
+        else:
+            names = itertools.cycle([None])
+            zip_strict = False
+        for i, (arr, name) in enumerate(zip(index, names, strict=zip_strict)):
+            if self._should_parse_dates(i):
+                arr = date_converter(
+                    arr,
+                    col=self.index_names[i] if self.index_names is not None else None,
+                    dayfirst=self.dayfirst,
+                    cache_dates=self.cache_dates,
+                    date_format=self.date_format,
+                )
+
+            if self.na_filter:
+                col_na_values = self.na_values
+                col_na_fvalues = self.na_fvalues
+            else:
+                col_na_values = set()
+                col_na_fvalues = set()
+
+            if isinstance(self.na_values, dict):
+                assert self.index_names is not None
+                col_name = self.index_names[i]
+                if col_name is not None:
+                    col_na_values, col_na_fvalues = get_na_values(
+                        col_name, self.na_values, self.na_fvalues, self.keep_default_na
+                    )
+                else:
+                    col_na_values, col_na_fvalues = set(), set()
+
+            cast_type = None
+            index_converter = False
+            if self.index_names is not None:
+                if isinstance(clean_dtypes, dict):
+                    cast_type = clean_dtypes.get(self.index_names[i], None)
+
+                if isinstance(converters, dict):
+                    index_converter = converters.get(self.index_names[i]) is not None
+
+            try_num_bool = not (
+                (cast_type and is_string_dtype(cast_type)) or index_converter
+            )
+
+            arr, _ = self._infer_types(
+                arr, col_na_values | col_na_fvalues, cast_type is None, try_num_bool
+            )
+            if cast_type is not None:
+                # Don't perform RangeIndex inference
+                idx = Index(arr, name=name, dtype=cast_type, copy=False)
+            else:
+                idx = ensure_index_from_sequences([arr], [name])
+            arrays.append(idx)
+
+        if len(arrays) == 1:
+            return arrays[0]
+        else:
+            return MultiIndex.from_arrays(arrays)
+
+    @final
+    def _set_noconvert_dtype_columns(
+        self, col_indices: list[int], names: Sequence[Hashable]
+    ) -> set[int]:
+        """
+        Set the columns that should not undergo dtype conversions.
+
+        Currently, any column that is involved with date parsing will not
+        undergo such conversions. If usecols is specified, the positions of the columns
+        not to cast is relative to the usecols not to all columns.
+
+        Parameters
+        ----------
+        col_indices: The indices specifying order and positions of the columns
+        names: The column names which order is corresponding with the order
+               of col_indices
+
+        Returns
+        -------
+        A set of integers containing the positions of the columns not to convert.
+        """
+        usecols: list[int] | list[str] | None
+        noconvert_columns = set()
+        if self.usecols_dtype == "integer":
+            # A set of integers will be converted to a list in
+            # the correct order every single time.
+            usecols = sorted(self.usecols)
+        elif callable(self.usecols) or self.usecols_dtype not in ("empty", None):
+            # The names attribute should have the correct columns
+            # in the proper order for indexing with parse_dates.
+            usecols = col_indices
+        else:
+            # Usecols is empty.
+            usecols = None
+
+        def _set(x) -> int:
+            if usecols is not None and is_integer(x):
+                x = usecols[x]
+
+            if not is_integer(x):
+                x = col_indices[names.index(x)]
+
+            return x
+
+        if isinstance(self.parse_dates, list):
+            validate_parse_dates_presence(self.parse_dates, names)
+            for val in self.parse_dates:
+                noconvert_columns.add(_set(val))
+
+        elif self.parse_dates:
+            if isinstance(self.index_col, list):
+                for k in self.index_col:
+                    noconvert_columns.add(_set(k))
+            elif self.index_col is not None:
+                noconvert_columns.add(_set(self.index_col))
+
+        return noconvert_columns
+
+    @final
+    def _infer_types(
+        self, values, na_values, no_dtype_specified, try_num_bool: bool = True
+    ) -> tuple[ArrayLike, int]:
+        """
+        Infer types of values, possibly casting
+
+        Parameters
+        ----------
+        values : ndarray
+        na_values : set
+        no_dtype_specified: Specifies if we want to cast explicitly
+        try_num_bool : bool, default try
+           try to cast values to numeric (first preference) or boolean
+
+        Returns
+        -------
+        converted : ndarray or ExtensionArray
+        na_count : int
+        """
+        na_count = 0
+        if issubclass(values.dtype.type, (np.number, np.bool_)):
+            # If our array has numeric dtype, we don't have to check for strings in isin
+            na_values = np.array([val for val in na_values if not isinstance(val, str)])
+            mask = algorithms.isin(values, na_values)
+            na_count = mask.astype("uint8", copy=False).sum()
+            if na_count > 0:
+                if is_integer_dtype(values):
+                    values = values.astype(np.float64)
+                np.putmask(values, mask, np.nan)
+            return values, na_count
+
+        dtype_backend = self.dtype_backend
+        non_default_dtype_backend = (
+            no_dtype_specified and dtype_backend is not lib.no_default
+        )
+        result: ArrayLike
+
+        if try_num_bool and is_object_dtype(values.dtype):
+            # exclude e.g DatetimeIndex here
+            try:
+                result, result_mask = lib.maybe_convert_numeric(
+                    values,
+                    na_values,
+                    False,
+                    convert_to_masked_nullable=non_default_dtype_backend,  # type: ignore[arg-type]
+                )
+            except (ValueError, TypeError):
+                # e.g. encountering datetime string gets ValueError
+                #  TypeError can be raised in floatify
+                na_count = parsers.sanitize_objects(values, na_values)
+                result = values
+            else:
+                if non_default_dtype_backend:
+                    if result_mask is None:
+                        result_mask = np.zeros(result.shape, dtype=np.bool_)
+
+                    if result_mask.all():
+                        result = IntegerArray(
+                            np.ones(result_mask.shape, dtype=np.int64), result_mask
+                        )
+                    elif is_integer_dtype(result):
+                        result = IntegerArray(result, result_mask)
+                    elif is_bool_dtype(result):
+                        result = BooleanArray(result, result_mask)
+                    elif is_float_dtype(result):
+                        result = FloatingArray(result, result_mask)
+
+                    na_count = result_mask.sum()
+                else:
+                    na_count = isna(result).sum()
+        else:
+            result = values
+            if values.dtype == np.object_:
+                na_count = parsers.sanitize_objects(values, na_values)
+
+        if (
+            result.dtype == np.object_
+            and try_num_bool
+            and (len(result) == 0 or not isinstance(result[0], int))
+        ):
+            result, bool_mask = libops.maybe_convert_bool(
+                np.asarray(values),
+                true_values=self.true_values,
+                false_values=self.false_values,
+                convert_to_masked_nullable=non_default_dtype_backend,  # type: ignore[arg-type]
+            )
+            if result.dtype == np.bool_ and non_default_dtype_backend:
+                if bool_mask is None:
+                    bool_mask = np.zeros(result.shape, dtype=np.bool_)
+                result = BooleanArray(result, bool_mask)
+            elif result.dtype == np.object_ and non_default_dtype_backend:
+                # read_excel sends array of datetime objects
+                if not lib.is_datetime_array(result, skipna=True):
+                    dtype = StringDtype()
+                    cls = dtype.construct_array_type()
+                    result = cls._from_sequence(values, dtype=dtype)
+
+        if dtype_backend == "pyarrow":
+            pa = import_optional_dependency("pyarrow")
+            if isinstance(result, np.ndarray):
+                result = ArrowExtensionArray(pa.array(result, from_pandas=True))
+            elif isinstance(result, BaseMaskedArray):
+                if result._mask.all():
+                    # We want an arrow null array here
+                    result = ArrowExtensionArray(pa.array([None] * len(result)))
+                else:
+                    result = ArrowExtensionArray(
+                        pa.array(result._data, mask=result._mask)
+                    )
+            else:
+                result = ArrowExtensionArray(
+                    pa.array(result.to_numpy(), from_pandas=True)
+                )
+
+        return result, na_count
+
+    @overload
+    def _do_date_conversions(
+        self,
+        names: Index,
+        data: DataFrame,
+    ) -> DataFrame: ...
+
+    @overload
+    def _do_date_conversions(
+        self,
+        names: Sequence[Hashable],
+        data: Mapping[Hashable, ArrayLike],
+    ) -> Mapping[Hashable, ArrayLike]: ...
+
+    @final
+    def _do_date_conversions(
+        self,
+        names: Sequence[Hashable] | Index,
+        data: Mapping[Hashable, ArrayLike] | DataFrame,
+    ) -> Mapping[Hashable, ArrayLike] | DataFrame:
+        if not isinstance(self.parse_dates, list):
+            return data
+        for colspec in self.parse_dates:
+            if isinstance(colspec, int) and colspec not in data:
+                colspec = names[colspec]
+            if (isinstance(self.index_col, list) and colspec in self.index_col) or (
+                isinstance(self.index_names, list) and colspec in self.index_names
+            ):
+                continue
+            result = date_converter(
+                data[colspec],
+                col=colspec,
+                dayfirst=self.dayfirst,
+                cache_dates=self.cache_dates,
+                date_format=self.date_format,
+            )
+            # error: Unsupported target for indexed assignment
+            # ("Mapping[Hashable, ExtensionArray | ndarray[Any, Any]] | DataFrame")
+            data[colspec] = result  # type: ignore[index]
+
+        return data
+
+    @final
+    def _check_data_length(
+        self,
+        columns: Sequence[Hashable],
+        data: Sequence[ArrayLike],
+    ) -> None:
+        """Checks if length of data is equal to length of column names.
+
+        One set of trailing commas is allowed. self.index_col not False
+        results in a ParserError previously when lengths do not match.
+
+        Parameters
+        ----------
+        columns: list of column names
+        data: list of array-likes containing the data column-wise.
+        """
+        if not self.index_col and len(columns) != len(data) and columns:
+            empty_str = is_object_dtype(data[-1]) and data[-1] == ""
+            # error: No overload variant of "__ror__" of "ndarray" matches
+            # argument type "ExtensionArray"
+            empty_str_or_na = empty_str | isna(data[-1])  # type: ignore[operator]
+            if len(columns) == len(data) - 1 and np.all(empty_str_or_na):
+                return
+            warnings.warn(
+                "Length of header or names does not match length of data. This leads "
+                "to a loss of data with index_col=False.",
+                ParserWarning,
+                stacklevel=find_stack_level(),
+            )
+
+    @final
+    def _validate_usecols_names(self, usecols: SequenceT, names: Sequence) -> SequenceT:
+        """
+        Validates that all usecols are present in a given
+        list of names. If not, raise a ValueError that
+        shows what usecols are missing.
+
+        Parameters
+        ----------
+        usecols : iterable of usecols
+            The columns to validate are present in names.
+        names : iterable of names
+            The column names to check against.
+
+        Returns
+        -------
+        usecols : iterable of usecols
+            The `usecols` parameter if the validation succeeds.
+
+        Raises
+        ------
+        ValueError : Columns were missing. Error message will list them.
+        """
+        missing = [c for c in usecols if c not in names]
+        if len(missing) > 0:
+            raise ValueError(
+                f"Usecols do not match columns, columns expected but not found: "
+                f"{missing}"
+            )
+
+        return usecols
+
+    @final
+    def _clean_index_names(self, columns, index_col) -> tuple[list | None, list, list]:
+        if not is_index_col(index_col):
+            return None, columns, index_col
+
+        columns = list(columns)
+
+        # In case of no rows and multiindex columns we have to set index_names to
+        # list of Nones GH#38292
+        if not columns:
+            return [None] * len(index_col), columns, index_col
+
+        cp_cols = list(columns)
+        index_names: list[str | int | None] = []
+
+        # don't mutate
+        index_col = list(index_col)
+
+        for i, c in enumerate(index_col):
+            if isinstance(c, str):
+                index_names.append(c)
+                for j, name in enumerate(cp_cols):
+                    if name == c:
+                        index_col[i] = j
+                        columns.remove(name)
+                        break
+            else:
+                name = cp_cols[c]
+                columns.remove(name)
+                index_names.append(name)
+
+        # Only clean index names that were placeholders.
+        for i, name in enumerate(index_names):
+            if isinstance(name, str) and name in self.unnamed_cols:
+                index_names[i] = None
+
+        return index_names, columns, index_col
+
+    @final
+    def _get_empty_meta(
+        self, columns: Sequence[HashableT], dtype: DtypeArg | None = None
+    ) -> tuple[Index, list[HashableT], dict[HashableT, Series]]:
+        columns = list(columns)
+
+        index_col = self.index_col
+        index_names = self.index_names
+
+        # Convert `dtype` to a defaultdict of some kind.
+        # This will enable us to write `dtype[col_name]`
+        # without worrying about KeyError issues later on.
+        dtype_dict: defaultdict[Hashable, Any]
+        if not is_dict_like(dtype):
+            # if dtype == None, default will be object.
+            dtype_dict = defaultdict(lambda: dtype)
+        else:
+            dtype = cast(dict, dtype)
+            dtype_dict = defaultdict(
+                lambda: None,
+                {columns[k] if is_integer(k) else k: v for k, v in dtype.items()},
+            )
+
+        # Even though we have no data, the "index" of the empty DataFrame
+        # could for example still be an empty MultiIndex. Thus, we need to
+        # check whether we have any index columns specified, via either:
+        #
+        # 1) index_col (column indices)
+        # 2) index_names (column names)
+        #
+        # Both must be non-null to ensure a successful construction. Otherwise,
+        # we have to create a generic empty Index.
+        index: Index
+        if (index_col is None or index_col is False) or index_names is None:
+            index = default_index(0)
+        else:
+            # TODO: We could return default_index(0) if dtype_dict[name] is None
+            data = [
+                Index([], name=name, dtype=dtype_dict[name]) for name in index_names
+            ]
+            if len(data) == 1:
+                index = data[0]
+            else:
+                index = MultiIndex.from_arrays(data)
+            index_col.sort()
+
+            for i, n in enumerate(index_col):
+                columns.pop(n - i)
+
+        col_dict = {
+            col_name: Series([], dtype=dtype_dict[col_name]) for col_name in columns
+        }
+
+        return index, columns, col_dict
+
+
+def date_converter(
+    date_col,
+    col: Hashable,
+    dayfirst: bool = False,
+    cache_dates: bool = True,
+    date_format: dict[Hashable, str] | str | None = None,
+):
+    if date_col.dtype.kind in "Mm":
+        return date_col
+
+    date_fmt = date_format.get(col) if isinstance(date_format, dict) else date_format
+
+    str_objs = lib.ensure_string_array(np.asarray(date_col))
+    try:
+        result = tools.to_datetime(
+            str_objs,
+            format=date_fmt,
+            utc=False,
+            dayfirst=dayfirst,
+            cache=cache_dates,
+        )
+    except (ValueError, TypeError):
+        # test_usecols_with_parse_dates4
+        # test_multi_index_parse_dates
+        return str_objs
+
+    if isinstance(result, DatetimeIndex):
+        arr = result.to_numpy()
+        arr.flags.writeable = True
+        return arr
+    return result._values
+
+
+parser_defaults = {
+    "delimiter": None,
+    "escapechar": None,
+    "quotechar": '"',
+    "quoting": csv.QUOTE_MINIMAL,
+    "doublequote": True,
+    "skipinitialspace": False,
+    "lineterminator": None,
+    "header": "infer",
+    "index_col": None,
+    "names": None,
+    "skiprows": None,
+    "skipfooter": 0,
+    "nrows": None,
+    "na_values": None,
+    "keep_default_na": True,
+    "true_values": None,
+    "false_values": None,
+    "converters": None,
+    "dtype": None,
+    "cache_dates": True,
+    "thousands": None,
+    "comment": None,
+    "decimal": ".",
+    # 'engine': 'c',
+    "parse_dates": False,
+    "dayfirst": False,
+    "date_format": None,
+    "usecols": None,
+    # 'iterator': False,
+    "chunksize": None,
+    "encoding": None,
+    "compression": None,
+    "skip_blank_lines": True,
+    "encoding_errors": "strict",
+    "on_bad_lines": ParserBase.BadLineHandleMethod.ERROR,
+    "dtype_backend": lib.no_default,
+}
+
+
+def get_na_values(col, na_values, na_fvalues, keep_default_na: bool):
+    """
+    Get the NaN values for a given column.
+
+    Parameters
+    ----------
+    col : str
+        The name of the column.
+    na_values : array-like, dict
+        The object listing the NaN values as strings.
+    na_fvalues : array-like, dict
+        The object listing the NaN values as floats.
+    keep_default_na : bool
+        If `na_values` is a dict, and the column is not mapped in the
+        dictionary, whether to return the default NaN values or the empty set.
+
+    Returns
+    -------
+    nan_tuple : A length-two tuple composed of
+
+        1) na_values : the string NaN values for that column.
+        2) na_fvalues : the float NaN values for that column.
+    """
+    if isinstance(na_values, dict):
+        if col in na_values:
+            return na_values[col], na_fvalues[col]
+        else:
+            if keep_default_na:
+                return STR_NA_VALUES, set()
+
+            return set(), set()
+    else:
+        return na_values, na_fvalues
+
+
+def is_index_col(col) -> bool:
+    return col is not None and col is not False
+
+
+def validate_parse_dates_presence(
+    parse_dates: bool | list, columns: Sequence[Hashable]
+) -> set:
+    """
+    Check if parse_dates are in columns.
+
+    If user has provided names for parse_dates, check if those columns
+    are available.
+
+    Parameters
+    ----------
+    columns : list
+        List of names of the dataframe.
+
+    Returns
+    -------
+    The names of the columns which will get parsed later if a list
+    is given as specification.
+
+    Raises
+    ------
+    ValueError
+        If column to parse_date is not in dataframe.
+
+    """
+    if not isinstance(parse_dates, list):
+        return set()
+
+    missing = set()
+    unique_cols = set()
+    for col in parse_dates:
+        if isinstance(col, str):
+            if col not in columns:
+                missing.add(col)
+            else:
+                unique_cols.add(col)
+        elif col in columns:
+            unique_cols.add(col)
+        else:
+            unique_cols.add(columns[col])
+    if missing:
+        missing_cols = ", ".join(sorted(missing))
+        raise ValueError(f"Missing column provided to 'parse_dates': '{missing_cols}'")
+    return unique_cols
+
+
+def _validate_usecols_arg(usecols):
+    """
+    Validate the 'usecols' parameter.
+
+    Checks whether or not the 'usecols' parameter contains all integers
+    (column selection by index), strings (column by name) or is a callable.
+    Raises a ValueError if that is not the case.
+
+    Parameters
+    ----------
+    usecols : list-like, callable, or None
+        List of columns to use when parsing or a callable that can be used
+        to filter a list of table columns.
+
+    Returns
+    -------
+    usecols_tuple : tuple
+        A tuple of (verified_usecols, usecols_dtype).
+
+        'verified_usecols' is either a set if an array-like is passed in or
+        'usecols' if a callable or None is passed in.
+
+        'usecols_dtype` is the inferred dtype of 'usecols' if an array-like
+        is passed in or None if a callable or None is passed in.
+    """
+    msg = (
+        "'usecols' must either be list-like of all strings, all unicode, "
+        "all integers or a callable."
+    )
+    if usecols is not None:
+        if callable(usecols):
+            return usecols, None
+
+        if not is_list_like(usecols):
+            # see gh-20529
+            #
+            # Ensure it is iterable container but not string.
+            raise ValueError(msg)
+
+        usecols_dtype = lib.infer_dtype(usecols, skipna=False)
+
+        if usecols_dtype not in ("empty", "integer", "string"):
+            raise ValueError(msg)
+
+        usecols = set(usecols)
+
+        return usecols, usecols_dtype
+    return usecols, None
+
+
+@overload
+def evaluate_callable_usecols(
+    usecols: Callable[[Hashable], object],
+    names: Iterable[Hashable],
+) -> set[int]: ...
+
+
+@overload
+def evaluate_callable_usecols(
+    usecols: SequenceT, names: Iterable[Hashable]
+) -> SequenceT: ...
+
+
+def evaluate_callable_usecols(
+    usecols: Callable[[Hashable], object] | SequenceT,
+    names: Iterable[Hashable],
+) -> SequenceT | set[int]:
+    """
+    Check whether or not the 'usecols' parameter
+    is a callable.  If so, enumerates the 'names'
+    parameter and returns a set of indices for
+    each entry in 'names' that evaluates to True.
+    If not a callable, returns 'usecols'.
+    """
+    if callable(usecols):
+        return {i for i, name in enumerate(names) if usecols(name)}
+    return usecols
@@ -0,0 +1,395 @@
+from __future__ import annotations
+
+from collections import defaultdict
+from typing import TYPE_CHECKING
+import warnings
+
+import numpy as np
+
+from pandas._libs import (
+    lib,
+    parsers,
+)
+from pandas.compat._optional import import_optional_dependency
+from pandas.errors import DtypeWarning
+from pandas.util._exceptions import find_stack_level
+
+from pandas.core.dtypes.common import pandas_dtype
+from pandas.core.dtypes.concat import (
+    concat_compat,
+    union_categoricals,
+)
+from pandas.core.dtypes.dtypes import CategoricalDtype
+
+from pandas.core.indexes.api import ensure_index_from_sequences
+
+from pandas.io.common import (
+    dedup_names,
+    is_potential_multi_index,
+)
+from pandas.io.parsers.base_parser import (
+    ParserBase,
+    ParserError,
+    date_converter,
+    evaluate_callable_usecols,
+    is_index_col,
+    validate_parse_dates_presence,
+)
+
+if TYPE_CHECKING:
+    from collections.abc import (
+        Hashable,
+        Mapping,
+        Sequence,
+    )
+
+    from pandas._typing import (
+        AnyArrayLike,
+        ArrayLike,
+        DtypeArg,
+        DtypeObj,
+        ReadCsvBuffer,
+        SequenceT,
+    )
+
+    from pandas import (
+        Index,
+        MultiIndex,
+    )
+
+
+class CParserWrapper(ParserBase):
+    low_memory: bool
+    _reader: parsers.TextReader
+
+    def __init__(self, src: ReadCsvBuffer[str], **kwds) -> None:
+        super().__init__(kwds)
+        self.kwds = kwds
+        kwds = kwds.copy()
+
+        self.low_memory = kwds.pop("low_memory", False)
+
+        # #2442
+        kwds["allow_leading_cols"] = self.index_col is not False
+
+        # GH20529, validate usecol arg before TextReader
+        kwds["usecols"] = self.usecols
+
+        # Have to pass int, would break tests using TextReader directly otherwise :(
+        kwds["on_bad_lines"] = self.on_bad_lines.value
+
+        for key in (
+            "storage_options",
+            "encoding",
+            "memory_map",
+            "compression",
+        ):
+            kwds.pop(key, None)
+
+        kwds["dtype"] = ensure_dtype_objs(kwds.get("dtype", None))
+        if "dtype_backend" not in kwds or kwds["dtype_backend"] is lib.no_default:
+            kwds["dtype_backend"] = "numpy"
+        if kwds["dtype_backend"] == "pyarrow":
+            # Fail here loudly instead of in cython after reading
+            import_optional_dependency("pyarrow")
+        self._reader = parsers.TextReader(src, **kwds)
+
+        self.unnamed_cols = self._reader.unnamed_cols
+
+        passed_names = self.names is None
+
+        if self._reader.header is None:
+            self.names = None
+        else:
+            (
+                self.names,
+                self.index_names,
+                self.col_names,
+                passed_names,
+            ) = self._extract_multi_indexer_columns(
+                self._reader.header,
+                self.index_names,
+                passed_names,
+            )
+
+        if self.names is None:
+            self.names = list(range(self._reader.table_width))
+
+        # gh-9755
+        #
+        # need to set orig_names here first
+        # so that proper indexing can be done
+        # with _set_noconvert_columns
+        #
+        # once names has been filtered, we will
+        # then set orig_names again to names
+        self.orig_names = self.names[:]
+
+        if self.usecols:
+            usecols = evaluate_callable_usecols(self.usecols, self.orig_names)
+
+            # GH 14671
+            # assert for mypy, orig_names is List or None, None would error in issubset
+            assert self.orig_names is not None
+            if self.usecols_dtype == "string" and not set(usecols).issubset(
+                self.orig_names
+            ):
+                self._validate_usecols_names(usecols, self.orig_names)
+
+            if len(self.names) > len(usecols):
+                self.names = [
+                    n
+                    for i, n in enumerate(self.names)
+                    if (i in usecols or n in usecols)
+                ]
+
+            if len(self.names) < len(usecols):
+                self._validate_usecols_names(
+                    usecols,
+                    self.names,
+                )
+
+        validate_parse_dates_presence(self.parse_dates, self.names)
+        self._set_noconvert_columns()
+
+        self.orig_names = self.names
+
+        if self._reader.leading_cols == 0 and is_index_col(self.index_col):
+            (
+                index_names,
+                self.names,
+                self.index_col,
+            ) = self._clean_index_names(
+                self.names,
+                self.index_col,
+            )
+
+            if self.index_names is None:
+                self.index_names = index_names
+
+        if self._reader.header is None and not passed_names:
+            assert self.index_names is not None
+            self.index_names = [None] * len(self.index_names)
+
+        self._implicit_index = self._reader.leading_cols > 0
+
+    def close(self) -> None:
+        # close handles opened by C parser
+        try:
+            self._reader.close()
+        except ValueError:
+            pass
+
+    def _set_noconvert_columns(self) -> None:
+        """
+        Set the columns that should not undergo dtype conversions.
+
+        Currently, any column that is involved with date parsing will not
+        undergo such conversions.
+        """
+        assert self.orig_names is not None
+        # error: Cannot determine type of 'names'
+
+        # much faster than using orig_names.index(x) xref GH#44106
+        names_dict = {x: i for i, x in enumerate(self.orig_names)}
+        col_indices = [names_dict[x] for x in self.names]
+        noconvert_columns = self._set_noconvert_dtype_columns(
+            col_indices,
+            self.names,
+        )
+        for col in noconvert_columns:
+            self._reader.set_noconvert(col)
+
+    def read(
+        self,
+        nrows: int | None = None,
+    ) -> tuple[
+        Index | MultiIndex | None,
+        Sequence[Hashable] | MultiIndex,
+        Mapping[Hashable, AnyArrayLike],
+    ]:
+        index: Index | MultiIndex | None
+        column_names: Sequence[Hashable] | MultiIndex
+        try:
+            if self.low_memory:
+                chunks = self._reader.read_low_memory(nrows)
+                # destructive to chunks
+                data = _concatenate_chunks(chunks, self.names)
+            else:
+                data = self._reader.read(nrows)
+        except StopIteration:
+            if self._first_chunk:
+                self._first_chunk = False
+                # assert for mypy, orig_names is List or None, None would error in
+                # list(...) in dedup_names
+                assert self.orig_names is not None
+                names = dedup_names(
+                    self.orig_names,
+                    is_potential_multi_index(self.orig_names, self.index_col),
+                )
+                index, columns, col_dict = self._get_empty_meta(
+                    names,
+                    dtype=self.dtype,
+                )
+                # error: Incompatible types in assignment (expression has type
+                # "list[Hashable] | MultiIndex", variable has type "list[Hashable]")
+                columns = self._maybe_make_multi_index_columns(  # type: ignore[assignment]
+                    columns, self.col_names
+                )
+
+                columns = _filter_usecols(self.usecols, columns)
+                columns_set = set(columns)
+
+                col_dict = {k: v for k, v in col_dict.items() if k in columns_set}
+
+                return index, columns, col_dict
+
+            else:
+                self.close()
+                raise
+
+        # Done with first read, next time raise StopIteration
+        self._first_chunk = False
+
+        names = self.names
+
+        if self._reader.leading_cols:
+            # implicit index, no index names
+            arrays = []
+
+            if self.index_col and self._reader.leading_cols != len(self.index_col):
+                raise ParserError(
+                    "Could not construct index. Requested to use "
+                    f"{len(self.index_col)} number of columns, but "
+                    f"{self._reader.leading_cols} left to parse."
+                )
+
+            for i in range(self._reader.leading_cols):
+                if self.index_col is None:
+                    values = data.pop(i)
+                else:
+                    values = data.pop(self.index_col[i])
+
+                if self._should_parse_dates(i):
+                    values = date_converter(
+                        values,
+                        col=(
+                            self.index_names[i]
+                            if self.index_names is not None
+                            else None
+                        ),
+                        dayfirst=self.dayfirst,
+                        cache_dates=self.cache_dates,
+                        date_format=self.date_format,
+                    )
+                arrays.append(values)
+
+            index = ensure_index_from_sequences(arrays)
+
+            names = _filter_usecols(self.usecols, names)
+
+            names = dedup_names(names, is_potential_multi_index(names, self.index_col))
+
+            # rename dict keys
+            data_tups = sorted(data.items())
+            data = {k: v for k, (i, v) in zip(names, data_tups, strict=True)}
+
+            date_data = self._do_date_conversions(names, data)
+
+            # maybe create a mi on the columns
+            column_names = self._maybe_make_multi_index_columns(names, self.col_names)
+
+        else:
+            # rename dict keys
+            data_tups = sorted(data.items())
+
+            # ugh, mutation
+
+            # assert for mypy, orig_names is List or None, None would error in list(...)
+            assert self.orig_names is not None
+            names = list(self.orig_names)
+            names = dedup_names(names, is_potential_multi_index(names, self.index_col))
+
+            names = _filter_usecols(self.usecols, names)
+
+            # columns as list
+            alldata = [x[1] for x in data_tups]
+            if self.usecols is None:
+                self._check_data_length(names, alldata)
+
+            data = {k: v for k, (i, v) in zip(names, data_tups, strict=False)}
+
+            date_data = self._do_date_conversions(names, data)
+            index, column_names = self._make_index(alldata, names)
+
+        return index, column_names, date_data
+
+
+def _filter_usecols(usecols, names: SequenceT) -> SequenceT | list[Hashable]:
+    # hackish
+    usecols = evaluate_callable_usecols(usecols, names)
+    if usecols is not None and len(names) != len(usecols):
+        return [name for i, name in enumerate(names) if i in usecols or name in usecols]
+    return names
+
+
+def _concatenate_chunks(
+    chunks: list[dict[int, ArrayLike]], column_names: list[str]
+) -> dict:
+    """
+    Concatenate chunks of data read with low_memory=True.
+
+    The tricky part is handling Categoricals, where different chunks
+    may have different inferred categories.
+    """
+    names = list(chunks[0].keys())
+    warning_columns = []
+
+    result: dict = {}
+    for name in names:
+        arrs = [chunk.pop(name) for chunk in chunks]
+        # Check each arr for consistent types.
+        dtypes = {a.dtype for a in arrs}
+        non_cat_dtypes = {x for x in dtypes if not isinstance(x, CategoricalDtype)}
+
+        dtype = dtypes.pop()
+        if isinstance(dtype, CategoricalDtype):
+            result[name] = union_categoricals(arrs, sort_categories=False)
+        else:
+            result[name] = concat_compat(arrs)
+            if len(non_cat_dtypes) > 1 and result[name].dtype == np.dtype(object):
+                warning_columns.append(column_names[name])
+
+    if warning_columns:
+        warning_names = ", ".join(
+            [f"{index}: {name}" for index, name in enumerate(warning_columns)]
+        )
+        warning_message = " ".join(
+            [
+                f"Columns ({warning_names}) have mixed types. "
+                f"Specify dtype option on import or set low_memory=False."
+            ]
+        )
+        warnings.warn(warning_message, DtypeWarning, stacklevel=find_stack_level())
+    return result
+
+
+def ensure_dtype_objs(
+    dtype: DtypeArg | dict[Hashable, DtypeArg] | None,
+) -> DtypeObj | dict[Hashable, DtypeObj] | None:
+    """
+    Ensure we have either None, a dtype object, or a dictionary mapping to
+    dtype objects.
+    """
+    if isinstance(dtype, defaultdict):
+        # "None" not callable  [misc]
+        default_dtype = pandas_dtype(dtype.default_factory())  # type: ignore[misc]
+        dtype_converted: defaultdict = defaultdict(lambda: default_dtype)
+        for key in dtype.keys():
+            dtype_converted[key] = pandas_dtype(dtype[key])
+        return dtype_converted
+    elif isinstance(dtype, dict):
+        return {k: pandas_dtype(dtype[k]) for k in dtype}
+    elif dtype is not None:
+        return pandas_dtype(dtype)
+    return dtype
@@ -0,0 +1,239 @@
+"""pickle compat"""
+
+from __future__ import annotations
+
+import pickle
+from typing import (
+    TYPE_CHECKING,
+    Any,
+)
+import warnings
+
+from pandas.compat import pickle_compat
+from pandas.util._decorators import set_module
+
+from pandas.io.common import get_handle
+
+if TYPE_CHECKING:
+    from pandas._typing import (
+        CompressionOptions,
+        FilePath,
+        ReadPickleBuffer,
+        StorageOptions,
+        WriteBuffer,
+    )
+
+    from pandas import (
+        DataFrame,
+        Series,
+    )
+
+
+@set_module("pandas")
+def to_pickle(
+    obj: Any,
+    filepath_or_buffer: FilePath | WriteBuffer[bytes],
+    compression: CompressionOptions = "infer",
+    protocol: int = pickle.HIGHEST_PROTOCOL,
+    storage_options: StorageOptions | None = None,
+) -> None:
+    """
+    Pickle (serialize) object to file.
+
+    Parameters
+    ----------
+    obj : any object
+        Any python object.
+    filepath_or_buffer : str, path object, or file-like object
+        String, path object (implementing ``os.PathLike[str]``), or file-like
+        object implementing a binary ``write()`` function.
+        Also accepts URL. URL has to be of S3 or GCS.
+    compression : str or dict, default 'infer'
+        For on-the-fly compression of the output data. If 'infer' and
+        'filepath_or_buffer' is path-like, then detect compression from the
+        following extensions: '.gz', '.bz2', '.zip', '.xz', '.zst', '.tar',
+        '.tar.gz', '.tar.xz' or '.tar.bz2' (otherwise no compression).
+        Set to ``None`` for no compression.
+        Can also be a dict with key ``'method'`` set
+        to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``,
+        ``'tar'``} and other key-value pairs are forwarded to
+        ``zipfile.ZipFile``, ``gzip.GzipFile``,
+        ``bz2.BZ2File``, ``zstandard.ZstdCompressor``, ``lzma.LZMAFile`` or
+        ``tarfile.TarFile``, respectively.
+        As an example, the following could be passed for faster compression
+        and to create a reproducible gzip archive:
+        ``compression={'method': 'gzip', 'compresslevel': 1, 'mtime': 1}``.
+    protocol : int
+        Int which indicates which protocol should be used by the pickler,
+        default HIGHEST_PROTOCOL (see [1], paragraph 12.1.2). The possible
+        values for this parameter depend on the version of Python. For Python
+        2.x, possible values are 0, 1, 2. For Python>=3.0, 3 is a valid value.
+        For Python >= 3.4, 4 is a valid value. A negative value for the
+        protocol parameter is equivalent to setting its value to
+        HIGHEST_PROTOCOL.
+    storage_options : dict, optional
+        Extra options that make sense for a particular storage connection, e.g.
+        host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
+        are forwarded to ``urllib.request.Request`` as header options. For other
+        URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are
+        forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more
+        details, and for more examples on storage options refer `here
+        <https://pandas.pydata.org/docs/user_guide/io.html?
+        highlight=storage_options#reading-writing-remote-files>`_.
+
+        .. [1] https://docs.python.org/3/library/pickle.html
+
+    See Also
+    --------
+    read_pickle : Load pickled pandas object (or any object) from file.
+    DataFrame.to_hdf : Write DataFrame to an HDF5 file.
+    DataFrame.to_sql : Write DataFrame to a SQL database.
+    DataFrame.to_parquet : Write a DataFrame to the binary parquet format.
+
+    Examples
+    --------
+    >>> original_df = pd.DataFrame(
+    ...     {{"foo": range(5), "bar": range(5, 10)}}
+    ... )  # doctest: +SKIP
+    >>> original_df  # doctest: +SKIP
+       foo  bar
+    0    0    5
+    1    1    6
+    2    2    7
+    3    3    8
+    4    4    9
+    >>> pd.to_pickle(original_df, "./dummy.pkl")  # doctest: +SKIP
+
+    >>> unpickled_df = pd.read_pickle("./dummy.pkl")  # doctest: +SKIP
+    >>> unpickled_df  # doctest: +SKIP
+       foo  bar
+    0    0    5
+    1    1    6
+    2    2    7
+    3    3    8
+    4    4    9
+    """
+    if protocol < 0:
+        protocol = pickle.HIGHEST_PROTOCOL
+
+    with get_handle(
+        filepath_or_buffer,
+        "wb",
+        compression=compression,
+        is_text=False,
+        storage_options=storage_options,
+    ) as handles:
+        # letting pickle write directly to the buffer is more memory-efficient
+        pickle.dump(obj, handles.handle, protocol=protocol)
+
+
+@set_module("pandas")
+def read_pickle(
+    filepath_or_buffer: FilePath | ReadPickleBuffer,
+    compression: CompressionOptions = "infer",
+    storage_options: StorageOptions | None = None,
+) -> DataFrame | Series:
+    """
+    Load pickled pandas object (or any object) from file and return unpickled object.
+
+    .. warning::
+
+       Loading pickled data received from untrusted sources can be
+       unsafe. See `here <https://docs.python.org/3/library/pickle.html>`__.
+
+    Parameters
+    ----------
+    filepath_or_buffer : str, path object, or file-like object
+        String, path object (implementing ``os.PathLike[str]``), or file-like
+        object implementing a binary ``readlines()`` function.
+        Also accepts URL. URL is not limited to S3 and GCS.
+    compression : str or dict, default 'infer'
+        For on-the-fly decompression of on-disk data. If 'infer' and
+        'filepath_or_buffer' is path-like, then detect compression from the
+        following extensions: '.gz', '.bz2', '.zip', '.xz', '.zst', '.tar',
+        '.tar.gz', '.tar.xz' or '.tar.bz2' (otherwise no compression).
+        If using 'zip' or 'tar', the ZIP file must contain only one data file
+        to be read in.
+        Set to ``None`` for no decompression.
+        Can also be a dict with key ``'method'`` set
+        to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``,
+        ``'tar'``} and other key-value pairs are forwarded to
+        ``zipfile.ZipFile``, ``gzip.GzipFile``,
+        ``bz2.BZ2File``, ``zstandard.ZstdDecompressor``, ``lzma.LZMAFile`` or
+        ``tarfile.TarFile``, respectively.
+        As an example, the following could be passed for Zstandard decompression
+        using a custom compression dictionary:
+        ``compression={'method': 'zstd', 'dict_data': my_compression_dict}``.
+    storage_options : dict, optional
+        Extra options that make sense for a particular storage connection, e.g.
+        host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
+        are forwarded to ``urllib.request.Request`` as header options. For other
+        URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are
+        forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more
+        details, and for more examples on storage options refer `here
+        <https://pandas.pydata.org/docs/user_guide/io.html?
+        highlight=storage_options#reading-writing-remote-files>`_.
+
+    Returns
+    -------
+    object
+        The unpickled pandas object (or any object) that was stored in file.
+
+    See Also
+    --------
+    DataFrame.to_pickle : Pickle (serialize) DataFrame object to file.
+    Series.to_pickle : Pickle (serialize) Series object to file.
+    read_hdf : Read HDF5 file into a DataFrame.
+    read_sql : Read SQL query or database table into a DataFrame.
+    read_parquet : Load a parquet object, returning a DataFrame.
+
+    Notes
+    -----
+    read_pickle is only guaranteed to be backwards compatible to pandas 1.0
+    provided the object was serialized with to_pickle.
+
+    Examples
+    --------
+    >>> original_df = pd.DataFrame(
+    ...     {{"foo": range(5), "bar": range(5, 10)}}
+    ... )  # doctest: +SKIP
+    >>> original_df  # doctest: +SKIP
+       foo  bar
+    0    0    5
+    1    1    6
+    2    2    7
+    3    3    8
+    4    4    9
+    >>> pd.to_pickle(original_df, "./dummy.pkl")  # doctest: +SKIP
+
+    >>> unpickled_df = pd.read_pickle("./dummy.pkl")  # doctest: +SKIP
+    >>> unpickled_df  # doctest: +SKIP
+       foo  bar
+    0    0    5
+    1    1    6
+    2    2    7
+    3    3    8
+    4    4    9
+    """
+    # TypeError for Cython complaints about object.__new__ vs Tick.__new__
+    excs_to_catch = (AttributeError, ImportError, ModuleNotFoundError, TypeError)
+    with get_handle(
+        filepath_or_buffer,
+        "rb",
+        compression=compression,
+        is_text=False,
+        storage_options=storage_options,
+    ) as handles:
+        # 1) try standard library Pickle
+        # 2) try pickle_compat (older pandas version) to handle subclass changes
+        try:
+            with warnings.catch_warnings(record=True):
+                # We want to silence any warnings about, e.g. moved modules.
+                warnings.simplefilter("ignore", Warning)
+                return pickle.load(handles.handle)
+        except excs_to_catch:
+            # e.g.
+            #  "No module named 'pandas.core.sparse.series'"
+            #  "Can't get attribute '_nat_unpickle' on <module 'pandas._libs.tslib"
+            handles.handle.seek(0)
+            return pickle_compat.Unpickler(handles.handle).load()
@@ -0,0 +1,3 @@
+from pandas.io.sas.sasreader import read_sas
+
+__all__ = ["read_sas"]
@@ -0,0 +1,738 @@
+"""
+Read SAS7BDAT files
+
+Based on code written by Jared Hobbs:
+  https://bitbucket.org/jaredhobbs/sas7bdat
+
+See also:
+  https://github.com/BioStatMatt/sas7bdat
+
+Partial documentation of the file format:
+  https://cran.r-project.org/package=sas7bdat/vignettes/sas7bdat.pdf
+
+Reference for binary data compression:
+  http://collaboration.cmc.ec.gc.ca/science/rpn/biblio/ddj/Website/articles/CUJ/1992/9210/ross/ross.htm
+"""
+
+from __future__ import annotations
+
+from datetime import datetime
+import sys
+from typing import TYPE_CHECKING
+
+import numpy as np
+
+from pandas._config import using_string_dtype
+
+from pandas._libs.byteswap import (
+    read_double_with_byteswap,
+    read_float_with_byteswap,
+    read_uint16_with_byteswap,
+    read_uint32_with_byteswap,
+    read_uint64_with_byteswap,
+)
+from pandas._libs.sas import (
+    Parser,
+    get_subheader_index,
+)
+from pandas._libs.tslibs.conversion import cast_from_unit_vectorized
+from pandas.errors import EmptyDataError
+
+import pandas as pd
+from pandas import (
+    DataFrame,
+    Timestamp,
+)
+
+from pandas.io.common import get_handle
+import pandas.io.sas.sas_constants as const
+from pandas.io.sas.sasreader import SASReader
+
+if TYPE_CHECKING:
+    from pandas._typing import (
+        CompressionOptions,
+        FilePath,
+        ReadBuffer,
+    )
+
+
+_unix_origin = Timestamp("1970-01-01")
+_sas_origin = Timestamp("1960-01-01")
+
+
+def _convert_datetimes(sas_datetimes: pd.Series, unit: str) -> pd.Series:
+    """
+    Convert to Timestamp if possible, otherwise to datetime.datetime.
+    SAS float64 lacks precision for more than ms resolution so the fit
+    to datetime.datetime is ok.
+
+    Parameters
+    ----------
+    sas_datetimes : {Series, Sequence[float]}
+       Dates or datetimes in SAS
+    unit : {'d', 's'}
+       "d" if the floats represent dates, "s" for datetimes
+
+    Returns
+    -------
+    Series
+       Series of datetime64 dtype or datetime.datetime.
+    """
+    td = (_sas_origin - _unix_origin).as_unit("s")
+    if unit == "s":
+        millis = cast_from_unit_vectorized(
+            sas_datetimes._values, unit="s", out_unit="ms"
+        )
+        dt64ms = millis.view("M8[ms]") + td
+        return pd.Series(dt64ms, index=sas_datetimes.index, copy=False)
+    else:
+        vals = np.array(sas_datetimes, dtype="M8[D]") + td
+        return pd.Series(vals, dtype="M8[s]", index=sas_datetimes.index, copy=False)
+
+
+class _Column:
+    col_id: int
+    name: str | bytes
+    label: str | bytes
+    format: str | bytes
+    ctype: bytes
+    length: int
+
+    def __init__(
+        self,
+        col_id: int,
+        # These can be bytes when convert_header_text is False
+        name: str | bytes,
+        label: str | bytes,
+        format: str | bytes,
+        ctype: bytes,
+        length: int,
+    ) -> None:
+        self.col_id = col_id
+        self.name = name
+        self.label = label
+        self.format = format
+        self.ctype = ctype
+        self.length = length
+
+
+# SAS7BDAT represents a SAS data file in SAS7BDAT format.
+class SAS7BDATReader(SASReader):
+    """
+    Read SAS files in SAS7BDAT format.
+
+    Parameters
+    ----------
+    path_or_buf : path name or buffer
+        Name of SAS file or file-like object pointing to SAS file
+        contents.
+    index : column identifier, defaults to None
+        Column to use as index.
+    convert_dates : bool, defaults to True
+        Attempt to convert dates to Pandas datetime values.  Note that
+        some rarely used SAS date formats may be unsupported.
+    blank_missing : bool, defaults to True
+        Convert empty strings to missing values (SAS uses blanks to
+        indicate missing character variables).
+    chunksize : int, defaults to None
+        Return SAS7BDATReader object for iterations, returns chunks
+        with given number of lines.
+    encoding : str, 'infer', defaults to None
+        String encoding acc. to Python standard encodings,
+        encoding='infer' tries to detect the encoding from the file header,
+        encoding=None will leave the data in binary format.
+    convert_text : bool, defaults to True
+        If False, text variables are left as raw bytes.
+    convert_header_text : bool, defaults to True
+        If False, header text, including column names, are left as raw
+        bytes.
+    """
+
+    _int_length: int
+    _cached_page: bytes | None
+
+    def __init__(
+        self,
+        path_or_buf: FilePath | ReadBuffer[bytes],
+        index=None,
+        convert_dates: bool = True,
+        blank_missing: bool = True,
+        chunksize: int | None = None,
+        encoding: str | None = None,
+        convert_text: bool = True,
+        convert_header_text: bool = True,
+        compression: CompressionOptions = "infer",
+    ) -> None:
+        self.index = index
+        self.convert_dates = convert_dates
+        self.blank_missing = blank_missing
+        self.chunksize = chunksize
+        self.encoding = encoding
+        self.convert_text = convert_text
+        self.convert_header_text = convert_header_text
+
+        self.default_encoding = "latin-1"
+        self.compression = b""
+        self.column_names_raw: list[bytes] = []
+        self.column_names: list[str | bytes] = []
+        self.column_formats: list[str | bytes] = []
+        self.columns: list[_Column] = []
+
+        self._current_page_data_subheader_pointers: list[tuple[int, int]] = []
+        self._cached_page = None
+        self._column_data_lengths: list[int] = []
+        self._column_data_offsets: list[int] = []
+        self._column_types: list[bytes] = []
+
+        self._current_row_in_file_index = 0
+        self._current_row_on_page_index = 0
+        self._current_row_in_file_index = 0
+
+        self.handles = get_handle(
+            path_or_buf, "rb", is_text=False, compression=compression
+        )
+
+        self._path_or_buf = self.handles.handle
+
+        # Same order as const.SASIndex
+        self._subheader_processors = [
+            self._process_rowsize_subheader,
+            self._process_columnsize_subheader,
+            self._process_subheader_counts,
+            self._process_columntext_subheader,
+            self._process_columnname_subheader,
+            self._process_columnattributes_subheader,
+            self._process_format_subheader,
+            self._process_columnlist_subheader,
+            None,  # Data
+        ]
+
+        try:
+            self._get_properties()
+            self._parse_metadata()
+        except Exception:
+            self.close()
+            raise
+
+    def column_data_lengths(self) -> np.ndarray:
+        """Return a numpy int64 array of the column data lengths"""
+        return np.asarray(self._column_data_lengths, dtype=np.int64)
+
+    def column_data_offsets(self) -> np.ndarray:
+        """Return a numpy int64 array of the column offsets"""
+        return np.asarray(self._column_data_offsets, dtype=np.int64)
+
+    def column_types(self) -> np.ndarray:
+        """
+        Returns a numpy character array of the column types:
+           s (string) or d (double)
+        """
+        return np.asarray(self._column_types, dtype=np.dtype("S1"))
+
+    def close(self) -> None:
+        self.handles.close()
+
+    def _get_properties(self) -> None:
+        # Check magic number
+        self._path_or_buf.seek(0)
+        self._cached_page = self._path_or_buf.read(288)
+        if self._cached_page[0 : len(const.magic)] != const.magic:
+            raise ValueError("magic number mismatch (not a SAS file?)")
+
+        # Get alignment information
+        buf = self._read_bytes(const.align_1_offset, const.align_1_length)
+        if buf == const.u64_byte_checker_value:
+            self.U64 = True
+            self._int_length = 8
+            self._page_bit_offset = const.page_bit_offset_x64
+            self._subheader_pointer_length = const.subheader_pointer_length_x64
+        else:
+            self.U64 = False
+            self._page_bit_offset = const.page_bit_offset_x86
+            self._subheader_pointer_length = const.subheader_pointer_length_x86
+            self._int_length = 4
+        buf = self._read_bytes(const.align_2_offset, const.align_2_length)
+        if buf == const.align_1_checker_value:
+            align1 = const.align_2_value
+        else:
+            align1 = 0
+
+        # Get endianness information
+        buf = self._read_bytes(const.endianness_offset, const.endianness_length)
+        if buf == b"\x01":
+            self.byte_order = "<"
+            self.need_byteswap = sys.byteorder == "big"
+        else:
+            self.byte_order = ">"
+            self.need_byteswap = sys.byteorder == "little"
+
+        # Get encoding information
+        buf = self._read_bytes(const.encoding_offset, const.encoding_length)[0]
+        if buf in const.encoding_names:
+            self.inferred_encoding = const.encoding_names[buf]
+            if self.encoding == "infer":
+                self.encoding = self.inferred_encoding
+        else:
+            self.inferred_encoding = f"unknown (code={buf})"
+
+        # Timestamp is epoch 01/01/1960
+        epoch = datetime(1960, 1, 1)
+        x = self._read_float(
+            const.date_created_offset + align1, const.date_created_length
+        )
+        self.date_created = epoch + pd.to_timedelta(x, unit="s")
+        x = self._read_float(
+            const.date_modified_offset + align1, const.date_modified_length
+        )
+        self.date_modified = epoch + pd.to_timedelta(x, unit="s")
+
+        self.header_length = self._read_uint(
+            const.header_size_offset + align1, const.header_size_length
+        )
+
+        # Read the rest of the header into cached_page.
+        buf = self._path_or_buf.read(self.header_length - 288)
+        self._cached_page += buf
+        if len(self._cached_page) != self.header_length:
+            raise ValueError("The SAS7BDAT file appears to be truncated.")
+
+        self._page_length = self._read_uint(
+            const.page_size_offset + align1, const.page_size_length
+        )
+
+    def __next__(self) -> DataFrame:
+        da = self.read(nrows=self.chunksize or 1)
+        if da.empty:
+            self.close()
+            raise StopIteration
+        return da
+
+    # Read a single float of the given width (4 or 8).
+    def _read_float(self, offset: int, width: int) -> float:
+        assert self._cached_page is not None
+        if width == 4:
+            return read_float_with_byteswap(
+                self._cached_page, offset, self.need_byteswap
+            )
+        elif width == 8:
+            return read_double_with_byteswap(
+                self._cached_page, offset, self.need_byteswap
+            )
+        else:
+            self.close()
+            raise ValueError("invalid float width")
+
+    # Read a single unsigned integer of the given width (1, 2, 4 or 8).
+    def _read_uint(self, offset: int, width: int) -> int:
+        assert self._cached_page is not None
+        if width == 1:
+            return self._read_bytes(offset, 1)[0]
+        elif width == 2:
+            return read_uint16_with_byteswap(
+                self._cached_page, offset, self.need_byteswap
+            )
+        elif width == 4:
+            return read_uint32_with_byteswap(
+                self._cached_page, offset, self.need_byteswap
+            )
+        elif width == 8:
+            return read_uint64_with_byteswap(
+                self._cached_page, offset, self.need_byteswap
+            )
+        else:
+            self.close()
+            raise ValueError("invalid int width")
+
+    def _read_bytes(self, offset: int, length: int):
+        assert self._cached_page is not None
+        if offset + length > len(self._cached_page):
+            self.close()
+            raise ValueError("The cached page is too small.")
+        return self._cached_page[offset : offset + length]
+
+    def _parse_metadata(self) -> None:
+        done = False
+        while not done:
+            self._cached_page = self._path_or_buf.read(self._page_length)
+            if len(self._cached_page) <= 0:
+                break
+            if len(self._cached_page) != self._page_length:
+                raise ValueError("Failed to read a meta data page from the SAS file.")
+            done = self._process_page_meta()
+
+    def _process_page_meta(self) -> bool:
+        self._read_page_header()
+        pt = [*const.page_meta_types, const.page_amd_type, const.page_mix_type]
+        if self._current_page_type in pt:
+            self._process_page_metadata()
+        is_data_page = self._current_page_type == const.page_data_type
+        is_mix_page = self._current_page_type == const.page_mix_type
+        return bool(
+            is_data_page
+            or is_mix_page
+            or self._current_page_data_subheader_pointers != []
+        )
+
+    def _read_page_header(self) -> None:
+        bit_offset = self._page_bit_offset
+        tx = const.page_type_offset + bit_offset
+        self._current_page_type = (
+            self._read_uint(tx, const.page_type_length) & const.page_type_mask2
+        )
+        tx = const.block_count_offset + bit_offset
+        self._current_page_block_count = self._read_uint(tx, const.block_count_length)
+        tx = const.subheader_count_offset + bit_offset
+        self._current_page_subheaders_count = self._read_uint(
+            tx, const.subheader_count_length
+        )
+
+    def _process_page_metadata(self) -> None:
+        bit_offset = self._page_bit_offset
+
+        for i in range(self._current_page_subheaders_count):
+            offset = const.subheader_pointers_offset + bit_offset
+            total_offset = offset + self._subheader_pointer_length * i
+
+            subheader_offset = self._read_uint(total_offset, self._int_length)
+            total_offset += self._int_length
+
+            subheader_length = self._read_uint(total_offset, self._int_length)
+            total_offset += self._int_length
+
+            subheader_compression = self._read_uint(total_offset, 1)
+            total_offset += 1
+
+            subheader_type = self._read_uint(total_offset, 1)
+
+            if (
+                subheader_length == 0
+                or subheader_compression == const.truncated_subheader_id
+            ):
+                continue
+
+            subheader_signature = self._read_bytes(subheader_offset, self._int_length)
+            subheader_index = get_subheader_index(subheader_signature)
+            subheader_processor = self._subheader_processors[subheader_index]
+
+            if subheader_processor is None:
+                f1 = subheader_compression in (const.compressed_subheader_id, 0)
+                f2 = subheader_type == const.compressed_subheader_type
+                if self.compression and f1 and f2:
+                    self._current_page_data_subheader_pointers.append(
+                        (subheader_offset, subheader_length)
+                    )
+                else:
+                    self.close()
+                    raise ValueError(
+                        f"Unknown subheader signature {subheader_signature}"
+                    )
+            else:
+                subheader_processor(subheader_offset, subheader_length)
+
+    def _process_rowsize_subheader(self, offset: int, length: int) -> None:
+        int_len = self._int_length
+        lcs_offset = offset
+        lcp_offset = offset
+        if self.U64:
+            lcs_offset += 682
+            lcp_offset += 706
+        else:
+            lcs_offset += 354
+            lcp_offset += 378
+
+        self.row_length = self._read_uint(
+            offset + const.row_length_offset_multiplier * int_len,
+            int_len,
+        )
+        self.row_count = self._read_uint(
+            offset + const.row_count_offset_multiplier * int_len,
+            int_len,
+        )
+        self.col_count_p1 = self._read_uint(
+            offset + const.col_count_p1_multiplier * int_len, int_len
+        )
+        self.col_count_p2 = self._read_uint(
+            offset + const.col_count_p2_multiplier * int_len, int_len
+        )
+        mx = const.row_count_on_mix_page_offset_multiplier * int_len
+        self._mix_page_row_count = self._read_uint(offset + mx, int_len)
+        self._lcs = self._read_uint(lcs_offset, 2)
+        self._lcp = self._read_uint(lcp_offset, 2)
+
+    def _process_columnsize_subheader(self, offset: int, length: int) -> None:
+        int_len = self._int_length
+        offset += int_len
+        self.column_count = self._read_uint(offset, int_len)
+        if self.col_count_p1 + self.col_count_p2 != self.column_count:
+            print(
+                f"Warning: column count mismatch ({self.col_count_p1} + "
+                f"{self.col_count_p2} != {self.column_count})\n"
+            )
+
+    # Unknown purpose
+    def _process_subheader_counts(self, offset: int, length: int) -> None:
+        pass
+
+    def _process_columntext_subheader(self, offset: int, length: int) -> None:
+        offset += self._int_length
+        text_block_size = self._read_uint(offset, const.text_block_size_length)
+
+        buf = self._read_bytes(offset, text_block_size)
+        cname_raw = buf[0:text_block_size].rstrip(b"\x00 ")
+        self.column_names_raw.append(cname_raw)
+
+        if len(self.column_names_raw) == 1:
+            compression_literal = b""
+            for cl in const.compression_literals:
+                if cl in cname_raw:
+                    compression_literal = cl
+            self.compression = compression_literal
+            offset -= self._int_length
+
+            offset1 = offset + 16
+            if self.U64:
+                offset1 += 4
+
+            buf = self._read_bytes(offset1, self._lcp)
+            compression_literal = buf.rstrip(b"\x00")
+            if compression_literal == b"":
+                self._lcs = 0
+                offset1 = offset + 32
+                if self.U64:
+                    offset1 += 4
+                buf = self._read_bytes(offset1, self._lcp)
+                self.creator_proc = buf[0 : self._lcp]
+            elif compression_literal == const.rle_compression:
+                offset1 = offset + 40
+                if self.U64:
+                    offset1 += 4
+                buf = self._read_bytes(offset1, self._lcp)
+                self.creator_proc = buf[0 : self._lcp]
+            elif self._lcs > 0:
+                self._lcp = 0
+                offset1 = offset + 16
+                if self.U64:
+                    offset1 += 4
+                buf = self._read_bytes(offset1, self._lcs)
+                self.creator_proc = buf[0 : self._lcp]
+            if hasattr(self, "creator_proc"):
+                self.creator_proc = self._convert_header_text(self.creator_proc)  # pyright: ignore[reportArgumentType]
+
+    def _process_columnname_subheader(self, offset: int, length: int) -> None:
+        int_len = self._int_length
+        offset += int_len
+        column_name_pointers_count = (length - 2 * int_len - 12) // 8
+        for i in range(column_name_pointers_count):
+            text_subheader = (
+                offset
+                + const.column_name_pointer_length * (i + 1)
+                + const.column_name_text_subheader_offset
+            )
+            col_name_offset = (
+                offset
+                + const.column_name_pointer_length * (i + 1)
+                + const.column_name_offset_offset
+            )
+            col_name_length = (
+                offset
+                + const.column_name_pointer_length * (i + 1)
+                + const.column_name_length_offset
+            )
+
+            idx = self._read_uint(
+                text_subheader, const.column_name_text_subheader_length
+            )
+            col_offset = self._read_uint(
+                col_name_offset, const.column_name_offset_length
+            )
+            col_len = self._read_uint(col_name_length, const.column_name_length_length)
+
+            name_raw = self.column_names_raw[idx]
+            cname = name_raw[col_offset : col_offset + col_len]
+            self.column_names.append(self._convert_header_text(cname))
+
+    def _process_columnattributes_subheader(self, offset: int, length: int) -> None:
+        int_len = self._int_length
+        column_attributes_vectors_count = (length - 2 * int_len - 12) // (int_len + 8)
+        for i in range(column_attributes_vectors_count):
+            col_data_offset = (
+                offset + int_len + const.column_data_offset_offset + i * (int_len + 8)
+            )
+            col_data_len = (
+                offset
+                + 2 * int_len
+                + const.column_data_length_offset
+                + i * (int_len + 8)
+            )
+            col_types = (
+                offset + 2 * int_len + const.column_type_offset + i * (int_len + 8)
+            )
+
+            x = self._read_uint(col_data_offset, int_len)
+            self._column_data_offsets.append(x)
+
+            x = self._read_uint(col_data_len, const.column_data_length_length)
+            self._column_data_lengths.append(x)
+
+            x = self._read_uint(col_types, const.column_type_length)
+            self._column_types.append(b"d" if x == 1 else b"s")
+
+    def _process_columnlist_subheader(self, offset: int, length: int) -> None:
+        # unknown purpose
+        pass
+
+    def _process_format_subheader(self, offset: int, length: int) -> None:
+        int_len = self._int_length
+        text_subheader_format = (
+            offset + const.column_format_text_subheader_index_offset + 3 * int_len
+        )
+        col_format_offset = offset + const.column_format_offset_offset + 3 * int_len
+        col_format_len = offset + const.column_format_length_offset + 3 * int_len
+        text_subheader_label = (
+            offset + const.column_label_text_subheader_index_offset + 3 * int_len
+        )
+        col_label_offset = offset + const.column_label_offset_offset + 3 * int_len
+        col_label_len = offset + const.column_label_length_offset + 3 * int_len
+
+        x = self._read_uint(
+            text_subheader_format, const.column_format_text_subheader_index_length
+        )
+        format_idx = min(x, len(self.column_names_raw) - 1)
+
+        format_start = self._read_uint(
+            col_format_offset, const.column_format_offset_length
+        )
+        format_len = self._read_uint(col_format_len, const.column_format_length_length)
+
+        label_idx = self._read_uint(
+            text_subheader_label, const.column_label_text_subheader_index_length
+        )
+        label_idx = min(label_idx, len(self.column_names_raw) - 1)
+
+        label_start = self._read_uint(
+            col_label_offset, const.column_label_offset_length
+        )
+        label_len = self._read_uint(col_label_len, const.column_label_length_length)
+
+        label_names = self.column_names_raw[label_idx]
+        column_label = self._convert_header_text(
+            label_names[label_start : label_start + label_len]
+        )
+        format_names = self.column_names_raw[format_idx]
+        column_format = self._convert_header_text(
+            format_names[format_start : format_start + format_len]
+        )
+        current_column_number = len(self.columns)
+
+        col = _Column(
+            current_column_number,
+            self.column_names[current_column_number],
+            column_label,
+            column_format,
+            self._column_types[current_column_number],
+            self._column_data_lengths[current_column_number],
+        )
+
+        self.column_formats.append(column_format)
+        self.columns.append(col)
+
+    def read(self, nrows: int | None = None) -> DataFrame:
+        if (nrows is None) and (self.chunksize is not None):
+            nrows = self.chunksize
+        elif nrows is None:
+            nrows = self.row_count
+
+        if len(self._column_types) == 0:
+            self.close()
+            raise EmptyDataError("No columns to parse from file")
+
+        if nrows > 0 and self._current_row_in_file_index >= self.row_count:
+            return DataFrame()
+
+        nrows = min(nrows, self.row_count - self._current_row_in_file_index)
+
+        nd = self._column_types.count(b"d")
+        ns = self._column_types.count(b"s")
+
+        self._string_chunk = np.empty((ns, nrows), dtype=object)
+        self._byte_chunk = np.zeros((nd, 8 * nrows), dtype=np.uint8)
+
+        self._current_row_in_chunk_index = 0
+        p = Parser(self)
+        p.read(nrows)
+
+        rslt = self._chunk_to_dataframe()
+        if self.index is not None:
+            rslt = rslt.set_index(self.index)
+
+        return rslt
+
+    def _read_next_page(self):
+        self._current_page_data_subheader_pointers = []
+        self._cached_page = self._path_or_buf.read(self._page_length)
+        if len(self._cached_page) <= 0:
+            return True
+        elif len(self._cached_page) != self._page_length:
+            self.close()
+            msg = (
+                "failed to read complete page from file (read "
+                f"{len(self._cached_page):d} of {self._page_length:d} bytes)"
+            )
+            raise ValueError(msg)
+
+        self._read_page_header()
+        if self._current_page_type in const.page_meta_types:
+            self._process_page_metadata()
+
+        if self._current_page_type not in [
+            *const.page_meta_types,
+            const.page_data_type,
+            const.page_mix_type,
+        ]:
+            return self._read_next_page()
+
+        return False
+
+    def _chunk_to_dataframe(self) -> DataFrame:
+        n = self._current_row_in_chunk_index
+        m = self._current_row_in_file_index
+        ix = range(m - n, m)
+        rslt = {}
+
+        js, jb = 0, 0
+        infer_string = using_string_dtype()
+        for j in range(self.column_count):
+            name = self.column_names[j]
+
+            if self._column_types[j] == b"d":
+                col_arr = self._byte_chunk[jb, :].view(dtype=self.byte_order + "d")
+                rslt[name] = pd.Series(col_arr, dtype=np.float64, index=ix, copy=False)
+                if self.convert_dates:
+                    if self.column_formats[j] in const.sas_date_formats:
+                        rslt[name] = _convert_datetimes(rslt[name], "d")
+                    elif self.column_formats[j] in const.sas_datetime_formats:
+                        rslt[name] = _convert_datetimes(rslt[name], "s")
+                jb += 1
+            elif self._column_types[j] == b"s":
+                rslt[name] = pd.Series(self._string_chunk[js, :], index=ix, copy=False)
+                if self.convert_text and (self.encoding is not None):
+                    rslt[name] = self._decode_string(rslt[name].str)
+                    if infer_string:
+                        rslt[name] = rslt[name].astype("str")
+
+                js += 1
+            else:
+                self.close()
+                raise ValueError(f"unknown column type {self._column_types[j]!r}")
+
+        df = DataFrame(rslt, columns=self.column_names, index=ix, copy=False)
+        return df
+
+    def _decode_string(self, b):
+        return b.decode(self.encoding or self.default_encoding)
+
+    def _convert_header_text(self, b: bytes) -> str | bytes:
+        if self.convert_header_text:
+            return self._decode_string(b)
+        else:
+            return b
@@ -0,0 +1,310 @@
+from __future__ import annotations
+
+from typing import Final
+
+magic: Final = (
+    b"\x00\x00\x00\x00\x00\x00\x00\x00"
+    b"\x00\x00\x00\x00\xc2\xea\x81\x60"
+    b"\xb3\x14\x11\xcf\xbd\x92\x08\x00"
+    b"\x09\xc7\x31\x8c\x18\x1f\x10\x11"
+)
+
+align_1_checker_value: Final = b"3"
+align_1_offset: Final = 32
+align_1_length: Final = 1
+align_1_value: Final = 4
+u64_byte_checker_value: Final = b"3"
+align_2_offset: Final = 35
+align_2_length: Final = 1
+align_2_value: Final = 4
+endianness_offset: Final = 37
+endianness_length: Final = 1
+platform_offset: Final = 39
+platform_length: Final = 1
+encoding_offset: Final = 70
+encoding_length: Final = 1
+dataset_offset: Final = 92
+dataset_length: Final = 64
+file_type_offset: Final = 156
+file_type_length: Final = 8
+date_created_offset: Final = 164
+date_created_length: Final = 8
+date_modified_offset: Final = 172
+date_modified_length: Final = 8
+header_size_offset: Final = 196
+header_size_length: Final = 4
+page_size_offset: Final = 200
+page_size_length: Final = 4
+page_count_offset: Final = 204
+page_count_length: Final = 4
+sas_release_offset: Final = 216
+sas_release_length: Final = 8
+sas_server_type_offset: Final = 224
+sas_server_type_length: Final = 16
+os_version_number_offset: Final = 240
+os_version_number_length: Final = 16
+os_maker_offset: Final = 256
+os_maker_length: Final = 16
+os_name_offset: Final = 272
+os_name_length: Final = 16
+page_bit_offset_x86: Final = 16
+page_bit_offset_x64: Final = 32
+subheader_pointer_length_x86: Final = 12
+subheader_pointer_length_x64: Final = 24
+page_type_offset: Final = 0
+page_type_length: Final = 2
+block_count_offset: Final = 2
+block_count_length: Final = 2
+subheader_count_offset: Final = 4
+subheader_count_length: Final = 2
+page_type_mask: Final = 0x0F00
+# Keep "page_comp_type" bits
+page_type_mask2: Final = 0xF000 | page_type_mask
+page_meta_type: Final = 0x0000
+page_data_type: Final = 0x0100
+page_mix_type: Final = 0x0200
+page_amd_type: Final = 0x0400
+page_meta2_type: Final = 0x4000
+page_comp_type: Final = 0x9000
+page_meta_types: Final = [page_meta_type, page_meta2_type]
+subheader_pointers_offset: Final = 8
+truncated_subheader_id: Final = 1
+compressed_subheader_id: Final = 4
+compressed_subheader_type: Final = 1
+text_block_size_length: Final = 2
+row_length_offset_multiplier: Final = 5
+row_count_offset_multiplier: Final = 6
+col_count_p1_multiplier: Final = 9
+col_count_p2_multiplier: Final = 10
+row_count_on_mix_page_offset_multiplier: Final = 15
+column_name_pointer_length: Final = 8
+column_name_text_subheader_offset: Final = 0
+column_name_text_subheader_length: Final = 2
+column_name_offset_offset: Final = 2
+column_name_offset_length: Final = 2
+column_name_length_offset: Final = 4
+column_name_length_length: Final = 2
+column_data_offset_offset: Final = 8
+column_data_length_offset: Final = 8
+column_data_length_length: Final = 4
+column_type_offset: Final = 14
+column_type_length: Final = 1
+column_format_text_subheader_index_offset: Final = 22
+column_format_text_subheader_index_length: Final = 2
+column_format_offset_offset: Final = 24
+column_format_offset_length: Final = 2
+column_format_length_offset: Final = 26
+column_format_length_length: Final = 2
+column_label_text_subheader_index_offset: Final = 28
+column_label_text_subheader_index_length: Final = 2
+column_label_offset_offset: Final = 30
+column_label_offset_length: Final = 2
+column_label_length_offset: Final = 32
+column_label_length_length: Final = 2
+rle_compression: Final = b"SASYZCRL"
+rdc_compression: Final = b"SASYZCR2"
+
+compression_literals: Final = [rle_compression, rdc_compression]
+
+# Incomplete list of encodings, using SAS nomenclature:
+# https://support.sas.com/documentation/onlinedoc/dfdmstudio/2.6/dmpdmsug/Content/dfU_Encodings_SAS.html
+# corresponding to the Python documentation of standard encodings
+# https://docs.python.org/3/library/codecs.html#standard-encodings
+encoding_names: Final = {
+    20: "utf-8",
+    29: "latin1",
+    30: "latin2",
+    31: "latin3",
+    32: "latin4",
+    33: "cyrillic",
+    34: "arabic",
+    35: "greek",
+    36: "hebrew",
+    37: "latin5",
+    38: "latin6",
+    39: "cp874",
+    40: "latin9",
+    41: "cp437",
+    42: "cp850",
+    43: "cp852",
+    44: "cp857",
+    45: "cp858",
+    46: "cp862",
+    47: "cp864",
+    48: "cp865",
+    49: "cp866",
+    50: "cp869",
+    51: "cp874",
+    # 52: "",  # not found
+    # 53: "",  # not found
+    # 54: "",  # not found
+    55: "cp720",
+    56: "cp737",
+    57: "cp775",
+    58: "cp860",
+    59: "cp863",
+    60: "cp1250",
+    61: "cp1251",
+    62: "cp1252",
+    63: "cp1253",
+    64: "cp1254",
+    65: "cp1255",
+    66: "cp1256",
+    67: "cp1257",
+    68: "cp1258",
+    118: "cp950",
+    # 119: "",  # not found
+    123: "big5",
+    125: "gb2312",
+    126: "cp936",
+    134: "euc_jp",
+    136: "cp932",
+    138: "shift_jis",
+    140: "euc-kr",
+    141: "cp949",
+    227: "latin8",
+    # 228: "", # not found
+    # 229: ""  # not found
+}
+
+
+class SASIndex:
+    row_size_index: Final = 0
+    column_size_index: Final = 1
+    subheader_counts_index: Final = 2
+    column_text_index: Final = 3
+    column_name_index: Final = 4
+    column_attributes_index: Final = 5
+    format_and_label_index: Final = 6
+    column_list_index: Final = 7
+    data_subheader_index: Final = 8
+
+
+subheader_signature_to_index: Final = {
+    b"\xf7\xf7\xf7\xf7": SASIndex.row_size_index,
+    b"\x00\x00\x00\x00\xf7\xf7\xf7\xf7": SASIndex.row_size_index,
+    b"\xf7\xf7\xf7\xf7\x00\x00\x00\x00": SASIndex.row_size_index,
+    b"\xf7\xf7\xf7\xf7\xff\xff\xfb\xfe": SASIndex.row_size_index,
+    b"\xf6\xf6\xf6\xf6": SASIndex.column_size_index,
+    b"\x00\x00\x00\x00\xf6\xf6\xf6\xf6": SASIndex.column_size_index,
+    b"\xf6\xf6\xf6\xf6\x00\x00\x00\x00": SASIndex.column_size_index,
+    b"\xf6\xf6\xf6\xf6\xff\xff\xfb\xfe": SASIndex.column_size_index,
+    b"\x00\xfc\xff\xff": SASIndex.subheader_counts_index,
+    b"\xff\xff\xfc\x00": SASIndex.subheader_counts_index,
+    b"\x00\xfc\xff\xff\xff\xff\xff\xff": SASIndex.subheader_counts_index,
+    b"\xff\xff\xff\xff\xff\xff\xfc\x00": SASIndex.subheader_counts_index,
+    b"\xfd\xff\xff\xff": SASIndex.column_text_index,
+    b"\xff\xff\xff\xfd": SASIndex.column_text_index,
+    b"\xfd\xff\xff\xff\xff\xff\xff\xff": SASIndex.column_text_index,
+    b"\xff\xff\xff\xff\xff\xff\xff\xfd": SASIndex.column_text_index,
+    b"\xff\xff\xff\xff": SASIndex.column_name_index,
+    b"\xff\xff\xff\xff\xff\xff\xff\xff": SASIndex.column_name_index,
+    b"\xfc\xff\xff\xff": SASIndex.column_attributes_index,
+    b"\xff\xff\xff\xfc": SASIndex.column_attributes_index,
+    b"\xfc\xff\xff\xff\xff\xff\xff\xff": SASIndex.column_attributes_index,
+    b"\xff\xff\xff\xff\xff\xff\xff\xfc": SASIndex.column_attributes_index,
+    b"\xfe\xfb\xff\xff": SASIndex.format_and_label_index,
+    b"\xff\xff\xfb\xfe": SASIndex.format_and_label_index,
+    b"\xfe\xfb\xff\xff\xff\xff\xff\xff": SASIndex.format_and_label_index,
+    b"\xff\xff\xff\xff\xff\xff\xfb\xfe": SASIndex.format_and_label_index,
+    b"\xfe\xff\xff\xff": SASIndex.column_list_index,
+    b"\xff\xff\xff\xfe": SASIndex.column_list_index,
+    b"\xfe\xff\xff\xff\xff\xff\xff\xff": SASIndex.column_list_index,
+    b"\xff\xff\xff\xff\xff\xff\xff\xfe": SASIndex.column_list_index,
+}
+
+
+# List of frequently used SAS date and datetime formats
+# http://support.sas.com/documentation/cdl/en/etsug/60372/HTML/default/viewer.htm#etsug_intervals_sect009.htm
+# https://github.com/epam/parso/blob/master/src/main/java/com/epam/parso/impl/SasFileConstants.java
+sas_date_formats: Final = (
+    "DATE",
+    "DAY",
+    "DDMMYY",
+    "DOWNAME",
+    "JULDAY",
+    "JULIAN",
+    "MMDDYY",
+    "MMYY",
+    "MMYYC",
+    "MMYYD",
+    "MMYYP",
+    "MMYYS",
+    "MMYYN",
+    "MONNAME",
+    "MONTH",
+    "MONYY",
+    "QTR",
+    "QTRR",
+    "NENGO",
+    "WEEKDATE",
+    "WEEKDATX",
+    "WEEKDAY",
+    "WEEKV",
+    "WORDDATE",
+    "WORDDATX",
+    "YEAR",
+    "YYMM",
+    "YYMMC",
+    "YYMMD",
+    "YYMMP",
+    "YYMMS",
+    "YYMMN",
+    "YYMON",
+    "YYMMDD",
+    "YYQ",
+    "YYQC",
+    "YYQD",
+    "YYQP",
+    "YYQS",
+    "YYQN",
+    "YYQR",
+    "YYQRC",
+    "YYQRD",
+    "YYQRP",
+    "YYQRS",
+    "YYQRN",
+    "YYMMDDP",
+    "YYMMDDC",
+    "E8601DA",
+    "YYMMDDN",
+    "MMDDYYC",
+    "MMDDYYS",
+    "MMDDYYD",
+    "YYMMDDS",
+    "B8601DA",
+    "DDMMYYN",
+    "YYMMDDD",
+    "DDMMYYB",
+    "DDMMYYP",
+    "MMDDYYP",
+    "YYMMDDB",
+    "MMDDYYN",
+    "DDMMYYC",
+    "DDMMYYD",
+    "DDMMYYS",
+    "MINGUO",
+)
+
+sas_datetime_formats: Final = (
+    "DATETIME",
+    "DTWKDATX",
+    "B8601DN",
+    "B8601DT",
+    "B8601DX",
+    "B8601DZ",
+    "B8601LX",
+    "E8601DN",
+    "E8601DT",
+    "E8601DX",
+    "E8601DZ",
+    "E8601LX",
+    "DATEAMPM",
+    "DTDATE",
+    "DTMONYY",
+    "DTMONYY",
+    "DTWKDATX",
+    "DTYEAR",
+    "TOD",
+    "MDYAMPM",
+)
@@ -0,0 +1,501 @@
+"""
+Read a SAS XPort format file into a Pandas DataFrame.
+
+Based on code from Jack Cushman (github.com/jcushman/xport).
+
+The file format is defined here:
+
+https://support.sas.com/content/dam/SAS/support/en/technical-papers/record-layout-of-a-sas-version-5-or-6-data-set-in-sas-transport-xport-format.pdf
+"""
+
+from __future__ import annotations
+
+from datetime import datetime
+import struct
+from typing import TYPE_CHECKING
+import warnings
+
+import numpy as np
+
+from pandas.util._exceptions import find_stack_level
+
+import pandas as pd
+
+from pandas.io.common import get_handle
+from pandas.io.sas.sasreader import SASReader
+
+if TYPE_CHECKING:
+    from pandas._typing import (
+        CompressionOptions,
+        DatetimeNaTType,
+        FilePath,
+        ReadBuffer,
+    )
+_correct_line1 = (
+    "HEADER RECORD*******LIBRARY HEADER RECORD!!!!!!!000000000000000000000000000000  "
+)
+_correct_header1 = (
+    "HEADER RECORD*******MEMBER  HEADER RECORD!!!!!!!000000000000000001600000000"
+)
+_correct_header2 = (
+    "HEADER RECORD*******DSCRPTR HEADER RECORD!!!!!!!000000000000000000000000000000  "
+)
+_correct_obs_header = (
+    "HEADER RECORD*******OBS     HEADER RECORD!!!!!!!000000000000000000000000000000  "
+)
+_fieldkeys = [
+    "ntype",
+    "nhfun",
+    "field_length",
+    "nvar0",
+    "name",
+    "label",
+    "nform",
+    "nfl",
+    "num_decimals",
+    "nfj",
+    "nfill",
+    "niform",
+    "nifl",
+    "nifd",
+    "npos",
+    "_",
+]
+
+
+_base_params_doc = """\
+Parameters
+----------
+filepath_or_buffer : str or file-like object
+    Path to SAS file or object implementing binary read method."""
+
+_params2_doc = """\
+index : identifier of index column
+    Identifier of column that should be used as index of the DataFrame.
+encoding : str
+    Encoding for text data.
+chunksize : int
+    Read file `chunksize` lines at a time, returns iterator."""
+
+_format_params_doc = """\
+format : str
+    File format, only `xport` is currently supported."""
+
+_iterator_doc = """\
+iterator : bool, default False
+    Return XportReader object for reading file incrementally."""
+
+
+_read_sas_doc = f"""Read a SAS file into a DataFrame.
+
+{_base_params_doc}
+{_format_params_doc}
+{_params2_doc}
+{_iterator_doc}
+
+Returns
+-------
+DataFrame or XportReader
+
+Examples
+--------
+Read a SAS Xport file:
+
+>>> df = pd.read_sas('filename.XPT')
+
+Read a Xport file in 10,000 line chunks:
+
+>>> itr = pd.read_sas('filename.XPT', chunksize=10000)
+>>> for chunk in itr:
+>>>     do_something(chunk)
+
+"""
+
+_xport_reader_doc = f"""\
+Class for reading SAS Xport files.
+
+{_base_params_doc}
+{_params2_doc}
+
+Attributes
+----------
+member_info : list
+    Contains information about the file
+fields : list
+    Contains information about the variables in the file
+"""
+
+
+def _parse_date(datestr: str) -> DatetimeNaTType:
+    """Given a date in xport format, return Python date."""
+    try:
+        # e.g. "16FEB11:10:07:55"
+        return datetime.strptime(datestr, "%d%b%y:%H:%M:%S")
+    except ValueError:
+        return pd.NaT
+
+
+def _split_line(s: str, parts):
+    """
+    Parameters
+    ----------
+    s: str
+        Fixed-length string to split
+    parts: list of (name, length) pairs
+        Used to break up string, name '_' will be filtered from output.
+
+    Returns
+    -------
+    Dict of name:contents of string at given location.
+    """
+    out = {}
+    start = 0
+    for name, length in parts:
+        out[name] = s[start : start + length].strip()
+        start += length
+    del out["_"]
+    return out
+
+
+def _handle_truncated_float_vec(vec, nbytes):
+    # This feature is not well documented, but some SAS XPORT files
+    # have 2-7 byte "truncated" floats.  To read these truncated
+    # floats, pad them with zeros on the right to make 8 byte floats.
+    #
+    # References:
+    # https://github.com/jcushman/xport/pull/3
+    # The R "foreign" library
+
+    if nbytes != 8:
+        vec1 = np.zeros(len(vec), np.dtype("S8"))
+        dtype = np.dtype(f"S{nbytes},S{8 - nbytes}")
+        vec2 = vec1.view(dtype=dtype)
+        vec2["f0"] = vec
+        return vec2
+
+    return vec
+
+
+def _parse_float_vec(vec):
+    """
+    Parse a vector of float values representing IBM 8 byte floats into
+    native 8 byte floats.
+    """
+    dtype = np.dtype(">u4,>u4")
+    vec1 = vec.view(dtype=dtype)
+    xport1 = vec1["f0"]
+    xport2 = vec1["f1"]
+
+    # Start by setting first half of ieee number to first half of IBM
+    # number sans exponent
+    ieee1 = xport1 & 0x00FFFFFF
+
+    # The fraction bit to the left of the binary point in the ieee
+    # format was set and the number was shifted 0, 1, 2, or 3
+    # places. This will tell us how to adjust the ibm exponent to be a
+    # power of 2 ieee exponent and how to shift the fraction bits to
+    # restore the correct magnitude.
+    shift = np.zeros(len(vec), dtype=np.uint8)
+    shift[np.where(xport1 & 0x00200000)] = 1
+    shift[np.where(xport1 & 0x00400000)] = 2
+    shift[np.where(xport1 & 0x00800000)] = 3
+
+    # shift the ieee number down the correct number of places then
+    # set the second half of the ieee number to be the second half
+    # of the ibm number shifted appropriately, ored with the bits
+    # from the first half that would have been shifted in if we
+    # could shift a double. All we are worried about are the low
+    # order 3 bits of the first half since we're only shifting by
+    # 1, 2, or 3.
+    ieee1 >>= shift
+    ieee2 = (xport2 >> shift) | ((xport1 & 0x00000007) << (29 + (3 - shift)))
+
+    # clear the 1 bit to the left of the binary point
+    ieee1 &= 0xFFEFFFFF
+
+    # set the exponent of the ieee number to be the actual exponent
+    # plus the shift count + 1023. Or this into the first half of the
+    # ieee number. The ibm exponent is excess 64 but is adjusted by 65
+    # since during conversion to ibm format the exponent is
+    # incremented by 1 and the fraction bits left 4 positions to the
+    # right of the radix point.  (had to add >> 24 because C treats &
+    # 0x7f as 0x7f000000 and Python doesn't)
+    ieee1 |= ((((((xport1 >> 24) & 0x7F) - 65) << 2) + shift + 1023) << 20) | (
+        xport1 & 0x80000000
+    )
+
+    ieee = np.empty((len(ieee1),), dtype=">u4,>u4")
+    ieee["f0"] = ieee1
+    ieee["f1"] = ieee2
+    ieee = ieee.view(dtype=">f8")
+    ieee = ieee.astype("f8")
+
+    return ieee
+
+
+class XportReader(SASReader):
+    __doc__ = _xport_reader_doc
+
+    def __init__(
+        self,
+        filepath_or_buffer: FilePath | ReadBuffer[bytes],
+        index=None,
+        encoding: str | None = "ISO-8859-1",
+        chunksize: int | None = None,
+        compression: CompressionOptions = "infer",
+    ) -> None:
+        self._encoding = encoding
+        self._lines_read = 0
+        self._index = index
+        self._chunksize = chunksize
+
+        self.handles = get_handle(
+            filepath_or_buffer,
+            "rb",
+            encoding=encoding,
+            is_text=False,
+            compression=compression,
+        )
+        self.filepath_or_buffer = self.handles.handle
+
+        try:
+            self._read_header()
+        except Exception:
+            self.close()
+            raise
+
+    def close(self) -> None:
+        self.handles.close()
+
+    def _get_row(self):
+        return self.filepath_or_buffer.read(80).decode()
+
+    def _read_header(self) -> None:
+        self.filepath_or_buffer.seek(0)
+
+        # read file header
+        line1 = self._get_row()
+        if line1 != _correct_line1:
+            if "**COMPRESSED**" in line1:
+                # this was created with the PROC CPORT method and can't be read
+                # https://documentation.sas.com/doc/en/pgmsascdc/9.4_3.5/movefile/p1bm6aqp3fw4uin1hucwh718f6kp.htm
+                raise ValueError(
+                    "Header record indicates a CPORT file, which is not readable."
+                )
+            raise ValueError("Header record is not an XPORT file.")
+
+        line2 = self._get_row()
+        fif = [["prefix", 24], ["version", 8], ["OS", 8], ["_", 24], ["created", 16]]
+        file_info = _split_line(line2, fif)
+        if file_info["prefix"] != "SAS     SAS     SASLIB":
+            raise ValueError("Header record has invalid prefix.")
+        file_info["created"] = _parse_date(file_info["created"])
+        self.file_info = file_info
+
+        line3 = self._get_row()
+        file_info["modified"] = _parse_date(line3[:16])
+
+        # read member header
+        header1 = self._get_row()
+        header2 = self._get_row()
+        headflag1 = header1.startswith(_correct_header1)
+        headflag2 = header2 == _correct_header2
+        if not (headflag1 and headflag2):
+            raise ValueError("Member header not found")
+        # usually 140, could be 135
+        fieldnamelength = int(header1[-5:-2])
+
+        # member info
+        mem = [
+            ["prefix", 8],
+            ["set_name", 8],
+            ["sasdata", 8],
+            ["version", 8],
+            ["OS", 8],
+            ["_", 24],
+            ["created", 16],
+        ]
+        member_info = _split_line(self._get_row(), mem)
+        mem = [["modified", 16], ["_", 16], ["label", 40], ["type", 8]]
+        member_info.update(_split_line(self._get_row(), mem))
+        member_info["modified"] = _parse_date(member_info["modified"])
+        member_info["created"] = _parse_date(member_info["created"])
+        self.member_info = member_info
+
+        # read field names
+        types = {1: "numeric", 2: "char"}
+        fieldcount = int(self._get_row()[54:58])
+        datalength = fieldnamelength * fieldcount
+        # round up to nearest 80
+        if datalength % 80:
+            datalength += 80 - datalength % 80
+        fielddata = self.filepath_or_buffer.read(datalength)
+        fields = []
+        obs_length = 0
+        while len(fielddata) >= fieldnamelength:
+            # pull data for one field
+            fieldbytes, fielddata = (
+                fielddata[:fieldnamelength],
+                fielddata[fieldnamelength:],
+            )
+
+            # rest at end gets ignored, so if field is short, pad out
+            # to match struct pattern below
+            fieldbytes = fieldbytes.ljust(140)
+
+            fieldstruct = struct.unpack(">hhhh8s40s8shhh2s8shhl52s", fieldbytes)
+            field = dict(zip(_fieldkeys, fieldstruct, strict=True))
+            del field["_"]
+            field["ntype"] = types[field["ntype"]]
+            fl = field["field_length"]
+            if field["ntype"] == "numeric" and ((fl < 2) or (fl > 8)):
+                msg = f"Floating field width {fl} is not between 2 and 8."
+                raise TypeError(msg)
+
+            for k, v in field.items():
+                try:
+                    field[k] = v.strip()
+                except AttributeError:
+                    pass
+
+            obs_length += field["field_length"]
+            fields += [field]
+
+        header = self._get_row()
+        if not header == _correct_obs_header:
+            raise ValueError("Observation header not found.")
+
+        self.fields = fields
+        self.record_length = obs_length
+        self.record_start = self.filepath_or_buffer.tell()
+
+        self.nobs = self._record_count()
+        self.columns = [x["name"].decode() for x in self.fields]
+
+        # Setup the dtype.
+        dtypel = [
+            ("s" + str(i), "S" + str(field["field_length"]))
+            for i, field in enumerate(self.fields)
+        ]
+        dtype = np.dtype(dtypel)
+        self._dtype = dtype
+
+    def __next__(self) -> pd.DataFrame:
+        return self.read(nrows=self._chunksize or 1)
+
+    def _record_count(self) -> int:
+        """
+        Get number of records in file.
+
+        This is maybe suboptimal because we have to seek to the end of
+        the file.
+
+        Side effect: returns file position to record_start.
+        """
+        self.filepath_or_buffer.seek(0, 2)
+        total_records_length = self.filepath_or_buffer.tell() - self.record_start
+
+        if total_records_length % 80 != 0:
+            warnings.warn(
+                "xport file may be corrupted.",
+                stacklevel=find_stack_level(),
+            )
+
+        if self.record_length > 80:
+            self.filepath_or_buffer.seek(self.record_start)
+            return total_records_length // self.record_length
+
+        self.filepath_or_buffer.seek(-80, 2)
+        last_card_bytes = self.filepath_or_buffer.read(80)
+        last_card = np.frombuffer(last_card_bytes, dtype=np.uint64)
+
+        # 8 byte blank
+        ix = np.flatnonzero(last_card == 2314885530818453536)
+
+        if len(ix) == 0:
+            tail_pad = 0
+        else:
+            tail_pad = 8 * len(ix)
+
+        self.filepath_or_buffer.seek(self.record_start)
+
+        return (total_records_length - tail_pad) // self.record_length
+
+    def get_chunk(self, size: int | None = None) -> pd.DataFrame:
+        """
+        Reads lines from Xport file and returns as dataframe
+
+        Parameters
+        ----------
+        size : int, defaults to None
+            Number of lines to read.  If None, reads whole file.
+
+        Returns
+        -------
+        DataFrame
+        """
+        if size is None:
+            size = self._chunksize
+        return self.read(nrows=size)
+
+    def _missing_double(self, vec):
+        v = vec.view(dtype="u1,u1,u2,u4")
+        miss = (v["f1"] == 0) & (v["f2"] == 0) & (v["f3"] == 0)
+        miss1 = (
+            ((v["f0"] >= 0x41) & (v["f0"] <= 0x5A))
+            | (v["f0"] == 0x5F)
+            | (v["f0"] == 0x2E)
+        )
+        miss &= miss1
+        return miss
+
+    def read(self, nrows: int | None = None) -> pd.DataFrame:
+        """Read observations from SAS Xport file, returning as data frame.
+
+        Parameters
+        ----------
+        nrows : int
+            Number of rows to read from data file; if None, read whole
+            file.
+
+        Returns
+        -------
+        A DataFrame.
+        """
+        if nrows is None:
+            nrows = self.nobs
+
+        read_lines = min(nrows, self.nobs - self._lines_read)
+        read_len = read_lines * self.record_length
+        if read_len <= 0:
+            self.close()
+            raise StopIteration
+        raw = self.filepath_or_buffer.read(read_len)
+        data = np.frombuffer(raw, dtype=self._dtype, count=read_lines)
+
+        df_data = {}
+        for j, x in enumerate(self.columns):
+            vec = data["s" + str(j)]
+            ntype = self.fields[j]["ntype"]
+            if ntype == "numeric":
+                vec = _handle_truncated_float_vec(vec, self.fields[j]["field_length"])
+                miss = self._missing_double(vec)
+                v = _parse_float_vec(vec)
+                v[miss] = np.nan
+            elif self.fields[j]["ntype"] == "char":
+                v = [y.rstrip() for y in vec]
+
+                if self._encoding is not None:
+                    v = [y.decode(self._encoding) for y in v]
+
+            df_data.update({x: v})
+        df = pd.DataFrame(df_data)
+
+        if self._index is None:
+            df.index = pd.Index(range(self._lines_read, self._lines_read + read_lines))
+        else:
+            df = df.set_index(self._index)
+
+        self._lines_read += read_lines
+
+        return df
@@ -0,0 +1,197 @@
+"""
+Read SAS sas7bdat or xport files.
+"""
+
+from __future__ import annotations
+
+from abc import (
+    ABC,
+    abstractmethod,
+)
+from collections.abc import Iterator
+from typing import (
+    TYPE_CHECKING,
+    Self,
+    overload,
+)
+
+from pandas.util._decorators import set_module
+
+from pandas.io.common import stringify_path
+
+if TYPE_CHECKING:
+    from collections.abc import Hashable
+    from types import TracebackType
+
+    from pandas._typing import (
+        CompressionOptions,
+        FilePath,
+        ReadBuffer,
+    )
+
+    from pandas import DataFrame
+
+
+@set_module("pandas.api.typing")
+class SASReader(Iterator["DataFrame"], ABC):
+    """
+    Abstract class for XportReader and SAS7BDATReader.
+    """
+
+    @abstractmethod
+    def read(self, nrows: int | None = None) -> DataFrame: ...
+
+    @abstractmethod
+    def close(self) -> None: ...
+
+    def __enter__(self) -> Self:
+        return self
+
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc_value: BaseException | None,
+        traceback: TracebackType | None,
+    ) -> None:
+        self.close()
+
+
+@overload
+def read_sas(
+    filepath_or_buffer: FilePath | ReadBuffer[bytes],
+    *,
+    format: str | None = ...,
+    index: Hashable | None = ...,
+    encoding: str | None = ...,
+    chunksize: int = ...,
+    iterator: bool = ...,
+    compression: CompressionOptions = ...,
+) -> SASReader: ...
+
+
+@overload
+def read_sas(
+    filepath_or_buffer: FilePath | ReadBuffer[bytes],
+    *,
+    format: str | None = ...,
+    index: Hashable | None = ...,
+    encoding: str | None = ...,
+    chunksize: None = ...,
+    iterator: bool = ...,
+    compression: CompressionOptions = ...,
+) -> DataFrame | SASReader: ...
+
+
+@set_module("pandas")
+def read_sas(
+    filepath_or_buffer: FilePath | ReadBuffer[bytes],
+    *,
+    format: str | None = None,
+    index: Hashable | None = None,
+    encoding: str | None = None,
+    chunksize: int | None = None,
+    iterator: bool = False,
+    compression: CompressionOptions = "infer",
+) -> DataFrame | SASReader:
+    """
+    Read SAS files stored as either XPORT or SAS7BDAT format files.
+
+    Parameters
+    ----------
+    filepath_or_buffer : str, path object, or file-like object
+        String, path object (implementing ``os.PathLike[str]``), or file-like
+        object implementing a binary ``read()`` function. The string could be
+        a URL. Valid URL schemes include http, ftp, s3, and file. For file
+        URLs, a host is expected. A local file could be:
+        ``file://localhost/path/to/table.sas7bdat``.
+    format : str {{'xport', 'sas7bdat'}} or None
+        If None, file format is inferred from file extension. If 'xport' or
+        'sas7bdat', uses the corresponding format.
+    index : identifier of index column, defaults to None
+        Identifier of column that should be used as index of the DataFrame.
+    encoding : str, default is None
+        Encoding for text data.  If None, text data are stored as raw bytes.
+    chunksize : int
+        Read file `chunksize` lines at a time, returns iterator.
+    iterator : bool, defaults to False
+        If True, returns an iterator for reading the file incrementally.
+    compression : str or dict, default 'infer'
+        For on-the-fly decompression of on-disk data. If 'infer' and
+        'filepath_or_buffer' is path-like, then detect compression from the
+        following extensions: '.gz', '.bz2', '.zip', '.xz', '.zst', '.tar',
+        '.tar.gz', '.tar.xz' or '.tar.bz2' (otherwise no compression).
+        Set to ``None`` for no decompression.
+        Can also be a dict with key ``'method'`` set to one of {``'zip'``,
+        ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, ``'tar'``} and other
+        key-value pairs are forwarded to ``zipfile.ZipFile``,
+        ``gzip.GzipFile``, ``bz2.BZ2File``, ``zstandard.ZstdCompressor``,
+        ``lzma.LZMAFile`` or ``tarfile.TarFile``, respectively.
+        As an example, the following could be passed for faster compression
+        and to create a reproducible gzip archive:
+        ``compression={'method': 'gzip', 'compresslevel': 1, 'mtime': 1}``.
+
+    Returns
+    -------
+    DataFrame, SAS7BDATReader, or XportReader
+        DataFrame if iterator=False and chunksize=None, else SAS7BDATReader
+        or XportReader, file format is inferred from file extension.
+
+    See Also
+    --------
+    read_csv : Read a comma-separated values (csv) file into a DataFrame.
+    read_excel : Read an Excel file into a pandas DataFrame.
+    read_spss : Read an SPSS file into a pandas DataFrame.
+    read_orc : Load an ORC object into a pandas DataFrame.
+    read_feather : Load a feather-format object into a pandas DataFrame.
+
+    Examples
+    --------
+    >>> df = pd.read_sas("sas_data.sas7bdat")  # doctest: +SKIP
+    """
+    if format is None:
+        buffer_error_msg = (
+            "If this is a buffer object rather "
+            "than a string name, you must specify a format string"
+        )
+        filepath_or_buffer = stringify_path(filepath_or_buffer)
+        if not isinstance(filepath_or_buffer, str):
+            raise ValueError(buffer_error_msg)
+        fname = filepath_or_buffer.lower()
+        if ".xpt" in fname:
+            format = "xport"
+        elif ".sas7bdat" in fname:
+            format = "sas7bdat"
+        else:
+            raise ValueError(
+                f"unable to infer format of SAS file from filename: {fname!r}"
+            )
+
+    reader: SASReader
+    if format.lower() == "xport":
+        from pandas.io.sas.sas_xport import XportReader
+
+        reader = XportReader(
+            filepath_or_buffer,
+            index=index,
+            encoding=encoding,
+            chunksize=chunksize,
+            compression=compression,
+        )
+    elif format.lower() == "sas7bdat":
+        from pandas.io.sas.sas7bdat import SAS7BDATReader
+
+        reader = SAS7BDATReader(
+            filepath_or_buffer,
+            index=index,
+            encoding=encoding,
+            chunksize=chunksize,
+            compression=compression,
+        )
+    else:
+        raise ValueError("unknown SAS format")
+
+    if iterator or chunksize:
+        return reader
+
+    with reader:
+        return reader.read()
@@ -0,0 +1,95 @@
+from __future__ import annotations
+
+from typing import (
+    TYPE_CHECKING,
+    Any,
+)
+
+from pandas._libs import lib
+from pandas.compat._optional import import_optional_dependency
+from pandas.util._decorators import set_module
+from pandas.util._validators import check_dtype_backend
+
+from pandas.core.dtypes.inference import is_list_like
+
+from pandas.io.common import stringify_path
+
+if TYPE_CHECKING:
+    from collections.abc import Sequence
+    from pathlib import Path
+
+    from pandas._typing import DtypeBackend
+
+    from pandas import DataFrame
+
+
+@set_module("pandas")
+def read_spss(
+    path: str | Path,
+    usecols: Sequence[str] | None = None,
+    convert_categoricals: bool = True,
+    dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
+    **kwargs: Any,
+) -> DataFrame:
+    """
+    Load an SPSS file from the file path, returning a DataFrame.
+
+    Parameters
+    ----------
+    path : str or Path
+        File path.
+    usecols : list-like, optional
+        Return a subset of the columns. If None, return all columns.
+    convert_categoricals : bool, default is True
+        Convert categorical columns into pd.Categorical.
+    dtype_backend : {'numpy_nullable', 'pyarrow'}
+        Back-end data type applied to the resultant :class:`DataFrame`
+        (still experimental). If not specified, the default behavior
+        is to not use nullable data types. If specified, the behavior
+        is as follows:
+
+        * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
+        * ``"pyarrow"``: returns pyarrow-backed
+          nullable :class:`ArrowDtype` :class:`DataFrame`
+
+        .. versionadded:: 2.0
+    **kwargs
+        Additional keyword arguments that can be passed to :func:`pyreadstat.read_sav`.
+
+        .. versionadded:: 3.0
+
+    Returns
+    -------
+    DataFrame
+        DataFrame based on the SPSS file.
+
+    See Also
+    --------
+    read_csv : Read a comma-separated values (csv) file into a pandas DataFrame.
+    read_excel : Read an Excel file into a pandas DataFrame.
+    read_sas : Read an SAS file into a pandas DataFrame.
+    read_orc : Load an ORC object into a pandas DataFrame.
+    read_feather : Load a feather-format object into a pandas DataFrame.
+
+    Examples
+    --------
+    >>> df = pd.read_spss("spss_data.sav")  # doctest: +SKIP
+    """
+    pyreadstat = import_optional_dependency("pyreadstat")
+    check_dtype_backend(dtype_backend)
+
+    if usecols is not None:
+        if not is_list_like(usecols):
+            raise TypeError("usecols must be list-like.")
+        usecols = list(usecols)  # pyreadstat requires a list
+
+    df, metadata = pyreadstat.read_sav(
+        stringify_path(path),
+        usecols=usecols,
+        apply_value_formats=convert_categoricals,
+        **kwargs,
+    )
+    df.attrs = metadata.__dict__
+    if dtype_backend is not lib.no_default:
+        df = df.convert_dtypes(dtype_backend=dtype_backend)
+    return df