localization.py 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. """
  2. Helpers for configuring locale settings.
  3. Name `localization` is chosen to avoid overlap with builtin `locale` module.
  4. """
  5. from __future__ import annotations
  6. from contextlib import contextmanager
  7. import locale
  8. import platform
  9. import re
  10. import subprocess
  11. from typing import TYPE_CHECKING
  12. from pandas._config.config import options
  13. if TYPE_CHECKING:
  14. from collections.abc import Generator
  15. @contextmanager
  16. def set_locale(
  17. new_locale: str | tuple[str, str], lc_var: int = locale.LC_ALL
  18. ) -> Generator[str | tuple[str, str], None, None]:
  19. """
  20. Context manager for temporarily setting a locale.
  21. Parameters
  22. ----------
  23. new_locale : str or tuple
  24. A string of the form <language_country>.<encoding>. For example to set
  25. the current locale to US English with a UTF8 encoding, you would pass
  26. "en_US.UTF-8".
  27. lc_var : int, default `locale.LC_ALL`
  28. The category of the locale being set.
  29. Notes
  30. -----
  31. This is useful when you want to run a particular block of code under a
  32. particular locale, without globally setting the locale. This probably isn't
  33. thread-safe.
  34. """
  35. # getlocale is not always compliant with setlocale, use setlocale. GH#46595
  36. current_locale = locale.setlocale(lc_var)
  37. try:
  38. locale.setlocale(lc_var, new_locale)
  39. normalized_code, normalized_encoding = locale.getlocale()
  40. if normalized_code is not None and normalized_encoding is not None:
  41. yield f"{normalized_code}.{normalized_encoding}"
  42. else:
  43. yield new_locale
  44. finally:
  45. locale.setlocale(lc_var, current_locale)
  46. def can_set_locale(lc: str, lc_var: int = locale.LC_ALL) -> bool:
  47. """
  48. Check to see if we can set a locale, and subsequently get the locale,
  49. without raising an Exception.
  50. Parameters
  51. ----------
  52. lc : str
  53. The locale to attempt to set.
  54. lc_var : int, default `locale.LC_ALL`
  55. The category of the locale being set.
  56. Returns
  57. -------
  58. bool
  59. Whether the passed locale can be set
  60. """
  61. try:
  62. with set_locale(lc, lc_var=lc_var):
  63. pass
  64. except (ValueError, locale.Error):
  65. # horrible name for a Exception subclass
  66. return False
  67. else:
  68. return True
  69. def _valid_locales(locales: list[str] | str, normalize: bool) -> list[str]:
  70. """
  71. Return a list of normalized locales that do not throw an ``Exception``
  72. when set.
  73. Parameters
  74. ----------
  75. locales : str
  76. A string where each locale is separated by a newline.
  77. normalize : bool
  78. Whether to call ``locale.normalize`` on each locale.
  79. Returns
  80. -------
  81. valid_locales : list
  82. A list of valid locales.
  83. """
  84. return [
  85. loc
  86. for loc in (
  87. locale.normalize(loc.strip()) if normalize else loc.strip()
  88. for loc in locales
  89. )
  90. if can_set_locale(loc)
  91. ]
  92. def get_locales(
  93. prefix: str | None = None,
  94. normalize: bool = True,
  95. ) -> list[str]:
  96. """
  97. Get all the locales that are available on the system.
  98. Parameters
  99. ----------
  100. prefix : str
  101. If not ``None`` then return only those locales with the prefix
  102. provided. For example to get all English language locales (those that
  103. start with ``"en"``), pass ``prefix="en"``.
  104. normalize : bool
  105. Call ``locale.normalize`` on the resulting list of available locales.
  106. If ``True``, only locales that can be set without throwing an
  107. ``Exception`` are returned.
  108. Returns
  109. -------
  110. locales : list of strings
  111. A list of locale strings that can be set with ``locale.setlocale()``.
  112. For example::
  113. locale.setlocale(locale.LC_ALL, locale_string)
  114. On error will return an empty list (no locale available, e.g. Windows)
  115. """
  116. if platform.system() in ("Linux", "Darwin"):
  117. raw_locales = subprocess.check_output(["locale", "-a"])
  118. else:
  119. # Other platforms e.g. windows platforms don't define "locale -a"
  120. # Note: is_platform_windows causes circular import here
  121. return []
  122. try:
  123. # raw_locales is "\n" separated list of locales
  124. # it may contain non-decodable parts, so split
  125. # extract what we can and then rejoin.
  126. split_raw_locales = raw_locales.split(b"\n")
  127. out_locales = []
  128. for x in split_raw_locales:
  129. try:
  130. out_locales.append(str(x, encoding=options.display.encoding))
  131. except UnicodeError:
  132. # 'locale -a' is used to populated 'raw_locales' and on
  133. # Redhat 7 Linux (and maybe others) prints locale names
  134. # using windows-1252 encoding. Bug only triggered by
  135. # a few special characters and when there is an
  136. # extensive list of installed locales.
  137. out_locales.append(str(x, encoding="windows-1252"))
  138. except TypeError:
  139. pass
  140. if prefix is None:
  141. return _valid_locales(out_locales, normalize)
  142. pattern = re.compile(f"{prefix}.*")
  143. found = pattern.findall("\n".join(out_locales))
  144. return _valid_locales(found, normalize)