conftest.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225
  1. import shlex
  2. import subprocess
  3. import time
  4. import uuid
  5. import pytest
  6. from pandas.compat import (
  7. is_ci_environment,
  8. is_platform_arm,
  9. is_platform_mac,
  10. is_platform_windows,
  11. )
  12. import pandas.util._test_decorators as td
  13. import pandas.io.common as icom
  14. from pandas.io.parsers import read_csv
  15. @pytest.fixture
  16. def compression_to_extension():
  17. return {value: key for key, value in icom.extension_to_compression.items()}
  18. @pytest.fixture
  19. def tips_file(datapath):
  20. """Path to the tips dataset"""
  21. return datapath("io", "data", "csv", "tips.csv")
  22. @pytest.fixture
  23. def jsonl_file(datapath):
  24. """Path to a JSONL dataset"""
  25. return datapath("io", "parser", "data", "items.jsonl")
  26. @pytest.fixture
  27. def salaries_table(datapath):
  28. """DataFrame with the salaries dataset"""
  29. return read_csv(datapath("io", "parser", "data", "salaries.csv"), sep="\t")
  30. @pytest.fixture
  31. def feather_file(datapath):
  32. return datapath("io", "data", "feather", "feather-0_3_1.feather")
  33. @pytest.fixture
  34. def xml_file(datapath):
  35. return datapath("io", "data", "xml", "books.xml")
  36. @pytest.fixture
  37. def s3_base(worker_id, monkeypatch):
  38. """
  39. Fixture for mocking S3 interaction.
  40. Sets up moto server in separate process locally
  41. Return url for motoserver/moto CI service
  42. """
  43. pytest.importorskip("s3fs")
  44. pytest.importorskip("boto3")
  45. # temporary workaround as moto fails for botocore >= 1.11 otherwise,
  46. # see https://github.com/spulec/moto/issues/1924 & 1952
  47. monkeypatch.setenv("AWS_ACCESS_KEY_ID", "foobar_key")
  48. monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "foobar_secret")
  49. if is_ci_environment():
  50. if is_platform_arm() or is_platform_mac() or is_platform_windows():
  51. # NOT RUN on Windows/macOS, only Ubuntu
  52. # - subprocess in CI can cause timeouts
  53. # - GitHub Actions do not support
  54. # container services for the above OSs
  55. pytest.skip(
  56. "S3 tests do not have a corresponding service on "
  57. "Windows or macOS platforms"
  58. )
  59. else:
  60. # set in .github/workflows/unit-tests.yml
  61. yield "http://localhost:5000"
  62. else:
  63. requests = pytest.importorskip("requests")
  64. pytest.importorskip("moto")
  65. pytest.importorskip("flask") # server mode needs flask too
  66. # Launching moto in server mode, i.e., as a separate process
  67. # with an S3 endpoint on localhost
  68. worker_id = "5" if worker_id == "master" else worker_id.lstrip("gw")
  69. endpoint_port = f"555{worker_id}"
  70. endpoint_uri = f"http://127.0.0.1:{endpoint_port}/"
  71. # pipe to null to avoid logging in terminal
  72. with subprocess.Popen(
  73. shlex.split(f"moto_server s3 -p {endpoint_port}"),
  74. stdout=subprocess.DEVNULL,
  75. stderr=subprocess.DEVNULL,
  76. ) as proc:
  77. timeout = 5
  78. while timeout > 0:
  79. try:
  80. # OK to go once server is accepting connections
  81. r = requests.get(endpoint_uri)
  82. if r.ok:
  83. break
  84. except Exception:
  85. pass
  86. timeout -= 0.1
  87. time.sleep(0.1)
  88. yield endpoint_uri
  89. proc.terminate()
  90. @pytest.fixture
  91. def s3so(s3_base):
  92. return {"client_kwargs": {"endpoint_url": s3_base}}
  93. @pytest.fixture
  94. def s3_resource(s3_base):
  95. import boto3
  96. s3 = boto3.resource("s3", endpoint_url=s3_base)
  97. return s3
  98. @pytest.fixture
  99. def s3_public_bucket(s3_resource):
  100. bucket = s3_resource.Bucket(f"pandas-test-{uuid.uuid4()}")
  101. bucket.create()
  102. yield bucket
  103. bucket.objects.delete()
  104. bucket.delete()
  105. @pytest.fixture
  106. def s3_public_bucket_with_data(
  107. s3_public_bucket, tips_file, jsonl_file, feather_file, xml_file
  108. ):
  109. """
  110. The following datasets
  111. are loaded.
  112. - tips.csv
  113. - tips.csv.gz
  114. - tips.csv.bz2
  115. - items.jsonl
  116. """
  117. test_s3_files = [
  118. ("tips#1.csv", tips_file),
  119. ("tips.csv", tips_file),
  120. ("tips.csv.gz", tips_file + ".gz"),
  121. ("tips.csv.bz2", tips_file + ".bz2"),
  122. ("items.jsonl", jsonl_file),
  123. ("simple_dataset.feather", feather_file),
  124. ("books.xml", xml_file),
  125. ]
  126. for s3_key, file_name in test_s3_files:
  127. with open(file_name, "rb") as f:
  128. s3_public_bucket.put_object(Key=s3_key, Body=f)
  129. return s3_public_bucket
  130. @pytest.fixture
  131. def s3_private_bucket(s3_resource):
  132. bucket = s3_resource.Bucket(f"cant_get_it-{uuid.uuid4()}")
  133. bucket.create(ACL="private")
  134. yield bucket
  135. bucket.objects.delete()
  136. bucket.delete()
  137. @pytest.fixture
  138. def s3_private_bucket_with_data(
  139. s3_private_bucket, tips_file, jsonl_file, feather_file, xml_file
  140. ):
  141. """
  142. The following datasets
  143. are loaded.
  144. - tips.csv
  145. - tips.csv.gz
  146. - tips.csv.bz2
  147. - items.jsonl
  148. """
  149. test_s3_files = [
  150. ("tips#1.csv", tips_file),
  151. ("tips.csv", tips_file),
  152. ("tips.csv.gz", tips_file + ".gz"),
  153. ("tips.csv.bz2", tips_file + ".bz2"),
  154. ("items.jsonl", jsonl_file),
  155. ("simple_dataset.feather", feather_file),
  156. ("books.xml", xml_file),
  157. ]
  158. for s3_key, file_name in test_s3_files:
  159. with open(file_name, "rb") as f:
  160. s3_private_bucket.put_object(Key=s3_key, Body=f)
  161. return s3_private_bucket
  162. _compression_formats_params = [
  163. (".no_compress", None),
  164. ("", None),
  165. (".gz", "gzip"),
  166. (".GZ", "gzip"),
  167. (".bz2", "bz2"),
  168. (".BZ2", "bz2"),
  169. (".zip", "zip"),
  170. (".ZIP", "zip"),
  171. (".xz", "xz"),
  172. (".XZ", "xz"),
  173. pytest.param((".zst", "zstd"), marks=td.skip_if_no("zstandard")),
  174. pytest.param((".ZST", "zstd"), marks=td.skip_if_no("zstandard")),
  175. ]
  176. @pytest.fixture(params=_compression_formats_params[1:])
  177. def compression_format(request):
  178. return request.param
  179. @pytest.fixture(params=_compression_formats_params)
  180. def compression_ext(request):
  181. return request.param[0]