conftest.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197
  1. import uuid
  2. import pytest
  3. from pandas.compat import (
  4. is_ci_environment,
  5. is_platform_arm,
  6. is_platform_mac,
  7. is_platform_windows,
  8. )
  9. import pandas.util._test_decorators as td
  10. import pandas.io.common as icom
  11. from pandas.io.parsers import read_csv
  12. @pytest.fixture
  13. def compression_to_extension():
  14. return {value: key for key, value in icom.extension_to_compression.items()}
  15. @pytest.fixture
  16. def tips_file(datapath):
  17. """Path to the tips dataset"""
  18. return datapath("io", "data", "csv", "tips.csv")
  19. @pytest.fixture
  20. def jsonl_file(datapath):
  21. """Path to a JSONL dataset"""
  22. return datapath("io", "parser", "data", "items.jsonl")
  23. @pytest.fixture
  24. def salaries_table(datapath):
  25. """DataFrame with the salaries dataset"""
  26. return read_csv(datapath("io", "parser", "data", "salaries.csv"), sep="\t")
  27. @pytest.fixture
  28. def feather_file(datapath):
  29. return datapath("io", "data", "feather", "feather-0_3_1.feather")
  30. @pytest.fixture
  31. def xml_file(datapath):
  32. return datapath("io", "data", "xml", "books.xml")
  33. @pytest.fixture(scope="session")
  34. def aws_credentials(monkeysession):
  35. """Mocked AWS Credentials for moto."""
  36. monkeysession.setenv("AWS_ACCESS_KEY_ID", "testing")
  37. monkeysession.setenv("AWS_SECRET_ACCESS_KEY", "testing")
  38. monkeysession.setenv("AWS_SECURITY_TOKEN", "testing")
  39. monkeysession.setenv("AWS_SESSION_AWS_SESSION_TOKEN", "testing")
  40. monkeysession.setenv("AWS_DEFAULT_REGION", "us-east-1")
  41. @pytest.fixture(scope="session")
  42. def moto_server(aws_credentials):
  43. # use service container for Linux on GitHub Actions
  44. if is_ci_environment() and not (
  45. is_platform_mac() or is_platform_arm() or is_platform_windows()
  46. ):
  47. yield "http://localhost:5000"
  48. else:
  49. moto_server = pytest.importorskip("moto.server")
  50. server = moto_server.ThreadedMotoServer(port=0)
  51. server.start()
  52. host, port = server.get_host_and_port()
  53. yield f"http://{host}:{port}"
  54. server.stop()
  55. @pytest.fixture
  56. def moto_s3_resource(moto_server):
  57. boto3 = pytest.importorskip("boto3")
  58. s3 = boto3.resource("s3", endpoint_url=moto_server)
  59. return s3
  60. @pytest.fixture(scope="session")
  61. def s3so(moto_server):
  62. return {
  63. "client_kwargs": {
  64. "endpoint_url": moto_server,
  65. }
  66. }
  67. @pytest.fixture
  68. def s3_bucket_public(moto_s3_resource):
  69. """
  70. Create a public S3 bucket using moto.
  71. """
  72. bucket_name = f"pandas-test-{uuid.uuid4()}"
  73. bucket = moto_s3_resource.Bucket(bucket_name)
  74. bucket.create(ACL="public-read")
  75. yield bucket
  76. bucket.objects.delete()
  77. bucket.delete()
  78. @pytest.fixture
  79. def s3_bucket_private(moto_s3_resource):
  80. """
  81. Create a private S3 bucket using moto.
  82. """
  83. bucket_name = f"cant_get_it-{uuid.uuid4()}"
  84. bucket = moto_s3_resource.Bucket(bucket_name)
  85. bucket.create(ACL="private")
  86. yield bucket
  87. bucket.objects.delete()
  88. bucket.delete()
  89. @pytest.fixture
  90. def s3_bucket_public_with_data(
  91. s3_bucket_public, tips_file, jsonl_file, feather_file, xml_file
  92. ):
  93. """
  94. The following datasets
  95. are loaded.
  96. - tips.csv
  97. - tips.csv.gz
  98. - tips.csv.bz2
  99. - items.jsonl
  100. """
  101. test_s3_files = [
  102. ("tips#1.csv", tips_file),
  103. ("tips.csv", tips_file),
  104. ("tips.csv.gz", tips_file + ".gz"),
  105. ("tips.csv.bz2", tips_file + ".bz2"),
  106. ("items.jsonl", jsonl_file),
  107. ("simple_dataset.feather", feather_file),
  108. ("books.xml", xml_file),
  109. ]
  110. for s3_key, file_name in test_s3_files:
  111. with open(file_name, "rb") as f:
  112. s3_bucket_public.put_object(Key=s3_key, Body=f)
  113. return s3_bucket_public
  114. @pytest.fixture
  115. def s3_bucket_private_with_data(
  116. s3_bucket_private, tips_file, jsonl_file, feather_file, xml_file
  117. ):
  118. """
  119. The following datasets
  120. are loaded.
  121. - tips.csv
  122. - tips.csv.gz
  123. - tips.csv.bz2
  124. - items.jsonl
  125. """
  126. test_s3_files = [
  127. ("tips#1.csv", tips_file),
  128. ("tips.csv", tips_file),
  129. ("tips.csv.gz", tips_file + ".gz"),
  130. ("tips.csv.bz2", tips_file + ".bz2"),
  131. ("items.jsonl", jsonl_file),
  132. ("simple_dataset.feather", feather_file),
  133. ("books.xml", xml_file),
  134. ]
  135. for s3_key, file_name in test_s3_files:
  136. with open(file_name, "rb") as f:
  137. s3_bucket_private.put_object(Key=s3_key, Body=f)
  138. return s3_bucket_private
  139. _compression_formats_params = [
  140. (".no_compress", None),
  141. ("", None),
  142. (".gz", "gzip"),
  143. (".GZ", "gzip"),
  144. (".bz2", "bz2"),
  145. (".BZ2", "bz2"),
  146. (".zip", "zip"),
  147. (".ZIP", "zip"),
  148. (".xz", "xz"),
  149. (".XZ", "xz"),
  150. pytest.param((".zst", "zstd"), marks=td.skip_if_no("zstandard")),
  151. pytest.param((".ZST", "zstd"), marks=td.skip_if_no("zstandard")),
  152. ]
  153. @pytest.fixture(params=_compression_formats_params[1:])
  154. def compression_format(request):
  155. return request.param
  156. @pytest.fixture(params=_compression_formats_params)
  157. def compression_ext(request):
  158. return request.param[0]