| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197 |
- import uuid
- import pytest
- from pandas.compat import (
- is_ci_environment,
- is_platform_arm,
- is_platform_mac,
- is_platform_windows,
- )
- import pandas.util._test_decorators as td
- import pandas.io.common as icom
- from pandas.io.parsers import read_csv
- @pytest.fixture
- def compression_to_extension():
- return {value: key for key, value in icom.extension_to_compression.items()}
- @pytest.fixture
- def tips_file(datapath):
- """Path to the tips dataset"""
- return datapath("io", "data", "csv", "tips.csv")
- @pytest.fixture
- def jsonl_file(datapath):
- """Path to a JSONL dataset"""
- return datapath("io", "parser", "data", "items.jsonl")
- @pytest.fixture
- def salaries_table(datapath):
- """DataFrame with the salaries dataset"""
- return read_csv(datapath("io", "parser", "data", "salaries.csv"), sep="\t")
- @pytest.fixture
- def feather_file(datapath):
- return datapath("io", "data", "feather", "feather-0_3_1.feather")
- @pytest.fixture
- def xml_file(datapath):
- return datapath("io", "data", "xml", "books.xml")
- @pytest.fixture(scope="session")
- def aws_credentials(monkeysession):
- """Mocked AWS Credentials for moto."""
- monkeysession.setenv("AWS_ACCESS_KEY_ID", "testing")
- monkeysession.setenv("AWS_SECRET_ACCESS_KEY", "testing")
- monkeysession.setenv("AWS_SECURITY_TOKEN", "testing")
- monkeysession.setenv("AWS_SESSION_AWS_SESSION_TOKEN", "testing")
- monkeysession.setenv("AWS_DEFAULT_REGION", "us-east-1")
- @pytest.fixture(scope="session")
- def moto_server(aws_credentials):
- # use service container for Linux on GitHub Actions
- if is_ci_environment() and not (
- is_platform_mac() or is_platform_arm() or is_platform_windows()
- ):
- yield "http://localhost:5000"
- else:
- moto_server = pytest.importorskip("moto.server")
- server = moto_server.ThreadedMotoServer(port=0)
- server.start()
- host, port = server.get_host_and_port()
- yield f"http://{host}:{port}"
- server.stop()
- @pytest.fixture
- def moto_s3_resource(moto_server):
- boto3 = pytest.importorskip("boto3")
- s3 = boto3.resource("s3", endpoint_url=moto_server)
- return s3
- @pytest.fixture(scope="session")
- def s3so(moto_server):
- return {
- "client_kwargs": {
- "endpoint_url": moto_server,
- }
- }
- @pytest.fixture
- def s3_bucket_public(moto_s3_resource):
- """
- Create a public S3 bucket using moto.
- """
- bucket_name = f"pandas-test-{uuid.uuid4()}"
- bucket = moto_s3_resource.Bucket(bucket_name)
- bucket.create(ACL="public-read")
- yield bucket
- bucket.objects.delete()
- bucket.delete()
- @pytest.fixture
- def s3_bucket_private(moto_s3_resource):
- """
- Create a private S3 bucket using moto.
- """
- bucket_name = f"cant_get_it-{uuid.uuid4()}"
- bucket = moto_s3_resource.Bucket(bucket_name)
- bucket.create(ACL="private")
- yield bucket
- bucket.objects.delete()
- bucket.delete()
- @pytest.fixture
- def s3_bucket_public_with_data(
- s3_bucket_public, tips_file, jsonl_file, feather_file, xml_file
- ):
- """
- The following datasets
- are loaded.
- - tips.csv
- - tips.csv.gz
- - tips.csv.bz2
- - items.jsonl
- """
- test_s3_files = [
- ("tips#1.csv", tips_file),
- ("tips.csv", tips_file),
- ("tips.csv.gz", tips_file + ".gz"),
- ("tips.csv.bz2", tips_file + ".bz2"),
- ("items.jsonl", jsonl_file),
- ("simple_dataset.feather", feather_file),
- ("books.xml", xml_file),
- ]
- for s3_key, file_name in test_s3_files:
- with open(file_name, "rb") as f:
- s3_bucket_public.put_object(Key=s3_key, Body=f)
- return s3_bucket_public
- @pytest.fixture
- def s3_bucket_private_with_data(
- s3_bucket_private, tips_file, jsonl_file, feather_file, xml_file
- ):
- """
- The following datasets
- are loaded.
- - tips.csv
- - tips.csv.gz
- - tips.csv.bz2
- - items.jsonl
- """
- test_s3_files = [
- ("tips#1.csv", tips_file),
- ("tips.csv", tips_file),
- ("tips.csv.gz", tips_file + ".gz"),
- ("tips.csv.bz2", tips_file + ".bz2"),
- ("items.jsonl", jsonl_file),
- ("simple_dataset.feather", feather_file),
- ("books.xml", xml_file),
- ]
- for s3_key, file_name in test_s3_files:
- with open(file_name, "rb") as f:
- s3_bucket_private.put_object(Key=s3_key, Body=f)
- return s3_bucket_private
- _compression_formats_params = [
- (".no_compress", None),
- ("", None),
- (".gz", "gzip"),
- (".GZ", "gzip"),
- (".bz2", "bz2"),
- (".BZ2", "bz2"),
- (".zip", "zip"),
- (".ZIP", "zip"),
- (".xz", "xz"),
- (".XZ", "xz"),
- pytest.param((".zst", "zstd"), marks=td.skip_if_no("zstandard")),
- pytest.param((".ZST", "zstd"), marks=td.skip_if_no("zstandard")),
- ]
- @pytest.fixture(params=_compression_formats_params[1:])
- def compression_format(request):
- return request.param
- @pytest.fixture(params=_compression_formats_params)
- def compression_ext(request):
- return request.param[0]
|