| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188 |
- import datetime
- from datetime import timedelta
- from decimal import Decimal
- from io import (
- BytesIO,
- StringIO,
- )
- import json
- import os
- import sys
- import time
- import numpy as np
- import pytest
- from pandas._config import using_string_dtype
- from pandas.compat import IS64
- import pandas.util._test_decorators as td
- import pandas as pd
- from pandas import (
- NA,
- DataFrame,
- DatetimeIndex,
- Index,
- RangeIndex,
- Series,
- Timestamp,
- date_range,
- read_json,
- )
- import pandas._testing as tm
- from pandas.io.json import ujson_dumps
- def test_literal_json_deprecation():
- # PR 53409
- expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
- jsonl = """{"a": 1, "b": 2}
- {"a": 3, "b": 4}
- {"a": 5, "b": 6}
- {"a": 7, "b": 8}"""
- msg = (
- "Passing literal json to 'read_json' is deprecated and "
- "will be removed in a future version. To read from a "
- "literal string, wrap it in a 'StringIO' object."
- )
- with tm.assert_produces_warning(FutureWarning, match=msg):
- try:
- read_json(jsonl, lines=False)
- except ValueError:
- pass
- with tm.assert_produces_warning(FutureWarning, match=msg):
- read_json(expected.to_json(), lines=False)
- with tm.assert_produces_warning(FutureWarning, match=msg):
- result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True)
- tm.assert_frame_equal(result, expected)
- with tm.assert_produces_warning(FutureWarning, match=msg):
- try:
- result = read_json(
- '{"a\\\\":"foo\\\\","b":"bar"}\n{"a\\\\":"foo\\"","b":"bar"}\n',
- lines=False,
- )
- except ValueError:
- pass
- with tm.assert_produces_warning(FutureWarning, match=msg):
- try:
- result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=False)
- except ValueError:
- pass
- tm.assert_frame_equal(result, expected)
- def assert_json_roundtrip_equal(result, expected, orient):
- if orient in ("records", "values"):
- expected = expected.reset_index(drop=True)
- if orient == "values":
- expected.columns = range(len(expected.columns))
- tm.assert_frame_equal(result, expected)
- class TestPandasContainer:
- @pytest.fixture
- def categorical_frame(self):
- data = {
- c: np.random.default_rng(i).standard_normal(30)
- for i, c in enumerate(list("ABCD"))
- }
- cat = ["bah"] * 5 + ["bar"] * 5 + ["baz"] * 5 + ["foo"] * 15
- data["E"] = list(reversed(cat))
- data["sort"] = np.arange(30, dtype="int64")
- return DataFrame(data, index=pd.CategoricalIndex(cat, name="E"))
- @pytest.fixture
- def datetime_series(self):
- # Same as usual datetime_series, but with index freq set to None,
- # since that doesn't round-trip, see GH#33711
- ser = Series(
- 1.1 * np.arange(10, dtype=np.float64),
- index=date_range("2020-01-01", periods=10),
- name="ts",
- )
- ser.index = ser.index._with_freq(None)
- return ser
- @pytest.fixture
- def datetime_frame(self):
- # Same as usual datetime_frame, but with index freq set to None,
- # since that doesn't round-trip, see GH#33711
- df = DataFrame(
- np.random.default_rng(2).standard_normal((30, 4)),
- columns=Index(list("ABCD")),
- index=date_range("2000-01-01", periods=30, freq="B"),
- )
- df.index = df.index._with_freq(None)
- return df
- def test_frame_double_encoded_labels(self, orient):
- df = DataFrame(
- [["a", "b"], ["c", "d"]],
- index=['index " 1', "index / 2"],
- columns=["a \\ b", "y / z"],
- )
- data = StringIO(df.to_json(orient=orient))
- result = read_json(data, orient=orient)
- expected = df.copy()
- assert_json_roundtrip_equal(result, expected, orient)
- @pytest.mark.parametrize("orient", ["split", "records", "values"])
- def test_frame_non_unique_index(self, orient):
- df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 1], columns=["x", "y"])
- data = StringIO(df.to_json(orient=orient))
- result = read_json(data, orient=orient)
- expected = df.copy()
- assert_json_roundtrip_equal(result, expected, orient)
- @pytest.mark.parametrize("orient", ["index", "columns"])
- def test_frame_non_unique_index_raises(self, orient):
- df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 1], columns=["x", "y"])
- msg = f"DataFrame index must be unique for orient='{orient}'"
- with pytest.raises(ValueError, match=msg):
- df.to_json(orient=orient)
- @pytest.mark.parametrize("orient", ["split", "values"])
- @pytest.mark.parametrize(
- "data",
- [
- [["a", "b"], ["c", "d"]],
- [[1.5, 2.5], [3.5, 4.5]],
- [[1, 2.5], [3, 4.5]],
- [[Timestamp("20130101"), 3.5], [Timestamp("20130102"), 4.5]],
- ],
- )
- def test_frame_non_unique_columns(self, orient, data):
- df = DataFrame(data, index=[1, 2], columns=["x", "x"])
- result = read_json(
- StringIO(df.to_json(orient=orient)), orient=orient, convert_dates=["x"]
- )
- if orient == "values":
- expected = DataFrame(data)
- if expected.iloc[:, 0].dtype == "datetime64[ns]":
- # orient == "values" by default will write Timestamp objects out
- # in milliseconds; these are internally stored in nanosecond,
- # so divide to get where we need
- # TODO: a to_epoch method would also solve; see GH 14772
- expected.isetitem(0, expected.iloc[:, 0].astype(np.int64) // 1000000)
- elif orient == "split":
- expected = df
- expected.columns = ["x", "x.1"]
- tm.assert_frame_equal(result, expected)
- @pytest.mark.parametrize("orient", ["index", "columns", "records"])
- def test_frame_non_unique_columns_raises(self, orient):
- df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 2], columns=["x", "x"])
- msg = f"DataFrame columns must be unique for orient='{orient}'"
- with pytest.raises(ValueError, match=msg):
- df.to_json(orient=orient)
- def test_frame_default_orient(self, float_frame):
- assert float_frame.to_json() == float_frame.to_json(orient="columns")
- @pytest.mark.parametrize("dtype", [False, float])
- @pytest.mark.parametrize("convert_axes", [True, False])
- def test_roundtrip_simple(self, orient, convert_axes, dtype, float_frame):
- data = StringIO(float_frame.to_json(orient=orient))
- result = read_json(data, orient=orient, convert_axes=convert_axes, dtype=dtype)
- expected = float_frame
- assert_json_roundtrip_equal(result, expected, orient)
- @pytest.mark.parametrize("dtype", [False, np.int64])
- @pytest.mark.parametrize("convert_axes", [True, False])
- def test_roundtrip_intframe(self, orient, convert_axes, dtype, int_frame):
- data = StringIO(int_frame.to_json(orient=orient))
- result = read_json(data, orient=orient, convert_axes=convert_axes, dtype=dtype)
- expected = int_frame
- assert_json_roundtrip_equal(result, expected, orient)
- @pytest.mark.parametrize("dtype", [None, np.float64, int, "U3"])
- @pytest.mark.parametrize("convert_axes", [True, False])
- def test_roundtrip_str_axes(self, orient, convert_axes, dtype):
- df = DataFrame(
- np.zeros((200, 4)),
- columns=[str(i) for i in range(4)],
- index=[str(i) for i in range(200)],
- dtype=dtype,
- )
- data = StringIO(df.to_json(orient=orient))
- result = read_json(data, orient=orient, convert_axes=convert_axes, dtype=dtype)
- expected = df.copy()
- if not dtype:
- expected = expected.astype(np.int64)
- # index columns, and records orients cannot fully preserve the string
- # dtype for axes as the index and column labels are used as keys in
- # JSON objects. JSON keys are by definition strings, so there's no way
- # to disambiguate whether those keys actually were strings or numeric
- # beforehand and numeric wins out.
- if convert_axes and (orient in ("index", "columns")):
- expected.columns = expected.columns.astype(np.int64)
- expected.index = expected.index.astype(np.int64)
- elif orient == "records" and convert_axes:
- expected.columns = expected.columns.astype(np.int64)
- elif convert_axes and orient == "split":
- expected.columns = expected.columns.astype(np.int64)
- assert_json_roundtrip_equal(result, expected, orient)
- @pytest.mark.parametrize("convert_axes", [True, False])
- def test_roundtrip_categorical(
- self, request, orient, categorical_frame, convert_axes, using_infer_string
- ):
- # TODO: create a better frame to test with and improve coverage
- if orient in ("index", "columns"):
- request.applymarker(
- pytest.mark.xfail(
- reason=f"Can't have duplicate index values for orient '{orient}')"
- )
- )
- data = StringIO(categorical_frame.to_json(orient=orient))
- result = read_json(data, orient=orient, convert_axes=convert_axes)
- expected = categorical_frame.copy()
- expected.index = expected.index.astype(
- str if not using_infer_string else "str"
- ) # Categorical not preserved
- expected.index.name = None # index names aren't preserved in JSON
- assert_json_roundtrip_equal(result, expected, orient)
- @pytest.mark.parametrize("convert_axes", [True, False])
- def test_roundtrip_empty(self, orient, convert_axes):
- empty_frame = DataFrame()
- data = StringIO(empty_frame.to_json(orient=orient))
- result = read_json(data, orient=orient, convert_axes=convert_axes)
- if orient == "split":
- idx = Index([], dtype=(float if convert_axes else object))
- expected = DataFrame(index=idx, columns=idx)
- elif orient in ["index", "columns"]:
- expected = DataFrame()
- else:
- expected = empty_frame.copy()
- tm.assert_frame_equal(result, expected)
- @pytest.mark.parametrize("convert_axes", [True, False])
- def test_roundtrip_timestamp(self, orient, convert_axes, datetime_frame):
- # TODO: improve coverage with date_format parameter
- data = StringIO(datetime_frame.to_json(orient=orient))
- result = read_json(data, orient=orient, convert_axes=convert_axes)
- expected = datetime_frame.copy()
- if not convert_axes: # one off for ts handling
- # DTI gets converted to epoch values
- idx = expected.index.view(np.int64) // 1000000
- if orient != "split": # TODO: handle consistently across orients
- idx = idx.astype(str)
- expected.index = idx
- assert_json_roundtrip_equal(result, expected, orient)
- @pytest.mark.parametrize("convert_axes", [True, False])
- def test_roundtrip_mixed(self, orient, convert_axes):
- index = Index(["a", "b", "c", "d", "e"])
- values = {
- "A": [0.0, 1.0, 2.0, 3.0, 4.0],
- "B": [0.0, 1.0, 0.0, 1.0, 0.0],
- "C": ["foo1", "foo2", "foo3", "foo4", "foo5"],
- "D": [True, False, True, False, True],
- }
- df = DataFrame(data=values, index=index)
- data = StringIO(df.to_json(orient=orient))
- result = read_json(data, orient=orient, convert_axes=convert_axes)
- expected = df.copy()
- expected = expected.assign(**expected.select_dtypes("number").astype(np.int64))
- assert_json_roundtrip_equal(result, expected, orient)
- @pytest.mark.xfail(
- reason="#50456 Column multiindex is stored and loaded differently",
- raises=AssertionError,
- )
- @pytest.mark.parametrize(
- "columns",
- [
- [["2022", "2022"], ["JAN", "FEB"]],
- [["2022", "2023"], ["JAN", "JAN"]],
- [["2022", "2022"], ["JAN", "JAN"]],
- ],
- )
- def test_roundtrip_multiindex(self, columns):
- df = DataFrame(
- [[1, 2], [3, 4]],
- columns=pd.MultiIndex.from_arrays(columns),
- )
- data = StringIO(df.to_json(orient="split"))
- result = read_json(data, orient="split")
- tm.assert_frame_equal(result, df)
- @pytest.mark.parametrize(
- "data,msg,orient",
- [
- ('{"key":b:a:d}', "Expected object or value", "columns"),
- # too few indices
- (
- '{"columns":["A","B"],'
- '"index":["2","3"],'
- '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}',
- "|".join(
- [
- r"Length of values \(3\) does not match length of index \(2\)",
- ]
- ),
- "split",
- ),
- # too many columns
- (
- '{"columns":["A","B","C"],'
- '"index":["1","2","3"],'
- '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}',
- "3 columns passed, passed data had 2 columns",
- "split",
- ),
- # bad key
- (
- '{"badkey":["A","B"],'
- '"index":["2","3"],'
- '"data":[[1.0,"1"],[2.0,"2"],[null,"3"]]}',
- r"unexpected key\(s\): badkey",
- "split",
- ),
- ],
- )
- def test_frame_from_json_bad_data_raises(self, data, msg, orient):
- with pytest.raises(ValueError, match=msg):
- read_json(StringIO(data), orient=orient)
- @pytest.mark.parametrize("dtype", [True, False])
- @pytest.mark.parametrize("convert_axes", [True, False])
- def test_frame_from_json_missing_data(self, orient, convert_axes, dtype):
- num_df = DataFrame([[1, 2], [4, 5, 6]])
- result = read_json(
- StringIO(num_df.to_json(orient=orient)),
- orient=orient,
- convert_axes=convert_axes,
- dtype=dtype,
- )
- assert np.isnan(result.iloc[0, 2])
- obj_df = DataFrame([["1", "2"], ["4", "5", "6"]])
- result = read_json(
- StringIO(obj_df.to_json(orient=orient)),
- orient=orient,
- convert_axes=convert_axes,
- dtype=dtype,
- )
- assert np.isnan(result.iloc[0, 2])
- @pytest.mark.parametrize("dtype", [True, False])
- def test_frame_read_json_dtype_missing_value(self, dtype):
- # GH28501 Parse missing values using read_json with dtype=False
- # to NaN instead of None
- result = read_json(StringIO("[null]"), dtype=dtype)
- expected = DataFrame([np.nan])
- tm.assert_frame_equal(result, expected)
- @pytest.mark.parametrize("inf", [np.inf, -np.inf])
- @pytest.mark.parametrize("dtype", [True, False])
- def test_frame_infinity(self, inf, dtype):
- # infinities get mapped to nulls which get mapped to NaNs during
- # deserialisation
- df = DataFrame([[1, 2], [4, 5, 6]])
- df.loc[0, 2] = inf
- data = StringIO(df.to_json())
- result = read_json(data, dtype=dtype)
- assert np.isnan(result.iloc[0, 2])
- @pytest.mark.skipif(not IS64, reason="not compliant on 32-bit, xref #15865")
- @pytest.mark.parametrize(
- "value,precision,expected_val",
- [
- (0.95, 1, 1.0),
- (1.95, 1, 2.0),
- (-1.95, 1, -2.0),
- (0.995, 2, 1.0),
- (0.9995, 3, 1.0),
- (0.99999999999999944, 15, 1.0),
- ],
- )
- def test_frame_to_json_float_precision(self, value, precision, expected_val):
- df = DataFrame([{"a_float": value}])
- encoded = df.to_json(double_precision=precision)
- assert encoded == f'{{"a_float":{{"0":{expected_val}}}}}'
- def test_frame_to_json_except(self):
- df = DataFrame([1, 2, 3])
- msg = "Invalid value 'garbage' for option 'orient'"
- with pytest.raises(ValueError, match=msg):
- df.to_json(orient="garbage")
- def test_frame_empty(self):
- df = DataFrame(columns=["jim", "joe"])
- assert not df._is_mixed_type
- data = StringIO(df.to_json())
- result = read_json(data, dtype=dict(df.dtypes))
- tm.assert_frame_equal(result, df, check_index_type=False)
- def test_frame_empty_to_json(self):
- # GH 7445
- df = DataFrame({"test": []}, index=[])
- result = df.to_json(orient="columns")
- expected = '{"test":{}}'
- assert result == expected
- def test_frame_empty_mixedtype(self):
- # mixed type
- df = DataFrame(columns=["jim", "joe"])
- df["joe"] = df["joe"].astype("i8")
- assert df._is_mixed_type
- data = df.to_json()
- tm.assert_frame_equal(
- read_json(StringIO(data), dtype=dict(df.dtypes)),
- df,
- check_index_type=False,
- )
- def test_frame_mixedtype_orient(self): # GH10289
- vals = [
- [10, 1, "foo", 0.1, 0.01],
- [20, 2, "bar", 0.2, 0.02],
- [30, 3, "baz", 0.3, 0.03],
- [40, 4, "qux", 0.4, 0.04],
- ]
- df = DataFrame(
- vals, index=list("abcd"), columns=["1st", "2nd", "3rd", "4th", "5th"]
- )
- assert df._is_mixed_type
- right = df.copy()
- for orient in ["split", "index", "columns"]:
- inp = StringIO(df.to_json(orient=orient))
- left = read_json(inp, orient=orient, convert_axes=False)
- tm.assert_frame_equal(left, right)
- right.index = RangeIndex(len(df))
- inp = StringIO(df.to_json(orient="records"))
- left = read_json(inp, orient="records", convert_axes=False)
- tm.assert_frame_equal(left, right)
- right.columns = RangeIndex(df.shape[1])
- inp = StringIO(df.to_json(orient="values"))
- left = read_json(inp, orient="values", convert_axes=False)
- tm.assert_frame_equal(left, right)
- def test_v12_compat(self, datapath):
- dti = date_range("2000-01-03", "2000-01-07")
- # freq doesn't roundtrip
- dti = DatetimeIndex(np.asarray(dti), freq=None)
- df = DataFrame(
- [
- [1.56808523, 0.65727391, 1.81021139, -0.17251653],
- [-0.2550111, -0.08072427, -0.03202878, -0.17581665],
- [1.51493992, 0.11805825, 1.629455, -1.31506612],
- [-0.02765498, 0.44679743, 0.33192641, -0.27885413],
- [0.05951614, -2.69652057, 1.28163262, 0.34703478],
- ],
- columns=["A", "B", "C", "D"],
- index=dti,
- )
- df["date"] = Timestamp("19920106 18:21:32.12").as_unit("ns")
- df.iloc[3, df.columns.get_loc("date")] = Timestamp("20130101")
- df["modified"] = df["date"]
- df.iloc[1, df.columns.get_loc("modified")] = pd.NaT
- dirpath = datapath("io", "json", "data")
- v12_json = os.path.join(dirpath, "tsframe_v012.json")
- df_unser = read_json(v12_json)
- tm.assert_frame_equal(df, df_unser)
- df_iso = df.drop(["modified"], axis=1)
- v12_iso_json = os.path.join(dirpath, "tsframe_iso_v012.json")
- df_unser_iso = read_json(v12_iso_json)
- tm.assert_frame_equal(df_iso, df_unser_iso, check_column_type=False)
- def test_blocks_compat_GH9037(self, using_infer_string):
- index = date_range("20000101", periods=10, freq="h")
- # freq doesn't round-trip
- index = DatetimeIndex(list(index), freq=None)
- df_mixed = DataFrame(
- {
- "float_1": [
- -0.92077639,
- 0.77434435,
- 1.25234727,
- 0.61485564,
- -0.60316077,
- 0.24653374,
- 0.28668979,
- -2.51969012,
- 0.95748401,
- -1.02970536,
- ],
- "int_1": [
- 19680418,
- 75337055,
- 99973684,
- 65103179,
- 79373900,
- 40314334,
- 21290235,
- 4991321,
- 41903419,
- 16008365,
- ],
- "str_1": [
- "78c608f1",
- "64a99743",
- "13d2ff52",
- "ca7f4af2",
- "97236474",
- "bde7e214",
- "1a6bde47",
- "b1190be5",
- "7a669144",
- "8d64d068",
- ],
- "float_2": [
- -0.0428278,
- -1.80872357,
- 3.36042349,
- -0.7573685,
- -0.48217572,
- 0.86229683,
- 1.08935819,
- 0.93898739,
- -0.03030452,
- 1.43366348,
- ],
- "str_2": [
- "14f04af9",
- "d085da90",
- "4bcfac83",
- "81504caf",
- "2ffef4a9",
- "08e2f5c4",
- "07e1af03",
- "addbd4a7",
- "1f6a09ba",
- "4bfc4d87",
- ],
- "int_2": [
- 86967717,
- 98098830,
- 51927505,
- 20372254,
- 12601730,
- 20884027,
- 34193846,
- 10561746,
- 24867120,
- 76131025,
- ],
- },
- index=index,
- )
- # JSON deserialisation always creates unicode strings
- df_mixed.columns = df_mixed.columns.astype(
- np.str_ if not using_infer_string else "str"
- )
- data = StringIO(df_mixed.to_json(orient="split"))
- df_roundtrip = read_json(data, orient="split")
- tm.assert_frame_equal(
- df_mixed,
- df_roundtrip,
- check_index_type=True,
- check_column_type=True,
- by_blocks=True,
- check_exact=True,
- )
- def test_frame_nonprintable_bytes(self):
- # GH14256: failing column caused segfaults, if it is not the last one
- class BinaryThing:
- def __init__(self, hexed) -> None:
- self.hexed = hexed
- self.binary = bytes.fromhex(hexed)
- def __str__(self) -> str:
- return self.hexed
- hexed = "574b4454ba8c5eb4f98a8f45"
- binthing = BinaryThing(hexed)
- # verify the proper conversion of printable content
- df_printable = DataFrame({"A": [binthing.hexed]})
- assert df_printable.to_json() == f'{{"A":{{"0":"{hexed}"}}}}'
- # check if non-printable content throws appropriate Exception
- df_nonprintable = DataFrame({"A": [binthing]})
- msg = "Unsupported UTF-8 sequence length when encoding string"
- with pytest.raises(OverflowError, match=msg):
- df_nonprintable.to_json()
- # the same with multiple columns threw segfaults
- df_mixed = DataFrame({"A": [binthing], "B": [1]}, columns=["A", "B"])
- with pytest.raises(OverflowError, match=msg):
- df_mixed.to_json()
- # default_handler should resolve exceptions for non-string types
- result = df_nonprintable.to_json(default_handler=str)
- expected = f'{{"A":{{"0":"{hexed}"}}}}'
- assert result == expected
- assert (
- df_mixed.to_json(default_handler=str)
- == f'{{"A":{{"0":"{hexed}"}},"B":{{"0":1}}}}'
- )
- def test_label_overflow(self):
- # GH14256: buffer length not checked when writing label
- result = DataFrame({"bar" * 100000: [1], "foo": [1337]}).to_json()
- expected = f'{{"{"bar" * 100000}":{{"0":1}},"foo":{{"0":1337}}}}'
- assert result == expected
- def test_series_non_unique_index(self):
- s = Series(["a", "b"], index=[1, 1])
- msg = "Series index must be unique for orient='index'"
- with pytest.raises(ValueError, match=msg):
- s.to_json(orient="index")
- tm.assert_series_equal(
- s,
- read_json(
- StringIO(s.to_json(orient="split")), orient="split", typ="series"
- ),
- )
- unserialized = read_json(
- StringIO(s.to_json(orient="records")), orient="records", typ="series"
- )
- tm.assert_equal(s.values, unserialized.values)
- def test_series_default_orient(self, string_series):
- assert string_series.to_json() == string_series.to_json(orient="index")
- def test_series_roundtrip_simple(self, orient, string_series, using_infer_string):
- data = StringIO(string_series.to_json(orient=orient))
- result = read_json(data, typ="series", orient=orient)
- expected = string_series
- if using_infer_string and orient in ("split", "index", "columns"):
- # These schemas don't contain dtypes, so we infer string
- expected.index = expected.index.astype("str")
- if orient in ("values", "records"):
- expected = expected.reset_index(drop=True)
- if orient != "split":
- expected.name = None
- tm.assert_series_equal(result, expected)
- @pytest.mark.parametrize("dtype", [False, None])
- def test_series_roundtrip_object(self, orient, dtype, object_series):
- data = StringIO(object_series.to_json(orient=orient))
- result = read_json(data, typ="series", orient=orient, dtype=dtype)
- expected = object_series
- if orient in ("values", "records"):
- expected = expected.reset_index(drop=True)
- if orient != "split":
- expected.name = None
- if using_string_dtype():
- expected = expected.astype("str")
- tm.assert_series_equal(result, expected)
- def test_series_roundtrip_empty(self, orient):
- empty_series = Series([], index=[], dtype=np.float64)
- data = StringIO(empty_series.to_json(orient=orient))
- result = read_json(data, typ="series", orient=orient)
- expected = empty_series.reset_index(drop=True)
- if orient in ("split"):
- expected.index = expected.index.astype(np.float64)
- tm.assert_series_equal(result, expected)
- def test_series_roundtrip_timeseries(self, orient, datetime_series):
- data = StringIO(datetime_series.to_json(orient=orient))
- result = read_json(data, typ="series", orient=orient)
- expected = datetime_series
- if orient in ("values", "records"):
- expected = expected.reset_index(drop=True)
- if orient != "split":
- expected.name = None
- tm.assert_series_equal(result, expected)
- @pytest.mark.parametrize("dtype", [np.float64, int])
- def test_series_roundtrip_numeric(self, orient, dtype):
- s = Series(range(6), index=["a", "b", "c", "d", "e", "f"])
- data = StringIO(s.to_json(orient=orient))
- result = read_json(data, typ="series", orient=orient)
- expected = s.copy()
- if orient in ("values", "records"):
- expected = expected.reset_index(drop=True)
- tm.assert_series_equal(result, expected)
- def test_series_to_json_except(self):
- s = Series([1, 2, 3])
- msg = "Invalid value 'garbage' for option 'orient'"
- with pytest.raises(ValueError, match=msg):
- s.to_json(orient="garbage")
- def test_series_from_json_precise_float(self):
- s = Series([4.56, 4.56, 4.56])
- result = read_json(StringIO(s.to_json()), typ="series", precise_float=True)
- tm.assert_series_equal(result, s, check_index_type=False)
- def test_series_with_dtype(self):
- # GH 21986
- s = Series([4.56, 4.56, 4.56])
- result = read_json(StringIO(s.to_json()), typ="series", dtype=np.int64)
- expected = Series([4] * 3)
- tm.assert_series_equal(result, expected)
- @pytest.mark.parametrize(
- "dtype,expected",
- [
- (True, Series(["2000-01-01"], dtype="datetime64[ns]")),
- (False, Series([946684800000])),
- ],
- )
- def test_series_with_dtype_datetime(self, dtype, expected):
- s = Series(["2000-01-01"], dtype="datetime64[ns]")
- data = StringIO(s.to_json())
- result = read_json(data, typ="series", dtype=dtype)
- tm.assert_series_equal(result, expected)
- def test_frame_from_json_precise_float(self):
- df = DataFrame([[4.56, 4.56, 4.56], [4.56, 4.56, 4.56]])
- result = read_json(StringIO(df.to_json()), precise_float=True)
- tm.assert_frame_equal(result, df)
- def test_typ(self):
- s = Series(range(6), index=["a", "b", "c", "d", "e", "f"], dtype="int64")
- result = read_json(StringIO(s.to_json()), typ=None)
- tm.assert_series_equal(result, s)
- def test_reconstruction_index(self):
- df = DataFrame([[1, 2, 3], [4, 5, 6]])
- result = read_json(StringIO(df.to_json()))
- tm.assert_frame_equal(result, df)
- df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["A", "B", "C"])
- result = read_json(StringIO(df.to_json()))
- tm.assert_frame_equal(result, df)
- def test_path(self, float_frame, int_frame, datetime_frame):
- with tm.ensure_clean("test.json") as path:
- for df in [float_frame, int_frame, datetime_frame]:
- df.to_json(path)
- read_json(path)
- def test_axis_dates(self, datetime_series, datetime_frame):
- # frame
- json = StringIO(datetime_frame.to_json())
- result = read_json(json)
- tm.assert_frame_equal(result, datetime_frame)
- # series
- json = StringIO(datetime_series.to_json())
- result = read_json(json, typ="series")
- tm.assert_series_equal(result, datetime_series, check_names=False)
- assert result.name is None
- def test_convert_dates(self, datetime_series, datetime_frame):
- # frame
- df = datetime_frame
- df["date"] = Timestamp("20130101").as_unit("ns")
- json = StringIO(df.to_json())
- result = read_json(json)
- tm.assert_frame_equal(result, df)
- df["foo"] = 1.0
- json = StringIO(df.to_json(date_unit="ns"))
- result = read_json(json, convert_dates=False)
- expected = df.copy()
- expected["date"] = expected["date"].values.view("i8")
- expected["foo"] = expected["foo"].astype("int64")
- tm.assert_frame_equal(result, expected)
- # series
- ts = Series(Timestamp("20130101").as_unit("ns"), index=datetime_series.index)
- json = StringIO(ts.to_json())
- result = read_json(json, typ="series")
- tm.assert_series_equal(result, ts)
- @pytest.mark.parametrize("date_format", ["epoch", "iso"])
- @pytest.mark.parametrize("as_object", [True, False])
- @pytest.mark.parametrize("date_typ", [datetime.date, datetime.datetime, Timestamp])
- def test_date_index_and_values(self, date_format, as_object, date_typ):
- data = [date_typ(year=2020, month=1, day=1), pd.NaT]
- if as_object:
- data.append("a")
- ser = Series(data, index=data)
- result = ser.to_json(date_format=date_format)
- if date_format == "epoch":
- expected = '{"1577836800000":1577836800000,"null":null}'
- else:
- expected = (
- '{"2020-01-01T00:00:00.000":"2020-01-01T00:00:00.000","null":null}'
- )
- if as_object:
- expected = expected.replace("}", ',"a":"a"}')
- assert result == expected
- @pytest.mark.parametrize(
- "infer_word",
- [
- "trade_time",
- "date",
- "datetime",
- "sold_at",
- "modified",
- "timestamp",
- "timestamps",
- ],
- )
- def test_convert_dates_infer(self, infer_word):
- # GH10747
- data = [{"id": 1, infer_word: 1036713600000}, {"id": 2}]
- expected = DataFrame(
- [[1, Timestamp("2002-11-08")], [2, pd.NaT]], columns=["id", infer_word]
- )
- result = read_json(StringIO(ujson_dumps(data)))[["id", infer_word]]
- tm.assert_frame_equal(result, expected)
- @pytest.mark.parametrize(
- "date,date_unit",
- [
- ("20130101 20:43:42.123", None),
- ("20130101 20:43:42", "s"),
- ("20130101 20:43:42.123", "ms"),
- ("20130101 20:43:42.123456", "us"),
- ("20130101 20:43:42.123456789", "ns"),
- ],
- )
- def test_date_format_frame(self, date, date_unit, datetime_frame):
- df = datetime_frame
- df["date"] = Timestamp(date).as_unit("ns")
- df.iloc[1, df.columns.get_loc("date")] = pd.NaT
- df.iloc[5, df.columns.get_loc("date")] = pd.NaT
- if date_unit:
- json = df.to_json(date_format="iso", date_unit=date_unit)
- else:
- json = df.to_json(date_format="iso")
- result = read_json(StringIO(json))
- expected = df.copy()
- tm.assert_frame_equal(result, expected)
- def test_date_format_frame_raises(self, datetime_frame):
- df = datetime_frame
- msg = "Invalid value 'foo' for option 'date_unit'"
- with pytest.raises(ValueError, match=msg):
- df.to_json(date_format="iso", date_unit="foo")
- @pytest.mark.parametrize(
- "date,date_unit",
- [
- ("20130101 20:43:42.123", None),
- ("20130101 20:43:42", "s"),
- ("20130101 20:43:42.123", "ms"),
- ("20130101 20:43:42.123456", "us"),
- ("20130101 20:43:42.123456789", "ns"),
- ],
- )
- def test_date_format_series(self, date, date_unit, datetime_series):
- ts = Series(Timestamp(date).as_unit("ns"), index=datetime_series.index)
- ts.iloc[1] = pd.NaT
- ts.iloc[5] = pd.NaT
- if date_unit:
- json = ts.to_json(date_format="iso", date_unit=date_unit)
- else:
- json = ts.to_json(date_format="iso")
- result = read_json(StringIO(json), typ="series")
- expected = ts.copy()
- tm.assert_series_equal(result, expected)
- def test_date_format_series_raises(self, datetime_series):
- ts = Series(Timestamp("20130101 20:43:42.123"), index=datetime_series.index)
- msg = "Invalid value 'foo' for option 'date_unit'"
- with pytest.raises(ValueError, match=msg):
- ts.to_json(date_format="iso", date_unit="foo")
- @pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"])
- def test_date_unit(self, unit, datetime_frame):
- df = datetime_frame
- df["date"] = Timestamp("20130101 20:43:42").as_unit("ns")
- dl = df.columns.get_loc("date")
- df.iloc[1, dl] = Timestamp("19710101 20:43:42")
- df.iloc[2, dl] = Timestamp("21460101 20:43:42")
- df.iloc[4, dl] = pd.NaT
- json = df.to_json(date_format="epoch", date_unit=unit)
- # force date unit
- result = read_json(StringIO(json), date_unit=unit)
- tm.assert_frame_equal(result, df)
- # detect date unit
- result = read_json(StringIO(json), date_unit=None)
- tm.assert_frame_equal(result, df)
- @pytest.mark.parametrize("unit", ["s", "ms", "us"])
- def test_iso_non_nano_datetimes(self, unit):
- # Test that numpy datetimes
- # in an Index or a column with non-nano resolution can be serialized
- # correctly
- # GH53686
- index = DatetimeIndex(
- [np.datetime64("2023-01-01T11:22:33.123456", unit)],
- dtype=f"datetime64[{unit}]",
- )
- df = DataFrame(
- {
- "date": Series(
- [np.datetime64("2022-01-01T11:22:33.123456", unit)],
- dtype=f"datetime64[{unit}]",
- index=index,
- ),
- "date_obj": Series(
- [np.datetime64("2023-01-01T11:22:33.123456", unit)],
- dtype=object,
- index=index,
- ),
- },
- )
- buf = StringIO()
- df.to_json(buf, date_format="iso", date_unit=unit)
- buf.seek(0)
- # read_json always reads datetimes in nanosecond resolution
- # TODO: check_dtype/check_index_type should be removable
- # once read_json gets non-nano support
- tm.assert_frame_equal(
- read_json(buf, convert_dates=["date", "date_obj"]),
- df,
- check_index_type=False,
- check_dtype=False,
- )
- def test_weird_nested_json(self):
- # this used to core dump the parser
- s = r"""{
- "status": "success",
- "data": {
- "posts": [
- {
- "id": 1,
- "title": "A blog post",
- "body": "Some useful content"
- },
- {
- "id": 2,
- "title": "Another blog post",
- "body": "More content"
- }
- ]
- }
- }"""
- read_json(StringIO(s))
- def test_doc_example(self):
- dfj2 = DataFrame(
- np.random.default_rng(2).standard_normal((5, 2)), columns=list("AB")
- )
- dfj2["date"] = Timestamp("20130101")
- dfj2["ints"] = range(5)
- dfj2["bools"] = True
- dfj2.index = date_range("20130101", periods=5)
- json = StringIO(dfj2.to_json())
- result = read_json(json, dtype={"ints": np.int64, "bools": np.bool_})
- tm.assert_frame_equal(result, result)
- def test_round_trip_exception(self, datapath):
- # GH 3867
- path = datapath("io", "json", "data", "teams.csv")
- df = pd.read_csv(path)
- s = df.to_json()
- result = read_json(StringIO(s))
- res = result.reindex(index=df.index, columns=df.columns)
- msg = "The 'downcast' keyword in fillna is deprecated"
- with tm.assert_produces_warning(FutureWarning, match=msg):
- res = res.fillna(np.nan, downcast=False)
- tm.assert_frame_equal(res, df)
- @pytest.mark.network
- @pytest.mark.single_cpu
- @pytest.mark.parametrize(
- "field,dtype",
- [
- ["created_at", pd.DatetimeTZDtype(tz="UTC")],
- ["closed_at", "datetime64[ns]"],
- ["updated_at", pd.DatetimeTZDtype(tz="UTC")],
- ],
- )
- def test_url(self, field, dtype, httpserver):
- data = '{"created_at": ["2023-06-23T18:21:36Z"], "closed_at": ["2023-06-23T18:21:36"], "updated_at": ["2023-06-23T18:21:36Z"]}\n' # noqa: E501
- httpserver.serve_content(content=data)
- result = read_json(httpserver.url, convert_dates=True)
- assert result[field].dtype == dtype
- def test_timedelta(self):
- converter = lambda x: pd.to_timedelta(x, unit="ms")
- ser = Series([timedelta(23), timedelta(seconds=5)])
- assert ser.dtype == "timedelta64[ns]"
- result = read_json(StringIO(ser.to_json()), typ="series").apply(converter)
- tm.assert_series_equal(result, ser)
- ser = Series([timedelta(23), timedelta(seconds=5)], index=Index([0, 1]))
- assert ser.dtype == "timedelta64[ns]"
- result = read_json(StringIO(ser.to_json()), typ="series").apply(converter)
- tm.assert_series_equal(result, ser)
- frame = DataFrame([timedelta(23), timedelta(seconds=5)])
- assert frame[0].dtype == "timedelta64[ns]"
- tm.assert_frame_equal(
- frame, read_json(StringIO(frame.to_json())).apply(converter)
- )
- def test_timedelta2(self):
- frame = DataFrame(
- {
- "a": [timedelta(days=23), timedelta(seconds=5)],
- "b": [1, 2],
- "c": date_range(start="20130101", periods=2),
- }
- )
- data = StringIO(frame.to_json(date_unit="ns"))
- result = read_json(data)
- result["a"] = pd.to_timedelta(result.a, unit="ns")
- result["c"] = pd.to_datetime(result.c)
- tm.assert_frame_equal(frame, result)
- def test_mixed_timedelta_datetime(self):
- td = timedelta(23)
- ts = Timestamp("20130101")
- frame = DataFrame({"a": [td, ts]}, dtype=object)
- expected = DataFrame(
- {"a": [pd.Timedelta(td).as_unit("ns")._value, ts.as_unit("ns")._value]}
- )
- data = StringIO(frame.to_json(date_unit="ns"))
- result = read_json(data, dtype={"a": "int64"})
- tm.assert_frame_equal(result, expected, check_index_type=False)
- @pytest.mark.parametrize("as_object", [True, False])
- @pytest.mark.parametrize("date_format", ["iso", "epoch"])
- @pytest.mark.parametrize("timedelta_typ", [pd.Timedelta, timedelta])
- def test_timedelta_to_json(self, as_object, date_format, timedelta_typ):
- # GH28156: to_json not correctly formatting Timedelta
- data = [timedelta_typ(days=1), timedelta_typ(days=2), pd.NaT]
- if as_object:
- data.append("a")
- ser = Series(data, index=data)
- if date_format == "iso":
- expected = (
- '{"P1DT0H0M0S":"P1DT0H0M0S","P2DT0H0M0S":"P2DT0H0M0S","null":null}'
- )
- else:
- expected = '{"86400000":86400000,"172800000":172800000,"null":null}'
- if as_object:
- expected = expected.replace("}", ',"a":"a"}')
- result = ser.to_json(date_format=date_format)
- assert result == expected
- @pytest.mark.parametrize("as_object", [True, False])
- @pytest.mark.parametrize("timedelta_typ", [pd.Timedelta, timedelta])
- def test_timedelta_to_json_fractional_precision(self, as_object, timedelta_typ):
- data = [timedelta_typ(milliseconds=42)]
- ser = Series(data, index=data)
- if as_object:
- ser = ser.astype(object)
- result = ser.to_json()
- expected = '{"42":42}'
- assert result == expected
- def test_default_handler(self):
- value = object()
- frame = DataFrame({"a": [7, value]})
- expected = DataFrame({"a": [7, str(value)]})
- result = read_json(StringIO(frame.to_json(default_handler=str)))
- tm.assert_frame_equal(expected, result, check_index_type=False)
- def test_default_handler_indirect(self):
- def default(obj):
- if isinstance(obj, complex):
- return [("mathjs", "Complex"), ("re", obj.real), ("im", obj.imag)]
- return str(obj)
- df_list = [
- 9,
- DataFrame(
- {"a": [1, "STR", complex(4, -5)], "b": [float("nan"), None, "N/A"]},
- columns=["a", "b"],
- ),
- ]
- expected = (
- '[9,[[1,null],["STR",null],[[["mathjs","Complex"],'
- '["re",4.0],["im",-5.0]],"N\\/A"]]]'
- )
- assert (
- ujson_dumps(df_list, default_handler=default, orient="values") == expected
- )
- def test_default_handler_numpy_unsupported_dtype(self):
- # GH12554 to_json raises 'Unhandled numpy dtype 15'
- df = DataFrame(
- {"a": [1, 2.3, complex(4, -5)], "b": [float("nan"), None, complex(1.2, 0)]},
- columns=["a", "b"],
- )
- expected = (
- '[["(1+0j)","(nan+0j)"],'
- '["(2.3+0j)","(nan+0j)"],'
- '["(4-5j)","(1.2+0j)"]]'
- )
- assert df.to_json(default_handler=str, orient="values") == expected
- def test_default_handler_raises(self):
- msg = "raisin"
- def my_handler_raises(obj):
- raise TypeError(msg)
- with pytest.raises(TypeError, match=msg):
- DataFrame({"a": [1, 2, object()]}).to_json(
- default_handler=my_handler_raises
- )
- with pytest.raises(TypeError, match=msg):
- DataFrame({"a": [1, 2, complex(4, -5)]}).to_json(
- default_handler=my_handler_raises
- )
- def test_categorical(self):
- # GH4377 df.to_json segfaults with non-ndarray blocks
- df = DataFrame({"A": ["a", "b", "c", "a", "b", "b", "a"]})
- df["B"] = df["A"]
- expected = df.to_json()
- df["B"] = df["A"].astype("category")
- assert expected == df.to_json()
- s = df["A"]
- sc = df["B"]
- assert s.to_json() == sc.to_json()
- def test_datetime_tz(self):
- # GH4377 df.to_json segfaults with non-ndarray blocks
- tz_range = date_range("20130101", periods=3, tz="US/Eastern")
- tz_naive = tz_range.tz_convert("utc").tz_localize(None)
- df = DataFrame({"A": tz_range, "B": date_range("20130101", periods=3)})
- df_naive = df.copy()
- df_naive["A"] = tz_naive
- expected = df_naive.to_json()
- assert expected == df.to_json()
- stz = Series(tz_range)
- s_naive = Series(tz_naive)
- assert stz.to_json() == s_naive.to_json()
- def test_sparse(self):
- # GH4377 df.to_json segfaults with non-ndarray blocks
- df = DataFrame(np.random.default_rng(2).standard_normal((10, 4)))
- df.loc[:8] = np.nan
- sdf = df.astype("Sparse")
- expected = df.to_json()
- assert expected == sdf.to_json()
- s = Series(np.random.default_rng(2).standard_normal(10))
- s.loc[:8] = np.nan
- ss = s.astype("Sparse")
- expected = s.to_json()
- assert expected == ss.to_json()
- @pytest.mark.parametrize(
- "ts",
- [
- Timestamp("2013-01-10 05:00:00Z"),
- Timestamp("2013-01-10 00:00:00", tz="US/Eastern"),
- Timestamp("2013-01-10 00:00:00-0500"),
- ],
- )
- def test_tz_is_utc(self, ts):
- exp = '"2013-01-10T05:00:00.000Z"'
- assert ujson_dumps(ts, iso_dates=True) == exp
- dt = ts.to_pydatetime()
- assert ujson_dumps(dt, iso_dates=True) == exp
- def test_tz_is_naive(self):
- ts = Timestamp("2013-01-10 05:00:00")
- exp = '"2013-01-10T05:00:00.000"'
- assert ujson_dumps(ts, iso_dates=True) == exp
- dt = ts.to_pydatetime()
- assert ujson_dumps(dt, iso_dates=True) == exp
- @pytest.mark.parametrize(
- "tz_range",
- [
- date_range("2013-01-01 05:00:00Z", periods=2),
- date_range("2013-01-01 00:00:00", periods=2, tz="US/Eastern"),
- date_range("2013-01-01 00:00:00-0500", periods=2),
- ],
- )
- def test_tz_range_is_utc(self, tz_range):
- exp = '["2013-01-01T05:00:00.000Z","2013-01-02T05:00:00.000Z"]'
- dfexp = (
- '{"DT":{'
- '"0":"2013-01-01T05:00:00.000Z",'
- '"1":"2013-01-02T05:00:00.000Z"}}'
- )
- assert ujson_dumps(tz_range, iso_dates=True) == exp
- dti = DatetimeIndex(tz_range)
- # Ensure datetimes in object array are serialized correctly
- # in addition to the normal DTI case
- assert ujson_dumps(dti, iso_dates=True) == exp
- assert ujson_dumps(dti.astype(object), iso_dates=True) == exp
- df = DataFrame({"DT": dti})
- result = ujson_dumps(df, iso_dates=True)
- assert result == dfexp
- assert ujson_dumps(df.astype({"DT": object}), iso_dates=True)
- def test_tz_range_is_naive(self):
- dti = date_range("2013-01-01 05:00:00", periods=2)
- exp = '["2013-01-01T05:00:00.000","2013-01-02T05:00:00.000"]'
- dfexp = '{"DT":{"0":"2013-01-01T05:00:00.000","1":"2013-01-02T05:00:00.000"}}'
- # Ensure datetimes in object array are serialized correctly
- # in addition to the normal DTI case
- assert ujson_dumps(dti, iso_dates=True) == exp
- assert ujson_dumps(dti.astype(object), iso_dates=True) == exp
- df = DataFrame({"DT": dti})
- result = ujson_dumps(df, iso_dates=True)
- assert result == dfexp
- assert ujson_dumps(df.astype({"DT": object}), iso_dates=True)
- def test_read_inline_jsonl(self):
- # GH9180
- result = read_json(StringIO('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n'), lines=True)
- expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
- tm.assert_frame_equal(result, expected)
- @pytest.mark.single_cpu
- @td.skip_if_not_us_locale
- def test_read_s3_jsonl(self, s3_public_bucket_with_data, s3so):
- # GH17200
- result = read_json(
- f"s3n://{s3_public_bucket_with_data.name}/items.jsonl",
- lines=True,
- storage_options=s3so,
- )
- expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
- tm.assert_frame_equal(result, expected)
- def test_read_local_jsonl(self):
- # GH17200
- with tm.ensure_clean("tmp_items.json") as path:
- with open(path, "w", encoding="utf-8") as infile:
- infile.write('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n')
- result = read_json(path, lines=True)
- expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
- tm.assert_frame_equal(result, expected)
- def test_read_jsonl_unicode_chars(self):
- # GH15132: non-ascii unicode characters
- # \u201d == RIGHT DOUBLE QUOTATION MARK
- # simulate file handle
- json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
- json = StringIO(json)
- result = read_json(json, lines=True)
- expected = DataFrame([["foo\u201d", "bar"], ["foo", "bar"]], columns=["a", "b"])
- tm.assert_frame_equal(result, expected)
- # simulate string
- json = StringIO('{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n')
- result = read_json(json, lines=True)
- expected = DataFrame([["foo\u201d", "bar"], ["foo", "bar"]], columns=["a", "b"])
- tm.assert_frame_equal(result, expected)
- @pytest.mark.parametrize("bigNum", [sys.maxsize + 1, -(sys.maxsize + 2)])
- def test_to_json_large_numbers(self, bigNum):
- # GH34473
- series = Series(bigNum, dtype=object, index=["articleId"])
- json = series.to_json()
- expected = '{"articleId":' + str(bigNum) + "}"
- assert json == expected
- df = DataFrame(bigNum, dtype=object, index=["articleId"], columns=[0])
- json = df.to_json()
- expected = '{"0":{"articleId":' + str(bigNum) + "}}"
- assert json == expected
- @pytest.mark.parametrize("bigNum", [-(2**63) - 1, 2**64])
- def test_read_json_large_numbers(self, bigNum):
- # GH20599, 26068
- json = StringIO('{"articleId":' + str(bigNum) + "}")
- msg = r"Value is too small|Value is too big"
- with pytest.raises(ValueError, match=msg):
- read_json(json)
- json = StringIO('{"0":{"articleId":' + str(bigNum) + "}}")
- with pytest.raises(ValueError, match=msg):
- read_json(json)
- def test_read_json_large_numbers2(self):
- # GH18842
- json = '{"articleId": "1404366058080022500245"}'
- json = StringIO(json)
- result = read_json(json, typ="series")
- expected = Series(1.404366e21, index=["articleId"])
- tm.assert_series_equal(result, expected)
- json = '{"0": {"articleId": "1404366058080022500245"}}'
- json = StringIO(json)
- result = read_json(json)
- expected = DataFrame(1.404366e21, index=["articleId"], columns=[0])
- tm.assert_frame_equal(result, expected)
- def test_to_jsonl(self):
- # GH9180
- df = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
- result = df.to_json(orient="records", lines=True)
- expected = '{"a":1,"b":2}\n{"a":1,"b":2}\n'
- assert result == expected
- df = DataFrame([["foo}", "bar"], ['foo"', "bar"]], columns=["a", "b"])
- result = df.to_json(orient="records", lines=True)
- expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}\n'
- assert result == expected
- tm.assert_frame_equal(read_json(StringIO(result), lines=True), df)
- # GH15096: escaped characters in columns and data
- df = DataFrame([["foo\\", "bar"], ['foo"', "bar"]], columns=["a\\", "b"])
- result = df.to_json(orient="records", lines=True)
- expected = '{"a\\\\":"foo\\\\","b":"bar"}\n{"a\\\\":"foo\\"","b":"bar"}\n'
- assert result == expected
- tm.assert_frame_equal(read_json(StringIO(result), lines=True), df)
- # TODO: there is a near-identical test for pytables; can we share?
- @pytest.mark.xfail(reason="GH#13774 encoding kwarg not supported", raises=TypeError)
- @pytest.mark.parametrize(
- "val",
- [
- [b"E\xc9, 17", b"", b"a", b"b", b"c"],
- [b"E\xc9, 17", b"a", b"b", b"c"],
- [b"EE, 17", b"", b"a", b"b", b"c"],
- [b"E\xc9, 17", b"\xf8\xfc", b"a", b"b", b"c"],
- [b"", b"a", b"b", b"c"],
- [b"\xf8\xfc", b"a", b"b", b"c"],
- [b"A\xf8\xfc", b"", b"a", b"b", b"c"],
- [np.nan, b"", b"b", b"c"],
- [b"A\xf8\xfc", np.nan, b"", b"b", b"c"],
- ],
- )
- @pytest.mark.parametrize("dtype", ["category", object])
- def test_latin_encoding(self, dtype, val):
- # GH 13774
- ser = Series(
- [x.decode("latin-1") if isinstance(x, bytes) else x for x in val],
- dtype=dtype,
- )
- encoding = "latin-1"
- with tm.ensure_clean("test.json") as path:
- ser.to_json(path, encoding=encoding)
- retr = read_json(StringIO(path), encoding=encoding)
- tm.assert_series_equal(ser, retr, check_categorical=False)
- def test_data_frame_size_after_to_json(self):
- # GH15344
- df = DataFrame({"a": [str(1)]})
- size_before = df.memory_usage(index=True, deep=True).sum()
- df.to_json()
- size_after = df.memory_usage(index=True, deep=True).sum()
- assert size_before == size_after
- @pytest.mark.parametrize(
- "index", [None, [1, 2], [1.0, 2.0], ["a", "b"], ["1", "2"], ["1.", "2."]]
- )
- @pytest.mark.parametrize("columns", [["a", "b"], ["1", "2"], ["1.", "2."]])
- def test_from_json_to_json_table_index_and_columns(self, index, columns):
- # GH25433 GH25435
- expected = DataFrame([[1, 2], [3, 4]], index=index, columns=columns)
- dfjson = expected.to_json(orient="table")
- result = read_json(StringIO(dfjson), orient="table")
- tm.assert_frame_equal(result, expected)
- def test_from_json_to_json_table_dtypes(self):
- # GH21345
- expected = DataFrame({"a": [1, 2], "b": [3.0, 4.0], "c": ["5", "6"]})
- dfjson = expected.to_json(orient="table")
- result = read_json(StringIO(dfjson), orient="table")
- tm.assert_frame_equal(result, expected)
- # TODO: We are casting to string which coerces None to NaN before casting back
- # to object, ending up with incorrect na values
- @pytest.mark.xfail(using_string_dtype(), reason="incorrect na conversion")
- @pytest.mark.parametrize("orient", ["split", "records", "index", "columns"])
- def test_to_json_from_json_columns_dtypes(self, orient):
- # GH21892 GH33205
- expected = DataFrame.from_dict(
- {
- "Integer": Series([1, 2, 3], dtype="int64"),
- "Float": Series([None, 2.0, 3.0], dtype="float64"),
- "Object": Series([None, "", "c"], dtype="object"),
- "Bool": Series([True, False, True], dtype="bool"),
- "Category": Series(["a", "b", None], dtype="category"),
- "Datetime": Series(
- ["2020-01-01", None, "2020-01-03"], dtype="datetime64[ns]"
- ),
- }
- )
- dfjson = expected.to_json(orient=orient)
- result = read_json(
- StringIO(dfjson),
- orient=orient,
- dtype={
- "Integer": "int64",
- "Float": "float64",
- "Object": "object",
- "Bool": "bool",
- "Category": "category",
- "Datetime": "datetime64[ns]",
- },
- )
- tm.assert_frame_equal(result, expected)
- @pytest.mark.parametrize("dtype", [True, {"b": int, "c": int}])
- def test_read_json_table_dtype_raises(self, dtype):
- # GH21345
- df = DataFrame({"a": [1, 2], "b": [3.0, 4.0], "c": ["5", "6"]})
- dfjson = df.to_json(orient="table")
- msg = "cannot pass both dtype and orient='table'"
- with pytest.raises(ValueError, match=msg):
- read_json(dfjson, orient="table", dtype=dtype)
- @pytest.mark.parametrize("orient", ["index", "columns", "records", "values"])
- def test_read_json_table_empty_axes_dtype(self, orient):
- # GH28558
- expected = DataFrame()
- result = read_json(StringIO("{}"), orient=orient, convert_axes=True)
- tm.assert_index_equal(result.index, expected.index)
- tm.assert_index_equal(result.columns, expected.columns)
- def test_read_json_table_convert_axes_raises(self):
- # GH25433 GH25435
- df = DataFrame([[1, 2], [3, 4]], index=[1.0, 2.0], columns=["1.", "2."])
- dfjson = df.to_json(orient="table")
- msg = "cannot pass both convert_axes and orient='table'"
- with pytest.raises(ValueError, match=msg):
- read_json(dfjson, orient="table", convert_axes=True)
- @pytest.mark.parametrize(
- "data, expected",
- [
- (
- DataFrame([[1, 2], [4, 5]], columns=["a", "b"]),
- {"columns": ["a", "b"], "data": [[1, 2], [4, 5]]},
- ),
- (
- DataFrame([[1, 2], [4, 5]], columns=["a", "b"]).rename_axis("foo"),
- {"columns": ["a", "b"], "data": [[1, 2], [4, 5]]},
- ),
- (
- DataFrame(
- [[1, 2], [4, 5]], columns=["a", "b"], index=[["a", "b"], ["c", "d"]]
- ),
- {"columns": ["a", "b"], "data": [[1, 2], [4, 5]]},
- ),
- (Series([1, 2, 3], name="A"), {"name": "A", "data": [1, 2, 3]}),
- (
- Series([1, 2, 3], name="A").rename_axis("foo"),
- {"name": "A", "data": [1, 2, 3]},
- ),
- (
- Series([1, 2], name="A", index=[["a", "b"], ["c", "d"]]),
- {"name": "A", "data": [1, 2]},
- ),
- ],
- )
- def test_index_false_to_json_split(self, data, expected):
- # GH 17394
- # Testing index=False in to_json with orient='split'
- result = data.to_json(orient="split", index=False)
- result = json.loads(result)
- assert result == expected
- @pytest.mark.parametrize(
- "data",
- [
- (DataFrame([[1, 2], [4, 5]], columns=["a", "b"])),
- (DataFrame([[1, 2], [4, 5]], columns=["a", "b"]).rename_axis("foo")),
- (
- DataFrame(
- [[1, 2], [4, 5]], columns=["a", "b"], index=[["a", "b"], ["c", "d"]]
- )
- ),
- (Series([1, 2, 3], name="A")),
- (Series([1, 2, 3], name="A").rename_axis("foo")),
- (Series([1, 2], name="A", index=[["a", "b"], ["c", "d"]])),
- ],
- )
- def test_index_false_to_json_table(self, data):
- # GH 17394
- # Testing index=False in to_json with orient='table'
- result = data.to_json(orient="table", index=False)
- result = json.loads(result)
- expected = {
- "schema": pd.io.json.build_table_schema(data, index=False),
- "data": DataFrame(data).to_dict(orient="records"),
- }
- assert result == expected
- @pytest.mark.parametrize("orient", ["index", "columns"])
- def test_index_false_error_to_json(self, orient):
- # GH 17394, 25513
- # Testing error message from to_json with index=False
- df = DataFrame([[1, 2], [4, 5]], columns=["a", "b"])
- msg = (
- "'index=False' is only valid when 'orient' is 'split', "
- "'table', 'records', or 'values'"
- )
- with pytest.raises(ValueError, match=msg):
- df.to_json(orient=orient, index=False)
- @pytest.mark.parametrize("orient", ["records", "values"])
- def test_index_true_error_to_json(self, orient):
- # GH 25513
- # Testing error message from to_json with index=True
- df = DataFrame([[1, 2], [4, 5]], columns=["a", "b"])
- msg = (
- "'index=True' is only valid when 'orient' is 'split', "
- "'table', 'index', or 'columns'"
- )
- with pytest.raises(ValueError, match=msg):
- df.to_json(orient=orient, index=True)
- @pytest.mark.parametrize("orient", ["split", "table"])
- @pytest.mark.parametrize("index", [True, False])
- def test_index_false_from_json_to_json(self, orient, index):
- # GH25170
- # Test index=False in from_json to_json
- expected = DataFrame({"a": [1, 2], "b": [3, 4]})
- dfjson = expected.to_json(orient=orient, index=index)
- result = read_json(StringIO(dfjson), orient=orient)
- tm.assert_frame_equal(result, expected)
- def test_read_timezone_information(self):
- # GH 25546
- result = read_json(
- StringIO('{"2019-01-01T11:00:00.000Z":88}'), typ="series", orient="index"
- )
- exp_dti = DatetimeIndex(["2019-01-01 11:00:00"], dtype="M8[ns, UTC]")
- expected = Series([88], index=exp_dti)
- tm.assert_series_equal(result, expected)
- @pytest.mark.parametrize(
- "url",
- [
- "s3://example-fsspec/",
- "gcs://another-fsspec/file.json",
- "https://example-site.com/data",
- "some-protocol://data.txt",
- ],
- )
- def test_read_json_with_url_value(self, url):
- # GH 36271
- result = read_json(StringIO(f'{{"url":{{"0":"{url}"}}}}'))
- expected = DataFrame({"url": [url]})
- tm.assert_frame_equal(result, expected)
- @pytest.mark.parametrize(
- "compression",
- ["", ".gz", ".bz2", ".tar"],
- )
- def test_read_json_with_very_long_file_path(self, compression):
- # GH 46718
- long_json_path = f'{"a" * 1000}.json{compression}'
- with pytest.raises(
- FileNotFoundError, match=f"File {long_json_path} does not exist"
- ):
- # path too long for Windows is handled in file_exists() but raises in
- # _get_data_from_filepath()
- read_json(long_json_path)
- @pytest.mark.parametrize(
- "date_format,key", [("epoch", 86400000), ("iso", "P1DT0H0M0S")]
- )
- def test_timedelta_as_label(self, date_format, key):
- df = DataFrame([[1]], columns=[pd.Timedelta("1D")])
- expected = f'{{"{key}":{{"0":1}}}}'
- result = df.to_json(date_format=date_format)
- assert result == expected
- @pytest.mark.parametrize(
- "orient,expected",
- [
- ("index", "{\"('a', 'b')\":{\"('c', 'd')\":1}}"),
- ("columns", "{\"('c', 'd')\":{\"('a', 'b')\":1}}"),
- # TODO: the below have separate encoding procedures
- pytest.param(
- "split",
- "",
- marks=pytest.mark.xfail(
- reason="Produces JSON but not in a consistent manner"
- ),
- ),
- pytest.param(
- "table",
- "",
- marks=pytest.mark.xfail(
- reason="Produces JSON but not in a consistent manner"
- ),
- ),
- ],
- )
- def test_tuple_labels(self, orient, expected):
- # GH 20500
- df = DataFrame([[1]], index=[("a", "b")], columns=[("c", "d")])
- result = df.to_json(orient=orient)
- assert result == expected
- @pytest.mark.parametrize("indent", [1, 2, 4])
- def test_to_json_indent(self, indent):
- # GH 12004
- df = DataFrame([["foo", "bar"], ["baz", "qux"]], columns=["a", "b"])
- result = df.to_json(indent=indent)
- spaces = " " * indent
- expected = f"""{{
- {spaces}"a":{{
- {spaces}{spaces}"0":"foo",
- {spaces}{spaces}"1":"baz"
- {spaces}}},
- {spaces}"b":{{
- {spaces}{spaces}"0":"bar",
- {spaces}{spaces}"1":"qux"
- {spaces}}}
- }}"""
- assert result == expected
- @pytest.mark.skipif(
- using_string_dtype(),
- reason="Adjust expected when infer_string is default, no bug here, "
- "just a complicated parametrization",
- )
- @pytest.mark.parametrize(
- "orient,expected",
- [
- (
- "split",
- """{
- "columns":[
- "a",
- "b"
- ],
- "index":[
- 0,
- 1
- ],
- "data":[
- [
- "foo",
- "bar"
- ],
- [
- "baz",
- "qux"
- ]
- ]
- }""",
- ),
- (
- "records",
- """[
- {
- "a":"foo",
- "b":"bar"
- },
- {
- "a":"baz",
- "b":"qux"
- }
- ]""",
- ),
- (
- "index",
- """{
- "0":{
- "a":"foo",
- "b":"bar"
- },
- "1":{
- "a":"baz",
- "b":"qux"
- }
- }""",
- ),
- (
- "columns",
- """{
- "a":{
- "0":"foo",
- "1":"baz"
- },
- "b":{
- "0":"bar",
- "1":"qux"
- }
- }""",
- ),
- (
- "values",
- """[
- [
- "foo",
- "bar"
- ],
- [
- "baz",
- "qux"
- ]
- ]""",
- ),
- (
- "table",
- """{
- "schema":{
- "fields":[
- {
- "name":"index",
- "type":"integer"
- },
- {
- "name":"a",
- "type":"string"
- },
- {
- "name":"b",
- "type":"string"
- }
- ],
- "primaryKey":[
- "index"
- ],
- "pandas_version":"1.4.0"
- },
- "data":[
- {
- "index":0,
- "a":"foo",
- "b":"bar"
- },
- {
- "index":1,
- "a":"baz",
- "b":"qux"
- }
- ]
- }""",
- ),
- ],
- )
- def test_json_indent_all_orients(self, orient, expected):
- # GH 12004
- df = DataFrame([["foo", "bar"], ["baz", "qux"]], columns=["a", "b"])
- result = df.to_json(orient=orient, indent=4)
- assert result == expected
- def test_json_negative_indent_raises(self):
- with pytest.raises(ValueError, match="must be a nonnegative integer"):
- DataFrame().to_json(indent=-1)
- def test_emca_262_nan_inf_support(self):
- # GH 12213
- data = StringIO(
- '["a", NaN, "NaN", Infinity, "Infinity", -Infinity, "-Infinity"]'
- )
- result = read_json(data)
- expected = DataFrame(
- ["a", None, "NaN", np.inf, "Infinity", -np.inf, "-Infinity"]
- )
- tm.assert_frame_equal(result, expected)
- def test_frame_int_overflow(self):
- # GH 30320
- encoded_json = json.dumps([{"col": "31900441201190696999"}, {"col": "Text"}])
- expected = DataFrame({"col": ["31900441201190696999", "Text"]})
- result = read_json(StringIO(encoded_json))
- tm.assert_frame_equal(result, expected)
- @pytest.mark.parametrize(
- "dataframe,expected",
- [
- (
- DataFrame({"x": [1, 2, 3], "y": ["a", "b", "c"]}),
- '{"(0, \'x\')":1,"(0, \'y\')":"a","(1, \'x\')":2,'
- '"(1, \'y\')":"b","(2, \'x\')":3,"(2, \'y\')":"c"}',
- )
- ],
- )
- def test_json_multiindex(self, dataframe, expected):
- series = dataframe.stack(future_stack=True)
- result = series.to_json(orient="index")
- assert result == expected
- @pytest.mark.single_cpu
- def test_to_s3(self, s3_public_bucket, s3so):
- # GH 28375
- mock_bucket_name, target_file = s3_public_bucket.name, "test.json"
- df = DataFrame({"x": [1, 2, 3], "y": [2, 4, 6]})
- df.to_json(f"s3://{mock_bucket_name}/{target_file}", storage_options=s3so)
- timeout = 5
- while True:
- if target_file in (obj.key for obj in s3_public_bucket.objects.all()):
- break
- time.sleep(0.1)
- timeout -= 0.1
- assert timeout > 0, "Timed out waiting for file to appear on moto"
- def test_json_pandas_nulls(self, nulls_fixture, request):
- # GH 31615
- if isinstance(nulls_fixture, Decimal):
- mark = pytest.mark.xfail(reason="not implemented")
- request.applymarker(mark)
- result = DataFrame([[nulls_fixture]]).to_json()
- assert result == '{"0":{"0":null}}'
- def test_readjson_bool_series(self):
- # GH31464
- result = read_json(StringIO("[true, true, false]"), typ="series")
- expected = Series([True, True, False])
- tm.assert_series_equal(result, expected)
- def test_to_json_multiindex_escape(self):
- # GH 15273
- df = DataFrame(
- True,
- index=date_range("2017-01-20", "2017-01-23"),
- columns=["foo", "bar"],
- ).stack(future_stack=True)
- result = df.to_json()
- expected = (
- "{\"(Timestamp('2017-01-20 00:00:00'), 'foo')\":true,"
- "\"(Timestamp('2017-01-20 00:00:00'), 'bar')\":true,"
- "\"(Timestamp('2017-01-21 00:00:00'), 'foo')\":true,"
- "\"(Timestamp('2017-01-21 00:00:00'), 'bar')\":true,"
- "\"(Timestamp('2017-01-22 00:00:00'), 'foo')\":true,"
- "\"(Timestamp('2017-01-22 00:00:00'), 'bar')\":true,"
- "\"(Timestamp('2017-01-23 00:00:00'), 'foo')\":true,"
- "\"(Timestamp('2017-01-23 00:00:00'), 'bar')\":true}"
- )
- assert result == expected
- def test_to_json_series_of_objects(self):
- class _TestObject:
- def __init__(self, a, b, _c, d) -> None:
- self.a = a
- self.b = b
- self._c = _c
- self.d = d
- def e(self):
- return 5
- # JSON keys should be all non-callable non-underscore attributes, see GH-42768
- series = Series([_TestObject(a=1, b=2, _c=3, d=4)])
- assert json.loads(series.to_json()) == {"0": {"a": 1, "b": 2, "d": 4}}
- @pytest.mark.parametrize(
- "data,expected",
- [
- (
- Series({0: -6 + 8j, 1: 0 + 1j, 2: 9 - 5j}),
- '{"0":{"imag":8.0,"real":-6.0},'
- '"1":{"imag":1.0,"real":0.0},'
- '"2":{"imag":-5.0,"real":9.0}}',
- ),
- (
- Series({0: -9.39 + 0.66j, 1: 3.95 + 9.32j, 2: 4.03 - 0.17j}),
- '{"0":{"imag":0.66,"real":-9.39},'
- '"1":{"imag":9.32,"real":3.95},'
- '"2":{"imag":-0.17,"real":4.03}}',
- ),
- (
- DataFrame([[-2 + 3j, -1 - 0j], [4 - 3j, -0 - 10j]]),
- '{"0":{"0":{"imag":3.0,"real":-2.0},'
- '"1":{"imag":-3.0,"real":4.0}},'
- '"1":{"0":{"imag":0.0,"real":-1.0},'
- '"1":{"imag":-10.0,"real":0.0}}}',
- ),
- (
- DataFrame(
- [[-0.28 + 0.34j, -1.08 - 0.39j], [0.41 - 0.34j, -0.78 - 1.35j]]
- ),
- '{"0":{"0":{"imag":0.34,"real":-0.28},'
- '"1":{"imag":-0.34,"real":0.41}},'
- '"1":{"0":{"imag":-0.39,"real":-1.08},'
- '"1":{"imag":-1.35,"real":-0.78}}}',
- ),
- ],
- )
- def test_complex_data_tojson(self, data, expected):
- # GH41174
- result = data.to_json()
- assert result == expected
- def test_json_uint64(self):
- # GH21073
- expected = (
- '{"columns":["col1"],"index":[0,1],'
- '"data":[[13342205958987758245],[12388075603347835679]]}'
- )
- df = DataFrame(data={"col1": [13342205958987758245, 12388075603347835679]})
- result = df.to_json(orient="split")
- assert result == expected
- @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
- def test_read_json_dtype_backend(
- self, string_storage, dtype_backend, orient, using_infer_string
- ):
- # GH#50750
- df = DataFrame(
- {
- "a": Series([1, np.nan, 3], dtype="Int64"),
- "b": Series([1, 2, 3], dtype="Int64"),
- "c": Series([1.5, np.nan, 2.5], dtype="Float64"),
- "d": Series([1.5, 2.0, 2.5], dtype="Float64"),
- "e": [True, False, None],
- "f": [True, False, True],
- "g": ["a", "b", "c"],
- "h": ["a", "b", None],
- }
- )
- out = df.to_json(orient=orient)
- with pd.option_context("mode.string_storage", string_storage):
- result = read_json(
- StringIO(out), dtype_backend=dtype_backend, orient=orient
- )
- if dtype_backend == "pyarrow":
- pa = pytest.importorskip("pyarrow")
- string_dtype = pd.ArrowDtype(pa.string())
- else:
- string_dtype = pd.StringDtype(string_storage)
- expected = DataFrame(
- {
- "a": Series([1, np.nan, 3], dtype="Int64"),
- "b": Series([1, 2, 3], dtype="Int64"),
- "c": Series([1.5, np.nan, 2.5], dtype="Float64"),
- "d": Series([1.5, 2.0, 2.5], dtype="Float64"),
- "e": Series([True, False, NA], dtype="boolean"),
- "f": Series([True, False, True], dtype="boolean"),
- "g": Series(["a", "b", "c"], dtype=string_dtype),
- "h": Series(["a", "b", None], dtype=string_dtype),
- }
- )
- if dtype_backend == "pyarrow":
- pa = pytest.importorskip("pyarrow")
- from pandas.arrays import ArrowExtensionArray
- expected = DataFrame(
- {
- col: ArrowExtensionArray(pa.array(expected[col], from_pandas=True))
- for col in expected.columns
- }
- )
- if orient == "values":
- expected.columns = list(range(8))
- # the storage of the str columns' Index is also affected by the
- # string_storage setting -> ignore that for checking the result
- tm.assert_frame_equal(result, expected, check_column_type=False)
- @pytest.mark.parametrize("orient", ["split", "records", "index"])
- def test_read_json_nullable_series(self, string_storage, dtype_backend, orient):
- # GH#50750
- pa = pytest.importorskip("pyarrow")
- ser = Series([1, np.nan, 3], dtype="Int64")
- out = ser.to_json(orient=orient)
- with pd.option_context("mode.string_storage", string_storage):
- result = read_json(
- StringIO(out), dtype_backend=dtype_backend, orient=orient, typ="series"
- )
- expected = Series([1, np.nan, 3], dtype="Int64")
- if dtype_backend == "pyarrow":
- from pandas.arrays import ArrowExtensionArray
- expected = Series(ArrowExtensionArray(pa.array(expected, from_pandas=True)))
- tm.assert_series_equal(result, expected)
- def test_invalid_dtype_backend(self):
- msg = (
- "dtype_backend numpy is invalid, only 'numpy_nullable' and "
- "'pyarrow' are allowed."
- )
- with pytest.raises(ValueError, match=msg):
- read_json("test", dtype_backend="numpy")
- def test_invalid_engine():
- # GH 48893
- ser = Series(range(1))
- out = ser.to_json()
- with pytest.raises(ValueError, match="The engine type foo"):
- read_json(out, engine="foo")
- def test_pyarrow_engine_lines_false():
- # GH 48893
- ser = Series(range(1))
- out = ser.to_json()
- with pytest.raises(ValueError, match="currently pyarrow engine only supports"):
- read_json(out, engine="pyarrow", lines=False)
- def test_json_roundtrip_string_inference(orient):
- df = DataFrame(
- [["a", "b"], ["c", "d"]], index=["row 1", "row 2"], columns=["col 1", "col 2"]
- )
- out = df.to_json()
- with pd.option_context("future.infer_string", True):
- result = read_json(StringIO(out))
- dtype = pd.StringDtype(na_value=np.nan)
- expected = DataFrame(
- [["a", "b"], ["c", "d"]],
- dtype=dtype,
- index=Index(["row 1", "row 2"], dtype=dtype),
- columns=Index(["col 1", "col 2"], dtype=dtype),
- )
- tm.assert_frame_equal(result, expected)
- def test_json_pos_args_deprecation():
- # GH-54229
- df = DataFrame({"a": [1, 2, 3]})
- msg = (
- r"Starting with pandas version 3.0 all arguments of to_json except for the "
- r"argument 'path_or_buf' will be keyword-only."
- )
- with tm.assert_produces_warning(FutureWarning, match=msg):
- buf = BytesIO()
- df.to_json(buf, "split")
- @td.skip_if_no("pyarrow")
- def test_to_json_ea_null():
- # GH#57224
- df = DataFrame(
- {
- "a": Series([1, NA], dtype="int64[pyarrow]"),
- "b": Series([2, NA], dtype="Int64"),
- }
- )
- result = df.to_json(orient="records", lines=True)
- expected = """{"a":1,"b":2}
- {"a":null,"b":null}
- """
- assert result == expected
- def test_read_json_lines_rangeindex():
- # GH 57429
- data = """
- {"a": 1, "b": 2}
- {"a": 3, "b": 4}
- """
- result = read_json(StringIO(data), lines=True).index
- expected = RangeIndex(2)
- tm.assert_index_equal(result, expected, exact=True)
|