test_sql.py 141 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387
  1. from __future__ import annotations
  2. import contextlib
  3. from contextlib import closing
  4. import csv
  5. from datetime import (
  6. date,
  7. datetime,
  8. time,
  9. timedelta,
  10. )
  11. from io import StringIO
  12. from pathlib import Path
  13. import sqlite3
  14. from typing import TYPE_CHECKING
  15. import uuid
  16. import numpy as np
  17. import pytest
  18. from pandas._config import using_string_dtype
  19. from pandas._libs import lib
  20. from pandas.compat import (
  21. pa_version_under13p0,
  22. pa_version_under14p1,
  23. )
  24. from pandas.compat._optional import import_optional_dependency
  25. import pandas.util._test_decorators as td
  26. import pandas as pd
  27. from pandas import (
  28. DataFrame,
  29. Index,
  30. MultiIndex,
  31. Series,
  32. Timestamp,
  33. concat,
  34. date_range,
  35. isna,
  36. to_datetime,
  37. to_timedelta,
  38. )
  39. import pandas._testing as tm
  40. from pandas.util.version import Version
  41. from pandas.io import sql
  42. from pandas.io.sql import (
  43. SQLAlchemyEngine,
  44. SQLDatabase,
  45. SQLiteDatabase,
  46. get_engine,
  47. pandasSQL_builder,
  48. read_sql_query,
  49. read_sql_table,
  50. )
  51. if TYPE_CHECKING:
  52. import sqlalchemy
  53. pytestmark = [
  54. pytest.mark.filterwarnings(
  55. "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
  56. ),
  57. pytest.mark.single_cpu,
  58. ]
  59. @pytest.fixture
  60. def sql_strings():
  61. return {
  62. "read_parameters": {
  63. "sqlite": "SELECT * FROM iris WHERE Name=? AND SepalLength=?",
  64. "mysql": "SELECT * FROM iris WHERE `Name`=%s AND `SepalLength`=%s",
  65. "postgresql": 'SELECT * FROM iris WHERE "Name"=%s AND "SepalLength"=%s',
  66. },
  67. "read_named_parameters": {
  68. "sqlite": """
  69. SELECT * FROM iris WHERE Name=:name AND SepalLength=:length
  70. """,
  71. "mysql": """
  72. SELECT * FROM iris WHERE
  73. `Name`=%(name)s AND `SepalLength`=%(length)s
  74. """,
  75. "postgresql": """
  76. SELECT * FROM iris WHERE
  77. "Name"=%(name)s AND "SepalLength"=%(length)s
  78. """,
  79. },
  80. "read_no_parameters_with_percent": {
  81. "sqlite": "SELECT * FROM iris WHERE Name LIKE '%'",
  82. "mysql": "SELECT * FROM iris WHERE `Name` LIKE '%'",
  83. "postgresql": "SELECT * FROM iris WHERE \"Name\" LIKE '%'",
  84. },
  85. }
  86. def iris_table_metadata():
  87. import sqlalchemy
  88. from sqlalchemy import (
  89. Column,
  90. Double,
  91. Float,
  92. MetaData,
  93. String,
  94. Table,
  95. )
  96. dtype = Double if Version(sqlalchemy.__version__) >= Version("2.0.0") else Float
  97. metadata = MetaData()
  98. iris = Table(
  99. "iris",
  100. metadata,
  101. Column("SepalLength", dtype),
  102. Column("SepalWidth", dtype),
  103. Column("PetalLength", dtype),
  104. Column("PetalWidth", dtype),
  105. Column("Name", String(200)),
  106. )
  107. return iris
  108. def create_and_load_iris_sqlite3(conn, iris_file: Path):
  109. stmt = """CREATE TABLE iris (
  110. "SepalLength" REAL,
  111. "SepalWidth" REAL,
  112. "PetalLength" REAL,
  113. "PetalWidth" REAL,
  114. "Name" TEXT
  115. )"""
  116. cur = conn.cursor()
  117. cur.execute(stmt)
  118. with iris_file.open(newline=None, encoding="utf-8") as csvfile:
  119. reader = csv.reader(csvfile)
  120. next(reader)
  121. stmt = "INSERT INTO iris VALUES(?, ?, ?, ?, ?)"
  122. # ADBC requires explicit types - no implicit str -> float conversion
  123. records = []
  124. records = [
  125. (
  126. float(row[0]),
  127. float(row[1]),
  128. float(row[2]),
  129. float(row[3]),
  130. row[4],
  131. )
  132. for row in reader
  133. ]
  134. cur.executemany(stmt, records)
  135. cur.close()
  136. conn.commit()
  137. def create_and_load_iris_postgresql(conn, iris_file: Path):
  138. stmt = """CREATE TABLE iris (
  139. "SepalLength" DOUBLE PRECISION,
  140. "SepalWidth" DOUBLE PRECISION,
  141. "PetalLength" DOUBLE PRECISION,
  142. "PetalWidth" DOUBLE PRECISION,
  143. "Name" TEXT
  144. )"""
  145. with conn.cursor() as cur:
  146. cur.execute(stmt)
  147. with iris_file.open(newline=None, encoding="utf-8") as csvfile:
  148. reader = csv.reader(csvfile)
  149. next(reader)
  150. stmt = "INSERT INTO iris VALUES($1, $2, $3, $4, $5)"
  151. # ADBC requires explicit types - no implicit str -> float conversion
  152. records = [
  153. (
  154. float(row[0]),
  155. float(row[1]),
  156. float(row[2]),
  157. float(row[3]),
  158. row[4],
  159. )
  160. for row in reader
  161. ]
  162. cur.executemany(stmt, records)
  163. conn.commit()
  164. def create_and_load_iris(conn, iris_file: Path):
  165. from sqlalchemy import insert
  166. iris = iris_table_metadata()
  167. with iris_file.open(newline=None, encoding="utf-8") as csvfile:
  168. reader = csv.reader(csvfile)
  169. header = next(reader)
  170. params = [dict(zip(header, row)) for row in reader]
  171. stmt = insert(iris).values(params)
  172. with conn.begin() as con:
  173. iris.drop(con, checkfirst=True)
  174. iris.create(bind=con)
  175. con.execute(stmt)
  176. def create_and_load_iris_view(conn):
  177. stmt = "CREATE VIEW iris_view AS SELECT * FROM iris"
  178. if isinstance(conn, sqlite3.Connection):
  179. cur = conn.cursor()
  180. cur.execute(stmt)
  181. else:
  182. adbc = import_optional_dependency("adbc_driver_manager.dbapi", errors="ignore")
  183. if adbc and isinstance(conn, adbc.Connection):
  184. with conn.cursor() as cur:
  185. cur.execute(stmt)
  186. conn.commit()
  187. else:
  188. from sqlalchemy import text
  189. stmt = text(stmt)
  190. with conn.begin() as con:
  191. con.execute(stmt)
  192. def types_table_metadata(dialect: str):
  193. from sqlalchemy import (
  194. TEXT,
  195. Boolean,
  196. Column,
  197. DateTime,
  198. Float,
  199. Integer,
  200. MetaData,
  201. Table,
  202. )
  203. date_type = TEXT if dialect == "sqlite" else DateTime
  204. bool_type = Integer if dialect == "sqlite" else Boolean
  205. metadata = MetaData()
  206. types = Table(
  207. "types",
  208. metadata,
  209. Column("TextCol", TEXT),
  210. Column("DateCol", date_type),
  211. Column("IntDateCol", Integer),
  212. Column("IntDateOnlyCol", Integer),
  213. Column("FloatCol", Float),
  214. Column("IntCol", Integer),
  215. Column("BoolCol", bool_type),
  216. Column("IntColWithNull", Integer),
  217. Column("BoolColWithNull", bool_type),
  218. )
  219. return types
  220. def create_and_load_types_sqlite3(conn, types_data: list[dict]):
  221. stmt = """CREATE TABLE types (
  222. "TextCol" TEXT,
  223. "DateCol" TEXT,
  224. "IntDateCol" INTEGER,
  225. "IntDateOnlyCol" INTEGER,
  226. "FloatCol" REAL,
  227. "IntCol" INTEGER,
  228. "BoolCol" INTEGER,
  229. "IntColWithNull" INTEGER,
  230. "BoolColWithNull" INTEGER
  231. )"""
  232. ins_stmt = """
  233. INSERT INTO types
  234. VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?)
  235. """
  236. if isinstance(conn, sqlite3.Connection):
  237. cur = conn.cursor()
  238. cur.execute(stmt)
  239. cur.executemany(ins_stmt, types_data)
  240. else:
  241. with conn.cursor() as cur:
  242. cur.execute(stmt)
  243. cur.executemany(ins_stmt, types_data)
  244. conn.commit()
  245. def create_and_load_types_postgresql(conn, types_data: list[dict]):
  246. with conn.cursor() as cur:
  247. stmt = """CREATE TABLE types (
  248. "TextCol" TEXT,
  249. "DateCol" TIMESTAMP,
  250. "IntDateCol" INTEGER,
  251. "IntDateOnlyCol" INTEGER,
  252. "FloatCol" DOUBLE PRECISION,
  253. "IntCol" INTEGER,
  254. "BoolCol" BOOLEAN,
  255. "IntColWithNull" INTEGER,
  256. "BoolColWithNull" BOOLEAN
  257. )"""
  258. cur.execute(stmt)
  259. stmt = """
  260. INSERT INTO types
  261. VALUES($1, $2::timestamp, $3, $4, $5, $6, $7, $8, $9)
  262. """
  263. cur.executemany(stmt, types_data)
  264. conn.commit()
  265. def create_and_load_types(conn, types_data: list[dict], dialect: str):
  266. from sqlalchemy import insert
  267. from sqlalchemy.engine import Engine
  268. types = types_table_metadata(dialect)
  269. stmt = insert(types).values(types_data)
  270. if isinstance(conn, Engine):
  271. with conn.connect() as conn:
  272. with conn.begin():
  273. types.drop(conn, checkfirst=True)
  274. types.create(bind=conn)
  275. conn.execute(stmt)
  276. else:
  277. with conn.begin():
  278. types.drop(conn, checkfirst=True)
  279. types.create(bind=conn)
  280. conn.execute(stmt)
  281. def create_and_load_postgres_datetz(conn):
  282. from sqlalchemy import (
  283. Column,
  284. DateTime,
  285. MetaData,
  286. Table,
  287. insert,
  288. )
  289. from sqlalchemy.engine import Engine
  290. metadata = MetaData()
  291. datetz = Table("datetz", metadata, Column("DateColWithTz", DateTime(timezone=True)))
  292. datetz_data = [
  293. {
  294. "DateColWithTz": "2000-01-01 00:00:00-08:00",
  295. },
  296. {
  297. "DateColWithTz": "2000-06-01 00:00:00-07:00",
  298. },
  299. ]
  300. stmt = insert(datetz).values(datetz_data)
  301. if isinstance(conn, Engine):
  302. with conn.connect() as conn:
  303. with conn.begin():
  304. datetz.drop(conn, checkfirst=True)
  305. datetz.create(bind=conn)
  306. conn.execute(stmt)
  307. else:
  308. with conn.begin():
  309. datetz.drop(conn, checkfirst=True)
  310. datetz.create(bind=conn)
  311. conn.execute(stmt)
  312. # "2000-01-01 00:00:00-08:00" should convert to
  313. # "2000-01-01 08:00:00"
  314. # "2000-06-01 00:00:00-07:00" should convert to
  315. # "2000-06-01 07:00:00"
  316. # GH 6415
  317. expected_data = [
  318. Timestamp("2000-01-01 08:00:00", tz="UTC"),
  319. Timestamp("2000-06-01 07:00:00", tz="UTC"),
  320. ]
  321. return Series(expected_data, name="DateColWithTz")
  322. def check_iris_frame(frame: DataFrame):
  323. pytype = frame.dtypes.iloc[0].type
  324. row = frame.iloc[0]
  325. assert issubclass(pytype, np.floating)
  326. tm.assert_series_equal(
  327. row, Series([5.1, 3.5, 1.4, 0.2, "Iris-setosa"], index=frame.columns, name=0)
  328. )
  329. assert frame.shape in ((150, 5), (8, 5))
  330. def count_rows(conn, table_name: str):
  331. stmt = f"SELECT count(*) AS count_1 FROM {table_name}"
  332. adbc = import_optional_dependency("adbc_driver_manager.dbapi", errors="ignore")
  333. if isinstance(conn, sqlite3.Connection):
  334. cur = conn.cursor()
  335. return cur.execute(stmt).fetchone()[0]
  336. elif adbc and isinstance(conn, adbc.Connection):
  337. with conn.cursor() as cur:
  338. cur.execute(stmt)
  339. return cur.fetchone()[0]
  340. else:
  341. from sqlalchemy import create_engine
  342. from sqlalchemy.engine import Engine
  343. if isinstance(conn, str):
  344. try:
  345. engine = create_engine(conn)
  346. with engine.connect() as conn:
  347. return conn.exec_driver_sql(stmt).scalar_one()
  348. finally:
  349. engine.dispose()
  350. elif isinstance(conn, Engine):
  351. with conn.connect() as conn:
  352. return conn.exec_driver_sql(stmt).scalar_one()
  353. else:
  354. return conn.exec_driver_sql(stmt).scalar_one()
  355. @pytest.fixture
  356. def iris_path(datapath):
  357. iris_path = datapath("io", "data", "csv", "iris.csv")
  358. return Path(iris_path)
  359. @pytest.fixture
  360. def types_data():
  361. return [
  362. {
  363. "TextCol": "first",
  364. "DateCol": "2000-01-03 00:00:00",
  365. "IntDateCol": 535852800,
  366. "IntDateOnlyCol": 20101010,
  367. "FloatCol": 10.10,
  368. "IntCol": 1,
  369. "BoolCol": False,
  370. "IntColWithNull": 1,
  371. "BoolColWithNull": False,
  372. },
  373. {
  374. "TextCol": "first",
  375. "DateCol": "2000-01-04 00:00:00",
  376. "IntDateCol": 1356998400,
  377. "IntDateOnlyCol": 20101212,
  378. "FloatCol": 10.10,
  379. "IntCol": 1,
  380. "BoolCol": False,
  381. "IntColWithNull": None,
  382. "BoolColWithNull": None,
  383. },
  384. ]
  385. @pytest.fixture
  386. def types_data_frame(types_data):
  387. dtypes = {
  388. "TextCol": "str",
  389. "DateCol": "str",
  390. "IntDateCol": "int64",
  391. "IntDateOnlyCol": "int64",
  392. "FloatCol": "float",
  393. "IntCol": "int64",
  394. "BoolCol": "int64",
  395. "IntColWithNull": "float",
  396. "BoolColWithNull": "float",
  397. }
  398. df = DataFrame(types_data)
  399. return df[dtypes.keys()].astype(dtypes)
  400. @pytest.fixture
  401. def test_frame1():
  402. columns = ["index", "A", "B", "C", "D"]
  403. data = [
  404. (
  405. "2000-01-03 00:00:00",
  406. 0.980268513777,
  407. 3.68573087906,
  408. -0.364216805298,
  409. -1.15973806169,
  410. ),
  411. (
  412. "2000-01-04 00:00:00",
  413. 1.04791624281,
  414. -0.0412318367011,
  415. -0.16181208307,
  416. 0.212549316967,
  417. ),
  418. (
  419. "2000-01-05 00:00:00",
  420. 0.498580885705,
  421. 0.731167677815,
  422. -0.537677223318,
  423. 1.34627041952,
  424. ),
  425. (
  426. "2000-01-06 00:00:00",
  427. 1.12020151869,
  428. 1.56762092543,
  429. 0.00364077397681,
  430. 0.67525259227,
  431. ),
  432. ]
  433. return DataFrame(data, columns=columns)
  434. @pytest.fixture
  435. def test_frame3():
  436. columns = ["index", "A", "B"]
  437. data = [
  438. ("2000-01-03 00:00:00", 2**31 - 1, -1.987670),
  439. ("2000-01-04 00:00:00", -29, -0.0412318367011),
  440. ("2000-01-05 00:00:00", 20000, 0.731167677815),
  441. ("2000-01-06 00:00:00", -290867, 1.56762092543),
  442. ]
  443. return DataFrame(data, columns=columns)
  444. def get_all_views(conn):
  445. if isinstance(conn, sqlite3.Connection):
  446. c = conn.execute("SELECT name FROM sqlite_master WHERE type='view'")
  447. return [view[0] for view in c.fetchall()]
  448. else:
  449. adbc = import_optional_dependency("adbc_driver_manager.dbapi", errors="ignore")
  450. if adbc and isinstance(conn, adbc.Connection):
  451. results = []
  452. info = conn.adbc_get_objects().read_all().to_pylist()
  453. for catalog in info:
  454. catalog["catalog_name"]
  455. for schema in catalog["catalog_db_schemas"]:
  456. schema["db_schema_name"]
  457. for table in schema["db_schema_tables"]:
  458. if table["table_type"] == "view":
  459. view_name = table["table_name"]
  460. results.append(view_name)
  461. return results
  462. else:
  463. from sqlalchemy import inspect
  464. return inspect(conn).get_view_names()
  465. def get_all_tables(conn):
  466. if isinstance(conn, sqlite3.Connection):
  467. c = conn.execute("SELECT name FROM sqlite_master WHERE type='table'")
  468. return [table[0] for table in c.fetchall()]
  469. else:
  470. adbc = import_optional_dependency("adbc_driver_manager.dbapi", errors="ignore")
  471. if adbc and isinstance(conn, adbc.Connection):
  472. results = []
  473. info = conn.adbc_get_objects().read_all().to_pylist()
  474. for catalog in info:
  475. for schema in catalog["catalog_db_schemas"]:
  476. for table in schema["db_schema_tables"]:
  477. if table["table_type"] == "table":
  478. table_name = table["table_name"]
  479. results.append(table_name)
  480. return results
  481. else:
  482. from sqlalchemy import inspect
  483. return inspect(conn).get_table_names()
  484. def drop_table(
  485. table_name: str,
  486. conn: sqlite3.Connection | sqlalchemy.engine.Engine | sqlalchemy.engine.Connection,
  487. ):
  488. if isinstance(conn, sqlite3.Connection):
  489. conn.execute(f"DROP TABLE IF EXISTS {sql._get_valid_sqlite_name(table_name)}")
  490. conn.commit()
  491. else:
  492. adbc = import_optional_dependency("adbc_driver_manager.dbapi", errors="ignore")
  493. if adbc and isinstance(conn, adbc.Connection):
  494. with conn.cursor() as cur:
  495. cur.execute(f'DROP TABLE IF EXISTS "{table_name}"')
  496. else:
  497. with conn.begin() as con:
  498. with sql.SQLDatabase(con) as db:
  499. db.drop_table(table_name)
  500. def drop_view(
  501. view_name: str,
  502. conn: sqlite3.Connection | sqlalchemy.engine.Engine | sqlalchemy.engine.Connection,
  503. ):
  504. import sqlalchemy
  505. if isinstance(conn, sqlite3.Connection):
  506. conn.execute(f"DROP VIEW IF EXISTS {sql._get_valid_sqlite_name(view_name)}")
  507. conn.commit()
  508. else:
  509. adbc = import_optional_dependency("adbc_driver_manager.dbapi", errors="ignore")
  510. if adbc and isinstance(conn, adbc.Connection):
  511. with conn.cursor() as cur:
  512. cur.execute(f'DROP VIEW IF EXISTS "{view_name}"')
  513. else:
  514. quoted_view = conn.engine.dialect.identifier_preparer.quote_identifier(
  515. view_name
  516. )
  517. stmt = sqlalchemy.text(f"DROP VIEW IF EXISTS {quoted_view}")
  518. with conn.begin() as con:
  519. con.execute(stmt) # type: ignore[union-attr]
  520. @pytest.fixture
  521. def mysql_pymysql_engine():
  522. sqlalchemy = pytest.importorskip("sqlalchemy")
  523. pymysql = pytest.importorskip("pymysql")
  524. engine = sqlalchemy.create_engine(
  525. "mysql+pymysql://root@localhost:3306/pandas",
  526. connect_args={"client_flag": pymysql.constants.CLIENT.MULTI_STATEMENTS},
  527. poolclass=sqlalchemy.pool.NullPool,
  528. )
  529. yield engine
  530. for view in get_all_views(engine):
  531. drop_view(view, engine)
  532. for tbl in get_all_tables(engine):
  533. drop_table(tbl, engine)
  534. engine.dispose()
  535. @pytest.fixture
  536. def mysql_pymysql_engine_iris(mysql_pymysql_engine, iris_path):
  537. create_and_load_iris(mysql_pymysql_engine, iris_path)
  538. create_and_load_iris_view(mysql_pymysql_engine)
  539. yield mysql_pymysql_engine
  540. @pytest.fixture
  541. def mysql_pymysql_engine_types(mysql_pymysql_engine, types_data):
  542. create_and_load_types(mysql_pymysql_engine, types_data, "mysql")
  543. yield mysql_pymysql_engine
  544. @pytest.fixture
  545. def mysql_pymysql_conn(mysql_pymysql_engine):
  546. with mysql_pymysql_engine.connect() as conn:
  547. yield conn
  548. @pytest.fixture
  549. def mysql_pymysql_conn_iris(mysql_pymysql_engine_iris):
  550. with mysql_pymysql_engine_iris.connect() as conn:
  551. yield conn
  552. @pytest.fixture
  553. def mysql_pymysql_conn_types(mysql_pymysql_engine_types):
  554. with mysql_pymysql_engine_types.connect() as conn:
  555. yield conn
  556. @pytest.fixture
  557. def postgresql_psycopg2_engine():
  558. sqlalchemy = pytest.importorskip("sqlalchemy")
  559. pytest.importorskip("psycopg2")
  560. engine = sqlalchemy.create_engine(
  561. "postgresql+psycopg2://postgres:postgres@localhost:5432/pandas",
  562. poolclass=sqlalchemy.pool.NullPool,
  563. )
  564. yield engine
  565. for view in get_all_views(engine):
  566. drop_view(view, engine)
  567. for tbl in get_all_tables(engine):
  568. drop_table(tbl, engine)
  569. engine.dispose()
  570. @pytest.fixture
  571. def postgresql_psycopg2_engine_iris(postgresql_psycopg2_engine, iris_path):
  572. create_and_load_iris(postgresql_psycopg2_engine, iris_path)
  573. create_and_load_iris_view(postgresql_psycopg2_engine)
  574. yield postgresql_psycopg2_engine
  575. @pytest.fixture
  576. def postgresql_psycopg2_engine_types(postgresql_psycopg2_engine, types_data):
  577. create_and_load_types(postgresql_psycopg2_engine, types_data, "postgres")
  578. yield postgresql_psycopg2_engine
  579. @pytest.fixture
  580. def postgresql_psycopg2_conn(postgresql_psycopg2_engine):
  581. with postgresql_psycopg2_engine.connect() as conn:
  582. yield conn
  583. @pytest.fixture
  584. def postgresql_adbc_conn():
  585. pytest.importorskip("pyarrow")
  586. pytest.importorskip("adbc_driver_postgresql")
  587. from adbc_driver_postgresql import dbapi
  588. uri = "postgresql://postgres:postgres@localhost:5432/pandas"
  589. with dbapi.connect(uri) as conn:
  590. yield conn
  591. for view in get_all_views(conn):
  592. drop_view(view, conn)
  593. for tbl in get_all_tables(conn):
  594. drop_table(tbl, conn)
  595. conn.commit()
  596. @pytest.fixture
  597. def postgresql_adbc_iris(postgresql_adbc_conn, iris_path):
  598. import adbc_driver_manager as mgr
  599. conn = postgresql_adbc_conn
  600. try:
  601. conn.adbc_get_table_schema("iris")
  602. except mgr.ProgrammingError:
  603. conn.rollback()
  604. create_and_load_iris_postgresql(conn, iris_path)
  605. try:
  606. conn.adbc_get_table_schema("iris_view")
  607. except mgr.ProgrammingError: # note arrow-adbc issue 1022
  608. conn.rollback()
  609. create_and_load_iris_view(conn)
  610. yield conn
  611. @pytest.fixture
  612. def postgresql_adbc_types(postgresql_adbc_conn, types_data):
  613. import adbc_driver_manager as mgr
  614. conn = postgresql_adbc_conn
  615. try:
  616. conn.adbc_get_table_schema("types")
  617. except mgr.ProgrammingError:
  618. conn.rollback()
  619. new_data = [tuple(entry.values()) for entry in types_data]
  620. create_and_load_types_postgresql(conn, new_data)
  621. yield conn
  622. @pytest.fixture
  623. def postgresql_psycopg2_conn_iris(postgresql_psycopg2_engine_iris):
  624. with postgresql_psycopg2_engine_iris.connect() as conn:
  625. yield conn
  626. @pytest.fixture
  627. def postgresql_psycopg2_conn_types(postgresql_psycopg2_engine_types):
  628. with postgresql_psycopg2_engine_types.connect() as conn:
  629. yield conn
  630. @pytest.fixture
  631. def sqlite_str():
  632. pytest.importorskip("sqlalchemy")
  633. with tm.ensure_clean() as name:
  634. yield f"sqlite:///{name}"
  635. @pytest.fixture
  636. def sqlite_engine(sqlite_str):
  637. sqlalchemy = pytest.importorskip("sqlalchemy")
  638. engine = sqlalchemy.create_engine(sqlite_str, poolclass=sqlalchemy.pool.NullPool)
  639. yield engine
  640. for view in get_all_views(engine):
  641. drop_view(view, engine)
  642. for tbl in get_all_tables(engine):
  643. drop_table(tbl, engine)
  644. engine.dispose()
  645. @pytest.fixture
  646. def sqlite_conn(sqlite_engine):
  647. with sqlite_engine.connect() as conn:
  648. yield conn
  649. @pytest.fixture
  650. def sqlite_str_iris(sqlite_str, iris_path):
  651. sqlalchemy = pytest.importorskip("sqlalchemy")
  652. engine = sqlalchemy.create_engine(sqlite_str)
  653. create_and_load_iris(engine, iris_path)
  654. create_and_load_iris_view(engine)
  655. engine.dispose()
  656. return sqlite_str
  657. @pytest.fixture
  658. def sqlite_engine_iris(sqlite_engine, iris_path):
  659. create_and_load_iris(sqlite_engine, iris_path)
  660. create_and_load_iris_view(sqlite_engine)
  661. yield sqlite_engine
  662. @pytest.fixture
  663. def sqlite_conn_iris(sqlite_engine_iris):
  664. with sqlite_engine_iris.connect() as conn:
  665. yield conn
  666. @pytest.fixture
  667. def sqlite_str_types(sqlite_str, types_data):
  668. sqlalchemy = pytest.importorskip("sqlalchemy")
  669. engine = sqlalchemy.create_engine(sqlite_str)
  670. create_and_load_types(engine, types_data, "sqlite")
  671. engine.dispose()
  672. return sqlite_str
  673. @pytest.fixture
  674. def sqlite_engine_types(sqlite_engine, types_data):
  675. create_and_load_types(sqlite_engine, types_data, "sqlite")
  676. yield sqlite_engine
  677. @pytest.fixture
  678. def sqlite_conn_types(sqlite_engine_types):
  679. with sqlite_engine_types.connect() as conn:
  680. yield conn
  681. @pytest.fixture
  682. def sqlite_adbc_conn():
  683. pytest.importorskip("pyarrow")
  684. pytest.importorskip("adbc_driver_sqlite")
  685. from adbc_driver_sqlite import dbapi
  686. with tm.ensure_clean() as name:
  687. uri = f"file:{name}"
  688. with dbapi.connect(uri) as conn:
  689. yield conn
  690. for view in get_all_views(conn):
  691. drop_view(view, conn)
  692. for tbl in get_all_tables(conn):
  693. drop_table(tbl, conn)
  694. conn.commit()
  695. @pytest.fixture
  696. def sqlite_adbc_iris(sqlite_adbc_conn, iris_path):
  697. import adbc_driver_manager as mgr
  698. conn = sqlite_adbc_conn
  699. try:
  700. conn.adbc_get_table_schema("iris")
  701. except mgr.ProgrammingError:
  702. conn.rollback()
  703. create_and_load_iris_sqlite3(conn, iris_path)
  704. try:
  705. conn.adbc_get_table_schema("iris_view")
  706. except mgr.ProgrammingError:
  707. conn.rollback()
  708. create_and_load_iris_view(conn)
  709. yield conn
  710. @pytest.fixture
  711. def sqlite_adbc_types(sqlite_adbc_conn, types_data):
  712. import adbc_driver_manager as mgr
  713. conn = sqlite_adbc_conn
  714. try:
  715. conn.adbc_get_table_schema("types")
  716. except mgr.ProgrammingError:
  717. conn.rollback()
  718. new_data = []
  719. for entry in types_data:
  720. entry["BoolCol"] = int(entry["BoolCol"])
  721. if entry["BoolColWithNull"] is not None:
  722. entry["BoolColWithNull"] = int(entry["BoolColWithNull"])
  723. new_data.append(tuple(entry.values()))
  724. create_and_load_types_sqlite3(conn, new_data)
  725. conn.commit()
  726. yield conn
  727. @pytest.fixture
  728. def sqlite_buildin():
  729. with contextlib.closing(sqlite3.connect(":memory:")) as closing_conn:
  730. with closing_conn as conn:
  731. yield conn
  732. @pytest.fixture
  733. def sqlite_buildin_iris(sqlite_buildin, iris_path):
  734. create_and_load_iris_sqlite3(sqlite_buildin, iris_path)
  735. create_and_load_iris_view(sqlite_buildin)
  736. yield sqlite_buildin
  737. @pytest.fixture
  738. def sqlite_buildin_types(sqlite_buildin, types_data):
  739. types_data = [tuple(entry.values()) for entry in types_data]
  740. create_and_load_types_sqlite3(sqlite_buildin, types_data)
  741. yield sqlite_buildin
  742. mysql_connectable = [
  743. pytest.param("mysql_pymysql_engine", marks=pytest.mark.db),
  744. pytest.param("mysql_pymysql_conn", marks=pytest.mark.db),
  745. ]
  746. mysql_connectable_iris = [
  747. pytest.param("mysql_pymysql_engine_iris", marks=pytest.mark.db),
  748. pytest.param("mysql_pymysql_conn_iris", marks=pytest.mark.db),
  749. ]
  750. mysql_connectable_types = [
  751. pytest.param("mysql_pymysql_engine_types", marks=pytest.mark.db),
  752. pytest.param("mysql_pymysql_conn_types", marks=pytest.mark.db),
  753. ]
  754. postgresql_connectable = [
  755. pytest.param("postgresql_psycopg2_engine", marks=pytest.mark.db),
  756. pytest.param("postgresql_psycopg2_conn", marks=pytest.mark.db),
  757. ]
  758. postgresql_connectable_iris = [
  759. pytest.param("postgresql_psycopg2_engine_iris", marks=pytest.mark.db),
  760. pytest.param("postgresql_psycopg2_conn_iris", marks=pytest.mark.db),
  761. ]
  762. postgresql_connectable_types = [
  763. pytest.param("postgresql_psycopg2_engine_types", marks=pytest.mark.db),
  764. pytest.param("postgresql_psycopg2_conn_types", marks=pytest.mark.db),
  765. ]
  766. sqlite_connectable = [
  767. "sqlite_engine",
  768. "sqlite_conn",
  769. "sqlite_str",
  770. ]
  771. sqlite_connectable_iris = [
  772. "sqlite_engine_iris",
  773. "sqlite_conn_iris",
  774. "sqlite_str_iris",
  775. ]
  776. sqlite_connectable_types = [
  777. "sqlite_engine_types",
  778. "sqlite_conn_types",
  779. "sqlite_str_types",
  780. ]
  781. sqlalchemy_connectable = mysql_connectable + postgresql_connectable + sqlite_connectable
  782. sqlalchemy_connectable_iris = (
  783. mysql_connectable_iris + postgresql_connectable_iris + sqlite_connectable_iris
  784. )
  785. sqlalchemy_connectable_types = (
  786. mysql_connectable_types + postgresql_connectable_types + sqlite_connectable_types
  787. )
  788. adbc_connectable = [
  789. "sqlite_adbc_conn",
  790. pytest.param("postgresql_adbc_conn", marks=pytest.mark.db),
  791. ]
  792. adbc_connectable_iris = [
  793. pytest.param("postgresql_adbc_iris", marks=pytest.mark.db),
  794. "sqlite_adbc_iris",
  795. ]
  796. adbc_connectable_types = [
  797. pytest.param("postgresql_adbc_types", marks=pytest.mark.db),
  798. "sqlite_adbc_types",
  799. ]
  800. all_connectable = sqlalchemy_connectable + ["sqlite_buildin"] + adbc_connectable
  801. all_connectable_iris = (
  802. sqlalchemy_connectable_iris + ["sqlite_buildin_iris"] + adbc_connectable_iris
  803. )
  804. all_connectable_types = (
  805. sqlalchemy_connectable_types + ["sqlite_buildin_types"] + adbc_connectable_types
  806. )
  807. @pytest.mark.parametrize("conn", all_connectable)
  808. def test_dataframe_to_sql(conn, test_frame1, request):
  809. # GH 51086 if conn is sqlite_engine
  810. conn = request.getfixturevalue(conn)
  811. test_frame1.to_sql(name="test", con=conn, if_exists="append", index=False)
  812. @pytest.mark.parametrize("conn", all_connectable)
  813. def test_dataframe_to_sql_empty(conn, test_frame1, request):
  814. if conn == "postgresql_adbc_conn" and not using_string_dtype():
  815. request.node.add_marker(
  816. pytest.mark.xfail(
  817. reason="postgres ADBC driver < 1.2 cannot insert index with null type",
  818. )
  819. )
  820. # GH 51086 if conn is sqlite_engine
  821. conn = request.getfixturevalue(conn)
  822. empty_df = test_frame1.iloc[:0]
  823. empty_df.to_sql(name="test", con=conn, if_exists="append", index=False)
  824. @pytest.mark.parametrize("conn", all_connectable)
  825. def test_dataframe_to_sql_arrow_dtypes(conn, request):
  826. # GH 52046
  827. pytest.importorskip("pyarrow")
  828. df = DataFrame(
  829. {
  830. "int": pd.array([1], dtype="int8[pyarrow]"),
  831. "datetime": pd.array(
  832. [datetime(2023, 1, 1)], dtype="timestamp[ns][pyarrow]"
  833. ),
  834. "date": pd.array([date(2023, 1, 1)], dtype="date32[day][pyarrow]"),
  835. "timedelta": pd.array([timedelta(1)], dtype="duration[ns][pyarrow]"),
  836. "string": pd.array(["a"], dtype="string[pyarrow]"),
  837. }
  838. )
  839. if "adbc" in conn:
  840. if conn == "sqlite_adbc_conn":
  841. df = df.drop(columns=["timedelta"])
  842. if pa_version_under14p1:
  843. exp_warning = DeprecationWarning
  844. msg = "is_sparse is deprecated"
  845. else:
  846. exp_warning = None
  847. msg = ""
  848. else:
  849. exp_warning = UserWarning
  850. msg = "the 'timedelta'"
  851. conn = request.getfixturevalue(conn)
  852. with tm.assert_produces_warning(exp_warning, match=msg, check_stacklevel=False):
  853. df.to_sql(name="test_arrow", con=conn, if_exists="replace", index=False)
  854. @pytest.mark.parametrize("conn", all_connectable)
  855. def test_dataframe_to_sql_arrow_dtypes_missing(conn, request, nulls_fixture):
  856. # GH 52046
  857. pytest.importorskip("pyarrow")
  858. df = DataFrame(
  859. {
  860. "datetime": pd.array(
  861. [datetime(2023, 1, 1), nulls_fixture], dtype="timestamp[ns][pyarrow]"
  862. ),
  863. }
  864. )
  865. conn = request.getfixturevalue(conn)
  866. df.to_sql(name="test_arrow", con=conn, if_exists="replace", index=False)
  867. @pytest.mark.parametrize("conn", all_connectable)
  868. @pytest.mark.parametrize("method", [None, "multi"])
  869. def test_to_sql(conn, method, test_frame1, request):
  870. if method == "multi" and "adbc" in conn:
  871. request.node.add_marker(
  872. pytest.mark.xfail(
  873. reason="'method' not implemented for ADBC drivers", strict=True
  874. )
  875. )
  876. conn = request.getfixturevalue(conn)
  877. with pandasSQL_builder(conn, need_transaction=True) as pandasSQL:
  878. pandasSQL.to_sql(test_frame1, "test_frame", method=method)
  879. assert pandasSQL.has_table("test_frame")
  880. assert count_rows(conn, "test_frame") == len(test_frame1)
  881. @pytest.mark.parametrize("conn", all_connectable)
  882. @pytest.mark.parametrize("mode, num_row_coef", [("replace", 1), ("append", 2)])
  883. def test_to_sql_exist(conn, mode, num_row_coef, test_frame1, request):
  884. conn = request.getfixturevalue(conn)
  885. with pandasSQL_builder(conn, need_transaction=True) as pandasSQL:
  886. pandasSQL.to_sql(test_frame1, "test_frame", if_exists="fail")
  887. pandasSQL.to_sql(test_frame1, "test_frame", if_exists=mode)
  888. assert pandasSQL.has_table("test_frame")
  889. assert count_rows(conn, "test_frame") == num_row_coef * len(test_frame1)
  890. @pytest.mark.parametrize("conn", all_connectable)
  891. def test_to_sql_exist_fail(conn, test_frame1, request):
  892. conn = request.getfixturevalue(conn)
  893. with pandasSQL_builder(conn, need_transaction=True) as pandasSQL:
  894. pandasSQL.to_sql(test_frame1, "test_frame", if_exists="fail")
  895. assert pandasSQL.has_table("test_frame")
  896. msg = "Table 'test_frame' already exists"
  897. with pytest.raises(ValueError, match=msg):
  898. pandasSQL.to_sql(test_frame1, "test_frame", if_exists="fail")
  899. @pytest.mark.parametrize("conn", all_connectable_iris)
  900. def test_read_iris_query(conn, request):
  901. conn = request.getfixturevalue(conn)
  902. iris_frame = read_sql_query("SELECT * FROM iris", conn)
  903. check_iris_frame(iris_frame)
  904. iris_frame = pd.read_sql("SELECT * FROM iris", conn)
  905. check_iris_frame(iris_frame)
  906. iris_frame = pd.read_sql("SELECT * FROM iris where 0=1", conn)
  907. assert iris_frame.shape == (0, 5)
  908. assert "SepalWidth" in iris_frame.columns
  909. @pytest.mark.parametrize("conn", all_connectable_iris)
  910. def test_read_iris_query_chunksize(conn, request):
  911. if "adbc" in conn:
  912. request.node.add_marker(
  913. pytest.mark.xfail(
  914. reason="'chunksize' not implemented for ADBC drivers",
  915. strict=True,
  916. )
  917. )
  918. conn = request.getfixturevalue(conn)
  919. iris_frame = concat(read_sql_query("SELECT * FROM iris", conn, chunksize=7))
  920. check_iris_frame(iris_frame)
  921. iris_frame = concat(pd.read_sql("SELECT * FROM iris", conn, chunksize=7))
  922. check_iris_frame(iris_frame)
  923. iris_frame = concat(pd.read_sql("SELECT * FROM iris where 0=1", conn, chunksize=7))
  924. assert iris_frame.shape == (0, 5)
  925. assert "SepalWidth" in iris_frame.columns
  926. @pytest.mark.parametrize("conn", sqlalchemy_connectable_iris)
  927. def test_read_iris_query_expression_with_parameter(conn, request):
  928. if "adbc" in conn:
  929. request.node.add_marker(
  930. pytest.mark.xfail(
  931. reason="'chunksize' not implemented for ADBC drivers",
  932. strict=True,
  933. )
  934. )
  935. conn = request.getfixturevalue(conn)
  936. from sqlalchemy import (
  937. MetaData,
  938. Table,
  939. create_engine,
  940. select,
  941. )
  942. metadata = MetaData()
  943. autoload_con = create_engine(conn) if isinstance(conn, str) else conn
  944. iris = Table("iris", metadata, autoload_with=autoload_con)
  945. iris_frame = read_sql_query(
  946. select(iris), conn, params={"name": "Iris-setosa", "length": 5.1}
  947. )
  948. check_iris_frame(iris_frame)
  949. if isinstance(conn, str):
  950. autoload_con.dispose()
  951. @pytest.mark.parametrize("conn", all_connectable_iris)
  952. def test_read_iris_query_string_with_parameter(conn, request, sql_strings):
  953. if "adbc" in conn:
  954. request.node.add_marker(
  955. pytest.mark.xfail(
  956. reason="'chunksize' not implemented for ADBC drivers",
  957. strict=True,
  958. )
  959. )
  960. for db, query in sql_strings["read_parameters"].items():
  961. if db in conn:
  962. break
  963. else:
  964. raise KeyError(f"No part of {conn} found in sql_strings['read_parameters']")
  965. conn = request.getfixturevalue(conn)
  966. iris_frame = read_sql_query(query, conn, params=("Iris-setosa", 5.1))
  967. check_iris_frame(iris_frame)
  968. @pytest.mark.parametrize("conn", sqlalchemy_connectable_iris)
  969. def test_read_iris_table(conn, request):
  970. # GH 51015 if conn = sqlite_iris_str
  971. conn = request.getfixturevalue(conn)
  972. iris_frame = read_sql_table("iris", conn)
  973. check_iris_frame(iris_frame)
  974. iris_frame = pd.read_sql("iris", conn)
  975. check_iris_frame(iris_frame)
  976. @pytest.mark.parametrize("conn", sqlalchemy_connectable_iris)
  977. def test_read_iris_table_chunksize(conn, request):
  978. if "adbc" in conn:
  979. request.node.add_marker(
  980. pytest.mark.xfail(reason="chunksize argument NotImplemented with ADBC")
  981. )
  982. conn = request.getfixturevalue(conn)
  983. iris_frame = concat(read_sql_table("iris", conn, chunksize=7))
  984. check_iris_frame(iris_frame)
  985. iris_frame = concat(pd.read_sql("iris", conn, chunksize=7))
  986. check_iris_frame(iris_frame)
  987. @pytest.mark.parametrize("conn", sqlalchemy_connectable)
  988. def test_to_sql_callable(conn, test_frame1, request):
  989. conn = request.getfixturevalue(conn)
  990. check = [] # used to double check function below is really being used
  991. def sample(pd_table, conn, keys, data_iter):
  992. check.append(1)
  993. data = [dict(zip(keys, row)) for row in data_iter]
  994. conn.execute(pd_table.table.insert(), data)
  995. with pandasSQL_builder(conn, need_transaction=True) as pandasSQL:
  996. pandasSQL.to_sql(test_frame1, "test_frame", method=sample)
  997. assert pandasSQL.has_table("test_frame")
  998. assert check == [1]
  999. assert count_rows(conn, "test_frame") == len(test_frame1)
  1000. @pytest.mark.parametrize("conn", all_connectable_types)
  1001. def test_default_type_conversion(conn, request):
  1002. conn_name = conn
  1003. if conn_name == "sqlite_buildin_types":
  1004. request.applymarker(
  1005. pytest.mark.xfail(
  1006. reason="sqlite_buildin connection does not implement read_sql_table"
  1007. )
  1008. )
  1009. conn = request.getfixturevalue(conn)
  1010. df = sql.read_sql_table("types", conn)
  1011. assert issubclass(df.FloatCol.dtype.type, np.floating)
  1012. assert issubclass(df.IntCol.dtype.type, np.integer)
  1013. # MySQL/sqlite has no real BOOL type
  1014. if "postgresql" in conn_name:
  1015. assert issubclass(df.BoolCol.dtype.type, np.bool_)
  1016. else:
  1017. assert issubclass(df.BoolCol.dtype.type, np.integer)
  1018. # Int column with NA values stays as float
  1019. assert issubclass(df.IntColWithNull.dtype.type, np.floating)
  1020. # Bool column with NA = int column with NA values => becomes float
  1021. if "postgresql" in conn_name:
  1022. assert issubclass(df.BoolColWithNull.dtype.type, object)
  1023. else:
  1024. assert issubclass(df.BoolColWithNull.dtype.type, np.floating)
  1025. @pytest.mark.parametrize("conn", mysql_connectable)
  1026. def test_read_procedure(conn, request):
  1027. conn = request.getfixturevalue(conn)
  1028. # GH 7324
  1029. # Although it is more an api test, it is added to the
  1030. # mysql tests as sqlite does not have stored procedures
  1031. from sqlalchemy import text
  1032. from sqlalchemy.engine import Engine
  1033. df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
  1034. df.to_sql(name="test_frame", con=conn, index=False)
  1035. proc = """DROP PROCEDURE IF EXISTS get_testdb;
  1036. CREATE PROCEDURE get_testdb ()
  1037. BEGIN
  1038. SELECT * FROM test_frame;
  1039. END"""
  1040. proc = text(proc)
  1041. if isinstance(conn, Engine):
  1042. with conn.connect() as engine_conn:
  1043. with engine_conn.begin():
  1044. engine_conn.execute(proc)
  1045. else:
  1046. with conn.begin():
  1047. conn.execute(proc)
  1048. res1 = sql.read_sql_query("CALL get_testdb();", conn)
  1049. tm.assert_frame_equal(df, res1)
  1050. # test delegation to read_sql_query
  1051. res2 = sql.read_sql("CALL get_testdb();", conn)
  1052. tm.assert_frame_equal(df, res2)
  1053. @pytest.mark.parametrize("conn", postgresql_connectable)
  1054. @pytest.mark.parametrize("expected_count", [2, "Success!"])
  1055. def test_copy_from_callable_insertion_method(conn, expected_count, request):
  1056. # GH 8953
  1057. # Example in io.rst found under _io.sql.method
  1058. # not available in sqlite, mysql
  1059. def psql_insert_copy(table, conn, keys, data_iter):
  1060. # gets a DBAPI connection that can provide a cursor
  1061. dbapi_conn = conn.connection
  1062. with dbapi_conn.cursor() as cur:
  1063. s_buf = StringIO()
  1064. writer = csv.writer(s_buf)
  1065. writer.writerows(data_iter)
  1066. s_buf.seek(0)
  1067. columns = ", ".join([f'"{k}"' for k in keys])
  1068. if table.schema:
  1069. table_name = f"{table.schema}.{table.name}"
  1070. else:
  1071. table_name = table.name
  1072. sql_query = f"COPY {table_name} ({columns}) FROM STDIN WITH CSV"
  1073. cur.copy_expert(sql=sql_query, file=s_buf)
  1074. return expected_count
  1075. conn = request.getfixturevalue(conn)
  1076. expected = DataFrame({"col1": [1, 2], "col2": [0.1, 0.2], "col3": ["a", "n"]})
  1077. result_count = expected.to_sql(
  1078. name="test_frame", con=conn, index=False, method=psql_insert_copy
  1079. )
  1080. # GH 46891
  1081. if expected_count is None:
  1082. assert result_count is None
  1083. else:
  1084. assert result_count == expected_count
  1085. result = sql.read_sql_table("test_frame", conn)
  1086. tm.assert_frame_equal(result, expected)
  1087. @pytest.mark.parametrize("conn", postgresql_connectable)
  1088. def test_insertion_method_on_conflict_do_nothing(conn, request):
  1089. # GH 15988: Example in to_sql docstring
  1090. conn = request.getfixturevalue(conn)
  1091. from sqlalchemy.dialects.postgresql import insert
  1092. from sqlalchemy.engine import Engine
  1093. from sqlalchemy.sql import text
  1094. def insert_on_conflict(table, conn, keys, data_iter):
  1095. data = [dict(zip(keys, row)) for row in data_iter]
  1096. stmt = (
  1097. insert(table.table)
  1098. .values(data)
  1099. .on_conflict_do_nothing(index_elements=["a"])
  1100. )
  1101. result = conn.execute(stmt)
  1102. return result.rowcount
  1103. create_sql = text(
  1104. """
  1105. CREATE TABLE test_insert_conflict (
  1106. a integer PRIMARY KEY,
  1107. b numeric,
  1108. c text
  1109. );
  1110. """
  1111. )
  1112. if isinstance(conn, Engine):
  1113. with conn.connect() as con:
  1114. with con.begin():
  1115. con.execute(create_sql)
  1116. else:
  1117. with conn.begin():
  1118. conn.execute(create_sql)
  1119. expected = DataFrame([[1, 2.1, "a"]], columns=list("abc"))
  1120. expected.to_sql(
  1121. name="test_insert_conflict", con=conn, if_exists="append", index=False
  1122. )
  1123. df_insert = DataFrame([[1, 3.2, "b"]], columns=list("abc"))
  1124. inserted = df_insert.to_sql(
  1125. name="test_insert_conflict",
  1126. con=conn,
  1127. index=False,
  1128. if_exists="append",
  1129. method=insert_on_conflict,
  1130. )
  1131. result = sql.read_sql_table("test_insert_conflict", conn)
  1132. tm.assert_frame_equal(result, expected)
  1133. assert inserted == 0
  1134. # Cleanup
  1135. with sql.SQLDatabase(conn, need_transaction=True) as pandasSQL:
  1136. pandasSQL.drop_table("test_insert_conflict")
  1137. @pytest.mark.parametrize("conn", all_connectable)
  1138. def test_to_sql_on_public_schema(conn, request):
  1139. if "sqlite" in conn or "mysql" in conn:
  1140. request.applymarker(
  1141. pytest.mark.xfail(
  1142. reason="test for public schema only specific to postgresql"
  1143. )
  1144. )
  1145. conn = request.getfixturevalue(conn)
  1146. test_data = DataFrame([[1, 2.1, "a"], [2, 3.1, "b"]], columns=list("abc"))
  1147. test_data.to_sql(
  1148. name="test_public_schema",
  1149. con=conn,
  1150. if_exists="append",
  1151. index=False,
  1152. schema="public",
  1153. )
  1154. df_out = sql.read_sql_table("test_public_schema", conn, schema="public")
  1155. tm.assert_frame_equal(test_data, df_out)
  1156. @pytest.mark.parametrize("conn", mysql_connectable)
  1157. def test_insertion_method_on_conflict_update(conn, request):
  1158. # GH 14553: Example in to_sql docstring
  1159. conn = request.getfixturevalue(conn)
  1160. from sqlalchemy.dialects.mysql import insert
  1161. from sqlalchemy.engine import Engine
  1162. from sqlalchemy.sql import text
  1163. def insert_on_conflict(table, conn, keys, data_iter):
  1164. data = [dict(zip(keys, row)) for row in data_iter]
  1165. stmt = insert(table.table).values(data)
  1166. stmt = stmt.on_duplicate_key_update(b=stmt.inserted.b, c=stmt.inserted.c)
  1167. result = conn.execute(stmt)
  1168. return result.rowcount
  1169. create_sql = text(
  1170. """
  1171. CREATE TABLE test_insert_conflict (
  1172. a INT PRIMARY KEY,
  1173. b FLOAT,
  1174. c VARCHAR(10)
  1175. );
  1176. """
  1177. )
  1178. if isinstance(conn, Engine):
  1179. with conn.connect() as con:
  1180. with con.begin():
  1181. con.execute(create_sql)
  1182. else:
  1183. with conn.begin():
  1184. conn.execute(create_sql)
  1185. df = DataFrame([[1, 2.1, "a"]], columns=list("abc"))
  1186. df.to_sql(name="test_insert_conflict", con=conn, if_exists="append", index=False)
  1187. expected = DataFrame([[1, 3.2, "b"]], columns=list("abc"))
  1188. inserted = expected.to_sql(
  1189. name="test_insert_conflict",
  1190. con=conn,
  1191. index=False,
  1192. if_exists="append",
  1193. method=insert_on_conflict,
  1194. )
  1195. result = sql.read_sql_table("test_insert_conflict", conn)
  1196. tm.assert_frame_equal(result, expected)
  1197. assert inserted == 2
  1198. # Cleanup
  1199. with sql.SQLDatabase(conn, need_transaction=True) as pandasSQL:
  1200. pandasSQL.drop_table("test_insert_conflict")
  1201. @pytest.mark.parametrize("conn", postgresql_connectable)
  1202. def test_read_view_postgres(conn, request):
  1203. # GH 52969
  1204. conn = request.getfixturevalue(conn)
  1205. from sqlalchemy.engine import Engine
  1206. from sqlalchemy.sql import text
  1207. table_name = f"group_{uuid.uuid4().hex}"
  1208. view_name = f"group_view_{uuid.uuid4().hex}"
  1209. sql_stmt = text(
  1210. f"""
  1211. CREATE TABLE {table_name} (
  1212. group_id INTEGER,
  1213. name TEXT
  1214. );
  1215. INSERT INTO {table_name} VALUES
  1216. (1, 'name');
  1217. CREATE VIEW {view_name}
  1218. AS
  1219. SELECT * FROM {table_name};
  1220. """
  1221. )
  1222. if isinstance(conn, Engine):
  1223. with conn.connect() as con:
  1224. with con.begin():
  1225. con.execute(sql_stmt)
  1226. else:
  1227. with conn.begin():
  1228. conn.execute(sql_stmt)
  1229. result = read_sql_table(view_name, conn)
  1230. expected = DataFrame({"group_id": [1], "name": "name"})
  1231. tm.assert_frame_equal(result, expected)
  1232. def test_read_view_sqlite(sqlite_buildin):
  1233. # GH 52969
  1234. create_table = """
  1235. CREATE TABLE groups (
  1236. group_id INTEGER,
  1237. name TEXT
  1238. );
  1239. """
  1240. insert_into = """
  1241. INSERT INTO groups VALUES
  1242. (1, 'name');
  1243. """
  1244. create_view = """
  1245. CREATE VIEW group_view
  1246. AS
  1247. SELECT * FROM groups;
  1248. """
  1249. sqlite_buildin.execute(create_table)
  1250. sqlite_buildin.execute(insert_into)
  1251. sqlite_buildin.execute(create_view)
  1252. result = pd.read_sql("SELECT * FROM group_view", sqlite_buildin)
  1253. expected = DataFrame({"group_id": [1], "name": "name"})
  1254. tm.assert_frame_equal(result, expected)
  1255. def test_execute_typeerror(sqlite_engine_iris):
  1256. with pytest.raises(TypeError, match="pandas.io.sql.execute requires a connection"):
  1257. with tm.assert_produces_warning(
  1258. FutureWarning,
  1259. match="`pandas.io.sql.execute` is deprecated and "
  1260. "will be removed in the future version.",
  1261. ):
  1262. sql.execute("select * from iris", sqlite_engine_iris)
  1263. def test_execute_deprecated(sqlite_conn_iris):
  1264. # GH50185
  1265. with tm.assert_produces_warning(
  1266. FutureWarning,
  1267. match="`pandas.io.sql.execute` is deprecated and "
  1268. "will be removed in the future version.",
  1269. ):
  1270. sql.execute("select * from iris", sqlite_conn_iris)
  1271. def flavor(conn_name):
  1272. if "postgresql" in conn_name:
  1273. return "postgresql"
  1274. elif "sqlite" in conn_name:
  1275. return "sqlite"
  1276. elif "mysql" in conn_name:
  1277. return "mysql"
  1278. raise ValueError(f"unsupported connection: {conn_name}")
  1279. @pytest.mark.parametrize("conn", all_connectable_iris)
  1280. def test_read_sql_iris_parameter(conn, request, sql_strings):
  1281. if "adbc" in conn:
  1282. request.node.add_marker(
  1283. pytest.mark.xfail(
  1284. reason="'params' not implemented for ADBC drivers",
  1285. strict=True,
  1286. )
  1287. )
  1288. conn_name = conn
  1289. conn = request.getfixturevalue(conn)
  1290. query = sql_strings["read_parameters"][flavor(conn_name)]
  1291. params = ("Iris-setosa", 5.1)
  1292. with pandasSQL_builder(conn) as pandasSQL:
  1293. with pandasSQL.run_transaction():
  1294. iris_frame = pandasSQL.read_query(query, params=params)
  1295. check_iris_frame(iris_frame)
  1296. @pytest.mark.parametrize("conn", all_connectable_iris)
  1297. def test_read_sql_iris_named_parameter(conn, request, sql_strings):
  1298. if "adbc" in conn:
  1299. request.node.add_marker(
  1300. pytest.mark.xfail(
  1301. reason="'params' not implemented for ADBC drivers",
  1302. strict=True,
  1303. )
  1304. )
  1305. conn_name = conn
  1306. conn = request.getfixturevalue(conn)
  1307. query = sql_strings["read_named_parameters"][flavor(conn_name)]
  1308. params = {"name": "Iris-setosa", "length": 5.1}
  1309. with pandasSQL_builder(conn) as pandasSQL:
  1310. with pandasSQL.run_transaction():
  1311. iris_frame = pandasSQL.read_query(query, params=params)
  1312. check_iris_frame(iris_frame)
  1313. @pytest.mark.parametrize("conn", all_connectable_iris)
  1314. def test_read_sql_iris_no_parameter_with_percent(conn, request, sql_strings):
  1315. if "mysql" in conn or ("postgresql" in conn and "adbc" not in conn):
  1316. request.applymarker(pytest.mark.xfail(reason="broken test"))
  1317. conn_name = conn
  1318. conn = request.getfixturevalue(conn)
  1319. query = sql_strings["read_no_parameters_with_percent"][flavor(conn_name)]
  1320. with pandasSQL_builder(conn) as pandasSQL:
  1321. with pandasSQL.run_transaction():
  1322. iris_frame = pandasSQL.read_query(query, params=None)
  1323. check_iris_frame(iris_frame)
  1324. # -----------------------------------------------------------------------------
  1325. # -- Testing the public API
  1326. @pytest.mark.parametrize("conn", all_connectable_iris)
  1327. def test_api_read_sql_view(conn, request):
  1328. conn = request.getfixturevalue(conn)
  1329. iris_frame = sql.read_sql_query("SELECT * FROM iris_view", conn)
  1330. check_iris_frame(iris_frame)
  1331. @pytest.mark.parametrize("conn", all_connectable_iris)
  1332. def test_api_read_sql_with_chunksize_no_result(conn, request):
  1333. if "adbc" in conn:
  1334. request.node.add_marker(
  1335. pytest.mark.xfail(reason="chunksize argument NotImplemented with ADBC")
  1336. )
  1337. conn = request.getfixturevalue(conn)
  1338. query = 'SELECT * FROM iris_view WHERE "SepalLength" < 0.0'
  1339. with_batch = sql.read_sql_query(query, conn, chunksize=5)
  1340. without_batch = sql.read_sql_query(query, conn)
  1341. tm.assert_frame_equal(concat(with_batch), without_batch)
  1342. @pytest.mark.parametrize("conn", all_connectable)
  1343. def test_api_to_sql(conn, request, test_frame1):
  1344. conn = request.getfixturevalue(conn)
  1345. if sql.has_table("test_frame1", conn):
  1346. with sql.SQLDatabase(conn, need_transaction=True) as pandasSQL:
  1347. pandasSQL.drop_table("test_frame1")
  1348. sql.to_sql(test_frame1, "test_frame1", conn)
  1349. assert sql.has_table("test_frame1", conn)
  1350. @pytest.mark.parametrize("conn", all_connectable)
  1351. def test_api_to_sql_fail(conn, request, test_frame1):
  1352. conn = request.getfixturevalue(conn)
  1353. if sql.has_table("test_frame2", conn):
  1354. with sql.SQLDatabase(conn, need_transaction=True) as pandasSQL:
  1355. pandasSQL.drop_table("test_frame2")
  1356. sql.to_sql(test_frame1, "test_frame2", conn, if_exists="fail")
  1357. assert sql.has_table("test_frame2", conn)
  1358. msg = "Table 'test_frame2' already exists"
  1359. with pytest.raises(ValueError, match=msg):
  1360. sql.to_sql(test_frame1, "test_frame2", conn, if_exists="fail")
  1361. @pytest.mark.parametrize("conn", all_connectable)
  1362. def test_api_to_sql_replace(conn, request, test_frame1):
  1363. conn = request.getfixturevalue(conn)
  1364. if sql.has_table("test_frame3", conn):
  1365. with sql.SQLDatabase(conn, need_transaction=True) as pandasSQL:
  1366. pandasSQL.drop_table("test_frame3")
  1367. sql.to_sql(test_frame1, "test_frame3", conn, if_exists="fail")
  1368. # Add to table again
  1369. sql.to_sql(test_frame1, "test_frame3", conn, if_exists="replace")
  1370. assert sql.has_table("test_frame3", conn)
  1371. num_entries = len(test_frame1)
  1372. num_rows = count_rows(conn, "test_frame3")
  1373. assert num_rows == num_entries
  1374. @pytest.mark.parametrize("conn", all_connectable)
  1375. def test_api_to_sql_append(conn, request, test_frame1):
  1376. conn = request.getfixturevalue(conn)
  1377. if sql.has_table("test_frame4", conn):
  1378. with sql.SQLDatabase(conn, need_transaction=True) as pandasSQL:
  1379. pandasSQL.drop_table("test_frame4")
  1380. assert sql.to_sql(test_frame1, "test_frame4", conn, if_exists="fail") == 4
  1381. # Add to table again
  1382. assert sql.to_sql(test_frame1, "test_frame4", conn, if_exists="append") == 4
  1383. assert sql.has_table("test_frame4", conn)
  1384. num_entries = 2 * len(test_frame1)
  1385. num_rows = count_rows(conn, "test_frame4")
  1386. assert num_rows == num_entries
  1387. @pytest.mark.parametrize("conn", all_connectable)
  1388. def test_api_to_sql_type_mapping(conn, request, test_frame3):
  1389. conn = request.getfixturevalue(conn)
  1390. if sql.has_table("test_frame5", conn):
  1391. with sql.SQLDatabase(conn, need_transaction=True) as pandasSQL:
  1392. pandasSQL.drop_table("test_frame5")
  1393. sql.to_sql(test_frame3, "test_frame5", conn, index=False)
  1394. result = sql.read_sql("SELECT * FROM test_frame5", conn)
  1395. tm.assert_frame_equal(test_frame3, result)
  1396. @pytest.mark.parametrize("conn", all_connectable)
  1397. def test_api_to_sql_series(conn, request):
  1398. conn = request.getfixturevalue(conn)
  1399. if sql.has_table("test_series", conn):
  1400. with sql.SQLDatabase(conn, need_transaction=True) as pandasSQL:
  1401. pandasSQL.drop_table("test_series")
  1402. s = Series(np.arange(5, dtype="int64"), name="series")
  1403. sql.to_sql(s, "test_series", conn, index=False)
  1404. s2 = sql.read_sql_query("SELECT * FROM test_series", conn)
  1405. tm.assert_frame_equal(s.to_frame(), s2)
  1406. @pytest.mark.parametrize("conn", all_connectable)
  1407. def test_api_roundtrip(conn, request, test_frame1):
  1408. conn_name = conn
  1409. conn = request.getfixturevalue(conn)
  1410. if sql.has_table("test_frame_roundtrip", conn):
  1411. with sql.SQLDatabase(conn, need_transaction=True) as pandasSQL:
  1412. pandasSQL.drop_table("test_frame_roundtrip")
  1413. sql.to_sql(test_frame1, "test_frame_roundtrip", con=conn)
  1414. result = sql.read_sql_query("SELECT * FROM test_frame_roundtrip", con=conn)
  1415. # HACK!
  1416. if "adbc" in conn_name:
  1417. result = result.rename(columns={"__index_level_0__": "level_0"})
  1418. result.index = test_frame1.index
  1419. result.set_index("level_0", inplace=True)
  1420. result.index.astype(int)
  1421. result.index.name = None
  1422. tm.assert_frame_equal(result, test_frame1)
  1423. @pytest.mark.parametrize("conn", all_connectable)
  1424. def test_api_roundtrip_chunksize(conn, request, test_frame1):
  1425. if "adbc" in conn:
  1426. request.node.add_marker(
  1427. pytest.mark.xfail(reason="chunksize argument NotImplemented with ADBC")
  1428. )
  1429. conn = request.getfixturevalue(conn)
  1430. if sql.has_table("test_frame_roundtrip", conn):
  1431. with sql.SQLDatabase(conn, need_transaction=True) as pandasSQL:
  1432. pandasSQL.drop_table("test_frame_roundtrip")
  1433. sql.to_sql(
  1434. test_frame1,
  1435. "test_frame_roundtrip",
  1436. con=conn,
  1437. index=False,
  1438. chunksize=2,
  1439. )
  1440. result = sql.read_sql_query("SELECT * FROM test_frame_roundtrip", con=conn)
  1441. tm.assert_frame_equal(result, test_frame1)
  1442. @pytest.mark.parametrize("conn", all_connectable_iris)
  1443. def test_api_execute_sql(conn, request):
  1444. # drop_sql = "DROP TABLE IF EXISTS test" # should already be done
  1445. conn = request.getfixturevalue(conn)
  1446. with sql.pandasSQL_builder(conn) as pandas_sql:
  1447. iris_results = pandas_sql.execute("SELECT * FROM iris")
  1448. row = iris_results.fetchone()
  1449. iris_results.close()
  1450. assert list(row) == [5.1, 3.5, 1.4, 0.2, "Iris-setosa"]
  1451. @pytest.mark.parametrize("conn", all_connectable_types)
  1452. def test_api_date_parsing(conn, request):
  1453. conn_name = conn
  1454. conn = request.getfixturevalue(conn)
  1455. # Test date parsing in read_sql
  1456. # No Parsing
  1457. df = sql.read_sql_query("SELECT * FROM types", conn)
  1458. if not ("mysql" in conn_name or "postgres" in conn_name):
  1459. assert not issubclass(df.DateCol.dtype.type, np.datetime64)
  1460. df = sql.read_sql_query("SELECT * FROM types", conn, parse_dates=["DateCol"])
  1461. assert issubclass(df.DateCol.dtype.type, np.datetime64)
  1462. assert df.DateCol.tolist() == [
  1463. Timestamp(2000, 1, 3, 0, 0, 0),
  1464. Timestamp(2000, 1, 4, 0, 0, 0),
  1465. ]
  1466. df = sql.read_sql_query(
  1467. "SELECT * FROM types",
  1468. conn,
  1469. parse_dates={"DateCol": "%Y-%m-%d %H:%M:%S"},
  1470. )
  1471. assert issubclass(df.DateCol.dtype.type, np.datetime64)
  1472. assert df.DateCol.tolist() == [
  1473. Timestamp(2000, 1, 3, 0, 0, 0),
  1474. Timestamp(2000, 1, 4, 0, 0, 0),
  1475. ]
  1476. df = sql.read_sql_query("SELECT * FROM types", conn, parse_dates=["IntDateCol"])
  1477. assert issubclass(df.IntDateCol.dtype.type, np.datetime64)
  1478. assert df.IntDateCol.tolist() == [
  1479. Timestamp(1986, 12, 25, 0, 0, 0),
  1480. Timestamp(2013, 1, 1, 0, 0, 0),
  1481. ]
  1482. df = sql.read_sql_query(
  1483. "SELECT * FROM types", conn, parse_dates={"IntDateCol": "s"}
  1484. )
  1485. assert issubclass(df.IntDateCol.dtype.type, np.datetime64)
  1486. assert df.IntDateCol.tolist() == [
  1487. Timestamp(1986, 12, 25, 0, 0, 0),
  1488. Timestamp(2013, 1, 1, 0, 0, 0),
  1489. ]
  1490. df = sql.read_sql_query(
  1491. "SELECT * FROM types",
  1492. conn,
  1493. parse_dates={"IntDateOnlyCol": "%Y%m%d"},
  1494. )
  1495. assert issubclass(df.IntDateOnlyCol.dtype.type, np.datetime64)
  1496. assert df.IntDateOnlyCol.tolist() == [
  1497. Timestamp("2010-10-10"),
  1498. Timestamp("2010-12-12"),
  1499. ]
  1500. @pytest.mark.parametrize("conn", all_connectable_types)
  1501. @pytest.mark.parametrize("error", ["ignore", "raise", "coerce"])
  1502. @pytest.mark.parametrize(
  1503. "read_sql, text, mode",
  1504. [
  1505. (sql.read_sql, "SELECT * FROM types", ("sqlalchemy", "fallback")),
  1506. (sql.read_sql, "types", ("sqlalchemy")),
  1507. (
  1508. sql.read_sql_query,
  1509. "SELECT * FROM types",
  1510. ("sqlalchemy", "fallback"),
  1511. ),
  1512. (sql.read_sql_table, "types", ("sqlalchemy")),
  1513. ],
  1514. )
  1515. def test_api_custom_dateparsing_error(
  1516. conn, request, read_sql, text, mode, error, types_data_frame
  1517. ):
  1518. conn_name = conn
  1519. conn = request.getfixturevalue(conn)
  1520. if text == "types" and conn_name == "sqlite_buildin_types":
  1521. request.applymarker(
  1522. pytest.mark.xfail(reason="failing combination of arguments")
  1523. )
  1524. expected = types_data_frame.astype({"DateCol": "datetime64[ns]"})
  1525. result = read_sql(
  1526. text,
  1527. con=conn,
  1528. parse_dates={
  1529. "DateCol": {"errors": error},
  1530. },
  1531. )
  1532. if "postgres" in conn_name:
  1533. # TODO: clean up types_data_frame fixture
  1534. result["BoolCol"] = result["BoolCol"].astype(int)
  1535. result["BoolColWithNull"] = result["BoolColWithNull"].astype(float)
  1536. if conn_name == "postgresql_adbc_types":
  1537. expected = expected.astype(
  1538. {
  1539. "IntDateCol": "int32",
  1540. "IntDateOnlyCol": "int32",
  1541. "IntCol": "int32",
  1542. }
  1543. )
  1544. if not pa_version_under13p0:
  1545. # TODO: is this astype safe?
  1546. expected["DateCol"] = expected["DateCol"].astype("datetime64[us]")
  1547. tm.assert_frame_equal(result, expected)
  1548. @pytest.mark.parametrize("conn", all_connectable_types)
  1549. def test_api_date_and_index(conn, request):
  1550. # Test case where same column appears in parse_date and index_col
  1551. conn = request.getfixturevalue(conn)
  1552. df = sql.read_sql_query(
  1553. "SELECT * FROM types",
  1554. conn,
  1555. index_col="DateCol",
  1556. parse_dates=["DateCol", "IntDateCol"],
  1557. )
  1558. assert issubclass(df.index.dtype.type, np.datetime64)
  1559. assert issubclass(df.IntDateCol.dtype.type, np.datetime64)
  1560. @pytest.mark.parametrize("conn", all_connectable)
  1561. def test_api_timedelta(conn, request):
  1562. # see #6921
  1563. conn_name = conn
  1564. conn = request.getfixturevalue(conn)
  1565. if sql.has_table("test_timedelta", conn):
  1566. with sql.SQLDatabase(conn, need_transaction=True) as pandasSQL:
  1567. pandasSQL.drop_table("test_timedelta")
  1568. df = to_timedelta(Series(["00:00:01", "00:00:03"], name="foo")).to_frame()
  1569. if conn_name == "sqlite_adbc_conn":
  1570. request.node.add_marker(
  1571. pytest.mark.xfail(
  1572. reason="sqlite ADBC driver doesn't implement timedelta",
  1573. )
  1574. )
  1575. if "adbc" in conn_name:
  1576. if pa_version_under14p1:
  1577. exp_warning = DeprecationWarning
  1578. else:
  1579. exp_warning = None
  1580. else:
  1581. exp_warning = UserWarning
  1582. with tm.assert_produces_warning(exp_warning, check_stacklevel=False):
  1583. result_count = df.to_sql(name="test_timedelta", con=conn)
  1584. assert result_count == 2
  1585. result = sql.read_sql_query("SELECT * FROM test_timedelta", conn)
  1586. if conn_name == "postgresql_adbc_conn":
  1587. # TODO: Postgres stores an INTERVAL, which ADBC reads as a Month-Day-Nano
  1588. # Interval; the default pandas type mapper maps this to a DateOffset
  1589. # but maybe we should try and restore the timedelta here?
  1590. expected = Series(
  1591. [
  1592. pd.DateOffset(months=0, days=0, microseconds=1000000, nanoseconds=0),
  1593. pd.DateOffset(months=0, days=0, microseconds=3000000, nanoseconds=0),
  1594. ],
  1595. name="foo",
  1596. )
  1597. else:
  1598. expected = df["foo"].astype("int64")
  1599. tm.assert_series_equal(result["foo"], expected)
  1600. @pytest.mark.parametrize("conn", all_connectable)
  1601. def test_api_complex_raises(conn, request):
  1602. conn_name = conn
  1603. conn = request.getfixturevalue(conn)
  1604. df = DataFrame({"a": [1 + 1j, 2j]})
  1605. if "adbc" in conn_name:
  1606. msg = "datatypes not supported"
  1607. else:
  1608. msg = "Complex datatypes not supported"
  1609. with pytest.raises(ValueError, match=msg):
  1610. assert df.to_sql("test_complex", con=conn) is None
  1611. @pytest.mark.parametrize("conn", all_connectable)
  1612. @pytest.mark.parametrize(
  1613. "index_name,index_label,expected",
  1614. [
  1615. # no index name, defaults to 'index'
  1616. (None, None, "index"),
  1617. # specifying index_label
  1618. (None, "other_label", "other_label"),
  1619. # using the index name
  1620. ("index_name", None, "index_name"),
  1621. # has index name, but specifying index_label
  1622. ("index_name", "other_label", "other_label"),
  1623. # index name is integer
  1624. (0, None, "0"),
  1625. # index name is None but index label is integer
  1626. (None, 0, "0"),
  1627. ],
  1628. )
  1629. def test_api_to_sql_index_label(conn, request, index_name, index_label, expected):
  1630. if "adbc" in conn:
  1631. request.node.add_marker(
  1632. pytest.mark.xfail(reason="index_label argument NotImplemented with ADBC")
  1633. )
  1634. conn = request.getfixturevalue(conn)
  1635. if sql.has_table("test_index_label", conn):
  1636. with sql.SQLDatabase(conn, need_transaction=True) as pandasSQL:
  1637. pandasSQL.drop_table("test_index_label")
  1638. temp_frame = DataFrame({"col1": range(4)})
  1639. temp_frame.index.name = index_name
  1640. query = "SELECT * FROM test_index_label"
  1641. sql.to_sql(temp_frame, "test_index_label", conn, index_label=index_label)
  1642. frame = sql.read_sql_query(query, conn)
  1643. assert frame.columns[0] == expected
  1644. @pytest.mark.parametrize("conn", all_connectable)
  1645. def test_api_to_sql_index_label_multiindex(conn, request):
  1646. conn_name = conn
  1647. if "mysql" in conn_name:
  1648. request.applymarker(
  1649. pytest.mark.xfail(
  1650. reason="MySQL can fail using TEXT without length as key", strict=False
  1651. )
  1652. )
  1653. elif "adbc" in conn_name:
  1654. request.node.add_marker(
  1655. pytest.mark.xfail(reason="index_label argument NotImplemented with ADBC")
  1656. )
  1657. conn = request.getfixturevalue(conn)
  1658. if sql.has_table("test_index_label", conn):
  1659. with sql.SQLDatabase(conn, need_transaction=True) as pandasSQL:
  1660. pandasSQL.drop_table("test_index_label")
  1661. expected_row_count = 4
  1662. temp_frame = DataFrame(
  1663. {"col1": range(4)},
  1664. index=MultiIndex.from_product([("A0", "A1"), ("B0", "B1")]),
  1665. )
  1666. # no index name, defaults to 'level_0' and 'level_1'
  1667. result = sql.to_sql(temp_frame, "test_index_label", conn)
  1668. assert result == expected_row_count
  1669. frame = sql.read_sql_query("SELECT * FROM test_index_label", conn)
  1670. assert frame.columns[0] == "level_0"
  1671. assert frame.columns[1] == "level_1"
  1672. # specifying index_label
  1673. result = sql.to_sql(
  1674. temp_frame,
  1675. "test_index_label",
  1676. conn,
  1677. if_exists="replace",
  1678. index_label=["A", "B"],
  1679. )
  1680. assert result == expected_row_count
  1681. frame = sql.read_sql_query("SELECT * FROM test_index_label", conn)
  1682. assert frame.columns[:2].tolist() == ["A", "B"]
  1683. # using the index name
  1684. temp_frame.index.names = ["A", "B"]
  1685. result = sql.to_sql(temp_frame, "test_index_label", conn, if_exists="replace")
  1686. assert result == expected_row_count
  1687. frame = sql.read_sql_query("SELECT * FROM test_index_label", conn)
  1688. assert frame.columns[:2].tolist() == ["A", "B"]
  1689. # has index name, but specifying index_label
  1690. result = sql.to_sql(
  1691. temp_frame,
  1692. "test_index_label",
  1693. conn,
  1694. if_exists="replace",
  1695. index_label=["C", "D"],
  1696. )
  1697. assert result == expected_row_count
  1698. frame = sql.read_sql_query("SELECT * FROM test_index_label", conn)
  1699. assert frame.columns[:2].tolist() == ["C", "D"]
  1700. msg = "Length of 'index_label' should match number of levels, which is 2"
  1701. with pytest.raises(ValueError, match=msg):
  1702. sql.to_sql(
  1703. temp_frame,
  1704. "test_index_label",
  1705. conn,
  1706. if_exists="replace",
  1707. index_label="C",
  1708. )
  1709. @pytest.mark.parametrize("conn", all_connectable)
  1710. def test_api_multiindex_roundtrip(conn, request):
  1711. conn = request.getfixturevalue(conn)
  1712. if sql.has_table("test_multiindex_roundtrip", conn):
  1713. with sql.SQLDatabase(conn, need_transaction=True) as pandasSQL:
  1714. pandasSQL.drop_table("test_multiindex_roundtrip")
  1715. df = DataFrame.from_records(
  1716. [(1, 2.1, "line1"), (2, 1.5, "line2")],
  1717. columns=["A", "B", "C"],
  1718. index=["A", "B"],
  1719. )
  1720. df.to_sql(name="test_multiindex_roundtrip", con=conn)
  1721. result = sql.read_sql_query(
  1722. "SELECT * FROM test_multiindex_roundtrip", conn, index_col=["A", "B"]
  1723. )
  1724. tm.assert_frame_equal(df, result, check_index_type=True)
  1725. @pytest.mark.parametrize("conn", all_connectable)
  1726. @pytest.mark.parametrize(
  1727. "dtype",
  1728. [
  1729. None,
  1730. int,
  1731. float,
  1732. {"A": int, "B": float},
  1733. ],
  1734. )
  1735. def test_api_dtype_argument(conn, request, dtype):
  1736. # GH10285 Add dtype argument to read_sql_query
  1737. conn_name = conn
  1738. conn = request.getfixturevalue(conn)
  1739. if sql.has_table("test_dtype_argument", conn):
  1740. with sql.SQLDatabase(conn, need_transaction=True) as pandasSQL:
  1741. pandasSQL.drop_table("test_dtype_argument")
  1742. df = DataFrame([[1.2, 3.4], [5.6, 7.8]], columns=["A", "B"])
  1743. assert df.to_sql(name="test_dtype_argument", con=conn) == 2
  1744. expected = df.astype(dtype)
  1745. if "postgres" in conn_name:
  1746. query = 'SELECT "A", "B" FROM test_dtype_argument'
  1747. else:
  1748. query = "SELECT A, B FROM test_dtype_argument"
  1749. result = sql.read_sql_query(query, con=conn, dtype=dtype)
  1750. tm.assert_frame_equal(result, expected)
  1751. @pytest.mark.parametrize("conn", all_connectable)
  1752. def test_api_integer_col_names(conn, request):
  1753. conn = request.getfixturevalue(conn)
  1754. df = DataFrame([[1, 2], [3, 4]], columns=[0, 1])
  1755. sql.to_sql(df, "test_frame_integer_col_names", conn, if_exists="replace")
  1756. @pytest.mark.parametrize("conn", all_connectable)
  1757. def test_api_get_schema(conn, request, test_frame1):
  1758. if "adbc" in conn:
  1759. request.node.add_marker(
  1760. pytest.mark.xfail(
  1761. reason="'get_schema' not implemented for ADBC drivers",
  1762. strict=True,
  1763. )
  1764. )
  1765. conn = request.getfixturevalue(conn)
  1766. create_sql = sql.get_schema(test_frame1, "test", con=conn)
  1767. assert "CREATE" in create_sql
  1768. @pytest.mark.parametrize("conn", all_connectable)
  1769. def test_api_get_schema_with_schema(conn, request, test_frame1):
  1770. # GH28486
  1771. if "adbc" in conn:
  1772. request.node.add_marker(
  1773. pytest.mark.xfail(
  1774. reason="'get_schema' not implemented for ADBC drivers",
  1775. strict=True,
  1776. )
  1777. )
  1778. conn = request.getfixturevalue(conn)
  1779. create_sql = sql.get_schema(test_frame1, "test", con=conn, schema="pypi")
  1780. assert "CREATE TABLE pypi." in create_sql
  1781. @pytest.mark.parametrize("conn", all_connectable)
  1782. def test_api_get_schema_dtypes(conn, request):
  1783. if "adbc" in conn:
  1784. request.node.add_marker(
  1785. pytest.mark.xfail(
  1786. reason="'get_schema' not implemented for ADBC drivers",
  1787. strict=True,
  1788. )
  1789. )
  1790. conn_name = conn
  1791. conn = request.getfixturevalue(conn)
  1792. float_frame = DataFrame({"a": [1.1, 1.2], "b": [2.1, 2.2]})
  1793. if conn_name == "sqlite_buildin":
  1794. dtype = "INTEGER"
  1795. else:
  1796. from sqlalchemy import Integer
  1797. dtype = Integer
  1798. create_sql = sql.get_schema(float_frame, "test", con=conn, dtype={"b": dtype})
  1799. assert "CREATE" in create_sql
  1800. assert "INTEGER" in create_sql
  1801. @pytest.mark.parametrize("conn", all_connectable)
  1802. def test_api_get_schema_keys(conn, request, test_frame1):
  1803. if "adbc" in conn:
  1804. request.node.add_marker(
  1805. pytest.mark.xfail(
  1806. reason="'get_schema' not implemented for ADBC drivers",
  1807. strict=True,
  1808. )
  1809. )
  1810. conn_name = conn
  1811. conn = request.getfixturevalue(conn)
  1812. frame = DataFrame({"Col1": [1.1, 1.2], "Col2": [2.1, 2.2]})
  1813. create_sql = sql.get_schema(frame, "test", con=conn, keys="Col1")
  1814. if "mysql" in conn_name:
  1815. constraint_sentence = "CONSTRAINT test_pk PRIMARY KEY (`Col1`)"
  1816. else:
  1817. constraint_sentence = 'CONSTRAINT test_pk PRIMARY KEY ("Col1")'
  1818. assert constraint_sentence in create_sql
  1819. # multiple columns as key (GH10385)
  1820. create_sql = sql.get_schema(test_frame1, "test", con=conn, keys=["A", "B"])
  1821. if "mysql" in conn_name:
  1822. constraint_sentence = "CONSTRAINT test_pk PRIMARY KEY (`A`, `B`)"
  1823. else:
  1824. constraint_sentence = 'CONSTRAINT test_pk PRIMARY KEY ("A", "B")'
  1825. assert constraint_sentence in create_sql
  1826. @pytest.mark.parametrize("conn", all_connectable)
  1827. def test_api_chunksize_read(conn, request):
  1828. if "adbc" in conn:
  1829. request.node.add_marker(
  1830. pytest.mark.xfail(reason="chunksize argument NotImplemented with ADBC")
  1831. )
  1832. conn_name = conn
  1833. conn = request.getfixturevalue(conn)
  1834. if sql.has_table("test_chunksize", conn):
  1835. with sql.SQLDatabase(conn, need_transaction=True) as pandasSQL:
  1836. pandasSQL.drop_table("test_chunksize")
  1837. df = DataFrame(
  1838. np.random.default_rng(2).standard_normal((22, 5)), columns=list("abcde")
  1839. )
  1840. df.to_sql(name="test_chunksize", con=conn, index=False)
  1841. # reading the query in one time
  1842. res1 = sql.read_sql_query("select * from test_chunksize", conn)
  1843. # reading the query in chunks with read_sql_query
  1844. res2 = DataFrame()
  1845. i = 0
  1846. sizes = [5, 5, 5, 5, 2]
  1847. for chunk in sql.read_sql_query("select * from test_chunksize", conn, chunksize=5):
  1848. res2 = concat([res2, chunk], ignore_index=True)
  1849. assert len(chunk) == sizes[i]
  1850. i += 1
  1851. tm.assert_frame_equal(res1, res2)
  1852. # reading the query in chunks with read_sql_query
  1853. if conn_name == "sqlite_buildin":
  1854. with pytest.raises(NotImplementedError, match=""):
  1855. sql.read_sql_table("test_chunksize", conn, chunksize=5)
  1856. else:
  1857. res3 = DataFrame()
  1858. i = 0
  1859. sizes = [5, 5, 5, 5, 2]
  1860. for chunk in sql.read_sql_table("test_chunksize", conn, chunksize=5):
  1861. res3 = concat([res3, chunk], ignore_index=True)
  1862. assert len(chunk) == sizes[i]
  1863. i += 1
  1864. tm.assert_frame_equal(res1, res3)
  1865. @pytest.mark.parametrize("conn", all_connectable)
  1866. def test_api_categorical(conn, request):
  1867. if conn == "postgresql_adbc_conn":
  1868. adbc = import_optional_dependency("adbc_driver_postgresql", errors="ignore")
  1869. if adbc is not None and Version(adbc.__version__) < Version("0.9.0"):
  1870. request.node.add_marker(
  1871. pytest.mark.xfail(
  1872. reason="categorical dtype not implemented for ADBC postgres driver",
  1873. strict=True,
  1874. )
  1875. )
  1876. # GH8624
  1877. # test that categorical gets written correctly as dense column
  1878. conn = request.getfixturevalue(conn)
  1879. if sql.has_table("test_categorical", conn):
  1880. with sql.SQLDatabase(conn, need_transaction=True) as pandasSQL:
  1881. pandasSQL.drop_table("test_categorical")
  1882. df = DataFrame(
  1883. {
  1884. "person_id": [1, 2, 3],
  1885. "person_name": ["John P. Doe", "Jane Dove", "John P. Doe"],
  1886. }
  1887. )
  1888. df2 = df.copy()
  1889. df2["person_name"] = df2["person_name"].astype("category")
  1890. df2.to_sql(name="test_categorical", con=conn, index=False)
  1891. res = sql.read_sql_query("SELECT * FROM test_categorical", conn)
  1892. tm.assert_frame_equal(res, df)
  1893. @pytest.mark.parametrize("conn", all_connectable)
  1894. def test_api_unicode_column_name(conn, request):
  1895. # GH 11431
  1896. conn = request.getfixturevalue(conn)
  1897. if sql.has_table("test_unicode", conn):
  1898. with sql.SQLDatabase(conn, need_transaction=True) as pandasSQL:
  1899. pandasSQL.drop_table("test_unicode")
  1900. df = DataFrame([[1, 2], [3, 4]], columns=["\xe9", "b"])
  1901. df.to_sql(name="test_unicode", con=conn, index=False)
  1902. @pytest.mark.parametrize("conn", all_connectable)
  1903. def test_api_escaped_table_name(conn, request):
  1904. # GH 13206
  1905. conn_name = conn
  1906. conn = request.getfixturevalue(conn)
  1907. if sql.has_table("d1187b08-4943-4c8d-a7f6", conn):
  1908. with sql.SQLDatabase(conn, need_transaction=True) as pandasSQL:
  1909. pandasSQL.drop_table("d1187b08-4943-4c8d-a7f6")
  1910. df = DataFrame({"A": [0, 1, 2], "B": [0.2, np.nan, 5.6]})
  1911. df.to_sql(name="d1187b08-4943-4c8d-a7f6", con=conn, index=False)
  1912. if "postgres" in conn_name:
  1913. query = 'SELECT * FROM "d1187b08-4943-4c8d-a7f6"'
  1914. else:
  1915. query = "SELECT * FROM `d1187b08-4943-4c8d-a7f6`"
  1916. res = sql.read_sql_query(query, conn)
  1917. tm.assert_frame_equal(res, df)
  1918. @pytest.mark.parametrize("conn", all_connectable)
  1919. def test_api_read_sql_duplicate_columns(conn, request):
  1920. # GH#53117
  1921. if "adbc" in conn:
  1922. pa = pytest.importorskip("pyarrow")
  1923. if not (
  1924. Version(pa.__version__) >= Version("16.0")
  1925. and conn in ["sqlite_adbc_conn", "postgresql_adbc_conn"]
  1926. ):
  1927. request.node.add_marker(
  1928. pytest.mark.xfail(
  1929. reason="pyarrow->pandas throws ValueError", strict=True
  1930. )
  1931. )
  1932. conn = request.getfixturevalue(conn)
  1933. if sql.has_table("test_table", conn):
  1934. with sql.SQLDatabase(conn, need_transaction=True) as pandasSQL:
  1935. pandasSQL.drop_table("test_table")
  1936. df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": 1})
  1937. df.to_sql(name="test_table", con=conn, index=False)
  1938. result = pd.read_sql("SELECT a, b, a +1 as a, c FROM test_table", conn)
  1939. expected = DataFrame(
  1940. [[1, 0.1, 2, 1], [2, 0.2, 3, 1], [3, 0.3, 4, 1]],
  1941. columns=["a", "b", "a", "c"],
  1942. )
  1943. tm.assert_frame_equal(result, expected)
  1944. @pytest.mark.parametrize("conn", all_connectable)
  1945. def test_read_table_columns(conn, request, test_frame1):
  1946. # test columns argument in read_table
  1947. conn_name = conn
  1948. if conn_name == "sqlite_buildin":
  1949. request.applymarker(pytest.mark.xfail(reason="Not Implemented"))
  1950. conn = request.getfixturevalue(conn)
  1951. sql.to_sql(test_frame1, "test_frame", conn)
  1952. cols = ["A", "B"]
  1953. result = sql.read_sql_table("test_frame", conn, columns=cols)
  1954. assert result.columns.tolist() == cols
  1955. @pytest.mark.parametrize("conn", all_connectable)
  1956. def test_read_table_index_col(conn, request, test_frame1):
  1957. # test columns argument in read_table
  1958. conn_name = conn
  1959. if conn_name == "sqlite_buildin":
  1960. request.applymarker(pytest.mark.xfail(reason="Not Implemented"))
  1961. conn = request.getfixturevalue(conn)
  1962. sql.to_sql(test_frame1, "test_frame", conn)
  1963. result = sql.read_sql_table("test_frame", conn, index_col="index")
  1964. assert result.index.names == ["index"]
  1965. result = sql.read_sql_table("test_frame", conn, index_col=["A", "B"])
  1966. assert result.index.names == ["A", "B"]
  1967. result = sql.read_sql_table(
  1968. "test_frame", conn, index_col=["A", "B"], columns=["C", "D"]
  1969. )
  1970. assert result.index.names == ["A", "B"]
  1971. assert result.columns.tolist() == ["C", "D"]
  1972. @pytest.mark.parametrize("conn", all_connectable_iris)
  1973. def test_read_sql_delegate(conn, request):
  1974. if conn == "sqlite_buildin_iris":
  1975. request.applymarker(
  1976. pytest.mark.xfail(
  1977. reason="sqlite_buildin connection does not implement read_sql_table"
  1978. )
  1979. )
  1980. conn = request.getfixturevalue(conn)
  1981. iris_frame1 = sql.read_sql_query("SELECT * FROM iris", conn)
  1982. iris_frame2 = sql.read_sql("SELECT * FROM iris", conn)
  1983. tm.assert_frame_equal(iris_frame1, iris_frame2)
  1984. iris_frame1 = sql.read_sql_table("iris", conn)
  1985. iris_frame2 = sql.read_sql("iris", conn)
  1986. tm.assert_frame_equal(iris_frame1, iris_frame2)
  1987. def test_not_reflect_all_tables(sqlite_conn):
  1988. conn = sqlite_conn
  1989. from sqlalchemy import text
  1990. from sqlalchemy.engine import Engine
  1991. # create invalid table
  1992. query_list = [
  1993. text("CREATE TABLE invalid (x INTEGER, y UNKNOWN);"),
  1994. text("CREATE TABLE other_table (x INTEGER, y INTEGER);"),
  1995. ]
  1996. for query in query_list:
  1997. if isinstance(conn, Engine):
  1998. with conn.connect() as conn:
  1999. with conn.begin():
  2000. conn.execute(query)
  2001. else:
  2002. with conn.begin():
  2003. conn.execute(query)
  2004. with tm.assert_produces_warning(None):
  2005. sql.read_sql_table("other_table", conn)
  2006. sql.read_sql_query("SELECT * FROM other_table", conn)
  2007. @pytest.mark.parametrize("conn", all_connectable)
  2008. def test_warning_case_insensitive_table_name(conn, request, test_frame1):
  2009. conn_name = conn
  2010. if conn_name == "sqlite_buildin" or "adbc" in conn_name:
  2011. request.applymarker(pytest.mark.xfail(reason="Does not raise warning"))
  2012. conn = request.getfixturevalue(conn)
  2013. # see gh-7815
  2014. with tm.assert_produces_warning(
  2015. UserWarning,
  2016. match=(
  2017. r"The provided table name 'TABLE1' is not found exactly as such in "
  2018. r"the database after writing the table, possibly due to case "
  2019. r"sensitivity issues. Consider using lower case table names."
  2020. ),
  2021. ):
  2022. with sql.SQLDatabase(conn) as db:
  2023. db.check_case_sensitive("TABLE1", "")
  2024. # Test that the warning is certainly NOT triggered in a normal case.
  2025. with tm.assert_produces_warning(None):
  2026. test_frame1.to_sql(name="CaseSensitive", con=conn)
  2027. @pytest.mark.parametrize("conn", sqlalchemy_connectable)
  2028. def test_sqlalchemy_type_mapping(conn, request):
  2029. conn = request.getfixturevalue(conn)
  2030. from sqlalchemy import TIMESTAMP
  2031. # Test Timestamp objects (no datetime64 because of timezone) (GH9085)
  2032. df = DataFrame(
  2033. {"time": to_datetime(["2014-12-12 01:54", "2014-12-11 02:54"], utc=True)}
  2034. )
  2035. with sql.SQLDatabase(conn) as db:
  2036. table = sql.SQLTable("test_type", db, frame=df)
  2037. # GH 9086: TIMESTAMP is the suggested type for datetimes with timezones
  2038. assert isinstance(table.table.c["time"].type, TIMESTAMP)
  2039. @pytest.mark.parametrize("conn", sqlalchemy_connectable)
  2040. @pytest.mark.parametrize(
  2041. "integer, expected",
  2042. [
  2043. ("int8", "SMALLINT"),
  2044. ("Int8", "SMALLINT"),
  2045. ("uint8", "SMALLINT"),
  2046. ("UInt8", "SMALLINT"),
  2047. ("int16", "SMALLINT"),
  2048. ("Int16", "SMALLINT"),
  2049. ("uint16", "INTEGER"),
  2050. ("UInt16", "INTEGER"),
  2051. ("int32", "INTEGER"),
  2052. ("Int32", "INTEGER"),
  2053. ("uint32", "BIGINT"),
  2054. ("UInt32", "BIGINT"),
  2055. ("int64", "BIGINT"),
  2056. ("Int64", "BIGINT"),
  2057. (int, "BIGINT" if np.dtype(int).name == "int64" else "INTEGER"),
  2058. ],
  2059. )
  2060. def test_sqlalchemy_integer_mapping(conn, request, integer, expected):
  2061. # GH35076 Map pandas integer to optimal SQLAlchemy integer type
  2062. conn = request.getfixturevalue(conn)
  2063. df = DataFrame([0, 1], columns=["a"], dtype=integer)
  2064. with sql.SQLDatabase(conn) as db:
  2065. table = sql.SQLTable("test_type", db, frame=df)
  2066. result = str(table.table.c.a.type)
  2067. assert result == expected
  2068. @pytest.mark.parametrize("conn", sqlalchemy_connectable)
  2069. @pytest.mark.parametrize("integer", ["uint64", "UInt64"])
  2070. def test_sqlalchemy_integer_overload_mapping(conn, request, integer):
  2071. conn = request.getfixturevalue(conn)
  2072. # GH35076 Map pandas integer to optimal SQLAlchemy integer type
  2073. df = DataFrame([0, 1], columns=["a"], dtype=integer)
  2074. with sql.SQLDatabase(conn) as db:
  2075. with pytest.raises(
  2076. ValueError, match="Unsigned 64 bit integer datatype is not supported"
  2077. ):
  2078. sql.SQLTable("test_type", db, frame=df)
  2079. @pytest.mark.parametrize("conn", all_connectable)
  2080. def test_database_uri_string(conn, request, test_frame1):
  2081. pytest.importorskip("sqlalchemy")
  2082. conn = request.getfixturevalue(conn)
  2083. # Test read_sql and .to_sql method with a database URI (GH10654)
  2084. # db_uri = 'sqlite:///:memory:' # raises
  2085. # sqlalchemy.exc.OperationalError: (sqlite3.OperationalError) near
  2086. # "iris": syntax error [SQL: 'iris']
  2087. with tm.ensure_clean() as name:
  2088. db_uri = "sqlite:///" + name
  2089. table = "iris"
  2090. test_frame1.to_sql(name=table, con=db_uri, if_exists="replace", index=False)
  2091. test_frame2 = sql.read_sql(table, db_uri)
  2092. test_frame3 = sql.read_sql_table(table, db_uri)
  2093. query = "SELECT * FROM iris"
  2094. test_frame4 = sql.read_sql_query(query, db_uri)
  2095. tm.assert_frame_equal(test_frame1, test_frame2)
  2096. tm.assert_frame_equal(test_frame1, test_frame3)
  2097. tm.assert_frame_equal(test_frame1, test_frame4)
  2098. @td.skip_if_installed("pg8000")
  2099. @pytest.mark.parametrize("conn", all_connectable)
  2100. def test_pg8000_sqlalchemy_passthrough_error(conn, request):
  2101. pytest.importorskip("sqlalchemy")
  2102. conn = request.getfixturevalue(conn)
  2103. # using driver that will not be installed on CI to trigger error
  2104. # in sqlalchemy.create_engine -> test passing of this error to user
  2105. db_uri = "postgresql+pg8000://user:pass@host/dbname"
  2106. with pytest.raises(ImportError, match="pg8000"):
  2107. sql.read_sql("select * from table", db_uri)
  2108. @pytest.mark.parametrize("conn", sqlalchemy_connectable_iris)
  2109. def test_query_by_text_obj(conn, request):
  2110. # WIP : GH10846
  2111. conn_name = conn
  2112. conn = request.getfixturevalue(conn)
  2113. from sqlalchemy import text
  2114. if "postgres" in conn_name:
  2115. name_text = text('select * from iris where "Name"=:name')
  2116. else:
  2117. name_text = text("select * from iris where name=:name")
  2118. iris_df = sql.read_sql(name_text, conn, params={"name": "Iris-versicolor"})
  2119. all_names = set(iris_df["Name"])
  2120. assert all_names == {"Iris-versicolor"}
  2121. @pytest.mark.parametrize("conn", sqlalchemy_connectable_iris)
  2122. def test_query_by_select_obj(conn, request):
  2123. conn = request.getfixturevalue(conn)
  2124. # WIP : GH10846
  2125. from sqlalchemy import (
  2126. bindparam,
  2127. select,
  2128. )
  2129. iris = iris_table_metadata()
  2130. name_select = select(iris).where(iris.c.Name == bindparam("name"))
  2131. iris_df = sql.read_sql(name_select, conn, params={"name": "Iris-setosa"})
  2132. all_names = set(iris_df["Name"])
  2133. assert all_names == {"Iris-setosa"}
  2134. @pytest.mark.parametrize("conn", all_connectable)
  2135. def test_column_with_percentage(conn, request):
  2136. # GH 37157
  2137. conn_name = conn
  2138. if conn_name == "sqlite_buildin":
  2139. request.applymarker(pytest.mark.xfail(reason="Not Implemented"))
  2140. conn = request.getfixturevalue(conn)
  2141. df = DataFrame({"A": [0, 1, 2], "%_variation": [3, 4, 5]})
  2142. df.to_sql(name="test_column_percentage", con=conn, index=False)
  2143. res = sql.read_sql_table("test_column_percentage", conn)
  2144. tm.assert_frame_equal(res, df)
  2145. def test_sql_open_close(test_frame3):
  2146. # Test if the IO in the database still work if the connection closed
  2147. # between the writing and reading (as in many real situations).
  2148. with tm.ensure_clean() as name:
  2149. with closing(sqlite3.connect(name)) as conn:
  2150. assert sql.to_sql(test_frame3, "test_frame3_legacy", conn, index=False) == 4
  2151. with closing(sqlite3.connect(name)) as conn:
  2152. result = sql.read_sql_query("SELECT * FROM test_frame3_legacy;", conn)
  2153. tm.assert_frame_equal(test_frame3, result)
  2154. @td.skip_if_installed("sqlalchemy")
  2155. def test_con_string_import_error():
  2156. conn = "mysql://root@localhost/pandas"
  2157. msg = "Using URI string without sqlalchemy installed"
  2158. with pytest.raises(ImportError, match=msg):
  2159. sql.read_sql("SELECT * FROM iris", conn)
  2160. @td.skip_if_installed("sqlalchemy")
  2161. def test_con_unknown_dbapi2_class_does_not_error_without_sql_alchemy_installed():
  2162. class MockSqliteConnection:
  2163. def __init__(self, *args, **kwargs) -> None:
  2164. self.conn = sqlite3.Connection(*args, **kwargs)
  2165. def __getattr__(self, name):
  2166. return getattr(self.conn, name)
  2167. def close(self):
  2168. self.conn.close()
  2169. with contextlib.closing(MockSqliteConnection(":memory:")) as conn:
  2170. with tm.assert_produces_warning(UserWarning):
  2171. sql.read_sql("SELECT 1", conn)
  2172. def test_sqlite_read_sql_delegate(sqlite_buildin_iris):
  2173. conn = sqlite_buildin_iris
  2174. iris_frame1 = sql.read_sql_query("SELECT * FROM iris", conn)
  2175. iris_frame2 = sql.read_sql("SELECT * FROM iris", conn)
  2176. tm.assert_frame_equal(iris_frame1, iris_frame2)
  2177. msg = "Execution failed on sql 'iris': near \"iris\": syntax error"
  2178. with pytest.raises(sql.DatabaseError, match=msg):
  2179. sql.read_sql("iris", conn)
  2180. def test_get_schema2(test_frame1):
  2181. # without providing a connection object (available for backwards comp)
  2182. create_sql = sql.get_schema(test_frame1, "test")
  2183. assert "CREATE" in create_sql
  2184. def test_sqlite_type_mapping(sqlite_buildin):
  2185. # Test Timestamp objects (no datetime64 because of timezone) (GH9085)
  2186. conn = sqlite_buildin
  2187. df = DataFrame(
  2188. {"time": to_datetime(["2014-12-12 01:54", "2014-12-11 02:54"], utc=True)}
  2189. )
  2190. db = sql.SQLiteDatabase(conn)
  2191. table = sql.SQLiteTable("test_type", db, frame=df)
  2192. schema = table.sql_schema()
  2193. for col in schema.split("\n"):
  2194. if col.split()[0].strip('"') == "time":
  2195. assert col.split()[1] == "TIMESTAMP"
  2196. # -----------------------------------------------------------------------------
  2197. # -- Database flavor specific tests
  2198. @pytest.mark.parametrize("conn", sqlalchemy_connectable)
  2199. def test_create_table(conn, request):
  2200. if conn == "sqlite_str":
  2201. pytest.skip("sqlite_str has no inspection system")
  2202. conn = request.getfixturevalue(conn)
  2203. from sqlalchemy import inspect
  2204. temp_frame = DataFrame({"one": [1.0, 2.0, 3.0, 4.0], "two": [4.0, 3.0, 2.0, 1.0]})
  2205. with sql.SQLDatabase(conn, need_transaction=True) as pandasSQL:
  2206. assert pandasSQL.to_sql(temp_frame, "temp_frame") == 4
  2207. insp = inspect(conn)
  2208. assert insp.has_table("temp_frame")
  2209. # Cleanup
  2210. with sql.SQLDatabase(conn, need_transaction=True) as pandasSQL:
  2211. pandasSQL.drop_table("temp_frame")
  2212. @pytest.mark.parametrize("conn", sqlalchemy_connectable)
  2213. def test_drop_table(conn, request):
  2214. if conn == "sqlite_str":
  2215. pytest.skip("sqlite_str has no inspection system")
  2216. conn = request.getfixturevalue(conn)
  2217. from sqlalchemy import inspect
  2218. temp_frame = DataFrame({"one": [1.0, 2.0, 3.0, 4.0], "two": [4.0, 3.0, 2.0, 1.0]})
  2219. with sql.SQLDatabase(conn) as pandasSQL:
  2220. with pandasSQL.run_transaction():
  2221. assert pandasSQL.to_sql(temp_frame, "temp_frame") == 4
  2222. insp = inspect(conn)
  2223. assert insp.has_table("temp_frame")
  2224. with pandasSQL.run_transaction():
  2225. pandasSQL.drop_table("temp_frame")
  2226. try:
  2227. insp.clear_cache() # needed with SQLAlchemy 2.0, unavailable prior
  2228. except AttributeError:
  2229. pass
  2230. assert not insp.has_table("temp_frame")
  2231. @pytest.mark.parametrize("conn", all_connectable)
  2232. def test_roundtrip(conn, request, test_frame1):
  2233. if conn == "sqlite_str":
  2234. pytest.skip("sqlite_str has no inspection system")
  2235. conn_name = conn
  2236. conn = request.getfixturevalue(conn)
  2237. pandasSQL = pandasSQL_builder(conn)
  2238. with pandasSQL.run_transaction():
  2239. assert pandasSQL.to_sql(test_frame1, "test_frame_roundtrip") == 4
  2240. result = pandasSQL.read_query("SELECT * FROM test_frame_roundtrip")
  2241. if "adbc" in conn_name:
  2242. result = result.rename(columns={"__index_level_0__": "level_0"})
  2243. result.set_index("level_0", inplace=True)
  2244. # result.index.astype(int)
  2245. result.index.name = None
  2246. tm.assert_frame_equal(result, test_frame1)
  2247. @pytest.mark.parametrize("conn", all_connectable_iris)
  2248. def test_execute_sql(conn, request):
  2249. conn = request.getfixturevalue(conn)
  2250. with pandasSQL_builder(conn) as pandasSQL:
  2251. with pandasSQL.run_transaction():
  2252. iris_results = pandasSQL.execute("SELECT * FROM iris")
  2253. row = iris_results.fetchone()
  2254. iris_results.close()
  2255. assert list(row) == [5.1, 3.5, 1.4, 0.2, "Iris-setosa"]
  2256. @pytest.mark.parametrize("conn", sqlalchemy_connectable_iris)
  2257. def test_sqlalchemy_read_table(conn, request):
  2258. conn = request.getfixturevalue(conn)
  2259. iris_frame = sql.read_sql_table("iris", con=conn)
  2260. check_iris_frame(iris_frame)
  2261. @pytest.mark.parametrize("conn", sqlalchemy_connectable_iris)
  2262. def test_sqlalchemy_read_table_columns(conn, request):
  2263. conn = request.getfixturevalue(conn)
  2264. iris_frame = sql.read_sql_table(
  2265. "iris", con=conn, columns=["SepalLength", "SepalLength"]
  2266. )
  2267. tm.assert_index_equal(iris_frame.columns, Index(["SepalLength", "SepalLength__1"]))
  2268. @pytest.mark.parametrize("conn", sqlalchemy_connectable_iris)
  2269. def test_read_table_absent_raises(conn, request):
  2270. conn = request.getfixturevalue(conn)
  2271. msg = "Table this_doesnt_exist not found"
  2272. with pytest.raises(ValueError, match=msg):
  2273. sql.read_sql_table("this_doesnt_exist", con=conn)
  2274. @pytest.mark.parametrize("conn", sqlalchemy_connectable_types)
  2275. def test_sqlalchemy_default_type_conversion(conn, request):
  2276. conn_name = conn
  2277. if conn_name == "sqlite_str":
  2278. pytest.skip("types tables not created in sqlite_str fixture")
  2279. elif "mysql" in conn_name or "sqlite" in conn_name:
  2280. request.applymarker(
  2281. pytest.mark.xfail(reason="boolean dtype not inferred properly")
  2282. )
  2283. conn = request.getfixturevalue(conn)
  2284. df = sql.read_sql_table("types", conn)
  2285. assert issubclass(df.FloatCol.dtype.type, np.floating)
  2286. assert issubclass(df.IntCol.dtype.type, np.integer)
  2287. assert issubclass(df.BoolCol.dtype.type, np.bool_)
  2288. # Int column with NA values stays as float
  2289. assert issubclass(df.IntColWithNull.dtype.type, np.floating)
  2290. # Bool column with NA values becomes object
  2291. assert issubclass(df.BoolColWithNull.dtype.type, object)
  2292. @pytest.mark.parametrize("conn", sqlalchemy_connectable)
  2293. def test_bigint(conn, request):
  2294. # int64 should be converted to BigInteger, GH7433
  2295. conn = request.getfixturevalue(conn)
  2296. df = DataFrame(data={"i64": [2**62]})
  2297. assert df.to_sql(name="test_bigint", con=conn, index=False) == 1
  2298. result = sql.read_sql_table("test_bigint", conn)
  2299. tm.assert_frame_equal(df, result)
  2300. @pytest.mark.parametrize("conn", sqlalchemy_connectable_types)
  2301. def test_default_date_load(conn, request):
  2302. conn_name = conn
  2303. if conn_name == "sqlite_str":
  2304. pytest.skip("types tables not created in sqlite_str fixture")
  2305. elif "sqlite" in conn_name:
  2306. request.applymarker(
  2307. pytest.mark.xfail(reason="sqlite does not read date properly")
  2308. )
  2309. conn = request.getfixturevalue(conn)
  2310. df = sql.read_sql_table("types", conn)
  2311. assert issubclass(df.DateCol.dtype.type, np.datetime64)
  2312. @pytest.mark.parametrize("conn", postgresql_connectable)
  2313. @pytest.mark.parametrize("parse_dates", [None, ["DateColWithTz"]])
  2314. def test_datetime_with_timezone_query(conn, request, parse_dates):
  2315. # edge case that converts postgresql datetime with time zone types
  2316. # to datetime64[ns,psycopg2.tz.FixedOffsetTimezone..], which is ok
  2317. # but should be more natural, so coerce to datetime64[ns] for now
  2318. conn = request.getfixturevalue(conn)
  2319. expected = create_and_load_postgres_datetz(conn)
  2320. # GH11216
  2321. df = read_sql_query("select * from datetz", conn, parse_dates=parse_dates)
  2322. col = df.DateColWithTz
  2323. tm.assert_series_equal(col, expected)
  2324. @pytest.mark.parametrize("conn", postgresql_connectable)
  2325. def test_datetime_with_timezone_query_chunksize(conn, request):
  2326. conn = request.getfixturevalue(conn)
  2327. expected = create_and_load_postgres_datetz(conn)
  2328. df = concat(
  2329. list(read_sql_query("select * from datetz", conn, chunksize=1)),
  2330. ignore_index=True,
  2331. )
  2332. col = df.DateColWithTz
  2333. tm.assert_series_equal(col, expected)
  2334. @pytest.mark.parametrize("conn", postgresql_connectable)
  2335. def test_datetime_with_timezone_table(conn, request):
  2336. conn = request.getfixturevalue(conn)
  2337. expected = create_and_load_postgres_datetz(conn)
  2338. result = sql.read_sql_table("datetz", conn)
  2339. tm.assert_frame_equal(result, expected.to_frame())
  2340. @pytest.mark.parametrize("conn", sqlalchemy_connectable)
  2341. def test_datetime_with_timezone_roundtrip(conn, request):
  2342. conn_name = conn
  2343. conn = request.getfixturevalue(conn)
  2344. # GH 9086
  2345. # Write datetimetz data to a db and read it back
  2346. # For dbs that support timestamps with timezones, should get back UTC
  2347. # otherwise naive data should be returned
  2348. expected = DataFrame(
  2349. {"A": date_range("2013-01-01 09:00:00", periods=3, tz="US/Pacific")}
  2350. )
  2351. assert expected.to_sql(name="test_datetime_tz", con=conn, index=False) == 3
  2352. if "postgresql" in conn_name:
  2353. # SQLAlchemy "timezones" (i.e. offsets) are coerced to UTC
  2354. expected["A"] = expected["A"].dt.tz_convert("UTC")
  2355. else:
  2356. # Otherwise, timestamps are returned as local, naive
  2357. expected["A"] = expected["A"].dt.tz_localize(None)
  2358. result = sql.read_sql_table("test_datetime_tz", conn)
  2359. tm.assert_frame_equal(result, expected)
  2360. result = sql.read_sql_query("SELECT * FROM test_datetime_tz", conn)
  2361. if "sqlite" in conn_name:
  2362. # read_sql_query does not return datetime type like read_sql_table
  2363. assert isinstance(result.loc[0, "A"], str)
  2364. result["A"] = to_datetime(result["A"])
  2365. tm.assert_frame_equal(result, expected)
  2366. @pytest.mark.parametrize("conn", sqlalchemy_connectable)
  2367. def test_out_of_bounds_datetime(conn, request):
  2368. # GH 26761
  2369. conn = request.getfixturevalue(conn)
  2370. data = DataFrame({"date": datetime(9999, 1, 1)}, index=[0])
  2371. assert data.to_sql(name="test_datetime_obb", con=conn, index=False) == 1
  2372. result = sql.read_sql_table("test_datetime_obb", conn)
  2373. expected = DataFrame([pd.NaT], columns=["date"])
  2374. tm.assert_frame_equal(result, expected)
  2375. @pytest.mark.parametrize("conn", sqlalchemy_connectable)
  2376. def test_naive_datetimeindex_roundtrip(conn, request):
  2377. # GH 23510
  2378. # Ensure that a naive DatetimeIndex isn't converted to UTC
  2379. conn = request.getfixturevalue(conn)
  2380. dates = date_range("2018-01-01", periods=5, freq="6h")._with_freq(None)
  2381. expected = DataFrame({"nums": range(5)}, index=dates)
  2382. assert expected.to_sql(name="foo_table", con=conn, index_label="info_date") == 5
  2383. result = sql.read_sql_table("foo_table", conn, index_col="info_date")
  2384. # result index with gain a name from a set_index operation; expected
  2385. tm.assert_frame_equal(result, expected, check_names=False)
  2386. @pytest.mark.parametrize("conn", sqlalchemy_connectable_types)
  2387. def test_date_parsing(conn, request):
  2388. # No Parsing
  2389. conn_name = conn
  2390. conn = request.getfixturevalue(conn)
  2391. df = sql.read_sql_table("types", conn)
  2392. expected_type = object if "sqlite" in conn_name else np.datetime64
  2393. assert issubclass(df.DateCol.dtype.type, expected_type)
  2394. df = sql.read_sql_table("types", conn, parse_dates=["DateCol"])
  2395. assert issubclass(df.DateCol.dtype.type, np.datetime64)
  2396. df = sql.read_sql_table("types", conn, parse_dates={"DateCol": "%Y-%m-%d %H:%M:%S"})
  2397. assert issubclass(df.DateCol.dtype.type, np.datetime64)
  2398. df = sql.read_sql_table(
  2399. "types",
  2400. conn,
  2401. parse_dates={"DateCol": {"format": "%Y-%m-%d %H:%M:%S"}},
  2402. )
  2403. assert issubclass(df.DateCol.dtype.type, np.datetime64)
  2404. df = sql.read_sql_table("types", conn, parse_dates=["IntDateCol"])
  2405. assert issubclass(df.IntDateCol.dtype.type, np.datetime64)
  2406. df = sql.read_sql_table("types", conn, parse_dates={"IntDateCol": "s"})
  2407. assert issubclass(df.IntDateCol.dtype.type, np.datetime64)
  2408. df = sql.read_sql_table("types", conn, parse_dates={"IntDateCol": {"unit": "s"}})
  2409. assert issubclass(df.IntDateCol.dtype.type, np.datetime64)
  2410. @pytest.mark.parametrize("conn", sqlalchemy_connectable)
  2411. def test_datetime(conn, request):
  2412. conn_name = conn
  2413. conn = request.getfixturevalue(conn)
  2414. df = DataFrame(
  2415. {"A": date_range("2013-01-01 09:00:00", periods=3), "B": np.arange(3.0)}
  2416. )
  2417. assert df.to_sql(name="test_datetime", con=conn) == 3
  2418. # with read_table -> type information from schema used
  2419. result = sql.read_sql_table("test_datetime", conn)
  2420. result = result.drop("index", axis=1)
  2421. tm.assert_frame_equal(result, df)
  2422. # with read_sql -> no type information -> sqlite has no native
  2423. result = sql.read_sql_query("SELECT * FROM test_datetime", conn)
  2424. result = result.drop("index", axis=1)
  2425. if "sqlite" in conn_name:
  2426. assert isinstance(result.loc[0, "A"], str)
  2427. result["A"] = to_datetime(result["A"])
  2428. tm.assert_frame_equal(result, df)
  2429. else:
  2430. tm.assert_frame_equal(result, df)
  2431. @pytest.mark.parametrize("conn", sqlalchemy_connectable)
  2432. def test_datetime_NaT(conn, request):
  2433. conn_name = conn
  2434. conn = request.getfixturevalue(conn)
  2435. df = DataFrame(
  2436. {"A": date_range("2013-01-01 09:00:00", periods=3), "B": np.arange(3.0)}
  2437. )
  2438. df.loc[1, "A"] = np.nan
  2439. assert df.to_sql(name="test_datetime", con=conn, index=False) == 3
  2440. # with read_table -> type information from schema used
  2441. result = sql.read_sql_table("test_datetime", conn)
  2442. tm.assert_frame_equal(result, df)
  2443. # with read_sql -> no type information -> sqlite has no native
  2444. result = sql.read_sql_query("SELECT * FROM test_datetime", conn)
  2445. if "sqlite" in conn_name:
  2446. assert isinstance(result.loc[0, "A"], str)
  2447. result["A"] = to_datetime(result["A"], errors="coerce")
  2448. tm.assert_frame_equal(result, df)
  2449. else:
  2450. tm.assert_frame_equal(result, df)
  2451. @pytest.mark.parametrize("conn", sqlalchemy_connectable)
  2452. def test_datetime_date(conn, request):
  2453. # test support for datetime.date
  2454. conn = request.getfixturevalue(conn)
  2455. df = DataFrame([date(2014, 1, 1), date(2014, 1, 2)], columns=["a"])
  2456. assert df.to_sql(name="test_date", con=conn, index=False) == 2
  2457. res = read_sql_table("test_date", conn)
  2458. result = res["a"]
  2459. expected = to_datetime(df["a"])
  2460. # comes back as datetime64
  2461. tm.assert_series_equal(result, expected)
  2462. @pytest.mark.parametrize("conn", sqlalchemy_connectable)
  2463. def test_datetime_time(conn, request, sqlite_buildin):
  2464. # test support for datetime.time
  2465. conn_name = conn
  2466. conn = request.getfixturevalue(conn)
  2467. df = DataFrame([time(9, 0, 0), time(9, 1, 30)], columns=["a"])
  2468. assert df.to_sql(name="test_time", con=conn, index=False) == 2
  2469. res = read_sql_table("test_time", conn)
  2470. tm.assert_frame_equal(res, df)
  2471. # GH8341
  2472. # first, use the fallback to have the sqlite adapter put in place
  2473. sqlite_conn = sqlite_buildin
  2474. assert sql.to_sql(df, "test_time2", sqlite_conn, index=False) == 2
  2475. res = sql.read_sql_query("SELECT * FROM test_time2", sqlite_conn)
  2476. ref = df.map(lambda _: _.strftime("%H:%M:%S.%f"))
  2477. tm.assert_frame_equal(ref, res) # check if adapter is in place
  2478. # then test if sqlalchemy is unaffected by the sqlite adapter
  2479. assert sql.to_sql(df, "test_time3", conn, index=False) == 2
  2480. if "sqlite" in conn_name:
  2481. res = sql.read_sql_query("SELECT * FROM test_time3", conn)
  2482. ref = df.map(lambda _: _.strftime("%H:%M:%S.%f"))
  2483. tm.assert_frame_equal(ref, res)
  2484. res = sql.read_sql_table("test_time3", conn)
  2485. tm.assert_frame_equal(df, res)
  2486. @pytest.mark.parametrize("conn", sqlalchemy_connectable)
  2487. def test_mixed_dtype_insert(conn, request):
  2488. # see GH6509
  2489. conn = request.getfixturevalue(conn)
  2490. s1 = Series(2**25 + 1, dtype=np.int32)
  2491. s2 = Series(0.0, dtype=np.float32)
  2492. df = DataFrame({"s1": s1, "s2": s2})
  2493. # write and read again
  2494. assert df.to_sql(name="test_read_write", con=conn, index=False) == 1
  2495. df2 = sql.read_sql_table("test_read_write", conn)
  2496. tm.assert_frame_equal(df, df2, check_dtype=False, check_exact=True)
  2497. @pytest.mark.parametrize("conn", sqlalchemy_connectable)
  2498. def test_nan_numeric(conn, request):
  2499. # NaNs in numeric float column
  2500. conn = request.getfixturevalue(conn)
  2501. df = DataFrame({"A": [0, 1, 2], "B": [0.2, np.nan, 5.6]})
  2502. assert df.to_sql(name="test_nan", con=conn, index=False) == 3
  2503. # with read_table
  2504. result = sql.read_sql_table("test_nan", conn)
  2505. tm.assert_frame_equal(result, df)
  2506. # with read_sql
  2507. result = sql.read_sql_query("SELECT * FROM test_nan", conn)
  2508. tm.assert_frame_equal(result, df)
  2509. @pytest.mark.parametrize("conn", sqlalchemy_connectable)
  2510. def test_nan_fullcolumn(conn, request):
  2511. # full NaN column (numeric float column)
  2512. conn = request.getfixturevalue(conn)
  2513. df = DataFrame({"A": [0, 1, 2], "B": [np.nan, np.nan, np.nan]})
  2514. assert df.to_sql(name="test_nan", con=conn, index=False) == 3
  2515. # with read_table
  2516. result = sql.read_sql_table("test_nan", conn)
  2517. tm.assert_frame_equal(result, df)
  2518. # with read_sql -> not type info from table -> stays None
  2519. df["B"] = df["B"].astype("object")
  2520. df["B"] = None
  2521. result = sql.read_sql_query("SELECT * FROM test_nan", conn)
  2522. tm.assert_frame_equal(result, df)
  2523. @pytest.mark.parametrize("conn", sqlalchemy_connectable)
  2524. def test_nan_string(conn, request):
  2525. # NaNs in string column
  2526. conn = request.getfixturevalue(conn)
  2527. df = DataFrame({"A": [0, 1, 2], "B": ["a", "b", np.nan]})
  2528. assert df.to_sql(name="test_nan", con=conn, index=False) == 3
  2529. # NaNs are coming back as None
  2530. df.loc[2, "B"] = None
  2531. # with read_table
  2532. result = sql.read_sql_table("test_nan", conn)
  2533. tm.assert_frame_equal(result, df)
  2534. # with read_sql
  2535. result = sql.read_sql_query("SELECT * FROM test_nan", conn)
  2536. tm.assert_frame_equal(result, df)
  2537. @pytest.mark.parametrize("conn", all_connectable)
  2538. def test_to_sql_save_index(conn, request):
  2539. if "adbc" in conn:
  2540. request.node.add_marker(
  2541. pytest.mark.xfail(
  2542. reason="ADBC implementation does not create index", strict=True
  2543. )
  2544. )
  2545. conn_name = conn
  2546. conn = request.getfixturevalue(conn)
  2547. df = DataFrame.from_records(
  2548. [(1, 2.1, "line1"), (2, 1.5, "line2")], columns=["A", "B", "C"], index=["A"]
  2549. )
  2550. tbl_name = "test_to_sql_saves_index"
  2551. with pandasSQL_builder(conn) as pandasSQL:
  2552. with pandasSQL.run_transaction():
  2553. assert pandasSQL.to_sql(df, tbl_name) == 2
  2554. if conn_name in {"sqlite_buildin", "sqlite_str"}:
  2555. ixs = sql.read_sql_query(
  2556. "SELECT * FROM sqlite_master WHERE type = 'index' "
  2557. f"AND tbl_name = '{tbl_name}'",
  2558. conn,
  2559. )
  2560. ix_cols = []
  2561. for ix_name in ixs.name:
  2562. ix_info = sql.read_sql_query(f"PRAGMA index_info({ix_name})", conn)
  2563. ix_cols.append(ix_info.name.tolist())
  2564. else:
  2565. from sqlalchemy import inspect
  2566. insp = inspect(conn)
  2567. ixs = insp.get_indexes(tbl_name)
  2568. ix_cols = [i["column_names"] for i in ixs]
  2569. assert ix_cols == [["A"]]
  2570. @pytest.mark.parametrize("conn", all_connectable)
  2571. def test_transactions(conn, request):
  2572. conn_name = conn
  2573. conn = request.getfixturevalue(conn)
  2574. stmt = "CREATE TABLE test_trans (A INT, B TEXT)"
  2575. if conn_name != "sqlite_buildin" and "adbc" not in conn_name:
  2576. from sqlalchemy import text
  2577. stmt = text(stmt)
  2578. with pandasSQL_builder(conn) as pandasSQL:
  2579. with pandasSQL.run_transaction() as trans:
  2580. trans.execute(stmt)
  2581. @pytest.mark.parametrize("conn", all_connectable)
  2582. def test_transaction_rollback(conn, request):
  2583. conn_name = conn
  2584. conn = request.getfixturevalue(conn)
  2585. with pandasSQL_builder(conn) as pandasSQL:
  2586. with pandasSQL.run_transaction() as trans:
  2587. stmt = "CREATE TABLE test_trans (A INT, B TEXT)"
  2588. if "adbc" in conn_name or isinstance(pandasSQL, SQLiteDatabase):
  2589. trans.execute(stmt)
  2590. else:
  2591. from sqlalchemy import text
  2592. stmt = text(stmt)
  2593. trans.execute(stmt)
  2594. class DummyException(Exception):
  2595. pass
  2596. # Make sure when transaction is rolled back, no rows get inserted
  2597. ins_sql = "INSERT INTO test_trans (A,B) VALUES (1, 'blah')"
  2598. if isinstance(pandasSQL, SQLDatabase):
  2599. from sqlalchemy import text
  2600. ins_sql = text(ins_sql)
  2601. try:
  2602. with pandasSQL.run_transaction() as trans:
  2603. trans.execute(ins_sql)
  2604. raise DummyException("error")
  2605. except DummyException:
  2606. # ignore raised exception
  2607. pass
  2608. with pandasSQL.run_transaction():
  2609. res = pandasSQL.read_query("SELECT * FROM test_trans")
  2610. assert len(res) == 0
  2611. # Make sure when transaction is committed, rows do get inserted
  2612. with pandasSQL.run_transaction() as trans:
  2613. trans.execute(ins_sql)
  2614. res2 = pandasSQL.read_query("SELECT * FROM test_trans")
  2615. assert len(res2) == 1
  2616. @pytest.mark.parametrize("conn", sqlalchemy_connectable)
  2617. def test_get_schema_create_table(conn, request, test_frame3):
  2618. # Use a dataframe without a bool column, since MySQL converts bool to
  2619. # TINYINT (which read_sql_table returns as an int and causes a dtype
  2620. # mismatch)
  2621. if conn == "sqlite_str":
  2622. request.applymarker(
  2623. pytest.mark.xfail(reason="test does not support sqlite_str fixture")
  2624. )
  2625. conn = request.getfixturevalue(conn)
  2626. from sqlalchemy import text
  2627. from sqlalchemy.engine import Engine
  2628. tbl = "test_get_schema_create_table"
  2629. create_sql = sql.get_schema(test_frame3, tbl, con=conn)
  2630. blank_test_df = test_frame3.iloc[:0]
  2631. create_sql = text(create_sql)
  2632. if isinstance(conn, Engine):
  2633. with conn.connect() as newcon:
  2634. with newcon.begin():
  2635. newcon.execute(create_sql)
  2636. else:
  2637. conn.execute(create_sql)
  2638. returned_df = sql.read_sql_table(tbl, conn)
  2639. tm.assert_frame_equal(returned_df, blank_test_df, check_index_type=False)
  2640. @pytest.mark.parametrize("conn", sqlalchemy_connectable)
  2641. def test_dtype(conn, request):
  2642. if conn == "sqlite_str":
  2643. pytest.skip("sqlite_str has no inspection system")
  2644. conn = request.getfixturevalue(conn)
  2645. from sqlalchemy import (
  2646. TEXT,
  2647. String,
  2648. )
  2649. from sqlalchemy.schema import MetaData
  2650. cols = ["A", "B"]
  2651. data = [(0.8, True), (0.9, None)]
  2652. df = DataFrame(data, columns=cols)
  2653. assert df.to_sql(name="dtype_test", con=conn) == 2
  2654. assert df.to_sql(name="dtype_test2", con=conn, dtype={"B": TEXT}) == 2
  2655. meta = MetaData()
  2656. meta.reflect(bind=conn)
  2657. sqltype = meta.tables["dtype_test2"].columns["B"].type
  2658. assert isinstance(sqltype, TEXT)
  2659. msg = "The type of B is not a SQLAlchemy type"
  2660. with pytest.raises(ValueError, match=msg):
  2661. df.to_sql(name="error", con=conn, dtype={"B": str})
  2662. # GH9083
  2663. assert df.to_sql(name="dtype_test3", con=conn, dtype={"B": String(10)}) == 2
  2664. meta.reflect(bind=conn)
  2665. sqltype = meta.tables["dtype_test3"].columns["B"].type
  2666. assert isinstance(sqltype, String)
  2667. assert sqltype.length == 10
  2668. # single dtype
  2669. assert df.to_sql(name="single_dtype_test", con=conn, dtype=TEXT) == 2
  2670. meta.reflect(bind=conn)
  2671. sqltypea = meta.tables["single_dtype_test"].columns["A"].type
  2672. sqltypeb = meta.tables["single_dtype_test"].columns["B"].type
  2673. assert isinstance(sqltypea, TEXT)
  2674. assert isinstance(sqltypeb, TEXT)
  2675. @pytest.mark.parametrize("conn", sqlalchemy_connectable)
  2676. def test_notna_dtype(conn, request):
  2677. if conn == "sqlite_str":
  2678. pytest.skip("sqlite_str has no inspection system")
  2679. conn_name = conn
  2680. conn = request.getfixturevalue(conn)
  2681. from sqlalchemy import (
  2682. Boolean,
  2683. DateTime,
  2684. Float,
  2685. Integer,
  2686. )
  2687. from sqlalchemy.schema import MetaData
  2688. cols = {
  2689. "Bool": Series([True, None]),
  2690. "Date": Series([datetime(2012, 5, 1), None]),
  2691. "Int": Series([1, None], dtype="object"),
  2692. "Float": Series([1.1, None]),
  2693. }
  2694. df = DataFrame(cols)
  2695. tbl = "notna_dtype_test"
  2696. assert df.to_sql(name=tbl, con=conn) == 2
  2697. _ = sql.read_sql_table(tbl, conn)
  2698. meta = MetaData()
  2699. meta.reflect(bind=conn)
  2700. my_type = Integer if "mysql" in conn_name else Boolean
  2701. col_dict = meta.tables[tbl].columns
  2702. assert isinstance(col_dict["Bool"].type, my_type)
  2703. assert isinstance(col_dict["Date"].type, DateTime)
  2704. assert isinstance(col_dict["Int"].type, Integer)
  2705. assert isinstance(col_dict["Float"].type, Float)
  2706. @pytest.mark.parametrize("conn", sqlalchemy_connectable)
  2707. def test_double_precision(conn, request):
  2708. if conn == "sqlite_str":
  2709. pytest.skip("sqlite_str has no inspection system")
  2710. conn = request.getfixturevalue(conn)
  2711. from sqlalchemy import (
  2712. BigInteger,
  2713. Float,
  2714. Integer,
  2715. )
  2716. from sqlalchemy.schema import MetaData
  2717. V = 1.23456789101112131415
  2718. df = DataFrame(
  2719. {
  2720. "f32": Series([V], dtype="float32"),
  2721. "f64": Series([V], dtype="float64"),
  2722. "f64_as_f32": Series([V], dtype="float64"),
  2723. "i32": Series([5], dtype="int32"),
  2724. "i64": Series([5], dtype="int64"),
  2725. }
  2726. )
  2727. assert (
  2728. df.to_sql(
  2729. name="test_dtypes",
  2730. con=conn,
  2731. index=False,
  2732. if_exists="replace",
  2733. dtype={"f64_as_f32": Float(precision=23)},
  2734. )
  2735. == 1
  2736. )
  2737. res = sql.read_sql_table("test_dtypes", conn)
  2738. # check precision of float64
  2739. assert np.round(df["f64"].iloc[0], 14) == np.round(res["f64"].iloc[0], 14)
  2740. # check sql types
  2741. meta = MetaData()
  2742. meta.reflect(bind=conn)
  2743. col_dict = meta.tables["test_dtypes"].columns
  2744. assert str(col_dict["f32"].type) == str(col_dict["f64_as_f32"].type)
  2745. assert isinstance(col_dict["f32"].type, Float)
  2746. assert isinstance(col_dict["f64"].type, Float)
  2747. assert isinstance(col_dict["i32"].type, Integer)
  2748. assert isinstance(col_dict["i64"].type, BigInteger)
  2749. @pytest.mark.parametrize("conn", sqlalchemy_connectable)
  2750. def test_connectable_issue_example(conn, request):
  2751. conn = request.getfixturevalue(conn)
  2752. # This tests the example raised in issue
  2753. # https://github.com/pandas-dev/pandas/issues/10104
  2754. from sqlalchemy.engine import Engine
  2755. def test_select(connection):
  2756. query = "SELECT test_foo_data FROM test_foo_data"
  2757. return sql.read_sql_query(query, con=connection)
  2758. def test_append(connection, data):
  2759. data.to_sql(name="test_foo_data", con=connection, if_exists="append")
  2760. def test_connectable(conn):
  2761. # https://github.com/sqlalchemy/sqlalchemy/commit/
  2762. # 00b5c10846e800304caa86549ab9da373b42fa5d#r48323973
  2763. foo_data = test_select(conn)
  2764. test_append(conn, foo_data)
  2765. def main(connectable):
  2766. if isinstance(connectable, Engine):
  2767. with connectable.connect() as conn:
  2768. with conn.begin():
  2769. test_connectable(conn)
  2770. else:
  2771. test_connectable(connectable)
  2772. assert (
  2773. DataFrame({"test_foo_data": [0, 1, 2]}).to_sql(name="test_foo_data", con=conn)
  2774. == 3
  2775. )
  2776. main(conn)
  2777. @pytest.mark.parametrize("conn", sqlalchemy_connectable)
  2778. @pytest.mark.parametrize(
  2779. "input",
  2780. [{"foo": [np.inf]}, {"foo": [-np.inf]}, {"foo": [-np.inf], "infe0": ["bar"]}],
  2781. )
  2782. def test_to_sql_with_negative_npinf(conn, request, input):
  2783. # GH 34431
  2784. df = DataFrame(input)
  2785. conn_name = conn
  2786. conn = request.getfixturevalue(conn)
  2787. if "mysql" in conn_name:
  2788. # GH 36465
  2789. # The input {"foo": [-np.inf], "infe0": ["bar"]} does not raise any error
  2790. # for pymysql version >= 0.10
  2791. # TODO(GH#36465): remove this version check after GH 36465 is fixed
  2792. pymysql = pytest.importorskip("pymysql")
  2793. if Version(pymysql.__version__) < Version("1.0.3") and "infe0" in df.columns:
  2794. mark = pytest.mark.xfail(reason="GH 36465")
  2795. request.applymarker(mark)
  2796. msg = "inf cannot be used with MySQL"
  2797. with pytest.raises(ValueError, match=msg):
  2798. df.to_sql(name="foobar", con=conn, index=False)
  2799. else:
  2800. assert df.to_sql(name="foobar", con=conn, index=False) == 1
  2801. res = sql.read_sql_table("foobar", conn)
  2802. tm.assert_equal(df, res)
  2803. @pytest.mark.parametrize("conn", sqlalchemy_connectable)
  2804. def test_temporary_table(conn, request):
  2805. if conn == "sqlite_str":
  2806. pytest.skip("test does not work with str connection")
  2807. conn = request.getfixturevalue(conn)
  2808. from sqlalchemy import (
  2809. Column,
  2810. Integer,
  2811. Unicode,
  2812. select,
  2813. )
  2814. from sqlalchemy.orm import (
  2815. Session,
  2816. declarative_base,
  2817. )
  2818. test_data = "Hello, World!"
  2819. expected = DataFrame({"spam": [test_data]})
  2820. Base = declarative_base()
  2821. class Temporary(Base):
  2822. __tablename__ = "temp_test"
  2823. __table_args__ = {"prefixes": ["TEMPORARY"]}
  2824. id = Column(Integer, primary_key=True)
  2825. spam = Column(Unicode(30), nullable=False)
  2826. with Session(conn) as session:
  2827. with session.begin():
  2828. conn = session.connection()
  2829. Temporary.__table__.create(conn)
  2830. session.add(Temporary(spam=test_data))
  2831. session.flush()
  2832. df = sql.read_sql_query(sql=select(Temporary.spam), con=conn)
  2833. tm.assert_frame_equal(df, expected)
  2834. @pytest.mark.parametrize("conn", all_connectable)
  2835. def test_invalid_engine(conn, request, test_frame1):
  2836. if conn == "sqlite_buildin" or "adbc" in conn:
  2837. request.applymarker(
  2838. pytest.mark.xfail(
  2839. reason="SQLiteDatabase/ADBCDatabase does not raise for bad engine"
  2840. )
  2841. )
  2842. conn = request.getfixturevalue(conn)
  2843. msg = "engine must be one of 'auto', 'sqlalchemy'"
  2844. with pandasSQL_builder(conn) as pandasSQL:
  2845. with pytest.raises(ValueError, match=msg):
  2846. pandasSQL.to_sql(test_frame1, "test_frame1", engine="bad_engine")
  2847. @pytest.mark.parametrize("conn", all_connectable)
  2848. def test_to_sql_with_sql_engine(conn, request, test_frame1):
  2849. """`to_sql` with the `engine` param"""
  2850. # mostly copied from this class's `_to_sql()` method
  2851. conn = request.getfixturevalue(conn)
  2852. with pandasSQL_builder(conn) as pandasSQL:
  2853. with pandasSQL.run_transaction():
  2854. assert pandasSQL.to_sql(test_frame1, "test_frame1", engine="auto") == 4
  2855. assert pandasSQL.has_table("test_frame1")
  2856. num_entries = len(test_frame1)
  2857. num_rows = count_rows(conn, "test_frame1")
  2858. assert num_rows == num_entries
  2859. @pytest.mark.parametrize("conn", sqlalchemy_connectable)
  2860. def test_options_sqlalchemy(conn, request, test_frame1):
  2861. # use the set option
  2862. conn = request.getfixturevalue(conn)
  2863. with pd.option_context("io.sql.engine", "sqlalchemy"):
  2864. with pandasSQL_builder(conn) as pandasSQL:
  2865. with pandasSQL.run_transaction():
  2866. assert pandasSQL.to_sql(test_frame1, "test_frame1") == 4
  2867. assert pandasSQL.has_table("test_frame1")
  2868. num_entries = len(test_frame1)
  2869. num_rows = count_rows(conn, "test_frame1")
  2870. assert num_rows == num_entries
  2871. @pytest.mark.parametrize("conn", all_connectable)
  2872. def test_options_auto(conn, request, test_frame1):
  2873. # use the set option
  2874. conn = request.getfixturevalue(conn)
  2875. with pd.option_context("io.sql.engine", "auto"):
  2876. with pandasSQL_builder(conn) as pandasSQL:
  2877. with pandasSQL.run_transaction():
  2878. assert pandasSQL.to_sql(test_frame1, "test_frame1") == 4
  2879. assert pandasSQL.has_table("test_frame1")
  2880. num_entries = len(test_frame1)
  2881. num_rows = count_rows(conn, "test_frame1")
  2882. assert num_rows == num_entries
  2883. def test_options_get_engine():
  2884. pytest.importorskip("sqlalchemy")
  2885. assert isinstance(get_engine("sqlalchemy"), SQLAlchemyEngine)
  2886. with pd.option_context("io.sql.engine", "sqlalchemy"):
  2887. assert isinstance(get_engine("auto"), SQLAlchemyEngine)
  2888. assert isinstance(get_engine("sqlalchemy"), SQLAlchemyEngine)
  2889. with pd.option_context("io.sql.engine", "auto"):
  2890. assert isinstance(get_engine("auto"), SQLAlchemyEngine)
  2891. assert isinstance(get_engine("sqlalchemy"), SQLAlchemyEngine)
  2892. def test_get_engine_auto_error_message():
  2893. # Expect different error messages from get_engine(engine="auto")
  2894. # if engines aren't installed vs. are installed but bad version
  2895. pass
  2896. # TODO(GH#36893) fill this in when we add more engines
  2897. @pytest.mark.parametrize("conn", all_connectable)
  2898. @pytest.mark.parametrize("func", ["read_sql", "read_sql_query"])
  2899. def test_read_sql_dtype_backend(
  2900. conn,
  2901. request,
  2902. string_storage,
  2903. func,
  2904. dtype_backend,
  2905. dtype_backend_data,
  2906. dtype_backend_expected,
  2907. ):
  2908. # GH#50048
  2909. conn_name = conn
  2910. conn = request.getfixturevalue(conn)
  2911. table = "test"
  2912. df = dtype_backend_data
  2913. df.to_sql(name=table, con=conn, index=False, if_exists="replace")
  2914. with pd.option_context("mode.string_storage", string_storage):
  2915. result = getattr(pd, func)(
  2916. f"Select * from {table}", conn, dtype_backend=dtype_backend
  2917. )
  2918. expected = dtype_backend_expected(string_storage, dtype_backend, conn_name)
  2919. tm.assert_frame_equal(result, expected)
  2920. if "adbc" in conn_name:
  2921. # adbc does not support chunksize argument
  2922. request.applymarker(
  2923. pytest.mark.xfail(reason="adbc does not support chunksize argument")
  2924. )
  2925. with pd.option_context("mode.string_storage", string_storage):
  2926. iterator = getattr(pd, func)(
  2927. f"Select * from {table}",
  2928. con=conn,
  2929. dtype_backend=dtype_backend,
  2930. chunksize=3,
  2931. )
  2932. expected = dtype_backend_expected(string_storage, dtype_backend, conn_name)
  2933. for result in iterator:
  2934. tm.assert_frame_equal(result, expected)
  2935. @pytest.mark.parametrize("conn", all_connectable)
  2936. @pytest.mark.parametrize("func", ["read_sql", "read_sql_table"])
  2937. def test_read_sql_dtype_backend_table(
  2938. conn,
  2939. request,
  2940. string_storage,
  2941. func,
  2942. dtype_backend,
  2943. dtype_backend_data,
  2944. dtype_backend_expected,
  2945. ):
  2946. if "sqlite" in conn and "adbc" not in conn:
  2947. request.applymarker(
  2948. pytest.mark.xfail(
  2949. reason=(
  2950. "SQLite actually returns proper boolean values via "
  2951. "read_sql_table, but before pytest refactor was skipped"
  2952. )
  2953. )
  2954. )
  2955. # GH#50048
  2956. conn_name = conn
  2957. conn = request.getfixturevalue(conn)
  2958. table = "test"
  2959. df = dtype_backend_data
  2960. df.to_sql(name=table, con=conn, index=False, if_exists="replace")
  2961. with pd.option_context("mode.string_storage", string_storage):
  2962. result = getattr(pd, func)(table, conn, dtype_backend=dtype_backend)
  2963. expected = dtype_backend_expected(string_storage, dtype_backend, conn_name)
  2964. tm.assert_frame_equal(result, expected)
  2965. if "adbc" in conn_name:
  2966. # adbc does not support chunksize argument
  2967. return
  2968. with pd.option_context("mode.string_storage", string_storage):
  2969. iterator = getattr(pd, func)(
  2970. table,
  2971. conn,
  2972. dtype_backend=dtype_backend,
  2973. chunksize=3,
  2974. )
  2975. expected = dtype_backend_expected(string_storage, dtype_backend, conn_name)
  2976. for result in iterator:
  2977. tm.assert_frame_equal(result, expected)
  2978. @pytest.mark.parametrize("conn", all_connectable)
  2979. @pytest.mark.parametrize("func", ["read_sql", "read_sql_table", "read_sql_query"])
  2980. def test_read_sql_invalid_dtype_backend_table(conn, request, func, dtype_backend_data):
  2981. conn = request.getfixturevalue(conn)
  2982. table = "test"
  2983. df = dtype_backend_data
  2984. df.to_sql(name=table, con=conn, index=False, if_exists="replace")
  2985. msg = (
  2986. "dtype_backend numpy is invalid, only 'numpy_nullable' and "
  2987. "'pyarrow' are allowed."
  2988. )
  2989. with pytest.raises(ValueError, match=msg):
  2990. getattr(pd, func)(table, conn, dtype_backend="numpy")
  2991. @pytest.fixture
  2992. def dtype_backend_data() -> DataFrame:
  2993. return DataFrame(
  2994. {
  2995. "a": Series([1, np.nan, 3], dtype="Int64"),
  2996. "b": Series([1, 2, 3], dtype="Int64"),
  2997. "c": Series([1.5, np.nan, 2.5], dtype="Float64"),
  2998. "d": Series([1.5, 2.0, 2.5], dtype="Float64"),
  2999. "e": [True, False, None],
  3000. "f": [True, False, True],
  3001. "g": ["a", "b", "c"],
  3002. "h": ["a", "b", None],
  3003. }
  3004. )
  3005. @pytest.fixture
  3006. def dtype_backend_expected():
  3007. def func(string_storage, dtype_backend, conn_name) -> DataFrame:
  3008. string_dtype: pd.StringDtype | pd.ArrowDtype
  3009. if dtype_backend == "pyarrow":
  3010. pa = pytest.importorskip("pyarrow")
  3011. string_dtype = pd.ArrowDtype(pa.string())
  3012. else:
  3013. string_dtype = pd.StringDtype(string_storage)
  3014. df = DataFrame(
  3015. {
  3016. "a": Series([1, np.nan, 3], dtype="Int64"),
  3017. "b": Series([1, 2, 3], dtype="Int64"),
  3018. "c": Series([1.5, np.nan, 2.5], dtype="Float64"),
  3019. "d": Series([1.5, 2.0, 2.5], dtype="Float64"),
  3020. "e": Series([True, False, pd.NA], dtype="boolean"),
  3021. "f": Series([True, False, True], dtype="boolean"),
  3022. "g": Series(["a", "b", "c"], dtype=string_dtype),
  3023. "h": Series(["a", "b", None], dtype=string_dtype),
  3024. }
  3025. )
  3026. if dtype_backend == "pyarrow":
  3027. pa = pytest.importorskip("pyarrow")
  3028. from pandas.arrays import ArrowExtensionArray
  3029. df = DataFrame(
  3030. {
  3031. col: ArrowExtensionArray(pa.array(df[col], from_pandas=True))
  3032. for col in df.columns
  3033. }
  3034. )
  3035. if "mysql" in conn_name or "sqlite" in conn_name:
  3036. if dtype_backend == "numpy_nullable":
  3037. df = df.astype({"e": "Int64", "f": "Int64"})
  3038. else:
  3039. df = df.astype({"e": "int64[pyarrow]", "f": "int64[pyarrow]"})
  3040. return df
  3041. return func
  3042. @pytest.mark.parametrize("conn", all_connectable)
  3043. def test_chunksize_empty_dtypes(conn, request):
  3044. # GH#50245
  3045. if "adbc" in conn:
  3046. request.node.add_marker(
  3047. pytest.mark.xfail(reason="chunksize argument NotImplemented with ADBC")
  3048. )
  3049. conn = request.getfixturevalue(conn)
  3050. dtypes = {"a": "int64", "b": "object"}
  3051. df = DataFrame(columns=["a", "b"]).astype(dtypes)
  3052. expected = df.copy()
  3053. df.to_sql(name="test", con=conn, index=False, if_exists="replace")
  3054. for result in read_sql_query(
  3055. "SELECT * FROM test",
  3056. conn,
  3057. dtype=dtypes,
  3058. chunksize=1,
  3059. ):
  3060. tm.assert_frame_equal(result, expected)
  3061. @pytest.mark.parametrize("conn", all_connectable)
  3062. @pytest.mark.parametrize("dtype_backend", [lib.no_default, "numpy_nullable"])
  3063. @pytest.mark.parametrize("func", ["read_sql", "read_sql_query"])
  3064. def test_read_sql_dtype(conn, request, func, dtype_backend):
  3065. # GH#50797
  3066. conn = request.getfixturevalue(conn)
  3067. table = "test"
  3068. df = DataFrame({"a": [1, 2, 3], "b": 5})
  3069. df.to_sql(name=table, con=conn, index=False, if_exists="replace")
  3070. result = getattr(pd, func)(
  3071. f"Select * from {table}",
  3072. conn,
  3073. dtype={"a": np.float64},
  3074. dtype_backend=dtype_backend,
  3075. )
  3076. expected = DataFrame(
  3077. {
  3078. "a": Series([1, 2, 3], dtype=np.float64),
  3079. "b": Series(
  3080. [5, 5, 5],
  3081. dtype="int64" if not dtype_backend == "numpy_nullable" else "Int64",
  3082. ),
  3083. }
  3084. )
  3085. tm.assert_frame_equal(result, expected)
  3086. def test_keyword_deprecation(sqlite_engine):
  3087. conn = sqlite_engine
  3088. # GH 54397
  3089. msg = (
  3090. "Starting with pandas version 3.0 all arguments of to_sql except for the "
  3091. "arguments 'name' and 'con' will be keyword-only."
  3092. )
  3093. df = DataFrame([{"A": 1, "B": 2, "C": 3}, {"A": 1, "B": 2, "C": 3}])
  3094. df.to_sql("example", conn)
  3095. with tm.assert_produces_warning(FutureWarning, match=msg):
  3096. df.to_sql("example", conn, None, if_exists="replace")
  3097. def test_bigint_warning(sqlite_engine):
  3098. conn = sqlite_engine
  3099. # test no warning for BIGINT (to support int64) is raised (GH7433)
  3100. df = DataFrame({"a": [1, 2]}, dtype="int64")
  3101. assert df.to_sql(name="test_bigintwarning", con=conn, index=False) == 2
  3102. with tm.assert_produces_warning(None):
  3103. sql.read_sql_table("test_bigintwarning", conn)
  3104. def test_valueerror_exception(sqlite_engine):
  3105. conn = sqlite_engine
  3106. df = DataFrame({"col1": [1, 2], "col2": [3, 4]})
  3107. with pytest.raises(ValueError, match="Empty table name specified"):
  3108. df.to_sql(name="", con=conn, if_exists="replace", index=False)
  3109. def test_row_object_is_named_tuple(sqlite_engine):
  3110. conn = sqlite_engine
  3111. # GH 40682
  3112. # Test for the is_named_tuple() function
  3113. # Placed here due to its usage of sqlalchemy
  3114. from sqlalchemy import (
  3115. Column,
  3116. Integer,
  3117. String,
  3118. )
  3119. from sqlalchemy.orm import (
  3120. declarative_base,
  3121. sessionmaker,
  3122. )
  3123. BaseModel = declarative_base()
  3124. class Test(BaseModel):
  3125. __tablename__ = "test_frame"
  3126. id = Column(Integer, primary_key=True)
  3127. string_column = Column(String(50))
  3128. with conn.begin():
  3129. BaseModel.metadata.create_all(conn)
  3130. Session = sessionmaker(bind=conn)
  3131. with Session() as session:
  3132. df = DataFrame({"id": [0, 1], "string_column": ["hello", "world"]})
  3133. assert (
  3134. df.to_sql(name="test_frame", con=conn, index=False, if_exists="replace")
  3135. == 2
  3136. )
  3137. session.commit()
  3138. test_query = session.query(Test.id, Test.string_column)
  3139. df = DataFrame(test_query)
  3140. assert list(df.columns) == ["id", "string_column"]
  3141. def test_read_sql_string_inference(sqlite_engine):
  3142. conn = sqlite_engine
  3143. # GH#54430
  3144. table = "test"
  3145. df = DataFrame({"a": ["x", "y"]})
  3146. df.to_sql(table, con=conn, index=False, if_exists="replace")
  3147. with pd.option_context("future.infer_string", True):
  3148. result = read_sql_table(table, conn)
  3149. dtype = pd.StringDtype(na_value=np.nan)
  3150. expected = DataFrame(
  3151. {"a": ["x", "y"]}, dtype=dtype, columns=Index(["a"], dtype=dtype)
  3152. )
  3153. tm.assert_frame_equal(result, expected)
  3154. def test_roundtripping_datetimes(sqlite_engine):
  3155. conn = sqlite_engine
  3156. # GH#54877
  3157. df = DataFrame({"t": [datetime(2020, 12, 31, 12)]}, dtype="datetime64[ns]")
  3158. df.to_sql("test", conn, if_exists="replace", index=False)
  3159. result = pd.read_sql("select * from test", conn).iloc[0, 0]
  3160. assert result == "2020-12-31 12:00:00.000000"
  3161. @pytest.fixture
  3162. def sqlite_builtin_detect_types():
  3163. with contextlib.closing(
  3164. sqlite3.connect(":memory:", detect_types=sqlite3.PARSE_DECLTYPES)
  3165. ) as closing_conn:
  3166. with closing_conn as conn:
  3167. yield conn
  3168. def test_roundtripping_datetimes_detect_types(sqlite_builtin_detect_types):
  3169. # https://github.com/pandas-dev/pandas/issues/55554
  3170. conn = sqlite_builtin_detect_types
  3171. df = DataFrame({"t": [datetime(2020, 12, 31, 12)]}, dtype="datetime64[ns]")
  3172. df.to_sql("test", conn, if_exists="replace", index=False)
  3173. result = pd.read_sql("select * from test", conn).iloc[0, 0]
  3174. assert result == Timestamp("2020-12-31 12:00:00.000000")
  3175. @pytest.mark.db
  3176. def test_psycopg2_schema_support(postgresql_psycopg2_engine):
  3177. conn = postgresql_psycopg2_engine
  3178. # only test this for postgresql (schema's not supported in
  3179. # mysql/sqlite)
  3180. df = DataFrame({"col1": [1, 2], "col2": [0.1, 0.2], "col3": ["a", "n"]})
  3181. # create a schema
  3182. with conn.connect() as con:
  3183. with con.begin():
  3184. con.exec_driver_sql("DROP SCHEMA IF EXISTS other CASCADE;")
  3185. con.exec_driver_sql("CREATE SCHEMA other;")
  3186. # write dataframe to different schema's
  3187. assert df.to_sql(name="test_schema_public", con=conn, index=False) == 2
  3188. assert (
  3189. df.to_sql(
  3190. name="test_schema_public_explicit",
  3191. con=conn,
  3192. index=False,
  3193. schema="public",
  3194. )
  3195. == 2
  3196. )
  3197. assert (
  3198. df.to_sql(name="test_schema_other", con=conn, index=False, schema="other") == 2
  3199. )
  3200. # read dataframes back in
  3201. res1 = sql.read_sql_table("test_schema_public", conn)
  3202. tm.assert_frame_equal(df, res1)
  3203. res2 = sql.read_sql_table("test_schema_public_explicit", conn)
  3204. tm.assert_frame_equal(df, res2)
  3205. res3 = sql.read_sql_table("test_schema_public_explicit", conn, schema="public")
  3206. tm.assert_frame_equal(df, res3)
  3207. res4 = sql.read_sql_table("test_schema_other", conn, schema="other")
  3208. tm.assert_frame_equal(df, res4)
  3209. msg = "Table test_schema_other not found"
  3210. with pytest.raises(ValueError, match=msg):
  3211. sql.read_sql_table("test_schema_other", conn, schema="public")
  3212. # different if_exists options
  3213. # create a schema
  3214. with conn.connect() as con:
  3215. with con.begin():
  3216. con.exec_driver_sql("DROP SCHEMA IF EXISTS other CASCADE;")
  3217. con.exec_driver_sql("CREATE SCHEMA other;")
  3218. # write dataframe with different if_exists options
  3219. assert (
  3220. df.to_sql(name="test_schema_other", con=conn, schema="other", index=False) == 2
  3221. )
  3222. df.to_sql(
  3223. name="test_schema_other",
  3224. con=conn,
  3225. schema="other",
  3226. index=False,
  3227. if_exists="replace",
  3228. )
  3229. assert (
  3230. df.to_sql(
  3231. name="test_schema_other",
  3232. con=conn,
  3233. schema="other",
  3234. index=False,
  3235. if_exists="append",
  3236. )
  3237. == 2
  3238. )
  3239. res = sql.read_sql_table("test_schema_other", conn, schema="other")
  3240. tm.assert_frame_equal(concat([df, df], ignore_index=True), res)
  3241. @pytest.mark.db
  3242. def test_self_join_date_columns(postgresql_psycopg2_engine):
  3243. # GH 44421
  3244. conn = postgresql_psycopg2_engine
  3245. from sqlalchemy.sql import text
  3246. create_table = text(
  3247. """
  3248. CREATE TABLE person
  3249. (
  3250. id serial constraint person_pkey primary key,
  3251. created_dt timestamp with time zone
  3252. );
  3253. INSERT INTO person
  3254. VALUES (1, '2021-01-01T00:00:00Z');
  3255. """
  3256. )
  3257. with conn.connect() as con:
  3258. with con.begin():
  3259. con.execute(create_table)
  3260. sql_query = (
  3261. 'SELECT * FROM "person" AS p1 INNER JOIN "person" AS p2 ON p1.id = p2.id;'
  3262. )
  3263. result = pd.read_sql(sql_query, conn)
  3264. expected = DataFrame(
  3265. [[1, Timestamp("2021", tz="UTC")] * 2], columns=["id", "created_dt"] * 2
  3266. )
  3267. tm.assert_frame_equal(result, expected)
  3268. # Cleanup
  3269. with sql.SQLDatabase(conn, need_transaction=True) as pandasSQL:
  3270. pandasSQL.drop_table("person")
  3271. def test_create_and_drop_table(sqlite_engine):
  3272. conn = sqlite_engine
  3273. temp_frame = DataFrame({"one": [1.0, 2.0, 3.0, 4.0], "two": [4.0, 3.0, 2.0, 1.0]})
  3274. with sql.SQLDatabase(conn) as pandasSQL:
  3275. with pandasSQL.run_transaction():
  3276. assert pandasSQL.to_sql(temp_frame, "drop_test_frame") == 4
  3277. assert pandasSQL.has_table("drop_test_frame")
  3278. with pandasSQL.run_transaction():
  3279. pandasSQL.drop_table("drop_test_frame")
  3280. assert not pandasSQL.has_table("drop_test_frame")
  3281. def test_sqlite_datetime_date(sqlite_buildin):
  3282. conn = sqlite_buildin
  3283. df = DataFrame([date(2014, 1, 1), date(2014, 1, 2)], columns=["a"])
  3284. assert df.to_sql(name="test_date", con=conn, index=False) == 2
  3285. res = read_sql_query("SELECT * FROM test_date", conn)
  3286. # comes back as strings
  3287. tm.assert_frame_equal(res, df.astype(str))
  3288. @pytest.mark.parametrize("tz_aware", [False, True])
  3289. def test_sqlite_datetime_time(tz_aware, sqlite_buildin):
  3290. conn = sqlite_buildin
  3291. # test support for datetime.time, GH #8341
  3292. if not tz_aware:
  3293. tz_times = [time(9, 0, 0), time(9, 1, 30)]
  3294. else:
  3295. tz_dt = date_range("2013-01-01 09:00:00", periods=2, tz="US/Pacific")
  3296. tz_times = Series(tz_dt.to_pydatetime()).map(lambda dt: dt.timetz())
  3297. df = DataFrame(tz_times, columns=["a"])
  3298. assert df.to_sql(name="test_time", con=conn, index=False) == 2
  3299. res = read_sql_query("SELECT * FROM test_time", conn)
  3300. # comes back as strings
  3301. expected = df.map(lambda _: _.strftime("%H:%M:%S.%f"))
  3302. tm.assert_frame_equal(res, expected)
  3303. def get_sqlite_column_type(conn, table, column):
  3304. recs = conn.execute(f"PRAGMA table_info({table})")
  3305. for cid, name, ctype, not_null, default, pk in recs:
  3306. if name == column:
  3307. return ctype
  3308. raise ValueError(f"Table {table}, column {column} not found")
  3309. def test_sqlite_test_dtype(sqlite_buildin):
  3310. conn = sqlite_buildin
  3311. cols = ["A", "B"]
  3312. data = [(0.8, True), (0.9, None)]
  3313. df = DataFrame(data, columns=cols)
  3314. assert df.to_sql(name="dtype_test", con=conn) == 2
  3315. assert df.to_sql(name="dtype_test2", con=conn, dtype={"B": "STRING"}) == 2
  3316. # sqlite stores Boolean values as INTEGER
  3317. assert get_sqlite_column_type(conn, "dtype_test", "B") == "INTEGER"
  3318. assert get_sqlite_column_type(conn, "dtype_test2", "B") == "STRING"
  3319. msg = r"B \(<class 'bool'>\) not a string"
  3320. with pytest.raises(ValueError, match=msg):
  3321. df.to_sql(name="error", con=conn, dtype={"B": bool})
  3322. # single dtype
  3323. assert df.to_sql(name="single_dtype_test", con=conn, dtype="STRING") == 2
  3324. assert get_sqlite_column_type(conn, "single_dtype_test", "A") == "STRING"
  3325. assert get_sqlite_column_type(conn, "single_dtype_test", "B") == "STRING"
  3326. def test_sqlite_notna_dtype(sqlite_buildin):
  3327. conn = sqlite_buildin
  3328. cols = {
  3329. "Bool": Series([True, None]),
  3330. "Date": Series([datetime(2012, 5, 1), None]),
  3331. "Int": Series([1, None], dtype="object"),
  3332. "Float": Series([1.1, None]),
  3333. }
  3334. df = DataFrame(cols)
  3335. tbl = "notna_dtype_test"
  3336. assert df.to_sql(name=tbl, con=conn) == 2
  3337. assert get_sqlite_column_type(conn, tbl, "Bool") == "INTEGER"
  3338. assert get_sqlite_column_type(conn, tbl, "Date") == "TIMESTAMP"
  3339. assert get_sqlite_column_type(conn, tbl, "Int") == "INTEGER"
  3340. assert get_sqlite_column_type(conn, tbl, "Float") == "REAL"
  3341. def test_sqlite_illegal_names(sqlite_buildin):
  3342. # For sqlite, these should work fine
  3343. conn = sqlite_buildin
  3344. df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"])
  3345. msg = "Empty table or column name specified"
  3346. with pytest.raises(ValueError, match=msg):
  3347. df.to_sql(name="", con=conn)
  3348. for ndx, weird_name in enumerate(
  3349. [
  3350. "test_weird_name]",
  3351. "test_weird_name[",
  3352. "test_weird_name`",
  3353. 'test_weird_name"',
  3354. "test_weird_name'",
  3355. "_b.test_weird_name_01-30",
  3356. '"_b.test_weird_name_01-30"',
  3357. "99beginswithnumber",
  3358. "12345",
  3359. "\xe9",
  3360. ]
  3361. ):
  3362. assert df.to_sql(name=weird_name, con=conn) == 2
  3363. sql.table_exists(weird_name, conn)
  3364. df2 = DataFrame([[1, 2], [3, 4]], columns=["a", weird_name])
  3365. c_tbl = f"test_weird_col_name{ndx:d}"
  3366. assert df2.to_sql(name=c_tbl, con=conn) == 2
  3367. sql.table_exists(c_tbl, conn)
  3368. def format_query(sql, *args):
  3369. _formatters = {
  3370. datetime: "'{}'".format,
  3371. str: "'{}'".format,
  3372. np.str_: "'{}'".format,
  3373. bytes: "'{}'".format,
  3374. float: "{:.8f}".format,
  3375. int: "{:d}".format,
  3376. type(None): lambda x: "NULL",
  3377. np.float64: "{:.10f}".format,
  3378. bool: "'{!s}'".format,
  3379. }
  3380. processed_args = []
  3381. for arg in args:
  3382. if isinstance(arg, float) and isna(arg):
  3383. arg = None
  3384. formatter = _formatters[type(arg)]
  3385. processed_args.append(formatter(arg))
  3386. return sql % tuple(processed_args)
  3387. def tquery(query, con=None):
  3388. """Replace removed sql.tquery function"""
  3389. with sql.pandasSQL_builder(con) as pandas_sql:
  3390. res = pandas_sql.execute(query).fetchall()
  3391. return None if res is None else list(res)
  3392. def test_xsqlite_basic(sqlite_buildin):
  3393. frame = DataFrame(
  3394. np.random.default_rng(2).standard_normal((10, 4)),
  3395. columns=Index(list("ABCD")),
  3396. index=date_range("2000-01-01", periods=10, freq="B"),
  3397. )
  3398. assert sql.to_sql(frame, name="test_table", con=sqlite_buildin, index=False) == 10
  3399. result = sql.read_sql("select * from test_table", sqlite_buildin)
  3400. # HACK! Change this once indexes are handled properly.
  3401. result.index = frame.index
  3402. expected = frame
  3403. tm.assert_frame_equal(result, frame)
  3404. frame["txt"] = ["a"] * len(frame)
  3405. frame2 = frame.copy()
  3406. new_idx = Index(np.arange(len(frame2)), dtype=np.int64) + 10
  3407. frame2["Idx"] = new_idx.copy()
  3408. assert sql.to_sql(frame2, name="test_table2", con=sqlite_buildin, index=False) == 10
  3409. result = sql.read_sql("select * from test_table2", sqlite_buildin, index_col="Idx")
  3410. expected = frame.copy()
  3411. expected.index = new_idx
  3412. expected.index.name = "Idx"
  3413. tm.assert_frame_equal(expected, result)
  3414. def test_xsqlite_write_row_by_row(sqlite_buildin):
  3415. frame = DataFrame(
  3416. np.random.default_rng(2).standard_normal((10, 4)),
  3417. columns=Index(list("ABCD")),
  3418. index=date_range("2000-01-01", periods=10, freq="B"),
  3419. )
  3420. frame.iloc[0, 0] = np.nan
  3421. create_sql = sql.get_schema(frame, "test")
  3422. cur = sqlite_buildin.cursor()
  3423. cur.execute(create_sql)
  3424. ins = "INSERT INTO test VALUES (%s, %s, %s, %s)"
  3425. for _, row in frame.iterrows():
  3426. fmt_sql = format_query(ins, *row)
  3427. tquery(fmt_sql, con=sqlite_buildin)
  3428. sqlite_buildin.commit()
  3429. result = sql.read_sql("select * from test", con=sqlite_buildin)
  3430. result.index = frame.index
  3431. tm.assert_frame_equal(result, frame, rtol=1e-3)
  3432. def test_xsqlite_execute(sqlite_buildin):
  3433. frame = DataFrame(
  3434. np.random.default_rng(2).standard_normal((10, 4)),
  3435. columns=Index(list("ABCD")),
  3436. index=date_range("2000-01-01", periods=10, freq="B"),
  3437. )
  3438. create_sql = sql.get_schema(frame, "test")
  3439. cur = sqlite_buildin.cursor()
  3440. cur.execute(create_sql)
  3441. ins = "INSERT INTO test VALUES (?, ?, ?, ?)"
  3442. row = frame.iloc[0]
  3443. with sql.pandasSQL_builder(sqlite_buildin) as pandas_sql:
  3444. pandas_sql.execute(ins, tuple(row))
  3445. sqlite_buildin.commit()
  3446. result = sql.read_sql("select * from test", sqlite_buildin)
  3447. result.index = frame.index[:1]
  3448. tm.assert_frame_equal(result, frame[:1])
  3449. def test_xsqlite_schema(sqlite_buildin):
  3450. frame = DataFrame(
  3451. np.random.default_rng(2).standard_normal((10, 4)),
  3452. columns=Index(list("ABCD")),
  3453. index=date_range("2000-01-01", periods=10, freq="B"),
  3454. )
  3455. create_sql = sql.get_schema(frame, "test")
  3456. lines = create_sql.splitlines()
  3457. for line in lines:
  3458. tokens = line.split(" ")
  3459. if len(tokens) == 2 and tokens[0] == "A":
  3460. assert tokens[1] == "DATETIME"
  3461. create_sql = sql.get_schema(frame, "test", keys=["A", "B"])
  3462. lines = create_sql.splitlines()
  3463. assert 'PRIMARY KEY ("A", "B")' in create_sql
  3464. cur = sqlite_buildin.cursor()
  3465. cur.execute(create_sql)
  3466. def test_xsqlite_execute_fail(sqlite_buildin):
  3467. create_sql = """
  3468. CREATE TABLE test
  3469. (
  3470. a TEXT,
  3471. b TEXT,
  3472. c REAL,
  3473. PRIMARY KEY (a, b)
  3474. );
  3475. """
  3476. cur = sqlite_buildin.cursor()
  3477. cur.execute(create_sql)
  3478. with sql.pandasSQL_builder(sqlite_buildin) as pandas_sql:
  3479. pandas_sql.execute("INSERT INTO test VALUES('foo', 'bar', 1.234)")
  3480. pandas_sql.execute("INSERT INTO test VALUES('foo', 'baz', 2.567)")
  3481. with pytest.raises(sql.DatabaseError, match="Execution failed on sql"):
  3482. pandas_sql.execute("INSERT INTO test VALUES('foo', 'bar', 7)")
  3483. def test_xsqlite_execute_closed_connection():
  3484. create_sql = """
  3485. CREATE TABLE test
  3486. (
  3487. a TEXT,
  3488. b TEXT,
  3489. c REAL,
  3490. PRIMARY KEY (a, b)
  3491. );
  3492. """
  3493. with contextlib.closing(sqlite3.connect(":memory:")) as conn:
  3494. cur = conn.cursor()
  3495. cur.execute(create_sql)
  3496. with sql.pandasSQL_builder(conn) as pandas_sql:
  3497. pandas_sql.execute("INSERT INTO test VALUES('foo', 'bar', 1.234)")
  3498. msg = "Cannot operate on a closed database."
  3499. with pytest.raises(sqlite3.ProgrammingError, match=msg):
  3500. tquery("select * from test", con=conn)
  3501. def test_xsqlite_keyword_as_column_names(sqlite_buildin):
  3502. df = DataFrame({"From": np.ones(5)})
  3503. assert sql.to_sql(df, con=sqlite_buildin, name="testkeywords", index=False) == 5
  3504. def test_xsqlite_onecolumn_of_integer(sqlite_buildin):
  3505. # GH 3628
  3506. # a column_of_integers dataframe should transfer well to sql
  3507. mono_df = DataFrame([1, 2], columns=["c0"])
  3508. assert sql.to_sql(mono_df, con=sqlite_buildin, name="mono_df", index=False) == 2
  3509. # computing the sum via sql
  3510. con_x = sqlite_buildin
  3511. the_sum = sum(my_c0[0] for my_c0 in con_x.execute("select * from mono_df"))
  3512. # it should not fail, and gives 3 ( Issue #3628 )
  3513. assert the_sum == 3
  3514. result = sql.read_sql("select * from mono_df", con_x)
  3515. tm.assert_frame_equal(result, mono_df)
  3516. def test_xsqlite_if_exists(sqlite_buildin):
  3517. df_if_exists_1 = DataFrame({"col1": [1, 2], "col2": ["A", "B"]})
  3518. df_if_exists_2 = DataFrame({"col1": [3, 4, 5], "col2": ["C", "D", "E"]})
  3519. table_name = "table_if_exists"
  3520. sql_select = f"SELECT * FROM {table_name}"
  3521. msg = "'notvalidvalue' is not valid for if_exists"
  3522. with pytest.raises(ValueError, match=msg):
  3523. sql.to_sql(
  3524. frame=df_if_exists_1,
  3525. con=sqlite_buildin,
  3526. name=table_name,
  3527. if_exists="notvalidvalue",
  3528. )
  3529. drop_table(table_name, sqlite_buildin)
  3530. # test if_exists='fail'
  3531. sql.to_sql(
  3532. frame=df_if_exists_1, con=sqlite_buildin, name=table_name, if_exists="fail"
  3533. )
  3534. msg = "Table 'table_if_exists' already exists"
  3535. with pytest.raises(ValueError, match=msg):
  3536. sql.to_sql(
  3537. frame=df_if_exists_1,
  3538. con=sqlite_buildin,
  3539. name=table_name,
  3540. if_exists="fail",
  3541. )
  3542. # test if_exists='replace'
  3543. sql.to_sql(
  3544. frame=df_if_exists_1,
  3545. con=sqlite_buildin,
  3546. name=table_name,
  3547. if_exists="replace",
  3548. index=False,
  3549. )
  3550. assert tquery(sql_select, con=sqlite_buildin) == [(1, "A"), (2, "B")]
  3551. assert (
  3552. sql.to_sql(
  3553. frame=df_if_exists_2,
  3554. con=sqlite_buildin,
  3555. name=table_name,
  3556. if_exists="replace",
  3557. index=False,
  3558. )
  3559. == 3
  3560. )
  3561. assert tquery(sql_select, con=sqlite_buildin) == [(3, "C"), (4, "D"), (5, "E")]
  3562. drop_table(table_name, sqlite_buildin)
  3563. # test if_exists='append'
  3564. assert (
  3565. sql.to_sql(
  3566. frame=df_if_exists_1,
  3567. con=sqlite_buildin,
  3568. name=table_name,
  3569. if_exists="fail",
  3570. index=False,
  3571. )
  3572. == 2
  3573. )
  3574. assert tquery(sql_select, con=sqlite_buildin) == [(1, "A"), (2, "B")]
  3575. assert (
  3576. sql.to_sql(
  3577. frame=df_if_exists_2,
  3578. con=sqlite_buildin,
  3579. name=table_name,
  3580. if_exists="append",
  3581. index=False,
  3582. )
  3583. == 3
  3584. )
  3585. assert tquery(sql_select, con=sqlite_buildin) == [
  3586. (1, "A"),
  3587. (2, "B"),
  3588. (3, "C"),
  3589. (4, "D"),
  3590. (5, "E"),
  3591. ]
  3592. drop_table(table_name, sqlite_buildin)