_continuous_distns.py 400 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243724472457246724772487249725072517252725372547255725672577258725972607261726272637264726572667267726872697270727172727273727472757276727772787279728072817282728372847285728672877288728972907291729272937294729572967297729872997300730173027303730473057306730773087309731073117312731373147315731673177318731973207321732273237324732573267327732873297330733173327333733473357336733773387339734073417342734373447345734673477348734973507351735273537354735573567357735873597360736173627363736473657366736773687369737073717372737373747375737673777378737973807381738273837384738573867387738873897390739173927393739473957396739773987399740074017402740374047405740674077408740974107411741274137414741574167417741874197420742174227423742474257426742774287429743074317432743374347435743674377438743974407441744274437444744574467447744874497450745174527453745474557456745774587459746074617462746374647465746674677468746974707471747274737474747574767477747874797480748174827483748474857486748774887489749074917492749374947495749674977498749975007501750275037504750575067507750875097510751175127513751475157516751775187519752075217522752375247525752675277528752975307531753275337534753575367537753875397540754175427543754475457546754775487549755075517552755375547555755675577558755975607561756275637564756575667567756875697570757175727573757475757576757775787579758075817582758375847585758675877588758975907591759275937594759575967597759875997600760176027603760476057606760776087609761076117612761376147615761676177618761976207621762276237624762576267627762876297630763176327633763476357636763776387639764076417642764376447645764676477648764976507651765276537654765576567657765876597660766176627663766476657666766776687669767076717672767376747675767676777678767976807681768276837684768576867687768876897690769176927693769476957696769776987699770077017702770377047705770677077708770977107711771277137714771577167717771877197720772177227723772477257726772777287729773077317732773377347735773677377738773977407741774277437744774577467747774877497750775177527753775477557756775777587759776077617762776377647765776677677768776977707771777277737774777577767777777877797780778177827783778477857786778777887789779077917792779377947795779677977798779978007801780278037804780578067807780878097810781178127813781478157816781778187819782078217822782378247825782678277828782978307831783278337834783578367837783878397840784178427843784478457846784778487849785078517852785378547855785678577858785978607861786278637864786578667867786878697870787178727873787478757876787778787879788078817882788378847885788678877888788978907891789278937894789578967897789878997900790179027903790479057906790779087909791079117912791379147915791679177918791979207921792279237924792579267927792879297930793179327933793479357936793779387939794079417942794379447945794679477948794979507951795279537954795579567957795879597960796179627963796479657966796779687969797079717972797379747975797679777978797979807981798279837984798579867987798879897990799179927993799479957996799779987999800080018002800380048005800680078008800980108011801280138014801580168017801880198020802180228023802480258026802780288029803080318032803380348035803680378038803980408041804280438044804580468047804880498050805180528053805480558056805780588059806080618062806380648065806680678068806980708071807280738074807580768077807880798080808180828083808480858086808780888089809080918092809380948095809680978098809981008101810281038104810581068107810881098110811181128113811481158116811781188119812081218122812381248125812681278128812981308131813281338134813581368137813881398140814181428143814481458146814781488149815081518152815381548155815681578158815981608161816281638164816581668167816881698170817181728173817481758176817781788179818081818182818381848185818681878188818981908191819281938194819581968197819881998200820182028203820482058206820782088209821082118212821382148215821682178218821982208221822282238224822582268227822882298230823182328233823482358236823782388239824082418242824382448245824682478248824982508251825282538254825582568257825882598260826182628263826482658266826782688269827082718272827382748275827682778278827982808281828282838284828582868287828882898290829182928293829482958296829782988299830083018302830383048305830683078308830983108311831283138314831583168317831883198320832183228323832483258326832783288329833083318332833383348335833683378338833983408341834283438344834583468347834883498350835183528353835483558356835783588359836083618362836383648365836683678368836983708371837283738374837583768377837883798380838183828383838483858386838783888389839083918392839383948395839683978398839984008401840284038404840584068407840884098410841184128413841484158416841784188419842084218422842384248425842684278428842984308431843284338434843584368437843884398440844184428443844484458446844784488449845084518452845384548455845684578458845984608461846284638464846584668467846884698470847184728473847484758476847784788479848084818482848384848485848684878488848984908491849284938494849584968497849884998500850185028503850485058506850785088509851085118512851385148515851685178518851985208521852285238524852585268527852885298530853185328533853485358536853785388539854085418542854385448545854685478548854985508551855285538554855585568557855885598560856185628563856485658566856785688569857085718572857385748575857685778578857985808581858285838584858585868587858885898590859185928593859485958596859785988599860086018602860386048605860686078608860986108611861286138614861586168617861886198620862186228623862486258626862786288629863086318632863386348635863686378638863986408641864286438644864586468647864886498650865186528653865486558656865786588659866086618662866386648665866686678668866986708671867286738674867586768677867886798680868186828683868486858686868786888689869086918692869386948695869686978698869987008701870287038704870587068707870887098710871187128713871487158716871787188719872087218722872387248725872687278728872987308731873287338734873587368737873887398740874187428743874487458746874787488749875087518752875387548755875687578758875987608761876287638764876587668767876887698770877187728773877487758776877787788779878087818782878387848785878687878788878987908791879287938794879587968797879887998800880188028803880488058806880788088809881088118812881388148815881688178818881988208821882288238824882588268827882888298830883188328833883488358836883788388839884088418842884388448845884688478848884988508851885288538854885588568857885888598860886188628863886488658866886788688869887088718872887388748875887688778878887988808881888288838884888588868887888888898890889188928893889488958896889788988899890089018902890389048905890689078908890989108911891289138914891589168917891889198920892189228923892489258926892789288929893089318932893389348935893689378938893989408941894289438944894589468947894889498950895189528953895489558956895789588959896089618962896389648965896689678968896989708971897289738974897589768977897889798980898189828983898489858986898789888989899089918992899389948995899689978998899990009001900290039004900590069007900890099010901190129013901490159016901790189019902090219022902390249025902690279028902990309031903290339034903590369037903890399040904190429043904490459046904790489049905090519052905390549055905690579058905990609061906290639064906590669067906890699070907190729073907490759076907790789079908090819082908390849085908690879088908990909091909290939094909590969097909890999100910191029103910491059106910791089109911091119112911391149115911691179118911991209121912291239124912591269127912891299130913191329133913491359136913791389139914091419142914391449145914691479148914991509151915291539154915591569157915891599160916191629163916491659166916791689169917091719172917391749175917691779178917991809181918291839184918591869187918891899190919191929193919491959196919791989199920092019202920392049205920692079208920992109211921292139214921592169217921892199220922192229223922492259226922792289229923092319232923392349235923692379238923992409241924292439244924592469247924892499250925192529253925492559256925792589259926092619262926392649265926692679268926992709271927292739274927592769277927892799280928192829283928492859286928792889289929092919292929392949295929692979298929993009301930293039304930593069307930893099310931193129313931493159316931793189319932093219322932393249325932693279328932993309331933293339334933593369337933893399340934193429343934493459346934793489349935093519352935393549355935693579358935993609361936293639364936593669367936893699370937193729373937493759376937793789379938093819382938393849385938693879388938993909391939293939394939593969397939893999400940194029403940494059406940794089409941094119412941394149415941694179418941994209421942294239424942594269427942894299430943194329433943494359436943794389439944094419442944394449445944694479448944994509451945294539454945594569457945894599460946194629463946494659466946794689469947094719472947394749475947694779478947994809481948294839484948594869487948894899490949194929493949494959496949794989499950095019502950395049505950695079508950995109511951295139514951595169517951895199520952195229523952495259526952795289529953095319532953395349535953695379538953995409541954295439544954595469547954895499550955195529553955495559556955795589559956095619562956395649565956695679568956995709571957295739574957595769577957895799580958195829583958495859586958795889589959095919592959395949595959695979598959996009601960296039604960596069607960896099610961196129613961496159616961796189619962096219622962396249625962696279628962996309631963296339634963596369637963896399640964196429643964496459646964796489649965096519652965396549655965696579658965996609661966296639664966596669667966896699670967196729673967496759676967796789679968096819682968396849685968696879688968996909691969296939694969596969697969896999700970197029703970497059706970797089709971097119712971397149715971697179718971997209721972297239724972597269727972897299730973197329733973497359736973797389739974097419742974397449745974697479748974997509751975297539754975597569757975897599760976197629763976497659766976797689769977097719772977397749775977697779778977997809781978297839784978597869787978897899790979197929793979497959796979797989799980098019802980398049805980698079808980998109811981298139814981598169817981898199820982198229823982498259826982798289829983098319832983398349835983698379838983998409841984298439844984598469847984898499850985198529853985498559856985798589859986098619862986398649865986698679868986998709871987298739874987598769877987898799880988198829883988498859886988798889889989098919892989398949895989698979898989999009901990299039904990599069907990899099910991199129913991499159916991799189919992099219922992399249925992699279928992999309931993299339934993599369937993899399940994199429943994499459946994799489949995099519952995399549955995699579958995999609961996299639964996599669967996899699970997199729973997499759976997799789979998099819982998399849985998699879988998999909991999299939994999599969997999899991000010001100021000310004100051000610007100081000910010100111001210013100141001510016100171001810019100201002110022100231002410025100261002710028100291003010031100321003310034100351003610037100381003910040100411004210043100441004510046100471004810049100501005110052100531005410055100561005710058100591006010061100621006310064100651006610067100681006910070100711007210073100741007510076100771007810079100801008110082100831008410085100861008710088100891009010091100921009310094100951009610097100981009910100101011010210103101041010510106101071010810109101101011110112101131011410115101161011710118101191012010121101221012310124101251012610127101281012910130101311013210133101341013510136101371013810139101401014110142101431014410145101461014710148101491015010151101521015310154101551015610157101581015910160101611016210163101641016510166101671016810169101701017110172101731017410175101761017710178101791018010181101821018310184101851018610187101881018910190101911019210193101941019510196101971019810199102001020110202102031020410205102061020710208102091021010211102121021310214102151021610217102181021910220102211022210223102241022510226102271022810229102301023110232102331023410235102361023710238102391024010241102421024310244102451024610247102481024910250102511025210253102541025510256102571025810259102601026110262102631026410265102661026710268102691027010271102721027310274102751027610277102781027910280102811028210283102841028510286102871028810289102901029110292102931029410295102961029710298102991030010301103021030310304103051030610307103081030910310103111031210313103141031510316103171031810319103201032110322103231032410325103261032710328103291033010331103321033310334103351033610337103381033910340103411034210343103441034510346103471034810349103501035110352103531035410355103561035710358103591036010361103621036310364103651036610367103681036910370103711037210373103741037510376103771037810379103801038110382103831038410385103861038710388103891039010391103921039310394103951039610397103981039910400104011040210403104041040510406104071040810409104101041110412104131041410415104161041710418104191042010421104221042310424104251042610427104281042910430104311043210433104341043510436104371043810439104401044110442104431044410445104461044710448104491045010451104521045310454104551045610457104581045910460104611046210463104641046510466104671046810469104701047110472104731047410475104761047710478104791048010481104821048310484104851048610487104881048910490104911049210493104941049510496104971049810499105001050110502105031050410505105061050710508105091051010511105121051310514105151051610517105181051910520105211052210523105241052510526105271052810529105301053110532105331053410535105361053710538105391054010541105421054310544105451054610547105481054910550105511055210553105541055510556105571055810559105601056110562105631056410565105661056710568105691057010571105721057310574105751057610577105781057910580105811058210583105841058510586105871058810589105901059110592105931059410595105961059710598105991060010601106021060310604106051060610607106081060910610106111061210613106141061510616106171061810619106201062110622106231062410625106261062710628106291063010631106321063310634106351063610637106381063910640106411064210643106441064510646106471064810649106501065110652106531065410655106561065710658106591066010661106621066310664106651066610667106681066910670106711067210673106741067510676106771067810679106801068110682106831068410685106861068710688106891069010691106921069310694106951069610697106981069910700107011070210703107041070510706107071070810709107101071110712107131071410715107161071710718107191072010721107221072310724107251072610727107281072910730107311073210733107341073510736107371073810739107401074110742107431074410745107461074710748107491075010751107521075310754107551075610757107581075910760107611076210763107641076510766107671076810769107701077110772107731077410775107761077710778107791078010781107821078310784107851078610787107881078910790107911079210793107941079510796107971079810799108001080110802108031080410805108061080710808108091081010811108121081310814108151081610817108181081910820108211082210823108241082510826108271082810829108301083110832108331083410835108361083710838108391084010841108421084310844108451084610847108481084910850108511085210853108541085510856108571085810859108601086110862108631086410865108661086710868108691087010871108721087310874108751087610877108781087910880108811088210883108841088510886108871088810889108901089110892108931089410895108961089710898108991090010901109021090310904109051090610907109081090910910109111091210913109141091510916109171091810919109201092110922109231092410925109261092710928109291093010931109321093310934109351093610937109381093910940109411094210943109441094510946109471094810949109501095110952109531095410955109561095710958109591096010961109621096310964109651096610967109681096910970109711097210973109741097510976109771097810979109801098110982109831098410985109861098710988109891099010991109921099310994109951099610997109981099911000110011100211003110041100511006110071100811009110101101111012110131101411015110161101711018110191102011021110221102311024110251102611027110281102911030110311103211033110341103511036110371103811039110401104111042110431104411045110461104711048110491105011051110521105311054110551105611057110581105911060110611106211063110641106511066110671106811069110701107111072110731107411075110761107711078110791108011081110821108311084110851108611087110881108911090110911109211093110941109511096110971109811099111001110111102111031110411105111061110711108111091111011111111121111311114111151111611117111181111911120111211112211123111241112511126111271112811129111301113111132111331113411135111361113711138111391114011141111421114311144111451114611147111481114911150111511115211153111541115511156111571115811159111601116111162111631116411165111661116711168111691117011171111721117311174111751117611177111781117911180111811118211183111841118511186111871118811189111901119111192111931119411195111961119711198111991120011201112021120311204112051120611207112081120911210112111121211213112141121511216112171121811219112201122111222112231122411225112261122711228112291123011231112321123311234112351123611237112381123911240112411124211243112441124511246112471124811249112501125111252112531125411255112561125711258112591126011261112621126311264112651126611267112681126911270112711127211273112741127511276112771127811279112801128111282112831128411285112861128711288112891129011291112921129311294112951129611297112981129911300113011130211303113041130511306113071130811309113101131111312113131131411315113161131711318113191132011321113221132311324113251132611327113281132911330113311133211333113341133511336113371133811339113401134111342113431134411345113461134711348113491135011351113521135311354113551135611357113581135911360113611136211363113641136511366113671136811369113701137111372113731137411375113761137711378113791138011381113821138311384113851138611387113881138911390113911139211393113941139511396113971139811399114001140111402114031140411405114061140711408114091141011411114121141311414114151141611417114181141911420114211142211423114241142511426114271142811429114301143111432114331143411435114361143711438114391144011441114421144311444114451144611447114481144911450114511145211453114541145511456114571145811459114601146111462114631146411465114661146711468114691147011471114721147311474114751147611477114781147911480114811148211483114841148511486114871148811489114901149111492114931149411495114961149711498114991150011501115021150311504115051150611507115081150911510115111151211513115141151511516115171151811519115201152111522115231152411525115261152711528115291153011531115321153311534115351153611537115381153911540115411154211543115441154511546115471154811549115501155111552115531155411555115561155711558115591156011561115621156311564115651156611567115681156911570115711157211573115741157511576115771157811579115801158111582115831158411585115861158711588115891159011591115921159311594115951159611597115981159911600116011160211603116041160511606116071160811609116101161111612116131161411615116161161711618116191162011621116221162311624116251162611627116281162911630116311163211633116341163511636116371163811639116401164111642116431164411645116461164711648116491165011651116521165311654116551165611657116581165911660116611166211663116641166511666116671166811669116701167111672116731167411675116761167711678116791168011681116821168311684116851168611687116881168911690116911169211693116941169511696116971169811699117001170111702117031170411705117061170711708117091171011711117121171311714117151171611717117181171911720117211172211723117241172511726117271172811729117301173111732117331173411735117361173711738117391174011741117421174311744117451174611747117481174911750117511175211753117541175511756117571175811759117601176111762117631176411765117661176711768117691177011771117721177311774117751177611777117781177911780117811178211783117841178511786117871178811789117901179111792117931179411795117961179711798117991180011801118021180311804118051180611807118081180911810118111181211813118141181511816118171181811819118201182111822118231182411825118261182711828118291183011831118321183311834118351183611837118381183911840118411184211843118441184511846118471184811849118501185111852118531185411855118561185711858118591186011861118621186311864118651186611867118681186911870118711187211873118741187511876118771187811879118801188111882118831188411885118861188711888118891189011891118921189311894118951189611897118981189911900119011190211903119041190511906119071190811909119101191111912119131191411915119161191711918119191192011921119221192311924119251192611927119281192911930119311193211933119341193511936119371193811939119401194111942119431194411945119461194711948119491195011951119521195311954119551195611957119581195911960119611196211963119641196511966119671196811969119701197111972119731197411975119761197711978119791198011981119821198311984119851198611987119881198911990119911199211993119941199511996119971199811999120001200112002120031200412005120061200712008120091201012011120121201312014120151201612017120181201912020120211202212023120241202512026120271202812029120301203112032120331203412035120361203712038120391204012041120421204312044120451204612047120481204912050120511205212053120541205512056120571205812059120601206112062120631206412065120661206712068120691207012071120721207312074120751207612077120781207912080120811208212083120841208512086120871208812089120901209112092120931209412095120961209712098120991210012101121021210312104121051210612107121081210912110121111211212113121141211512116121171211812119121201212112122121231212412125121261212712128121291213012131121321213312134121351213612137121381213912140121411214212143121441214512146121471214812149121501215112152121531215412155121561215712158121591216012161121621216312164121651216612167121681216912170121711217212173121741217512176121771217812179121801218112182121831218412185121861218712188121891219012191121921219312194121951219612197121981219912200122011220212203122041220512206122071220812209122101221112212122131221412215122161221712218122191222012221122221222312224122251222612227122281222912230122311223212233122341223512236122371223812239122401224112242122431224412245122461224712248122491225012251122521225312254122551225612257122581225912260122611226212263122641226512266122671226812269122701227112272122731227412275122761227712278122791228012281122821228312284122851228612287122881228912290122911229212293122941229512296122971229812299123001230112302123031230412305123061230712308123091231012311123121231312314123151231612317123181231912320123211232212323123241232512326123271232812329123301233112332123331233412335123361233712338123391234012341123421234312344123451234612347123481234912350123511235212353123541235512356123571235812359123601236112362123631236412365123661236712368123691237012371123721237312374123751237612377123781237912380123811238212383123841238512386123871238812389123901239112392123931239412395123961239712398123991240012401124021240312404124051240612407124081240912410124111241212413124141241512416124171241812419124201242112422124231242412425124261242712428124291243012431124321243312434124351243612437124381243912440124411244212443124441244512446124471244812449124501245112452124531245412455124561245712458124591246012461124621246312464124651246612467124681246912470124711247212473124741247512476124771247812479124801248112482124831248412485124861248712488124891249012491124921249312494124951249612497124981249912500125011250212503125041250512506125071250812509125101251112512125131251412515125161251712518125191252012521125221252312524125251252612527125281252912530125311253212533125341253512536125371253812539125401254112542
  1. #
  2. # Author: Travis Oliphant 2002-2011 with contributions from
  3. # SciPy Developers 2004-2011
  4. #
  5. import warnings
  6. from collections.abc import Iterable
  7. from functools import wraps, cached_property
  8. import ctypes
  9. import operator
  10. import numpy as np
  11. from numpy.polynomial import Polynomial
  12. from scipy.interpolate import BSpline
  13. from scipy._lib.doccer import (extend_notes_in_docstring,
  14. replace_notes_in_docstring,
  15. inherit_docstring_from)
  16. from scipy._lib._ccallback import LowLevelCallable
  17. from scipy import optimize
  18. from scipy import integrate
  19. import scipy.special as sc
  20. import scipy.special._ufuncs as scu
  21. from scipy._lib._util import _lazyselect
  22. import scipy._lib.array_api_extra as xpx
  23. from scipy._lib._array_api import xp_promote
  24. from . import _stats
  25. from ._tukeylambda_stats import (tukeylambda_variance as _tlvar,
  26. tukeylambda_kurtosis as _tlkurt)
  27. from ._distn_infrastructure import (_vectorize_rvs_over_shapes,
  28. get_distribution_names, _kurtosis, _isintegral,
  29. rv_continuous, _skew, _get_fixed_fit_value, _check_shape, _ShapeInfo)
  30. from ._ksstats import kolmogn, kolmognp, kolmogni
  31. from ._constants import (_XMIN, _LOGXMIN, _EULER, _ZETA3, _SQRT_PI,
  32. _SQRT_2_OVER_PI, _LOG_PI, _LOG_SQRT_2_OVER_PI)
  33. from ._censored_data import CensoredData
  34. from scipy.optimize import root_scalar
  35. from scipy.stats._warnings_errors import FitError
  36. import scipy.stats as stats
  37. def _remove_optimizer_parameters(kwds):
  38. """
  39. Remove the optimizer-related keyword arguments 'loc', 'scale' and
  40. 'optimizer' from `kwds`. Then check that `kwds` is empty, and
  41. raise `TypeError("Unknown arguments: %s." % kwds)` if it is not.
  42. This function is used in the fit method of distributions that override
  43. the default method and do not use the default optimization code.
  44. `kwds` is modified in-place.
  45. """
  46. kwds.pop('loc', None)
  47. kwds.pop('scale', None)
  48. kwds.pop('optimizer', None)
  49. kwds.pop('method', None)
  50. if kwds:
  51. raise TypeError(f"Unknown arguments: {kwds}.")
  52. def _call_super_mom(fun):
  53. # If fit method is overridden only for MLE and doesn't specify what to do
  54. # if method == 'mm' or with censored data, this decorator calls the generic
  55. # implementation.
  56. @wraps(fun)
  57. def wrapper(self, data, *args, **kwds):
  58. method = kwds.get('method', 'mle').lower()
  59. censored = isinstance(data, CensoredData)
  60. if method == 'mm' or (censored and data.num_censored() > 0):
  61. return super(type(self), self).fit(data, *args, **kwds)
  62. else:
  63. if censored:
  64. # data is an instance of CensoredData, but actually holds
  65. # no censored values, so replace it with the array of
  66. # uncensored values.
  67. data = data._uncensored
  68. return fun(self, data, *args, **kwds)
  69. return wrapper
  70. def _get_left_bracket(fun, rbrack, lbrack=None):
  71. # find left bracket for `root_scalar`. A guess for lbrack may be provided.
  72. lbrack = lbrack or rbrack - 1
  73. diff = rbrack - lbrack
  74. # if there is no sign change in `fun` between the brackets, expand
  75. # rbrack - lbrack until a sign change occurs
  76. def interval_contains_root(lbrack, rbrack):
  77. # return true if the signs disagree.
  78. return np.sign(fun(lbrack)) != np.sign(fun(rbrack))
  79. while not interval_contains_root(lbrack, rbrack):
  80. diff *= 2
  81. lbrack = rbrack - diff
  82. msg = ("The solver could not find a bracket containing a "
  83. "root to an MLE first order condition.")
  84. if np.isinf(lbrack):
  85. raise FitSolverError(msg)
  86. return lbrack
  87. class ksone_gen(rv_continuous):
  88. r"""Kolmogorov-Smirnov one-sided test statistic distribution.
  89. This is the distribution of the one-sided Kolmogorov-Smirnov (KS)
  90. statistics :math:`D_n^+` and :math:`D_n^-`
  91. for a finite sample size ``n >= 1`` (the shape parameter).
  92. %(before_notes)s
  93. See Also
  94. --------
  95. kstwobign, kstwo, kstest
  96. Notes
  97. -----
  98. :math:`D_n^+` and :math:`D_n^-` are given by
  99. .. math::
  100. D_n^+ &= \text{sup}_x (F_n(x) - F(x)),\\
  101. D_n^- &= \text{sup}_x (F(x) - F_n(x)),\\
  102. where :math:`F` is a continuous CDF and :math:`F_n` is an empirical CDF.
  103. `ksone` describes the distribution under the null hypothesis of the KS test
  104. that the empirical CDF corresponds to :math:`n` i.i.d. random variates
  105. with CDF :math:`F`.
  106. %(after_notes)s
  107. References
  108. ----------
  109. .. [1] Birnbaum, Z. W. and Tingey, F.H. "One-sided confidence contours
  110. for probability distribution functions", The Annals of Mathematical
  111. Statistics, 22(4), pp 592-596 (1951).
  112. Examples
  113. --------
  114. >>> import numpy as np
  115. >>> from scipy.stats import ksone
  116. >>> import matplotlib.pyplot as plt
  117. >>> fig, ax = plt.subplots(1, 1)
  118. Display the probability density function (``pdf``):
  119. >>> n = 1e+03
  120. >>> x = np.linspace(ksone.ppf(0.01, n),
  121. ... ksone.ppf(0.99, n), 100)
  122. >>> ax.plot(x, ksone.pdf(x, n),
  123. ... 'r-', lw=5, alpha=0.6, label='ksone pdf')
  124. Alternatively, the distribution object can be called (as a function)
  125. to fix the shape, location and scale parameters. This returns a "frozen"
  126. RV object holding the given parameters fixed.
  127. Freeze the distribution and display the frozen ``pdf``:
  128. >>> rv = ksone(n)
  129. >>> ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')
  130. >>> ax.legend(loc='best', frameon=False)
  131. >>> plt.show()
  132. Check accuracy of ``cdf`` and ``ppf``:
  133. >>> vals = ksone.ppf([0.001, 0.5, 0.999], n)
  134. >>> np.allclose([0.001, 0.5, 0.999], ksone.cdf(vals, n))
  135. True
  136. """
  137. def _argcheck(self, n):
  138. return (n >= 1) & (n == np.round(n))
  139. def _shape_info(self):
  140. return [_ShapeInfo("n", True, (1, np.inf), (True, False))]
  141. def _pdf(self, x, n):
  142. return -scu._smirnovp(n, x)
  143. def _cdf(self, x, n):
  144. return scu._smirnovc(n, x)
  145. def _sf(self, x, n):
  146. return sc.smirnov(n, x)
  147. def _ppf(self, q, n):
  148. return scu._smirnovci(n, q)
  149. def _isf(self, q, n):
  150. return sc.smirnovi(n, q)
  151. ksone = ksone_gen(a=0.0, b=1.0, name='ksone')
  152. class kstwo_gen(rv_continuous):
  153. r"""Kolmogorov-Smirnov two-sided test statistic distribution.
  154. This is the distribution of the two-sided Kolmogorov-Smirnov (KS)
  155. statistic :math:`D_n` for a finite sample size ``n >= 1``
  156. (the shape parameter).
  157. %(before_notes)s
  158. See Also
  159. --------
  160. kstwobign, ksone, kstest
  161. Notes
  162. -----
  163. :math:`D_n` is given by
  164. .. math::
  165. D_n = \text{sup}_x |F_n(x) - F(x)|
  166. where :math:`F` is a (continuous) CDF and :math:`F_n` is an empirical CDF.
  167. `kstwo` describes the distribution under the null hypothesis of the KS test
  168. that the empirical CDF corresponds to :math:`n` i.i.d. random variates
  169. with CDF :math:`F`.
  170. %(after_notes)s
  171. References
  172. ----------
  173. .. [1] Simard, R., L'Ecuyer, P. "Computing the Two-Sided
  174. Kolmogorov-Smirnov Distribution", Journal of Statistical Software,
  175. Vol 39, 11, 1-18 (2011).
  176. Examples
  177. --------
  178. >>> import numpy as np
  179. >>> from scipy.stats import kstwo
  180. >>> import matplotlib.pyplot as plt
  181. >>> fig, ax = plt.subplots(1, 1)
  182. Display the probability density function (``pdf``):
  183. >>> n = 10
  184. >>> x = np.linspace(kstwo.ppf(0.01, n),
  185. ... kstwo.ppf(0.99, n), 100)
  186. >>> ax.plot(x, kstwo.pdf(x, n),
  187. ... 'r-', lw=5, alpha=0.6, label='kstwo pdf')
  188. Alternatively, the distribution object can be called (as a function)
  189. to fix the shape, location and scale parameters. This returns a "frozen"
  190. RV object holding the given parameters fixed.
  191. Freeze the distribution and display the frozen ``pdf``:
  192. >>> rv = kstwo(n)
  193. >>> ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')
  194. >>> ax.legend(loc='best', frameon=False)
  195. >>> plt.show()
  196. Check accuracy of ``cdf`` and ``ppf``:
  197. >>> vals = kstwo.ppf([0.001, 0.5, 0.999], n)
  198. >>> np.allclose([0.001, 0.5, 0.999], kstwo.cdf(vals, n))
  199. True
  200. """
  201. def _argcheck(self, n):
  202. return (n >= 1) & (n == np.round(n))
  203. def _shape_info(self):
  204. return [_ShapeInfo("n", True, (1, np.inf), (True, False))]
  205. def _get_support(self, n):
  206. return (0.5/(n if not isinstance(n, Iterable) else np.asanyarray(n)),
  207. 1.0)
  208. def _pdf(self, x, n):
  209. return kolmognp(n, x)
  210. def _cdf(self, x, n):
  211. return kolmogn(n, x)
  212. def _sf(self, x, n):
  213. return kolmogn(n, x, cdf=False)
  214. def _ppf(self, q, n):
  215. return kolmogni(n, q, cdf=True)
  216. def _isf(self, q, n):
  217. return kolmogni(n, q, cdf=False)
  218. # Use the pdf, (not the ppf) to compute moments
  219. kstwo = kstwo_gen(momtype=0, a=0.0, b=1.0, name='kstwo')
  220. class kstwobign_gen(rv_continuous):
  221. r"""Limiting distribution of scaled Kolmogorov-Smirnov two-sided test statistic.
  222. This is the asymptotic distribution of the two-sided Kolmogorov-Smirnov
  223. statistic :math:`\sqrt{n} D_n` that measures the maximum absolute
  224. distance of the theoretical (continuous) CDF from the empirical CDF.
  225. (see `kstest`).
  226. %(before_notes)s
  227. See Also
  228. --------
  229. ksone, kstwo, kstest
  230. Notes
  231. -----
  232. :math:`\sqrt{n} D_n` is given by
  233. .. math::
  234. D_n = \text{sup}_x |F_n(x) - F(x)|
  235. where :math:`F` is a continuous CDF and :math:`F_n` is an empirical CDF.
  236. `kstwobign` describes the asymptotic distribution (i.e. the limit of
  237. :math:`\sqrt{n} D_n`) under the null hypothesis of the KS test that the
  238. empirical CDF corresponds to i.i.d. random variates with CDF :math:`F`.
  239. %(after_notes)s
  240. References
  241. ----------
  242. .. [1] Feller, W. "On the Kolmogorov-Smirnov Limit Theorems for Empirical
  243. Distributions", Ann. Math. Statist. Vol 19, 177-189 (1948).
  244. %(example)s
  245. """
  246. def _shape_info(self):
  247. return []
  248. def _pdf(self, x):
  249. return -scu._kolmogp(x)
  250. def _cdf(self, x):
  251. return scu._kolmogc(x)
  252. def _sf(self, x):
  253. return sc.kolmogorov(x)
  254. def _ppf(self, q):
  255. return scu._kolmogci(q)
  256. def _isf(self, q):
  257. return sc.kolmogi(q)
  258. kstwobign = kstwobign_gen(a=0.0, name='kstwobign')
  259. ## Normal distribution
  260. # loc = mu, scale = std
  261. # Keep these implementations out of the class definition so they can be reused
  262. # by other distributions.
  263. _norm_pdf_C = np.sqrt(2*np.pi)
  264. _norm_pdf_logC = np.log(_norm_pdf_C)
  265. def _norm_pdf(x):
  266. return np.exp(-x**2/2.0) / _norm_pdf_C
  267. def _norm_logpdf(x):
  268. return -x**2 / 2.0 - _norm_pdf_logC
  269. def _norm_cdf(x):
  270. return sc.ndtr(x)
  271. def _norm_logcdf(x):
  272. return sc.log_ndtr(x)
  273. def _norm_ppf(q):
  274. return sc.ndtri(q)
  275. def _norm_sf(x):
  276. return _norm_cdf(-x)
  277. def _norm_logsf(x):
  278. return _norm_logcdf(-x)
  279. def _norm_isf(q):
  280. return -_norm_ppf(q)
  281. class norm_gen(rv_continuous):
  282. r"""A normal continuous random variable.
  283. The location (``loc``) keyword specifies the mean.
  284. The scale (``scale``) keyword specifies the standard deviation.
  285. %(before_notes)s
  286. Notes
  287. -----
  288. The probability density function for `norm` is:
  289. .. math::
  290. f(x) = \frac{\exp(-x^2/2)}{\sqrt{2\pi}}
  291. for a real number :math:`x`.
  292. %(after_notes)s
  293. %(example)s
  294. """
  295. def _shape_info(self):
  296. return []
  297. def _rvs(self, size=None, random_state=None):
  298. return random_state.standard_normal(size)
  299. def _pdf(self, x):
  300. # norm.pdf(x) = exp(-x**2/2)/sqrt(2*pi)
  301. return _norm_pdf(x)
  302. def _logpdf(self, x):
  303. return _norm_logpdf(x)
  304. def _cdf(self, x):
  305. return _norm_cdf(x)
  306. def _logcdf(self, x):
  307. return _norm_logcdf(x)
  308. def _sf(self, x):
  309. return _norm_sf(x)
  310. def _logsf(self, x):
  311. return _norm_logsf(x)
  312. def _ppf(self, q):
  313. return _norm_ppf(q)
  314. def _isf(self, q):
  315. return _norm_isf(q)
  316. def _stats(self):
  317. return 0.0, 1.0, 0.0, 0.0
  318. def _entropy(self):
  319. return 0.5*(np.log(2*np.pi)+1)
  320. @_call_super_mom
  321. @replace_notes_in_docstring(rv_continuous, notes="""\
  322. For the normal distribution, method of moments and maximum likelihood
  323. estimation give identical fits, and explicit formulas for the estimates
  324. are available.
  325. This function uses these explicit formulas for the maximum likelihood
  326. estimation of the normal distribution parameters, so the
  327. `optimizer` and `method` arguments are ignored.\n\n""")
  328. def fit(self, data, **kwds):
  329. floc = kwds.pop('floc', None)
  330. fscale = kwds.pop('fscale', None)
  331. _remove_optimizer_parameters(kwds)
  332. if floc is not None and fscale is not None:
  333. # This check is for consistency with `rv_continuous.fit`.
  334. # Without this check, this function would just return the
  335. # parameters that were given.
  336. raise ValueError("All parameters fixed. There is nothing to "
  337. "optimize.")
  338. data = np.asarray(data)
  339. if not np.isfinite(data).all():
  340. raise ValueError("The data contains non-finite values.")
  341. if floc is None:
  342. loc = data.mean()
  343. else:
  344. loc = floc
  345. if fscale is None:
  346. scale = np.sqrt(((data - loc)**2).mean())
  347. else:
  348. scale = fscale
  349. return loc, scale
  350. def _munp(self, n):
  351. """
  352. @returns Moments of standard normal distribution for integer n >= 0
  353. See eq. 16 of https://arxiv.org/abs/1209.4340v2
  354. """
  355. if n == 0:
  356. return 1.
  357. if n % 2 == 0:
  358. return sc.factorial2(int(n) - 1)
  359. else:
  360. return 0.
  361. norm = norm_gen(name='norm')
  362. class alpha_gen(rv_continuous):
  363. r"""An alpha continuous random variable.
  364. %(before_notes)s
  365. Notes
  366. -----
  367. The probability density function for `alpha` ([1]_, [2]_) is:
  368. .. math::
  369. f(x, a) = \frac{1}{x^2 \Phi(a) \sqrt{2\pi}} *
  370. \exp(-\frac{1}{2} (a-1/x)^2)
  371. where :math:`\Phi` is the normal CDF, :math:`x > 0`, and :math:`a > 0`.
  372. `alpha` takes ``a`` as a shape parameter.
  373. %(after_notes)s
  374. References
  375. ----------
  376. .. [1] Johnson, Kotz, and Balakrishnan, "Continuous Univariate
  377. Distributions, Volume 1", Second Edition, John Wiley and Sons,
  378. p. 173 (1994).
  379. .. [2] Anthony A. Salvia, "Reliability applications of the Alpha
  380. Distribution", IEEE Transactions on Reliability, Vol. R-34,
  381. No. 3, pp. 251-252 (1985).
  382. %(example)s
  383. """
  384. _support_mask = rv_continuous._open_support_mask
  385. def _shape_info(self):
  386. return [_ShapeInfo("a", False, (0, np.inf), (False, False))]
  387. def _pdf(self, x, a):
  388. # alpha.pdf(x, a) = 1/(x**2*Phi(a)*sqrt(2*pi)) * exp(-1/2 * (a-1/x)**2)
  389. return 1.0/(x**2)/_norm_cdf(a)*_norm_pdf(a-1.0/x)
  390. def _logpdf(self, x, a):
  391. return -2*np.log(x) + _norm_logpdf(a-1.0/x) - np.log(_norm_cdf(a))
  392. def _cdf(self, x, a):
  393. return _norm_cdf(a-1.0/x) / _norm_cdf(a)
  394. def _ppf(self, q, a):
  395. return 1.0/np.asarray(a - _norm_ppf(q*_norm_cdf(a)))
  396. def _stats(self, a):
  397. return [np.inf]*2 + [np.nan]*2
  398. alpha = alpha_gen(a=0.0, name='alpha')
  399. class anglit_gen(rv_continuous):
  400. r"""An anglit continuous random variable.
  401. %(before_notes)s
  402. Notes
  403. -----
  404. The probability density function for `anglit` is:
  405. .. math::
  406. f(x) = \sin(2x + \pi/2) = \cos(2x)
  407. for :math:`-\pi/4 \le x \le \pi/4`.
  408. %(after_notes)s
  409. %(example)s
  410. """
  411. def _shape_info(self):
  412. return []
  413. def _pdf(self, x):
  414. # anglit.pdf(x) = sin(2*x + \pi/2) = cos(2*x)
  415. return np.cos(2*x)
  416. def _cdf(self, x):
  417. return np.sin(x+np.pi/4)**2.0
  418. def _sf(self, x):
  419. return np.cos(x + np.pi / 4) ** 2.0
  420. def _ppf(self, q):
  421. return np.arcsin(np.sqrt(q))-np.pi/4
  422. def _stats(self):
  423. return 0.0, np.pi*np.pi/16-0.5, 0.0, -2*(np.pi**4 - 96)/(np.pi*np.pi-8)**2
  424. def _entropy(self):
  425. return 1-np.log(2)
  426. anglit = anglit_gen(a=-np.pi/4, b=np.pi/4, name='anglit')
  427. class arcsine_gen(rv_continuous):
  428. r"""An arcsine continuous random variable.
  429. %(before_notes)s
  430. Notes
  431. -----
  432. The probability density function for `arcsine` is:
  433. .. math::
  434. f(x) = \frac{1}{\pi \sqrt{x (1-x)}}
  435. for :math:`0 < x < 1`.
  436. %(after_notes)s
  437. %(example)s
  438. """
  439. def _shape_info(self):
  440. return []
  441. def _pdf(self, x):
  442. # arcsine.pdf(x) = 1/(pi*sqrt(x*(1-x)))
  443. with np.errstate(divide='ignore'):
  444. return 1.0/np.pi/np.sqrt(x*(1-x))
  445. def _cdf(self, x):
  446. return 2.0/np.pi*np.arcsin(np.sqrt(x))
  447. def _ppf(self, q):
  448. return np.sin(np.pi/2.0*q)**2.0
  449. def _stats(self):
  450. mu = 0.5
  451. mu2 = 1.0/8
  452. g1 = 0
  453. g2 = -3.0/2.0
  454. return mu, mu2, g1, g2
  455. def _entropy(self):
  456. return -0.24156447527049044468
  457. arcsine = arcsine_gen(a=0.0, b=1.0, name='arcsine')
  458. class FitDataError(ValueError):
  459. """Raised when input data is inconsistent with fixed parameters."""
  460. # This exception is raised by, for example, beta_gen.fit when both floc
  461. # and fscale are fixed and there are values in the data not in the open
  462. # interval (floc, floc+fscale).
  463. def __init__(self, distr, lower, upper):
  464. self.args = (
  465. "Invalid values in `data`. Maximum likelihood "
  466. f"estimation with {distr!r} requires that {lower!r} < "
  467. f"(x - loc)/scale < {upper!r} for each x in `data`.",
  468. )
  469. class FitSolverError(FitError):
  470. """
  471. Raised when a solver fails to converge while fitting a distribution.
  472. """
  473. # This exception is raised by, for example, beta_gen.fit when
  474. # optimize.fsolve returns with ier != 1.
  475. def __init__(self, mesg):
  476. emsg = "Solver for the MLE equations failed to converge: "
  477. emsg += mesg.replace('\n', '')
  478. self.args = (emsg,)
  479. def _beta_mle_a(a, b, n, s1):
  480. # The zeros of this function give the MLE for `a`, with
  481. # `b`, `n` and `s1` given. `s1` is the sum of the logs of
  482. # the data. `n` is the number of data points.
  483. psiab = sc.psi(a + b)
  484. func = s1 - n * (-psiab + sc.psi(a))
  485. return func
  486. def _beta_mle_ab(theta, n, s1, s2):
  487. # Zeros of this function are critical points of
  488. # the maximum likelihood function. Solving this system
  489. # for theta (which contains a and b) gives the MLE for a and b
  490. # given `n`, `s1` and `s2`. `s1` is the sum of the logs of the data,
  491. # and `s2` is the sum of the logs of 1 - data. `n` is the number
  492. # of data points.
  493. a, b = theta
  494. psiab = sc.psi(a + b)
  495. func = [s1 - n * (-psiab + sc.psi(a)),
  496. s2 - n * (-psiab + sc.psi(b))]
  497. return func
  498. class beta_gen(rv_continuous):
  499. r"""A beta continuous random variable.
  500. %(before_notes)s
  501. Notes
  502. -----
  503. The probability density function for `beta` is:
  504. .. math::
  505. f(x, a, b) = \frac{\Gamma(a+b) x^{a-1} (1-x)^{b-1}}
  506. {\Gamma(a) \Gamma(b)}
  507. for :math:`0 <= x <= 1`, :math:`a > 0`, :math:`b > 0`, where
  508. :math:`\Gamma` is the gamma function (`scipy.special.gamma`).
  509. `beta` takes :math:`a` and :math:`b` as shape parameters.
  510. This distribution uses routines from the Boost Math C++ library for
  511. the computation of the ``pdf``, ``cdf``, ``ppf``, ``sf`` and ``isf``
  512. methods. [1]_
  513. Maximum likelihood estimates of parameters are only available when the location and
  514. scale are fixed. When either of these parameters is free, ``beta.fit`` resorts to
  515. numerical optimization, but this problem is unbounded: the location and scale may be
  516. chosen to make the minimum and maximum elements of the data coincide with the
  517. endpoints of the support, and the shape parameters may be chosen to make the PDF at
  518. these points infinite. For best results, pass ``floc`` and ``fscale`` keyword
  519. arguments to fix the location and scale, or use `scipy.stats.fit` with
  520. ``method='mse'``.
  521. %(after_notes)s
  522. References
  523. ----------
  524. .. [1] The Boost Developers. "Boost C++ Libraries". https://www.boost.org/.
  525. %(example)s
  526. """
  527. def _shape_info(self):
  528. ia = _ShapeInfo("a", False, (0, np.inf), (False, False))
  529. ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
  530. return [ia, ib]
  531. def _rvs(self, a, b, size=None, random_state=None):
  532. return random_state.beta(a, b, size)
  533. def _pdf(self, x, a, b):
  534. # gamma(a+b) * x**(a-1) * (1-x)**(b-1)
  535. # beta.pdf(x, a, b) = ------------------------------------
  536. # gamma(a)*gamma(b)
  537. with np.errstate(over='ignore'):
  538. return scu._beta_pdf(x, a, b)
  539. def _logpdf(self, x, a, b):
  540. lPx = sc.xlog1py(b - 1.0, -x) + sc.xlogy(a - 1.0, x)
  541. lPx -= sc.betaln(a, b)
  542. return lPx
  543. def _cdf(self, x, a, b):
  544. return sc.betainc(a, b, x)
  545. def _sf(self, x, a, b):
  546. return sc.betaincc(a, b, x)
  547. def _isf(self, x, a, b):
  548. return sc.betainccinv(a, b, x)
  549. def _ppf(self, q, a, b):
  550. return scu._beta_ppf(q, a, b)
  551. def _stats(self, a, b):
  552. a_plus_b = a + b
  553. _beta_mean = a/a_plus_b
  554. _beta_variance = a*b / (a_plus_b**2 * (a_plus_b + 1))
  555. _beta_skewness = ((2 * (b - a) * np.sqrt(a_plus_b + 1)) /
  556. ((a_plus_b + 2) * np.sqrt(a * b)))
  557. _beta_kurtosis_excess_n = 6 * ((a - b)**2 * (a_plus_b + 1) -
  558. a * b * (a_plus_b + 2))
  559. _beta_kurtosis_excess_d = a * b * (a_plus_b + 2) * (a_plus_b + 3)
  560. _beta_kurtosis_excess = _beta_kurtosis_excess_n / _beta_kurtosis_excess_d
  561. return (
  562. _beta_mean,
  563. _beta_variance,
  564. _beta_skewness,
  565. _beta_kurtosis_excess)
  566. def _fitstart(self, data):
  567. if isinstance(data, CensoredData):
  568. data = data._uncensor()
  569. g1 = _skew(data)
  570. g2 = _kurtosis(data)
  571. def func(x):
  572. a, b = x
  573. sk = 2*(b-a)*np.sqrt(a + b + 1) / (a + b + 2) / np.sqrt(a*b)
  574. ku = a**3 - a**2*(2*b-1) + b**2*(b+1) - 2*a*b*(b+2)
  575. ku /= a*b*(a+b+2)*(a+b+3)
  576. ku *= 6
  577. return [sk-g1, ku-g2]
  578. a, b = optimize.fsolve(func, (1.0, 1.0))
  579. return super()._fitstart(data, args=(a, b))
  580. @_call_super_mom
  581. @extend_notes_in_docstring(rv_continuous, notes="""\
  582. In the special case where `method="MLE"` and
  583. both `floc` and `fscale` are given, a
  584. `ValueError` is raised if any value `x` in `data` does not satisfy
  585. `floc < x < floc + fscale`.\n\n""")
  586. def fit(self, data, *args, **kwds):
  587. # Override rv_continuous.fit, so we can more efficiently handle the
  588. # case where floc and fscale are given.
  589. floc = kwds.get('floc', None)
  590. fscale = kwds.get('fscale', None)
  591. if floc is None or fscale is None:
  592. # do general fit
  593. return super().fit(data, *args, **kwds)
  594. # We already got these from kwds, so just pop them.
  595. kwds.pop('floc', None)
  596. kwds.pop('fscale', None)
  597. f0 = _get_fixed_fit_value(kwds, ['f0', 'fa', 'fix_a'])
  598. f1 = _get_fixed_fit_value(kwds, ['f1', 'fb', 'fix_b'])
  599. _remove_optimizer_parameters(kwds)
  600. if f0 is not None and f1 is not None:
  601. # This check is for consistency with `rv_continuous.fit`.
  602. raise ValueError("All parameters fixed. There is nothing to "
  603. "optimize.")
  604. # Special case: loc and scale are constrained, so we are fitting
  605. # just the shape parameters. This can be done much more efficiently
  606. # than the method used in `rv_continuous.fit`. (See the subsection
  607. # "Two unknown parameters" in the section "Maximum likelihood" of
  608. # the Wikipedia article on the Beta distribution for the formulas.)
  609. if not np.isfinite(data).all():
  610. raise ValueError("The data contains non-finite values.")
  611. # Normalize the data to the interval [0, 1].
  612. data = (np.ravel(data) - floc) / fscale
  613. if np.any(data <= 0) or np.any(data >= 1):
  614. raise FitDataError("beta", lower=floc, upper=floc + fscale)
  615. xbar = data.mean()
  616. if f0 is not None or f1 is not None:
  617. # One of the shape parameters is fixed.
  618. if f0 is not None:
  619. # The shape parameter a is fixed, so swap the parameters
  620. # and flip the data. We always solve for `a`. The result
  621. # will be swapped back before returning.
  622. b = f0
  623. data = 1 - data
  624. xbar = 1 - xbar
  625. else:
  626. b = f1
  627. # Initial guess for a. Use the formula for the mean of the beta
  628. # distribution, E[x] = a / (a + b), to generate a reasonable
  629. # starting point based on the mean of the data and the given
  630. # value of b.
  631. a = b * xbar / (1 - xbar)
  632. # Compute the MLE for `a` by solving _beta_mle_a.
  633. theta, info, ier, mesg = optimize.fsolve(
  634. _beta_mle_a, a,
  635. args=(b, len(data), np.log(data).sum()),
  636. full_output=True
  637. )
  638. if ier != 1:
  639. raise FitSolverError(mesg=mesg)
  640. a = theta[0]
  641. if f0 is not None:
  642. # The shape parameter a was fixed, so swap back the
  643. # parameters.
  644. a, b = b, a
  645. else:
  646. # Neither of the shape parameters is fixed.
  647. # s1 and s2 are used in the extra arguments passed to _beta_mle_ab
  648. # by optimize.fsolve.
  649. s1 = np.log(data).sum()
  650. s2 = sc.log1p(-data).sum()
  651. # Use the "method of moments" to estimate the initial
  652. # guess for a and b.
  653. fac = xbar * (1 - xbar) / data.var(ddof=0) - 1
  654. a = xbar * fac
  655. b = (1 - xbar) * fac
  656. # Compute the MLE for a and b by solving _beta_mle_ab.
  657. theta, info, ier, mesg = optimize.fsolve(
  658. _beta_mle_ab, [a, b],
  659. args=(len(data), s1, s2),
  660. full_output=True
  661. )
  662. if ier != 1:
  663. raise FitSolverError(mesg=mesg)
  664. a, b = theta
  665. return a, b, floc, fscale
  666. def _entropy(self, a, b):
  667. def regular(a, b):
  668. return (sc.betaln(a, b) - (a - 1) * sc.psi(a) -
  669. (b - 1) * sc.psi(b) + (a + b - 2) * sc.psi(a + b))
  670. def asymptotic_ab_large(a, b):
  671. sum_ab = a + b
  672. log_term = 0.5 * (
  673. np.log(2*np.pi) + np.log(a) + np.log(b) - 3*np.log(sum_ab) + 1
  674. )
  675. t1 = 110/sum_ab + 20*sum_ab**-2.0 + sum_ab**-3.0 - 2*sum_ab**-4.0
  676. t2 = -50/a - 10*a**-2.0 - a**-3.0 + a**-4.0
  677. t3 = -50/b - 10*b**-2.0 - b**-3.0 + b**-4.0
  678. return log_term + (t1 + t2 + t3) / 120
  679. def asymptotic_b_large(a, b):
  680. sum_ab = a + b
  681. t1 = sc.gammaln(a) - (a - 1) * sc.psi(a)
  682. t2 = (
  683. - 1/(2*b) + 1/(12*b) - b**-2.0/12 - b**-3.0/120 + b**-4.0/120
  684. + b**-5.0/252 - b**-6.0/252 + 1/sum_ab - 1/(12*sum_ab)
  685. + sum_ab**-2.0/6 + sum_ab**-3.0/120 - sum_ab**-4.0/60
  686. - sum_ab**-5.0/252 + sum_ab**-6.0/126
  687. )
  688. log_term = sum_ab*np.log1p(a/b) + np.log(b) - 2*np.log(sum_ab)
  689. return t1 + t2 + log_term
  690. def asymptotic_a_large(a, b):
  691. return asymptotic_b_large(b, a)
  692. def threshold_large(v):
  693. j = np.floor(np.log10(v))
  694. d = np.floor(v / 10 ** j) + 2
  695. return xpx.apply_where(v != 1.0, (d, j), lambda d_, j_: d_ * 10**(7 + j_),
  696. fill_value=1000)
  697. threshold_a = threshold_large(a)
  698. threshold_b = threshold_large(b)
  699. return _lazyselect([(a >= 4.96e6) & (b >= 4.96e6),
  700. (a <= 4.9e6) & (b - a >= 1e6) & (b >= threshold_a),
  701. (b <= 4.9e6) & (a - b >= 1e6) & (a >= threshold_b),
  702. (a < 4.9e6) & (b < 4.9e6)
  703. ],
  704. [asymptotic_ab_large, asymptotic_b_large,
  705. asymptotic_a_large, regular],
  706. [a, b]
  707. )
  708. beta = beta_gen(a=0.0, b=1.0, name='beta')
  709. class betaprime_gen(rv_continuous):
  710. r"""A beta prime continuous random variable.
  711. %(before_notes)s
  712. Notes
  713. -----
  714. The probability density function for `betaprime` is:
  715. .. math::
  716. f(x, a, b) = \frac{x^{a-1} (1+x)^{-a-b}}{\beta(a, b)}
  717. for :math:`x >= 0`, :math:`a > 0`, :math:`b > 0`, where
  718. :math:`\beta(a, b)` is the beta function (see `scipy.special.beta`).
  719. `betaprime` takes ``a`` and ``b`` as shape parameters.
  720. The distribution is related to the `beta` distribution as follows:
  721. If :math:`X` follows a beta distribution with parameters :math:`a, b`,
  722. then :math:`Y = X/(1-X)` has a beta prime distribution with
  723. parameters :math:`a, b` ([1]_).
  724. The beta prime distribution is a reparametrized version of the
  725. F distribution. The beta prime distribution with shape parameters
  726. ``a`` and ``b`` and ``scale = s`` is equivalent to the F distribution
  727. with parameters ``d1 = 2*a``, ``d2 = 2*b`` and ``scale = (a/b)*s``.
  728. For example,
  729. >>> from scipy.stats import betaprime, f
  730. >>> x = [1, 2, 5, 10]
  731. >>> a = 12
  732. >>> b = 5
  733. >>> betaprime.pdf(x, a, b, scale=2)
  734. array([0.00541179, 0.08331299, 0.14669185, 0.03150079])
  735. >>> f.pdf(x, 2*a, 2*b, scale=(a/b)*2)
  736. array([0.00541179, 0.08331299, 0.14669185, 0.03150079])
  737. %(after_notes)s
  738. References
  739. ----------
  740. .. [1] Beta prime distribution, Wikipedia,
  741. https://en.wikipedia.org/wiki/Beta_prime_distribution
  742. %(example)s
  743. """
  744. _support_mask = rv_continuous._open_support_mask
  745. def _shape_info(self):
  746. ia = _ShapeInfo("a", False, (0, np.inf), (False, False))
  747. ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
  748. return [ia, ib]
  749. def _rvs(self, a, b, size=None, random_state=None):
  750. u1 = gamma.rvs(a, size=size, random_state=random_state)
  751. u2 = gamma.rvs(b, size=size, random_state=random_state)
  752. return u1 / u2
  753. def _pdf(self, x, a, b):
  754. # betaprime.pdf(x, a, b) = x**(a-1) * (1+x)**(-a-b) / beta(a, b)
  755. return np.exp(self._logpdf(x, a, b))
  756. def _logpdf(self, x, a, b):
  757. return sc.xlogy(a - 1.0, x) - sc.xlog1py(a + b, x) - sc.betaln(a, b)
  758. def _cdf(self, x, a, b):
  759. # note: f2 is the direct way to compute the cdf if the relationship
  760. # to the beta distribution is used.
  761. # however, for very large x, x/(1+x) == 1. since the distribution
  762. # has very fat tails if b is small, this can cause inaccurate results
  763. # use the following relationship of the incomplete beta function:
  764. # betainc(x, a, b) = 1 - betainc(1-x, b, a)
  765. # see gh-17631
  766. return xpx.apply_where(
  767. x > 1, (x, a, b),
  768. lambda x_, a_, b_: beta._sf(1 / (1 + x_), b_, a_),
  769. lambda x_, a_, b_: beta._cdf(x_ / (1 + x_), a_, b_))
  770. def _sf(self, x, a, b):
  771. return xpx.apply_where(
  772. x > 1, (x, a, b),
  773. lambda x_, a_, b_: beta._cdf(1 / (1 + x_), b_, a_),
  774. lambda x_, a_, b_: beta._sf(x_ / (1 + x_), a_, b_))
  775. def _ppf(self, p, a, b):
  776. p, a, b = np.broadcast_arrays(p, a, b)
  777. # By default, compute the ppf by solving the following:
  778. # p = beta._cdf(x/(1+x), a, b). This implies x = r/(1-r) with
  779. # r = beta._ppf(p, a, b). This can cause numerical issues if r is
  780. # very close to 1. In that case, invert the alternative expression of
  781. # the cdf: p = beta._sf(1/(1+x), b, a).
  782. r = stats.beta._ppf(p, a, b)
  783. with np.errstate(divide='ignore'):
  784. out = r / (1 - r)
  785. rnear1 = r > 0.9999
  786. if np.isscalar(r):
  787. if rnear1:
  788. out = 1/stats.beta._isf(p, b, a) - 1
  789. else:
  790. out[rnear1] = 1/stats.beta._isf(p[rnear1], b[rnear1], a[rnear1]) - 1
  791. return out
  792. def _munp(self, n, a, b):
  793. return xpx.apply_where(
  794. b > n, (a, b),
  795. lambda a, b: np.prod([(a+i-1)/(b-i) for i in range(1, int(n)+1)], axis=0),
  796. fill_value=np.inf)
  797. betaprime = betaprime_gen(a=0.0, name='betaprime')
  798. class bradford_gen(rv_continuous):
  799. r"""A Bradford continuous random variable.
  800. %(before_notes)s
  801. Notes
  802. -----
  803. The probability density function for `bradford` is:
  804. .. math::
  805. f(x, c) = \frac{c}{\log(1+c) (1+cx)}
  806. for :math:`0 <= x <= 1` and :math:`c > 0`.
  807. `bradford` takes ``c`` as a shape parameter for :math:`c`.
  808. %(after_notes)s
  809. %(example)s
  810. """
  811. def _shape_info(self):
  812. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  813. def _pdf(self, x, c):
  814. # bradford.pdf(x, c) = c / (k * (1+c*x))
  815. return c / (c*x + 1.0) / sc.log1p(c)
  816. def _cdf(self, x, c):
  817. return sc.log1p(c*x) / sc.log1p(c)
  818. def _ppf(self, q, c):
  819. return sc.expm1(q * sc.log1p(c)) / c
  820. def _stats(self, c, moments='mv'):
  821. k = np.log(1.0+c)
  822. mu = (c-k)/(c*k)
  823. mu2 = ((c+2.0)*k-2.0*c)/(2*c*k*k)
  824. g1 = None
  825. g2 = None
  826. if 's' in moments:
  827. g1 = np.sqrt(2)*(12*c*c-9*c*k*(c+2)+2*k*k*(c*(c+3)+3))
  828. g1 /= np.sqrt(c*(c*(k-2)+2*k))*(3*c*(k-2)+6*k)
  829. if 'k' in moments:
  830. g2 = (c**3*(k-3)*(k*(3*k-16)+24)+12*k*c*c*(k-4)*(k-3) +
  831. 6*c*k*k*(3*k-14) + 12*k**3)
  832. g2 /= 3*c*(c*(k-2)+2*k)**2
  833. return mu, mu2, g1, g2
  834. def _entropy(self, c):
  835. k = np.log(1+c)
  836. return k/2.0 - np.log(c/k)
  837. bradford = bradford_gen(a=0.0, b=1.0, name='bradford')
  838. class burr_gen(rv_continuous):
  839. r"""A Burr (Type III) continuous random variable.
  840. %(before_notes)s
  841. See Also
  842. --------
  843. fisk : a special case of either `burr` or `burr12` with ``d=1``
  844. burr12 : Burr Type XII distribution
  845. mielke : Mielke Beta-Kappa / Dagum distribution
  846. Notes
  847. -----
  848. The probability density function for `burr` is:
  849. .. math::
  850. f(x; c, d) = c d \frac{x^{-c - 1}}
  851. {{(1 + x^{-c})}^{d + 1}}
  852. for :math:`x >= 0` and :math:`c, d > 0`.
  853. `burr` takes ``c`` and ``d`` as shape parameters for :math:`c` and
  854. :math:`d`.
  855. This is the PDF corresponding to the third CDF given in Burr's list;
  856. specifically, it is equation (11) in Burr's paper [1]_. The distribution
  857. is also commonly referred to as the Dagum distribution [2]_. If the
  858. parameter :math:`c < 1` then the mean of the distribution does not
  859. exist and if :math:`c < 2` the variance does not exist [2]_.
  860. The PDF is finite at the left endpoint :math:`x = 0` if :math:`c * d >= 1`.
  861. %(after_notes)s
  862. References
  863. ----------
  864. .. [1] Burr, I. W. "Cumulative frequency functions", Annals of
  865. Mathematical Statistics, 13(2), pp 215-232 (1942).
  866. .. [2] https://en.wikipedia.org/wiki/Dagum_distribution
  867. .. [3] Kleiber, Christian. "A guide to the Dagum distributions."
  868. Modeling Income Distributions and Lorenz Curves pp 97-117 (2008).
  869. %(example)s
  870. """
  871. # Do not set _support_mask to rv_continuous._open_support_mask
  872. # Whether the left-hand endpoint is suitable for pdf evaluation is dependent
  873. # on the values of c and d: if c*d >= 1, the pdf is finite, otherwise infinite.
  874. def _shape_info(self):
  875. ic = _ShapeInfo("c", False, (0, np.inf), (False, False))
  876. id = _ShapeInfo("d", False, (0, np.inf), (False, False))
  877. return [ic, id]
  878. def _pdf(self, x, c, d):
  879. # burr.pdf(x, c, d) = c * d * x**(-c-1) * (1+x**(-c))**(-d-1)
  880. output = xpx.apply_where(
  881. x == 0, (x, c, d),
  882. lambda x_, c_, d_: c_ * d_ * (x_**(c_*d_-1)) / (1 + x_**c_),
  883. lambda x_, c_, d_: (c_ * d_ * (x_ ** (-c_ - 1.0)) /
  884. ((1 + x_ ** (-c_)) ** (d_ + 1.0))))
  885. return output[()] if output.ndim == 0 else output
  886. def _logpdf(self, x, c, d):
  887. output = xpx.apply_where(
  888. x == 0, (x, c, d),
  889. lambda x_, c_, d_: (np.log(c_) + np.log(d_) + sc.xlogy(c_*d_ - 1, x_)
  890. - (d_+1) * sc.log1p(x_**(c_))),
  891. lambda x_, c_, d_: (np.log(c_) + np.log(d_)
  892. + sc.xlogy(-c_ - 1, x_)
  893. - sc.xlog1py(d_+1, x_**(-c_))))
  894. return output[()] if output.ndim == 0 else output
  895. def _cdf(self, x, c, d):
  896. return (1 + x**(-c))**(-d)
  897. def _logcdf(self, x, c, d):
  898. return sc.log1p(x**(-c)) * (-d)
  899. def _sf(self, x, c, d):
  900. return np.exp(self._logsf(x, c, d))
  901. def _logsf(self, x, c, d):
  902. return np.log1p(- (1 + x**(-c))**(-d))
  903. def _ppf(self, q, c, d):
  904. return (q**(-1.0/d) - 1)**(-1.0/c)
  905. def _isf(self, q, c, d):
  906. _q = sc.xlog1py(-1.0 / d, -q)
  907. return sc.expm1(_q) ** (-1.0 / c)
  908. def _stats(self, c, d):
  909. nc = np.arange(1, 5).reshape(4,1) / c
  910. # ek is the kth raw moment, e1 is the mean e2-e1**2 variance etc.
  911. e1, e2, e3, e4 = sc.beta(d + nc, 1. - nc) * d
  912. mu = np.where(c > 1.0, e1, np.nan)
  913. mu2_if_c = e2 - mu**2
  914. mu2 = np.where(c > 2.0, mu2_if_c, np.nan)
  915. g1 = xpx.apply_where(
  916. c > 3.0, (e1, e2, e3, mu2_if_c),
  917. lambda e1, e2, e3, mu2_if_c: ((e3 - 3*e2*e1 + 2*e1**3)
  918. / np.sqrt((mu2_if_c)**3)),
  919. fill_value=np.nan)
  920. g2 = xpx.apply_where(
  921. c > 4.0, (e1, e2, e3, e4, mu2_if_c),
  922. lambda e1, e2, e3, e4, mu2_if_c: (
  923. ((e4 - 4*e3*e1 + 6*e2*e1**2 - 3*e1**4) / mu2_if_c**2) - 3),
  924. fill_value=np.nan)
  925. if np.ndim(c) == 0:
  926. return mu.item(), mu2.item(), g1.item(), g2.item()
  927. return mu, mu2, g1, g2
  928. def _munp(self, n, c, d):
  929. def __munp(n, c, d):
  930. nc = 1. * n / c
  931. return d * sc.beta(1.0 - nc, d + nc)
  932. n, c, d = np.asarray(n), np.asarray(c), np.asarray(d)
  933. return xpx.apply_where((c > n) & (n == n) & (d == d),
  934. (n, c, d), __munp, fill_value=np.nan)
  935. burr = burr_gen(a=0.0, name='burr')
  936. class burr12_gen(rv_continuous):
  937. r"""A Burr (Type XII) continuous random variable.
  938. %(before_notes)s
  939. See Also
  940. --------
  941. fisk : a special case of either `burr` or `burr12` with ``d=1``
  942. burr : Burr Type III distribution
  943. Notes
  944. -----
  945. The probability density function for `burr12` is:
  946. .. math::
  947. f(x; c, d) = c d \frac{x^{c-1}}
  948. {(1 + x^c)^{d + 1}}
  949. for :math:`x >= 0` and :math:`c, d > 0`.
  950. `burr12` takes ``c`` and ``d`` as shape parameters for :math:`c`
  951. and :math:`d`.
  952. This is the PDF corresponding to the twelfth CDF given in Burr's list;
  953. specifically, it is equation (20) in Burr's paper [1]_.
  954. %(after_notes)s
  955. The Burr type 12 distribution is also sometimes referred to as
  956. the Singh-Maddala distribution from NIST [2]_.
  957. References
  958. ----------
  959. .. [1] Burr, I. W. "Cumulative frequency functions", Annals of
  960. Mathematical Statistics, 13(2), pp 215-232 (1942).
  961. .. [2] https://www.itl.nist.gov/div898/software/dataplot/refman2/auxillar/b12pdf.htm
  962. .. [3] "Burr distribution",
  963. https://en.wikipedia.org/wiki/Burr_distribution
  964. %(example)s
  965. """
  966. def _shape_info(self):
  967. ic = _ShapeInfo("c", False, (0, np.inf), (False, False))
  968. id = _ShapeInfo("d", False, (0, np.inf), (False, False))
  969. return [ic, id]
  970. def _pdf(self, x, c, d):
  971. # burr12.pdf(x, c, d) = c * d * x**(c-1) * (1+x**(c))**(-d-1)
  972. return np.exp(self._logpdf(x, c, d))
  973. def _logpdf(self, x, c, d):
  974. return np.log(c) + np.log(d) + sc.xlogy(c - 1, x) + sc.xlog1py(-d-1, x**c)
  975. def _cdf(self, x, c, d):
  976. return -sc.expm1(self._logsf(x, c, d))
  977. def _logcdf(self, x, c, d):
  978. return sc.log1p(-(1 + x**c)**(-d))
  979. def _sf(self, x, c, d):
  980. return np.exp(self._logsf(x, c, d))
  981. def _logsf(self, x, c, d):
  982. return sc.xlog1py(-d, x**c)
  983. def _ppf(self, q, c, d):
  984. # The following is an implementation of
  985. # ((1 - q)**(-1.0/d) - 1)**(1.0/c)
  986. # that does a better job handling small values of q.
  987. return sc.expm1(-1/d * sc.log1p(-q))**(1/c)
  988. def _isf(self, p, c, d):
  989. return sc.expm1(-1/d * np.log(p))**(1/c)
  990. def _munp(self, n, c, d):
  991. def moment_if_exists(n, c, d):
  992. nc = 1. * n / c
  993. return d * sc.beta(1.0 + nc, d - nc)
  994. return xpx.apply_where(c * d > n, (n, c, d), moment_if_exists,
  995. fill_value=np.nan)
  996. burr12 = burr12_gen(a=0.0, name='burr12')
  997. class fisk_gen(burr_gen):
  998. r"""A Fisk continuous random variable.
  999. The Fisk distribution is also known as the log-logistic distribution.
  1000. %(before_notes)s
  1001. See Also
  1002. --------
  1003. burr
  1004. Notes
  1005. -----
  1006. The probability density function for `fisk` is:
  1007. .. math::
  1008. f(x, c) = \frac{c x^{c-1}}
  1009. {(1 + x^c)^2}
  1010. for :math:`x >= 0` and :math:`c > 0`.
  1011. Please note that the above expression can be transformed into the following
  1012. one, which is also commonly used:
  1013. .. math::
  1014. f(x, c) = \frac{c x^{-c-1}}
  1015. {(1 + x^{-c})^2}
  1016. `fisk` takes ``c`` as a shape parameter for :math:`c`.
  1017. `fisk` is a special case of `burr` or `burr12` with ``d=1``.
  1018. Suppose ``X`` is a logistic random variable with location ``l``
  1019. and scale ``s``. Then ``Y = exp(X)`` is a Fisk (log-logistic)
  1020. random variable with ``scale = exp(l)`` and shape ``c = 1/s``.
  1021. %(after_notes)s
  1022. %(example)s
  1023. """
  1024. def _shape_info(self):
  1025. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  1026. def _pdf(self, x, c):
  1027. # fisk.pdf(x, c) = c * x**(-c-1) * (1 + x**(-c))**(-2)
  1028. return burr._pdf(x, c, 1.0)
  1029. def _cdf(self, x, c):
  1030. return burr._cdf(x, c, 1.0)
  1031. def _sf(self, x, c):
  1032. return burr._sf(x, c, 1.0)
  1033. def _logpdf(self, x, c):
  1034. # fisk.pdf(x, c) = c * x**(-c-1) * (1 + x**(-c))**(-2)
  1035. return burr._logpdf(x, c, 1.0)
  1036. def _logcdf(self, x, c):
  1037. return burr._logcdf(x, c, 1.0)
  1038. def _logsf(self, x, c):
  1039. return burr._logsf(x, c, 1.0)
  1040. def _ppf(self, x, c):
  1041. return burr._ppf(x, c, 1.0)
  1042. def _isf(self, q, c):
  1043. return burr._isf(q, c, 1.0)
  1044. def _munp(self, n, c):
  1045. return burr._munp(n, c, 1.0)
  1046. def _stats(self, c):
  1047. return burr._stats(c, 1.0)
  1048. def _entropy(self, c):
  1049. return 2 - np.log(c)
  1050. fisk = fisk_gen(a=0.0, name='fisk')
  1051. class cauchy_gen(rv_continuous):
  1052. r"""A Cauchy continuous random variable.
  1053. %(before_notes)s
  1054. Notes
  1055. -----
  1056. The probability density function for `cauchy` is
  1057. .. math::
  1058. f(x) = \frac{1}{\pi (1 + x^2)}
  1059. for a real number :math:`x`.
  1060. This distribution uses routines from the Boost Math C++ library for
  1061. the computation of the ``ppf`` and ``isf`` methods. [1]_
  1062. %(after_notes)s
  1063. References
  1064. ----------
  1065. .. [1] The Boost Developers. "Boost C++ Libraries". https://www.boost.org/.
  1066. %(example)s
  1067. """
  1068. def _shape_info(self):
  1069. return []
  1070. def _pdf(self, x):
  1071. # cauchy.pdf(x) = 1 / (pi * (1 + x**2))
  1072. with np.errstate(over='ignore'):
  1073. return 1.0/np.pi/(1.0+x*x)
  1074. def _logpdf(self, x):
  1075. # The formulas
  1076. # log(1/(pi*(1 + x**2))) = -log(pi) - log(1 + x**2)
  1077. # = -log(pi) - log(x**2*(1 + 1/x**2))
  1078. # = -log(pi) - (2log(|x|) + log1p(1/x**2))
  1079. # are used here.
  1080. absx = np.abs(x)
  1081. # In the following apply_where, `f1` provides better precision than `f2`
  1082. # for small and moderate x, while `f2` avoids the overflow that can
  1083. # occur with absx**2.
  1084. return xpx.apply_where(
  1085. absx < 1, absx,
  1086. lambda absx: -_LOG_PI - np.log1p(absx**2),
  1087. lambda absx: (-_LOG_PI - (2*np.log(absx) + np.log1p((1/absx)**2))))
  1088. def _cdf(self, x):
  1089. return np.arctan2(1, -x)/np.pi
  1090. def _ppf(self, q):
  1091. return scu._cauchy_ppf(q, 0, 1)
  1092. def _sf(self, x):
  1093. return np.arctan2(1, x)/np.pi
  1094. def _isf(self, q):
  1095. return scu._cauchy_isf(q, 0, 1)
  1096. def _stats(self):
  1097. return np.nan, np.nan, np.nan, np.nan
  1098. def _entropy(self):
  1099. return np.log(4*np.pi)
  1100. def _fitstart(self, data, args=None):
  1101. # Initialize ML guesses using quartiles instead of moments.
  1102. if isinstance(data, CensoredData):
  1103. data = data._uncensor()
  1104. p25, p50, p75 = np.percentile(data, [25, 50, 75])
  1105. return p50, (p75 - p25)/2
  1106. cauchy = cauchy_gen(name='cauchy')
  1107. class chi_gen(rv_continuous):
  1108. r"""A chi continuous random variable.
  1109. %(before_notes)s
  1110. Notes
  1111. -----
  1112. The probability density function for `chi` is:
  1113. .. math::
  1114. f(x, k) = \frac{1}{2^{k/2-1} \Gamma \left( k/2 \right)}
  1115. x^{k-1} \exp \left( -x^2/2 \right)
  1116. for :math:`x >= 0` and :math:`k > 0` (degrees of freedom, denoted ``df``
  1117. in the implementation). :math:`\Gamma` is the gamma function
  1118. (`scipy.special.gamma`).
  1119. Special cases of `chi` are:
  1120. - ``chi(1, loc, scale)`` is equivalent to `halfnorm`
  1121. - ``chi(2, 0, scale)`` is equivalent to `rayleigh`
  1122. - ``chi(3, 0, scale)`` is equivalent to `maxwell`
  1123. `chi` takes ``df`` as a shape parameter.
  1124. %(after_notes)s
  1125. %(example)s
  1126. """
  1127. def _shape_info(self):
  1128. return [_ShapeInfo("df", False, (0, np.inf), (False, False))]
  1129. def _rvs(self, df, size=None, random_state=None):
  1130. return np.sqrt(chi2.rvs(df, size=size, random_state=random_state))
  1131. def _pdf(self, x, df):
  1132. # x**(df-1) * exp(-x**2/2)
  1133. # chi.pdf(x, df) = -------------------------
  1134. # 2**(df/2-1) * gamma(df/2)
  1135. return np.exp(self._logpdf(x, df))
  1136. def _logpdf(self, x, df):
  1137. l = np.log(2) - .5*np.log(2)*df - sc.gammaln(.5*df)
  1138. return l + sc.xlogy(df - 1., x) - .5*x**2
  1139. def _cdf(self, x, df):
  1140. return sc.gammainc(.5*df, .5*x**2)
  1141. def _sf(self, x, df):
  1142. return sc.gammaincc(.5*df, .5*x**2)
  1143. def _ppf(self, q, df):
  1144. return np.sqrt(2*sc.gammaincinv(.5*df, q))
  1145. def _isf(self, q, df):
  1146. return np.sqrt(2*sc.gammainccinv(.5*df, q))
  1147. def _stats(self, df):
  1148. # poch(df/2, 1/2) = gamma(df/2 + 1/2) / gamma(df/2)
  1149. mu = np.sqrt(2) * sc.poch(0.5 * df, 0.5)
  1150. mu2 = df - mu*mu
  1151. g1 = (2*mu**3.0 + mu*(1-2*df))/np.asarray(np.power(mu2, 1.5))
  1152. g2 = 2*df*(1.0-df)-6*mu**4 + 4*mu**2 * (2*df-1)
  1153. g2 /= np.asarray(mu2**2.0)
  1154. return mu, mu2, g1, g2
  1155. def _entropy(self, df):
  1156. def regular_formula(df):
  1157. return (sc.gammaln(.5 * df)
  1158. + 0.5 * (df - np.log(2) - (df - 1) * sc.digamma(0.5 * df)))
  1159. def asymptotic_formula(df):
  1160. return (0.5 + np.log(np.pi)/2 - (df**-1)/6 - (df**-2)/6
  1161. - 4/45*(df**-3) + (df**-4)/15)
  1162. return xpx.apply_where(df < 300, df, regular_formula, asymptotic_formula)
  1163. chi = chi_gen(a=0.0, name='chi')
  1164. class chi2_gen(rv_continuous):
  1165. r"""A chi-squared continuous random variable.
  1166. For the noncentral chi-square distribution, see `ncx2`.
  1167. %(before_notes)s
  1168. See Also
  1169. --------
  1170. ncx2
  1171. Notes
  1172. -----
  1173. The probability density function for `chi2` is:
  1174. .. math::
  1175. f(x, k) = \frac{1}{2^{k/2} \Gamma \left( k/2 \right)}
  1176. x^{k/2-1} \exp \left( -x/2 \right)
  1177. for :math:`x > 0` and :math:`k > 0` (degrees of freedom, denoted ``df``
  1178. in the implementation).
  1179. `chi2` takes ``df`` as a shape parameter.
  1180. The chi-squared distribution is a special case of the gamma
  1181. distribution, with gamma parameters ``a = df/2``, ``loc = 0`` and
  1182. ``scale = 2``.
  1183. %(after_notes)s
  1184. %(example)s
  1185. """
  1186. def _shape_info(self):
  1187. return [_ShapeInfo("df", False, (0, np.inf), (False, False))]
  1188. def _rvs(self, df, size=None, random_state=None):
  1189. return random_state.chisquare(df, size)
  1190. def _pdf(self, x, df):
  1191. # chi2.pdf(x, df) = 1 / (2*gamma(df/2)) * (x/2)**(df/2-1) * exp(-x/2)
  1192. return np.exp(self._logpdf(x, df))
  1193. def _logpdf(self, x, df):
  1194. return sc.xlogy(df/2.-1, x) - x/2. - sc.gammaln(df/2.) - (np.log(2)*df)/2.
  1195. def _cdf(self, x, df):
  1196. return sc.chdtr(df, x)
  1197. def _sf(self, x, df):
  1198. return sc.chdtrc(df, x)
  1199. def _isf(self, p, df):
  1200. return sc.chdtri(df, p)
  1201. def _ppf(self, p, df):
  1202. return 2*sc.gammaincinv(df/2, p)
  1203. def _stats(self, df):
  1204. mu = df
  1205. mu2 = 2*df
  1206. g1 = 2*np.sqrt(2.0/df)
  1207. g2 = 12.0/df
  1208. return mu, mu2, g1, g2
  1209. def _entropy(self, df):
  1210. half_df = 0.5 * df
  1211. def regular_formula(half_df):
  1212. return (half_df + np.log(2) + sc.gammaln(half_df) +
  1213. (1 - half_df) * sc.psi(half_df))
  1214. def asymptotic_formula(half_df):
  1215. # plug in the above formula the following asymptotic
  1216. # expansions:
  1217. # ln(gamma(a)) ~ (a - 0.5) * ln(a) - a + 0.5 * ln(2 * pi) +
  1218. # 1/(12 * a) - 1/(360 * a**3)
  1219. # psi(a) ~ ln(a) - 1/(2 * a) - 1/(3 * a**2) + 1/120 * a**4)
  1220. c = np.log(2) + 0.5*(1 + np.log(2*np.pi))
  1221. h = 0.5/half_df
  1222. return (h*(-2/3 + h*(-1/3 + h*(-4/45 + h/7.5))) +
  1223. 0.5*np.log(half_df) + c)
  1224. return xpx.apply_where(half_df < 125, half_df,
  1225. regular_formula, asymptotic_formula)
  1226. chi2 = chi2_gen(a=0.0, name='chi2')
  1227. class cosine_gen(rv_continuous):
  1228. r"""A cosine continuous random variable.
  1229. %(before_notes)s
  1230. Notes
  1231. -----
  1232. The cosine distribution is an approximation to the normal distribution.
  1233. The probability density function for `cosine` is:
  1234. .. math::
  1235. f(x) = \frac{1}{2\pi} (1+\cos(x))
  1236. for :math:`-\pi \le x \le \pi`.
  1237. %(after_notes)s
  1238. %(example)s
  1239. """
  1240. def _shape_info(self):
  1241. return []
  1242. def _pdf(self, x):
  1243. # cosine.pdf(x) = 1/(2*pi) * (1+cos(x))
  1244. return 1.0/2/np.pi*(1+np.cos(x))
  1245. def _logpdf(self, x):
  1246. c = np.cos(x)
  1247. return xpx.apply_where(c != -1, c,
  1248. lambda c: np.log1p(c) - np.log(2*np.pi),
  1249. fill_value=-np.inf)
  1250. def _cdf(self, x):
  1251. return scu._cosine_cdf(x)
  1252. def _sf(self, x):
  1253. return scu._cosine_cdf(-x)
  1254. def _ppf(self, p):
  1255. return scu._cosine_invcdf(p)
  1256. def _isf(self, p):
  1257. return -scu._cosine_invcdf(p)
  1258. def _stats(self):
  1259. v = (np.pi * np.pi / 3.0) - 2.0
  1260. k = -6.0 * (np.pi**4 - 90) / (5.0 * (np.pi * np.pi - 6)**2)
  1261. return 0.0, v, 0.0, k
  1262. def _entropy(self):
  1263. return np.log(4*np.pi)-1.0
  1264. cosine = cosine_gen(a=-np.pi, b=np.pi, name='cosine')
  1265. class dgamma_gen(rv_continuous):
  1266. r"""A double gamma continuous random variable.
  1267. The double gamma distribution is also known as the reflected gamma
  1268. distribution [1]_.
  1269. %(before_notes)s
  1270. Notes
  1271. -----
  1272. The probability density function for `dgamma` is:
  1273. .. math::
  1274. f(x, a) = \frac{1}{2\Gamma(a)} |x|^{a-1} \exp(-|x|)
  1275. for a real number :math:`x` and :math:`a > 0`. :math:`\Gamma` is the
  1276. gamma function (`scipy.special.gamma`).
  1277. `dgamma` takes ``a`` as a shape parameter for :math:`a`.
  1278. %(after_notes)s
  1279. References
  1280. ----------
  1281. .. [1] Johnson, Kotz, and Balakrishnan, "Continuous Univariate
  1282. Distributions, Volume 1", Second Edition, John Wiley and Sons
  1283. (1994).
  1284. %(example)s
  1285. """
  1286. def _shape_info(self):
  1287. return [_ShapeInfo("a", False, (0, np.inf), (False, False))]
  1288. def _rvs(self, a, size=None, random_state=None):
  1289. u = random_state.uniform(size=size)
  1290. gm = gamma.rvs(a, size=size, random_state=random_state)
  1291. return gm * np.where(u >= 0.5, 1, -1)
  1292. def _pdf(self, x, a):
  1293. # dgamma.pdf(x, a) = 1 / (2*gamma(a)) * abs(x)**(a-1) * exp(-abs(x))
  1294. ax = abs(x)
  1295. return 1.0/(2*sc.gamma(a))*ax**(a-1.0) * np.exp(-ax)
  1296. def _logpdf(self, x, a):
  1297. ax = abs(x)
  1298. return sc.xlogy(a - 1.0, ax) - ax - np.log(2) - sc.gammaln(a)
  1299. def _cdf(self, x, a):
  1300. return np.where(x > 0,
  1301. 0.5 + 0.5*sc.gammainc(a, x),
  1302. 0.5*sc.gammaincc(a, -x))
  1303. def _sf(self, x, a):
  1304. return np.where(x > 0,
  1305. 0.5*sc.gammaincc(a, x),
  1306. 0.5 + 0.5*sc.gammainc(a, -x))
  1307. def _entropy(self, a):
  1308. return stats.gamma._entropy(a) - np.log(0.5)
  1309. def _ppf(self, q, a):
  1310. return np.where(q > 0.5,
  1311. sc.gammaincinv(a, 2*q - 1),
  1312. -sc.gammainccinv(a, 2*q))
  1313. def _isf(self, q, a):
  1314. return np.where(q > 0.5,
  1315. -sc.gammaincinv(a, 2*q - 1),
  1316. sc.gammainccinv(a, 2*q))
  1317. def _stats(self, a):
  1318. mu2 = a*(a+1.0)
  1319. return 0.0, mu2, 0.0, (a+2.0)*(a+3.0)/mu2-3.0
  1320. dgamma = dgamma_gen(name='dgamma')
  1321. class dpareto_lognorm_gen(rv_continuous):
  1322. r"""A double Pareto lognormal continuous random variable.
  1323. %(before_notes)s
  1324. Notes
  1325. -----
  1326. The probability density function for `dpareto_lognorm` is:
  1327. .. math::
  1328. f(x, \mu, \sigma, \alpha, \beta) =
  1329. \frac{\alpha \beta}{(\alpha + \beta) x}
  1330. \phi\left( \frac{\log x - \mu}{\sigma} \right)
  1331. \left( R(y_1) + R(y_2) \right)
  1332. where :math:`R(t) = \frac{1 - \Phi(t)}{\phi(t)}`,
  1333. :math:`\phi` and :math:`\Phi` are the normal PDF and CDF, respectively,
  1334. :math:`y_1 = \alpha \sigma - \frac{\log x - \mu}{\sigma}`,
  1335. and :math:`y_2 = \beta \sigma + \frac{\log x - \mu}{\sigma}`
  1336. for real numbers :math:`x` and :math:`\mu`, :math:`\sigma > 0`,
  1337. :math:`\alpha > 0`, and :math:`\beta > 0` [1]_.
  1338. `dpareto_lognorm` takes
  1339. ``u`` as a shape parameter for :math:`\mu`,
  1340. ``s`` as a shape parameter for :math:`\sigma`,
  1341. ``a`` as a shape parameter for :math:`\alpha`, and
  1342. ``b`` as a shape parameter for :math:`\beta`.
  1343. A random variable :math:`X` distributed according to the PDF above
  1344. can be represented as :math:`X = U \frac{V_1}{V_2}` where :math:`U`,
  1345. :math:`V_1`, and :math:`V_2` are independent, :math:`U` is lognormally
  1346. distributed such that :math:`\log U \sim N(\mu, \sigma^2)`, and
  1347. :math:`V_1` and :math:`V_2` follow Pareto distributions with parameters
  1348. :math:`\alpha` and :math:`\beta`, respectively [2]_.
  1349. %(after_notes)s
  1350. References
  1351. ----------
  1352. .. [1] Hajargasht, Gholamreza, and William E. Griffiths. "Pareto-lognormal
  1353. distributions: Inequality, poverty, and estimation from grouped income
  1354. data." Economic Modelling 33 (2013): 593-604.
  1355. .. [2] Reed, William J., and Murray Jorgensen. "The double Pareto-lognormal
  1356. distribution - a new parametric model for size distributions."
  1357. Communications in Statistics - Theory and Methods 33.8 (2004): 1733-1753.
  1358. %(example)s
  1359. """
  1360. _logphi = norm._logpdf
  1361. _logPhi = norm._logcdf
  1362. _logPhic = norm._logsf
  1363. _phi = norm._pdf
  1364. _Phi = norm._cdf
  1365. _Phic = norm._sf
  1366. def _R(self, z):
  1367. return self._Phic(z) / self._phi(z)
  1368. def _logR(self, z):
  1369. return self._logPhic(z) - self._logphi(z)
  1370. def _shape_info(self):
  1371. return [_ShapeInfo("u", False, (-np.inf, np.inf), (False, False)),
  1372. _ShapeInfo("s", False, (0, np.inf), (False, False)),
  1373. _ShapeInfo("a", False, (0, np.inf), (False, False)),
  1374. _ShapeInfo("b", False, (0, np.inf), (False, False))]
  1375. def _argcheck(self, u, s, a, b):
  1376. return (s > 0) & (a > 0) & (b > 0)
  1377. def _rvs(self, u, s, a, b, size=None, random_state=None):
  1378. # From [1] after Equation (12): "To generate pseudo-random
  1379. # deviates from the dPlN distribution, one can exponentiate
  1380. # pseudo-random deviates from NL generated using (6)."
  1381. Z = random_state.normal(u, s, size=size)
  1382. E1 = random_state.standard_exponential(size=size)
  1383. E2 = random_state.standard_exponential(size=size)
  1384. return np.exp(Z + E1 / a - E2 / b)
  1385. def _logpdf(self, x, u, s, a, b):
  1386. with np.errstate(invalid='ignore', divide='ignore'):
  1387. log_y, m = np.log(x), u # compare against [1] Eq. 1
  1388. z = (log_y - m) / s
  1389. x1 = a * s - z
  1390. x2 = b * s + z
  1391. out = np.asarray(np.log(a) + np.log(b) - np.log(a + b) - log_y)
  1392. out += self._logphi(z)
  1393. out += np.logaddexp(self._logR(x1), self._logR(x2))
  1394. out[(x == 0) | np.isinf(x)] = -np.inf
  1395. return out[()]
  1396. def _logcdf(self, x, u, s, a, b):
  1397. with np.errstate(invalid='ignore', divide='ignore'):
  1398. log_y, m = np.log(x), u # compare against [1] Eq. 2
  1399. z = (log_y - m) / s
  1400. x1 = a * s - z
  1401. x2 = b * s + z
  1402. t1 = self._logPhi(z)
  1403. t2 = self._logphi(z)
  1404. t3 = (np.log(b) + self._logR(x1))
  1405. t4 = (np.log(a) + self._logR(x2))
  1406. t1, t2, t3, t4, one = np.broadcast_arrays(t1, t2, t3, t4, 1)
  1407. # t3 can be smaller than t4, so we have to consider log of negative number
  1408. # This would be much simpler, but `return_sign` is available, so use it?
  1409. # t5 = sc.logsumexp([t3, t4 + np.pi*1j])
  1410. t5, sign = sc.logsumexp([t3, t4], b=[one, -one], axis=0, return_sign=True)
  1411. temp = [t1, t2 + t5 - np.log(a + b)]
  1412. out = np.asarray(sc.logsumexp(temp, b=[one, -one*sign], axis=0))
  1413. out[x == 0] = -np.inf
  1414. return out[()]
  1415. def _logsf(self, x, u, s, a, b):
  1416. return scu._log1mexp(self._logcdf(x, u, s, a, b))
  1417. # Infrastructure doesn't seem to do this, so...
  1418. def _pdf(self, x, u, s, a, b):
  1419. return np.exp(self._logpdf(x, u, s, a, b))
  1420. def _cdf(self, x, u, s, a, b):
  1421. return np.exp(self._logcdf(x, u, s, a, b))
  1422. def _sf(self, x, u, s, a, b):
  1423. return np.exp(self._logsf(x, u, s, a, b))
  1424. def _munp(self, n, u, s, a, b):
  1425. m, k = u, float(n) # compare against [1] Eq. 6
  1426. out = (a * b) / ((a - k) * (b + k)) * np.exp(k * m + k ** 2 * s ** 2 / 2)
  1427. out = np.asarray(out)
  1428. out[a <= k] = np.nan
  1429. return out
  1430. dpareto_lognorm = dpareto_lognorm_gen(a=0, name='dpareto_lognorm')
  1431. class dweibull_gen(rv_continuous):
  1432. r"""A double Weibull continuous random variable.
  1433. %(before_notes)s
  1434. Notes
  1435. -----
  1436. The probability density function for `dweibull` is given by
  1437. .. math::
  1438. f(x, c) = c / 2 |x|^{c-1} \exp(-|x|^c)
  1439. for a real number :math:`x` and :math:`c > 0`.
  1440. `dweibull` takes ``c`` as a shape parameter for :math:`c`.
  1441. %(after_notes)s
  1442. %(example)s
  1443. """
  1444. def _shape_info(self):
  1445. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  1446. def _rvs(self, c, size=None, random_state=None):
  1447. u = random_state.uniform(size=size)
  1448. w = weibull_min.rvs(c, size=size, random_state=random_state)
  1449. return w * (np.where(u >= 0.5, 1, -1))
  1450. def _pdf(self, x, c):
  1451. # dweibull.pdf(x, c) = c / 2 * abs(x)**(c-1) * exp(-abs(x)**c)
  1452. ax = abs(x)
  1453. Px = c / 2.0 * ax**(c-1.0) * np.exp(-ax**c)
  1454. return Px
  1455. def _logpdf(self, x, c):
  1456. ax = abs(x)
  1457. return np.log(c) - np.log(2.0) + sc.xlogy(c - 1.0, ax) - ax**c
  1458. def _cdf(self, x, c):
  1459. Cx1 = 0.5 * np.exp(-abs(x)**c)
  1460. return np.where(x > 0, 1 - Cx1, Cx1)
  1461. def _ppf(self, q, c):
  1462. fac = 2. * np.where(q <= 0.5, q, 1. - q)
  1463. fac = np.power(-np.log(fac), 1.0 / c)
  1464. return np.where(q > 0.5, fac, -fac)
  1465. def _sf(self, x, c):
  1466. half_weibull_min_sf = 0.5 * stats.weibull_min._sf(np.abs(x), c)
  1467. return np.where(x > 0, half_weibull_min_sf, 1 - half_weibull_min_sf)
  1468. def _isf(self, q, c):
  1469. double_q = 2. * np.where(q <= 0.5, q, 1. - q)
  1470. weibull_min_isf = stats.weibull_min._isf(double_q, c)
  1471. return np.where(q > 0.5, -weibull_min_isf, weibull_min_isf)
  1472. def _munp(self, n, c):
  1473. return (1 - (n % 2)) * sc.gamma(1.0 + 1.0 * n / c)
  1474. # since we know that all odd moments are zeros, return them at once.
  1475. # returning Nones from _stats makes the public stats call _munp
  1476. # so overall we're saving one or two gamma function evaluations here.
  1477. def _stats(self, c):
  1478. return 0, None, 0, None
  1479. def _entropy(self, c):
  1480. h = stats.weibull_min._entropy(c) - np.log(0.5)
  1481. return h
  1482. dweibull = dweibull_gen(name='dweibull')
  1483. class expon_gen(rv_continuous):
  1484. r"""An exponential continuous random variable.
  1485. %(before_notes)s
  1486. Notes
  1487. -----
  1488. The probability density function for `expon` is:
  1489. .. math::
  1490. f(x) = \exp(-x)
  1491. for :math:`x \ge 0`.
  1492. %(after_notes)s
  1493. A common parameterization for `expon` is in terms of the rate parameter
  1494. ``lambda``, such that ``pdf = lambda * exp(-lambda * x)``. This
  1495. parameterization corresponds to using ``scale = 1 / lambda``.
  1496. The exponential distribution is a special case of the gamma
  1497. distributions, with gamma shape parameter ``a = 1``.
  1498. %(example)s
  1499. """
  1500. def _shape_info(self):
  1501. return []
  1502. def _rvs(self, size=None, random_state=None):
  1503. return random_state.standard_exponential(size)
  1504. def _pdf(self, x):
  1505. # expon.pdf(x) = exp(-x)
  1506. return np.exp(-x)
  1507. def _logpdf(self, x):
  1508. return -x
  1509. def _cdf(self, x):
  1510. return -sc.expm1(-x)
  1511. def _ppf(self, q):
  1512. return -sc.log1p(-q)
  1513. def _sf(self, x):
  1514. return np.exp(-x)
  1515. def _logsf(self, x):
  1516. return -x
  1517. def _isf(self, q):
  1518. return -np.log(q)
  1519. def _stats(self):
  1520. return 1.0, 1.0, 2.0, 6.0
  1521. def _entropy(self):
  1522. return 1.0
  1523. @_call_super_mom
  1524. @replace_notes_in_docstring(rv_continuous, notes="""\
  1525. When `method='MLE'`,
  1526. this function uses explicit formulas for the maximum likelihood
  1527. estimation of the exponential distribution parameters, so the
  1528. `optimizer`, `loc` and `scale` keyword arguments are
  1529. ignored.\n\n""")
  1530. def fit(self, data, *args, **kwds):
  1531. if len(args) > 0:
  1532. raise TypeError("Too many arguments.")
  1533. floc = kwds.pop('floc', None)
  1534. fscale = kwds.pop('fscale', None)
  1535. _remove_optimizer_parameters(kwds)
  1536. if floc is not None and fscale is not None:
  1537. # This check is for consistency with `rv_continuous.fit`.
  1538. raise ValueError("All parameters fixed. There is nothing to "
  1539. "optimize.")
  1540. data = np.asarray(data)
  1541. if not np.isfinite(data).all():
  1542. raise ValueError("The data contains non-finite values.")
  1543. data_min = data.min()
  1544. if floc is None:
  1545. # ML estimate of the location is the minimum of the data.
  1546. loc = data_min
  1547. else:
  1548. loc = floc
  1549. if data_min < loc:
  1550. # There are values that are less than the specified loc.
  1551. raise FitDataError("expon", lower=floc, upper=np.inf)
  1552. if fscale is None:
  1553. # ML estimate of the scale is the shifted mean.
  1554. scale = data.mean() - loc
  1555. else:
  1556. scale = fscale
  1557. # We expect the return values to be floating point, so ensure it
  1558. # by explicitly converting to float.
  1559. return float(loc), float(scale)
  1560. expon = expon_gen(a=0.0, name='expon')
  1561. class exponnorm_gen(rv_continuous):
  1562. r"""An exponentially modified Normal continuous random variable.
  1563. Also known as the exponentially modified Gaussian distribution [1]_.
  1564. %(before_notes)s
  1565. Notes
  1566. -----
  1567. The probability density function for `exponnorm` is:
  1568. .. math::
  1569. f(x, K) = \frac{1}{2K} \exp\left(\frac{1}{2 K^2} - x / K \right)
  1570. \text{erfc}\left(-\frac{x - 1/K}{\sqrt{2}}\right)
  1571. where :math:`x` is a real number and :math:`K > 0`.
  1572. It can be thought of as the sum of a standard normal random variable
  1573. and an independent exponentially distributed random variable with rate
  1574. ``1/K``.
  1575. %(after_notes)s
  1576. An alternative parameterization of this distribution (for example, in
  1577. the Wikipedia article [1]_) involves three parameters, :math:`\mu`,
  1578. :math:`\lambda` and :math:`\sigma`.
  1579. In the present parameterization this corresponds to having ``loc`` and
  1580. ``scale`` equal to :math:`\mu` and :math:`\sigma`, respectively, and
  1581. shape parameter :math:`K = 1/(\sigma\lambda)`.
  1582. .. versionadded:: 0.16.0
  1583. References
  1584. ----------
  1585. .. [1] Exponentially modified Gaussian distribution, Wikipedia,
  1586. https://en.wikipedia.org/wiki/Exponentially_modified_Gaussian_distribution
  1587. %(example)s
  1588. """
  1589. def _shape_info(self):
  1590. return [_ShapeInfo("K", False, (0, np.inf), (False, False))]
  1591. def _rvs(self, K, size=None, random_state=None):
  1592. expval = random_state.standard_exponential(size) * K
  1593. gval = random_state.standard_normal(size)
  1594. return expval + gval
  1595. def _pdf(self, x, K):
  1596. return np.exp(self._logpdf(x, K))
  1597. def _logpdf(self, x, K):
  1598. invK = 1.0 / K
  1599. exparg = invK * (0.5 * invK - x)
  1600. return exparg + _norm_logcdf(x - invK) - np.log(K)
  1601. def _cdf(self, x, K):
  1602. invK = 1.0 / K
  1603. expval = invK * (0.5 * invK - x)
  1604. logprod = expval + _norm_logcdf(x - invK)
  1605. return _norm_cdf(x) - np.exp(logprod)
  1606. def _sf(self, x, K):
  1607. invK = 1.0 / K
  1608. expval = invK * (0.5 * invK - x)
  1609. logprod = expval + _norm_logcdf(x - invK)
  1610. return _norm_cdf(-x) + np.exp(logprod)
  1611. def _stats(self, K):
  1612. K2 = K * K
  1613. opK2 = 1.0 + K2
  1614. skw = 2 * K**3 * opK2**(-1.5)
  1615. krt = 6.0 * K2 * K2 * opK2**(-2)
  1616. return K, opK2, skw, krt
  1617. exponnorm = exponnorm_gen(name='exponnorm')
  1618. def _pow1pm1(x, y):
  1619. """
  1620. Compute (1 + x)**y - 1.
  1621. Uses expm1 and xlog1py to avoid loss of precision when
  1622. (1 + x)**y is close to 1.
  1623. Note that the inverse of this function with respect to x is
  1624. ``_pow1pm1(x, 1/y)``. That is, if
  1625. t = _pow1pm1(x, y)
  1626. then
  1627. x = _pow1pm1(t, 1/y)
  1628. """
  1629. return np.expm1(sc.xlog1py(y, x))
  1630. class exponweib_gen(rv_continuous):
  1631. r"""An exponentiated Weibull continuous random variable.
  1632. %(before_notes)s
  1633. See Also
  1634. --------
  1635. weibull_min, numpy.random.Generator.weibull
  1636. Notes
  1637. -----
  1638. The probability density function for `exponweib` is:
  1639. .. math::
  1640. f(x, a, c) = a c [1-\exp(-x^c)]^{a-1} \exp(-x^c) x^{c-1}
  1641. and its cumulative distribution function is:
  1642. .. math::
  1643. F(x, a, c) = [1-\exp(-x^c)]^a
  1644. for :math:`x > 0`, :math:`a > 0`, :math:`c > 0`.
  1645. `exponweib` takes :math:`a` and :math:`c` as shape parameters:
  1646. * :math:`a` is the exponentiation parameter,
  1647. with the special case :math:`a=1` corresponding to the
  1648. (non-exponentiated) Weibull distribution `weibull_min`.
  1649. * :math:`c` is the shape parameter of the non-exponentiated Weibull law.
  1650. %(after_notes)s
  1651. References
  1652. ----------
  1653. https://en.wikipedia.org/wiki/Exponentiated_Weibull_distribution
  1654. %(example)s
  1655. """
  1656. def _shape_info(self):
  1657. ia = _ShapeInfo("a", False, (0, np.inf), (False, False))
  1658. ic = _ShapeInfo("c", False, (0, np.inf), (False, False))
  1659. return [ia, ic]
  1660. def _pdf(self, x, a, c):
  1661. # exponweib.pdf(x, a, c) =
  1662. # a * c * (1-exp(-x**c))**(a-1) * exp(-x**c)*x**(c-1)
  1663. return np.exp(self._logpdf(x, a, c))
  1664. def _logpdf(self, x, a, c):
  1665. negxc = -x**c
  1666. exm1c = -sc.expm1(negxc)
  1667. logp = (np.log(a) + np.log(c) + sc.xlogy(a - 1.0, exm1c) +
  1668. negxc + sc.xlogy(c - 1.0, x))
  1669. return logp
  1670. def _cdf(self, x, a, c):
  1671. exm1c = -sc.expm1(-x**c)
  1672. return exm1c**a
  1673. def _ppf(self, q, a, c):
  1674. return (-sc.log1p(-q**(1.0/a)))**np.asarray(1.0/c)
  1675. def _sf(self, x, a, c):
  1676. return -_pow1pm1(-np.exp(-x**c), a)
  1677. def _isf(self, p, a, c):
  1678. return (-np.log(-_pow1pm1(-p, 1/a)))**(1/c)
  1679. exponweib = exponweib_gen(a=0.0, name='exponweib')
  1680. class exponpow_gen(rv_continuous):
  1681. r"""An exponential power continuous random variable.
  1682. %(before_notes)s
  1683. Notes
  1684. -----
  1685. The probability density function for `exponpow` is:
  1686. .. math::
  1687. f(x, b) = b x^{b-1} \exp(1 + x^b - \exp(x^b))
  1688. for :math:`x \ge 0`, :math:`b > 0`. Note that this is a different
  1689. distribution from the exponential power distribution that is also known
  1690. under the names "generalized normal" or "generalized Gaussian".
  1691. `exponpow` takes ``b`` as a shape parameter for :math:`b`.
  1692. %(after_notes)s
  1693. References
  1694. ----------
  1695. http://www.math.wm.edu/~leemis/chart/UDR/PDFs/Exponentialpower.pdf
  1696. %(example)s
  1697. """
  1698. def _shape_info(self):
  1699. return [_ShapeInfo("b", False, (0, np.inf), (False, False))]
  1700. def _pdf(self, x, b):
  1701. # exponpow.pdf(x, b) = b * x**(b-1) * exp(1 + x**b - exp(x**b))
  1702. return np.exp(self._logpdf(x, b))
  1703. def _logpdf(self, x, b):
  1704. xb = x**b
  1705. f = 1 + np.log(b) + sc.xlogy(b - 1.0, x) + xb - np.exp(xb)
  1706. return f
  1707. def _cdf(self, x, b):
  1708. return -sc.expm1(-sc.expm1(x**b))
  1709. def _sf(self, x, b):
  1710. return np.exp(-sc.expm1(x**b))
  1711. def _isf(self, x, b):
  1712. return (sc.log1p(-np.log(x)))**(1./b)
  1713. def _ppf(self, q, b):
  1714. return pow(sc.log1p(-sc.log1p(-q)), 1.0/b)
  1715. exponpow = exponpow_gen(a=0.0, name='exponpow')
  1716. class fatiguelife_gen(rv_continuous):
  1717. r"""A fatigue-life (Birnbaum-Saunders) continuous random variable.
  1718. %(before_notes)s
  1719. Notes
  1720. -----
  1721. The probability density function for `fatiguelife` is:
  1722. .. math::
  1723. f(x, c) = \frac{x+1}{2c\sqrt{2\pi x^3}} \exp(-\frac{(x-1)^2}{2x c^2})
  1724. for :math:`x >= 0` and :math:`c > 0`.
  1725. `fatiguelife` takes ``c`` as a shape parameter for :math:`c`.
  1726. %(after_notes)s
  1727. References
  1728. ----------
  1729. .. [1] "Birnbaum-Saunders distribution",
  1730. https://en.wikipedia.org/wiki/Birnbaum-Saunders_distribution
  1731. %(example)s
  1732. """
  1733. _support_mask = rv_continuous._open_support_mask
  1734. def _shape_info(self):
  1735. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  1736. def _rvs(self, c, size=None, random_state=None):
  1737. z = random_state.standard_normal(size)
  1738. x = 0.5*c*z
  1739. x2 = x*x
  1740. t = 1.0 + 2*x2 + 2*x*np.sqrt(1 + x2)
  1741. return t
  1742. def _pdf(self, x, c):
  1743. # fatiguelife.pdf(x, c) =
  1744. # (x+1) / (2*c*sqrt(2*pi*x**3)) * exp(-(x-1)**2/(2*x*c**2))
  1745. return np.exp(self._logpdf(x, c))
  1746. def _logpdf(self, x, c):
  1747. return (np.log(x+1) - (x-1)**2 / (2.0*x*c**2) - np.log(2*c) -
  1748. 0.5*(np.log(2*np.pi) + 3*np.log(x)))
  1749. def _cdf(self, x, c):
  1750. return _norm_cdf(1.0 / c * (np.sqrt(x) - 1.0/np.sqrt(x)))
  1751. def _ppf(self, q, c):
  1752. tmp = c * _norm_ppf(q)
  1753. return 0.25 * (tmp + np.sqrt(tmp**2 + 4))**2
  1754. def _sf(self, x, c):
  1755. return _norm_sf(1.0 / c * (np.sqrt(x) - 1.0/np.sqrt(x)))
  1756. def _isf(self, q, c):
  1757. tmp = -c * _norm_ppf(q)
  1758. return 0.25 * (tmp + np.sqrt(tmp**2 + 4))**2
  1759. def _stats(self, c):
  1760. # NB: the formula for kurtosis in wikipedia seems to have an error:
  1761. # it's 40, not 41. At least it disagrees with the one from Wolfram
  1762. # Alpha. And the latter one, below, passes the tests, while the wiki
  1763. # one doesn't So far I didn't have the guts to actually check the
  1764. # coefficients from the expressions for the raw moments.
  1765. c2 = c*c
  1766. mu = c2 / 2.0 + 1.0
  1767. den = 5.0 * c2 + 4.0
  1768. mu2 = c2*den / 4.0
  1769. g1 = 4 * c * (11*c2 + 6.0) / np.power(den, 1.5)
  1770. g2 = 6 * c2 * (93*c2 + 40.0) / den**2.0
  1771. return mu, mu2, g1, g2
  1772. fatiguelife = fatiguelife_gen(a=0.0, name='fatiguelife')
  1773. class foldcauchy_gen(rv_continuous):
  1774. r"""A folded Cauchy continuous random variable.
  1775. %(before_notes)s
  1776. Notes
  1777. -----
  1778. The probability density function for `foldcauchy` is:
  1779. .. math::
  1780. f(x, c) = \frac{1}{\pi (1+(x-c)^2)} + \frac{1}{\pi (1+(x+c)^2)}
  1781. for :math:`x \ge 0` and :math:`c \ge 0`.
  1782. `foldcauchy` takes ``c`` as a shape parameter for :math:`c`.
  1783. %(example)s
  1784. """
  1785. def _argcheck(self, c):
  1786. return c >= 0
  1787. def _shape_info(self):
  1788. return [_ShapeInfo("c", False, (0, np.inf), (True, False))]
  1789. def _rvs(self, c, size=None, random_state=None):
  1790. return abs(cauchy.rvs(loc=c, size=size,
  1791. random_state=random_state))
  1792. def _pdf(self, x, c):
  1793. # foldcauchy.pdf(x, c) = 1/(pi*(1+(x-c)**2)) + 1/(pi*(1+(x+c)**2))
  1794. return 1.0/np.pi*(1.0/(1+(x-c)**2) + 1.0/(1+(x+c)**2))
  1795. def _cdf(self, x, c):
  1796. return 1.0/np.pi*(np.arctan(x-c) + np.arctan(x+c))
  1797. def _sf(self, x, c):
  1798. # 1 - CDF(x, c) = 1 - (atan(x - c) + atan(x + c))/pi
  1799. # = ((pi/2 - atan(x - c)) + (pi/2 - atan(x + c)))/pi
  1800. # = (acot(x - c) + acot(x + c))/pi
  1801. # = (atan2(1, x - c) + atan2(1, x + c))/pi
  1802. return (np.arctan2(1, x - c) + np.arctan2(1, x + c))/np.pi
  1803. def _stats(self, c):
  1804. return np.inf, np.inf, np.nan, np.nan
  1805. foldcauchy = foldcauchy_gen(a=0.0, name='foldcauchy')
  1806. class f_gen(rv_continuous):
  1807. r"""An F continuous random variable.
  1808. For the noncentral F distribution, see `ncf`.
  1809. %(before_notes)s
  1810. See Also
  1811. --------
  1812. ncf
  1813. Notes
  1814. -----
  1815. The F distribution with :math:`df_1 > 0` and :math:`df_2 > 0` degrees of freedom is
  1816. the distribution of the ratio of two independent chi-squared distributions with
  1817. :math:`df_1` and :math:`df_2` degrees of freedom, after rescaling by
  1818. :math:`df_2 / df_1`.
  1819. The probability density function for `f` is:
  1820. .. math::
  1821. f(x, df_1, df_2) = \frac{df_2^{df_2/2} df_1^{df_1/2} x^{df_1 / 2-1}}
  1822. {(df_2+df_1 x)^{(df_1+df_2)/2}
  1823. B(df_1/2, df_2/2)}
  1824. for :math:`x > 0`.
  1825. `f` accepts shape parameters ``dfn`` and ``dfd`` for :math:`df_1`, the degrees of
  1826. freedom of the chi-squared distribution in the numerator, and :math:`df_2`, the
  1827. degrees of freedom of the chi-squared distribution in the denominator, respectively.
  1828. %(after_notes)s
  1829. %(example)s
  1830. """
  1831. def _shape_info(self):
  1832. idfn = _ShapeInfo("dfn", False, (0, np.inf), (False, False))
  1833. idfd = _ShapeInfo("dfd", False, (0, np.inf), (False, False))
  1834. return [idfn, idfd]
  1835. def _rvs(self, dfn, dfd, size=None, random_state=None):
  1836. return random_state.f(dfn, dfd, size)
  1837. def _pdf(self, x, dfn, dfd):
  1838. # df2**(df2/2) * df1**(df1/2) * x**(df1/2-1)
  1839. # F.pdf(x, df1, df2) = --------------------------------------------
  1840. # (df2+df1*x)**((df1+df2)/2) * B(df1/2, df2/2)
  1841. return np.exp(self._logpdf(x, dfn, dfd))
  1842. def _logpdf(self, x, dfn, dfd):
  1843. n = 1.0 * dfn
  1844. m = 1.0 * dfd
  1845. lPx = (m/2 * np.log(m) + n/2 * np.log(n) + sc.xlogy(n/2 - 1, x)
  1846. - (((n+m)/2) * np.log(m + n*x) + sc.betaln(n/2, m/2)))
  1847. return lPx
  1848. def _cdf(self, x, dfn, dfd):
  1849. return sc.fdtr(dfn, dfd, x)
  1850. def _sf(self, x, dfn, dfd):
  1851. return sc.fdtrc(dfn, dfd, x)
  1852. def _ppf(self, q, dfn, dfd):
  1853. return sc.fdtri(dfn, dfd, q)
  1854. def _stats(self, dfn, dfd):
  1855. v1, v2 = 1. * dfn, 1. * dfd
  1856. v2_2, v2_4, v2_6, v2_8 = v2 - 2., v2 - 4., v2 - 6., v2 - 8.
  1857. mu = xpx.apply_where(
  1858. v2 > 2, (v2, v2_2),
  1859. lambda v2, v2_2: v2 / v2_2,
  1860. fill_value=np.inf)
  1861. mu2 = xpx.apply_where(
  1862. v2 > 4, (v1, v2, v2_2, v2_4),
  1863. lambda v1, v2, v2_2, v2_4:
  1864. 2 * v2 * v2 * (v1 + v2_2) / (v1 * v2_2**2 * v2_4),
  1865. fill_value=np.inf)
  1866. g1 = xpx.apply_where(
  1867. v2 > 6, (v1, v2_2, v2_4, v2_6),
  1868. lambda v1, v2_2, v2_4, v2_6:
  1869. (2 * v1 + v2_2) / v2_6 * np.sqrt(v2_4 / (v1 * (v1 + v2_2))),
  1870. fill_value=np.nan)
  1871. g1 *= np.sqrt(8.)
  1872. g2 = xpx.apply_where(
  1873. v2 > 8, (g1, v2_6, v2_8),
  1874. lambda g1, v2_6, v2_8: (8 + g1 * g1 * v2_6) / v2_8,
  1875. fill_value=np.nan)
  1876. g2 *= 3. / 2.
  1877. return mu, mu2, g1, g2
  1878. def _entropy(self, dfn, dfd):
  1879. # the formula found in literature is incorrect. This one yields the
  1880. # same result as numerical integration using the generic entropy
  1881. # definition. This is also tested in tests/test_conntinous_basic
  1882. half_dfn = 0.5 * dfn
  1883. half_dfd = 0.5 * dfd
  1884. half_sum = 0.5 * (dfn + dfd)
  1885. return (np.log(dfd) - np.log(dfn) + sc.betaln(half_dfn, half_dfd) +
  1886. (1 - half_dfn) * sc.psi(half_dfn) - (1 + half_dfd) *
  1887. sc.psi(half_dfd) + half_sum * sc.psi(half_sum))
  1888. f = f_gen(a=0.0, name='f')
  1889. ## Folded Normal
  1890. ## abs(Z) where (Z is normal with mu=L and std=S so that c=abs(L)/S)
  1891. ##
  1892. ## note: regress docs have scale parameter correct, but first parameter
  1893. ## he gives is a shape parameter A = c * scale
  1894. ## Half-normal is folded normal with shape-parameter c=0.
  1895. class foldnorm_gen(rv_continuous):
  1896. r"""A folded normal continuous random variable.
  1897. %(before_notes)s
  1898. Notes
  1899. -----
  1900. The probability density function for `foldnorm` is:
  1901. .. math::
  1902. f(x, c) = \sqrt{2/\pi} cosh(c x) \exp(-\frac{x^2+c^2}{2})
  1903. for :math:`x \ge 0` and :math:`c \ge 0`.
  1904. `foldnorm` takes ``c`` as a shape parameter for :math:`c`.
  1905. %(after_notes)s
  1906. %(example)s
  1907. """
  1908. def _argcheck(self, c):
  1909. return c >= 0
  1910. def _shape_info(self):
  1911. return [_ShapeInfo("c", False, (0, np.inf), (True, False))]
  1912. def _rvs(self, c, size=None, random_state=None):
  1913. return abs(random_state.standard_normal(size) + c)
  1914. def _pdf(self, x, c):
  1915. # foldnormal.pdf(x, c) = sqrt(2/pi) * cosh(c*x) * exp(-(x**2+c**2)/2)
  1916. return _norm_pdf(x + c) + _norm_pdf(x-c)
  1917. def _cdf(self, x, c):
  1918. sqrt_two = np.sqrt(2)
  1919. return 0.5 * (sc.erf((x - c)/sqrt_two) + sc.erf((x + c)/sqrt_two))
  1920. def _sf(self, x, c):
  1921. return _norm_sf(x - c) + _norm_sf(x + c)
  1922. def _stats(self, c):
  1923. # Regina C. Elandt, Technometrics 3, 551 (1961)
  1924. # https://www.jstor.org/stable/1266561
  1925. #
  1926. c2 = c*c
  1927. expfac = np.exp(-0.5*c2) / np.sqrt(2.*np.pi)
  1928. mu = 2.*expfac + c * sc.erf(c/np.sqrt(2))
  1929. mu2 = c2 + 1 - mu*mu
  1930. g1 = 2. * (mu*mu*mu - c2*mu - expfac)
  1931. g1 /= np.power(mu2, 1.5)
  1932. g2 = c2 * (c2 + 6.) + 3 + 8.*expfac*mu
  1933. g2 += (2. * (c2 - 3.) - 3. * mu**2) * mu**2
  1934. g2 = g2 / mu2**2.0 - 3.
  1935. return mu, mu2, g1, g2
  1936. foldnorm = foldnorm_gen(a=0.0, name='foldnorm')
  1937. class weibull_min_gen(rv_continuous):
  1938. r"""Weibull minimum continuous random variable.
  1939. The Weibull Minimum Extreme Value distribution, from extreme value theory
  1940. (Fisher-Gnedenko theorem), is also often simply called the Weibull
  1941. distribution. It arises as the limiting distribution of the rescaled
  1942. minimum of iid random variables.
  1943. %(before_notes)s
  1944. See Also
  1945. --------
  1946. weibull_max, numpy.random.Generator.weibull, exponweib
  1947. Notes
  1948. -----
  1949. The probability density function for `weibull_min` is:
  1950. .. math::
  1951. f(x, c) = c x^{c-1} \exp(-x^c)
  1952. for :math:`x > 0`, :math:`c > 0`.
  1953. `weibull_min` takes ``c`` as a shape parameter for :math:`c`.
  1954. (named :math:`k` in Wikipedia article and :math:`a` in
  1955. ``numpy.random.weibull``). Special shape values are :math:`c=1` and
  1956. :math:`c=2` where Weibull distribution reduces to the `expon` and
  1957. `rayleigh` distributions respectively.
  1958. Suppose ``X`` is an exponentially distributed random variable with
  1959. scale ``s``. Then ``Y = X**k`` is `weibull_min` distributed with shape
  1960. ``c = 1/k`` and scale ``s**k``.
  1961. %(after_notes)s
  1962. References
  1963. ----------
  1964. https://en.wikipedia.org/wiki/Weibull_distribution
  1965. https://en.wikipedia.org/wiki/Fisher-Tippett-Gnedenko_theorem
  1966. %(example)s
  1967. """
  1968. def _shape_info(self):
  1969. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  1970. def _pdf(self, x, c):
  1971. # weibull_min.pdf(x, c) = c * x**(c-1) * exp(-x**c)
  1972. return c*pow(x, c-1)*np.exp(-pow(x, c))
  1973. def _logpdf(self, x, c):
  1974. return np.log(c) + sc.xlogy(c - 1, x) - pow(x, c)
  1975. def _cdf(self, x, c):
  1976. return -sc.expm1(-pow(x, c))
  1977. def _ppf(self, q, c):
  1978. return pow(-sc.log1p(-q), 1.0/c)
  1979. def _sf(self, x, c):
  1980. return np.exp(self._logsf(x, c))
  1981. def _logsf(self, x, c):
  1982. return -pow(x, c)
  1983. def _isf(self, q, c):
  1984. return (-np.log(q))**(1/c)
  1985. def _munp(self, n, c):
  1986. return sc.gamma(1.0+n*1.0/c)
  1987. def _entropy(self, c):
  1988. return -_EULER / c - np.log(c) + _EULER + 1
  1989. @extend_notes_in_docstring(rv_continuous, notes="""\
  1990. If ``method='mm'``, parameters fixed by the user are respected, and the
  1991. remaining parameters are used to match distribution and sample moments
  1992. where possible. For example, if the user fixes the location with
  1993. ``floc``, the parameters will only match the distribution skewness and
  1994. variance to the sample skewness and variance; no attempt will be made
  1995. to match the means or minimize a norm of the errors.
  1996. \n\n""")
  1997. def fit(self, data, *args, **kwds):
  1998. if isinstance(data, CensoredData):
  1999. if data.num_censored() == 0:
  2000. data = data._uncensor()
  2001. else:
  2002. return super().fit(data, *args, **kwds)
  2003. if kwds.pop('superfit', False):
  2004. return super().fit(data, *args, **kwds)
  2005. # this extracts fixed shape, location, and scale however they
  2006. # are specified, and also leaves them in `kwds`
  2007. data, fc, floc, fscale = _check_fit_input_parameters(self, data,
  2008. args, kwds)
  2009. method = kwds.get("method", "mle").lower()
  2010. # See https://en.wikipedia.org/wiki/Weibull_distribution#Moments for
  2011. # moment formulas.
  2012. def skew(c):
  2013. gamma1 = sc.gamma(1+1/c)
  2014. gamma2 = sc.gamma(1+2/c)
  2015. gamma3 = sc.gamma(1+3/c)
  2016. num = 2 * gamma1**3 - 3*gamma1*gamma2 + gamma3
  2017. den = (gamma2 - gamma1**2)**(3/2)
  2018. return num/den
  2019. # For c in [1e2, 3e4], population skewness appears to approach
  2020. # asymptote near -1.139, but past c > 3e4, skewness begins to vary
  2021. # wildly, and MoM won't provide a good guess. Get out early.
  2022. s = stats.skew(data)
  2023. max_c = 1e4
  2024. s_min = skew(max_c)
  2025. if s < s_min and method != "mm" and fc is None and not args:
  2026. return super().fit(data, *args, **kwds)
  2027. # If method is method of moments, we don't need the user's guesses.
  2028. # Otherwise, extract the guesses from args and kwds.
  2029. if method == "mm":
  2030. c, loc, scale = None, None, None
  2031. else:
  2032. c = args[0] if len(args) else None
  2033. loc = kwds.pop('loc', None)
  2034. scale = kwds.pop('scale', None)
  2035. if fc is None and c is None: # not fixed and no guess: use MoM
  2036. # Solve for c that matches sample distribution skewness to sample
  2037. # skewness.
  2038. # we start having numerical issues with `weibull_min` with
  2039. # parameters outside this range - and not just in this method.
  2040. # We could probably improve the situation by doing everything
  2041. # in the log space, but that is for another time.
  2042. c = root_scalar(lambda c: skew(c) - s, bracket=[0.02, max_c],
  2043. method='bisect').root
  2044. elif fc is not None: # fixed: use it
  2045. c = fc
  2046. if fscale is None and scale is None:
  2047. v = np.var(data)
  2048. scale = np.sqrt(v / (sc.gamma(1+2/c) - sc.gamma(1+1/c)**2))
  2049. elif fscale is not None:
  2050. scale = fscale
  2051. if floc is None and loc is None:
  2052. m = np.mean(data)
  2053. loc = m - scale*sc.gamma(1 + 1/c)
  2054. elif floc is not None:
  2055. loc = floc
  2056. if method == 'mm':
  2057. return c, loc, scale
  2058. else:
  2059. # At this point, parameter "guesses" may equal the fixed parameters
  2060. # in kwds. No harm in passing them as guesses, too.
  2061. return super().fit(data, c, loc=loc, scale=scale, **kwds)
  2062. weibull_min = weibull_min_gen(a=0.0, name='weibull_min')
  2063. class truncweibull_min_gen(rv_continuous):
  2064. r"""A doubly truncated Weibull minimum continuous random variable.
  2065. %(before_notes)s
  2066. See Also
  2067. --------
  2068. weibull_min, truncexpon
  2069. Notes
  2070. -----
  2071. The probability density function for `truncweibull_min` is:
  2072. .. math::
  2073. f(x, a, b, c) = \frac{c x^{c-1} \exp(-x^c)}{\exp(-a^c) - \exp(-b^c)}
  2074. for :math:`a < x <= b`, :math:`0 \le a < b` and :math:`c > 0`.
  2075. `truncweibull_min` takes :math:`a`, :math:`b`, and :math:`c` as shape
  2076. parameters.
  2077. Notice that the truncation values, :math:`a` and :math:`b`, are defined in
  2078. standardized form:
  2079. .. math::
  2080. a = (u_l - loc)/scale
  2081. b = (u_r - loc)/scale
  2082. where :math:`u_l` and :math:`u_r` are the specific left and right
  2083. truncation values, respectively. In other words, the support of the
  2084. distribution becomes :math:`(a*scale + loc) < x <= (b*scale + loc)` when
  2085. :math:`loc` and/or :math:`scale` are provided.
  2086. %(after_notes)s
  2087. References
  2088. ----------
  2089. .. [1] Rinne, H. "The Weibull Distribution: A Handbook". CRC Press (2009).
  2090. %(example)s
  2091. """
  2092. def _argcheck(self, c, a, b):
  2093. return (a >= 0.) & (b > a) & (c > 0.)
  2094. def _shape_info(self):
  2095. ic = _ShapeInfo("c", False, (0, np.inf), (False, False))
  2096. ia = _ShapeInfo("a", False, (0, np.inf), (True, False))
  2097. ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
  2098. return [ic, ia, ib]
  2099. def _fitstart(self, data):
  2100. # Arbitrary, but default a=b=c=1 is not valid
  2101. return super()._fitstart(data, args=(1, 0, 1))
  2102. def _get_support(self, c, a, b):
  2103. return a, b
  2104. def _pdf(self, x, c, a, b):
  2105. denum = (np.exp(-pow(a, c)) - np.exp(-pow(b, c)))
  2106. return (c * pow(x, c-1) * np.exp(-pow(x, c))) / denum
  2107. def _logpdf(self, x, c, a, b):
  2108. logdenum = np.log(np.exp(-pow(a, c)) - np.exp(-pow(b, c)))
  2109. return np.log(c) + sc.xlogy(c - 1, x) - pow(x, c) - logdenum
  2110. def _cdf(self, x, c, a, b):
  2111. num = (np.exp(-pow(a, c)) - np.exp(-pow(x, c)))
  2112. denum = (np.exp(-pow(a, c)) - np.exp(-pow(b, c)))
  2113. return num / denum
  2114. def _logcdf(self, x, c, a, b):
  2115. lognum = np.log(np.exp(-pow(a, c)) - np.exp(-pow(x, c)))
  2116. logdenum = np.log(np.exp(-pow(a, c)) - np.exp(-pow(b, c)))
  2117. return lognum - logdenum
  2118. def _sf(self, x, c, a, b):
  2119. num = (np.exp(-pow(x, c)) - np.exp(-pow(b, c)))
  2120. denum = (np.exp(-pow(a, c)) - np.exp(-pow(b, c)))
  2121. return num / denum
  2122. def _logsf(self, x, c, a, b):
  2123. lognum = np.log(np.exp(-pow(x, c)) - np.exp(-pow(b, c)))
  2124. logdenum = np.log(np.exp(-pow(a, c)) - np.exp(-pow(b, c)))
  2125. return lognum - logdenum
  2126. def _isf(self, q, c, a, b):
  2127. return pow(
  2128. -np.log((1 - q) * np.exp(-pow(b, c)) + q * np.exp(-pow(a, c))), 1/c
  2129. )
  2130. def _ppf(self, q, c, a, b):
  2131. return pow(
  2132. -np.log((1 - q) * np.exp(-pow(a, c)) + q * np.exp(-pow(b, c))), 1/c
  2133. )
  2134. def _munp(self, n, c, a, b):
  2135. gamma_fun = sc.gamma(n/c + 1.) * (
  2136. sc.gammainc(n/c + 1., pow(b, c)) - sc.gammainc(n/c + 1., pow(a, c))
  2137. )
  2138. denum = (np.exp(-pow(a, c)) - np.exp(-pow(b, c)))
  2139. return gamma_fun / denum
  2140. truncweibull_min = truncweibull_min_gen(name='truncweibull_min')
  2141. truncweibull_min._support = ('a', 'b')
  2142. class weibull_max_gen(rv_continuous):
  2143. r"""Weibull maximum continuous random variable.
  2144. The Weibull Maximum Extreme Value distribution, from extreme value theory
  2145. (Fisher-Gnedenko theorem), is the limiting distribution of rescaled
  2146. maximum of iid random variables. This is the distribution of -X
  2147. if X is from the `weibull_min` function.
  2148. %(before_notes)s
  2149. See Also
  2150. --------
  2151. weibull_min
  2152. Notes
  2153. -----
  2154. The probability density function for `weibull_max` is:
  2155. .. math::
  2156. f(x, c) = c (-x)^{c-1} \exp(-(-x)^c)
  2157. for :math:`x < 0`, :math:`c > 0`.
  2158. `weibull_max` takes ``c`` as a shape parameter for :math:`c`.
  2159. %(after_notes)s
  2160. References
  2161. ----------
  2162. https://en.wikipedia.org/wiki/Weibull_distribution
  2163. https://en.wikipedia.org/wiki/Fisher-Tippett-Gnedenko_theorem
  2164. %(example)s
  2165. """
  2166. def _shape_info(self):
  2167. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  2168. def _pdf(self, x, c):
  2169. # weibull_max.pdf(x, c) = c * (-x)**(c-1) * exp(-(-x)**c)
  2170. return c*pow(-x, c-1)*np.exp(-pow(-x, c))
  2171. def _logpdf(self, x, c):
  2172. return np.log(c) + sc.xlogy(c-1, -x) - pow(-x, c)
  2173. def _cdf(self, x, c):
  2174. return np.exp(-pow(-x, c))
  2175. def _logcdf(self, x, c):
  2176. return -pow(-x, c)
  2177. def _sf(self, x, c):
  2178. return -sc.expm1(-pow(-x, c))
  2179. def _ppf(self, q, c):
  2180. return -pow(-np.log(q), 1.0/c)
  2181. def _munp(self, n, c):
  2182. val = sc.gamma(1.0+n*1.0/c)
  2183. if int(n) % 2:
  2184. sgn = -1
  2185. else:
  2186. sgn = 1
  2187. return sgn * val
  2188. def _entropy(self, c):
  2189. return -_EULER / c - np.log(c) + _EULER + 1
  2190. weibull_max = weibull_max_gen(b=0.0, name='weibull_max')
  2191. class genlogistic_gen(rv_continuous):
  2192. r"""A generalized logistic continuous random variable.
  2193. %(before_notes)s
  2194. Notes
  2195. -----
  2196. The probability density function for `genlogistic` is:
  2197. .. math::
  2198. f(x, c) = c \frac{\exp(-x)}
  2199. {(1 + \exp(-x))^{c+1}}
  2200. for real :math:`x` and :math:`c > 0`. In literature, different
  2201. generalizations of the logistic distribution can be found. This is the type 1
  2202. generalized logistic distribution according to [1]_. It is also referred to
  2203. as the skew-logistic distribution [2]_.
  2204. `genlogistic` takes ``c`` as a shape parameter for :math:`c`.
  2205. %(after_notes)s
  2206. References
  2207. ----------
  2208. .. [1] Johnson et al. "Continuous Univariate Distributions", Volume 2,
  2209. Wiley. 1995.
  2210. .. [2] "Generalized Logistic Distribution", Wikipedia,
  2211. https://en.wikipedia.org/wiki/Generalized_logistic_distribution
  2212. %(example)s
  2213. """
  2214. def _shape_info(self):
  2215. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  2216. def _pdf(self, x, c):
  2217. # genlogistic.pdf(x, c) = c * exp(-x) / (1 + exp(-x))**(c+1)
  2218. return np.exp(self._logpdf(x, c))
  2219. def _logpdf(self, x, c):
  2220. # Two mathematically equivalent expressions for log(pdf(x, c)):
  2221. # log(pdf(x, c)) = log(c) - x - (c + 1)*log(1 + exp(-x))
  2222. # = log(c) + c*x - (c + 1)*log(1 + exp(x))
  2223. mult = -(c - 1) * (x < 0) - 1
  2224. absx = np.abs(x)
  2225. return np.log(c) + mult*absx - (c+1) * sc.log1p(np.exp(-absx))
  2226. def _cdf(self, x, c):
  2227. Cx = (1+np.exp(-x))**(-c)
  2228. return Cx
  2229. def _logcdf(self, x, c):
  2230. return -c * np.log1p(np.exp(-x))
  2231. def _ppf(self, q, c):
  2232. return -np.log(sc.powm1(q, -1.0/c))
  2233. def _sf(self, x, c):
  2234. return -sc.expm1(self._logcdf(x, c))
  2235. def _isf(self, q, c):
  2236. return self._ppf(1 - q, c)
  2237. def _stats(self, c):
  2238. mu = _EULER + sc.psi(c)
  2239. mu2 = np.pi*np.pi/6.0 + sc.zeta(2, c)
  2240. g1 = -2*sc.zeta(3, c) + 2*_ZETA3
  2241. g1 /= np.power(mu2, 1.5)
  2242. g2 = np.pi**4/15.0 + 6*sc.zeta(4, c)
  2243. g2 /= mu2**2.0
  2244. return mu, mu2, g1, g2
  2245. def _entropy(self, c):
  2246. return xpx.apply_where(
  2247. c < 8e6, c,
  2248. lambda c: -np.log(c) + sc.psi(c + 1) + _EULER + 1,
  2249. # asymptotic expansion: psi(c) ~ log(c) - 1 / (2 * c)
  2250. # a = -log(c) + psi(c + 1)
  2251. # = -log(c) + psi(c) + 1 / c
  2252. # ~ -log(c) + log(c) - 1 / (2 * c) + 1 / c
  2253. # = 1 / (2 * c)
  2254. lambda c: 1 / (2 * c) + _EULER + 1)
  2255. genlogistic = genlogistic_gen(name='genlogistic')
  2256. class genpareto_gen(rv_continuous):
  2257. r"""A generalized Pareto continuous random variable.
  2258. %(before_notes)s
  2259. Notes
  2260. -----
  2261. The probability density function for `genpareto` is:
  2262. .. math::
  2263. f(x, c) = (1 + c x)^{-1 - 1/c}
  2264. defined for :math:`x \ge 0` if :math:`c \ge 0`, and for
  2265. :math:`0 \le x \le -1/c` if :math:`c < 0`.
  2266. `genpareto` takes ``c`` as a shape parameter for :math:`c`.
  2267. For :math:`c=0`, `genpareto` reduces to the exponential
  2268. distribution, `expon`:
  2269. .. math::
  2270. f(x, 0) = \exp(-x)
  2271. For :math:`c=-1`, `genpareto` is uniform on ``[0, 1]``:
  2272. .. math::
  2273. f(x, -1) = 1
  2274. %(after_notes)s
  2275. %(example)s
  2276. """
  2277. def _argcheck(self, c):
  2278. return np.isfinite(c)
  2279. def _shape_info(self):
  2280. return [_ShapeInfo("c", False, (-np.inf, np.inf), (False, False))]
  2281. def _get_support(self, c):
  2282. c = np.asarray(c)
  2283. a = np.broadcast_arrays(self.a, c)[0].copy()
  2284. b = xpx.apply_where(c < 0, c, lambda c: -1. / c,
  2285. fill_value=np.inf)
  2286. return a, b
  2287. def _pdf(self, x, c):
  2288. # genpareto.pdf(x, c) = (1 + c * x)**(-1 - 1/c)
  2289. return np.exp(self._logpdf(x, c))
  2290. def _logpdf(self, x, c):
  2291. return xpx.apply_where((x == x) & (c != 0), (x, c),
  2292. lambda x, c: -sc.xlog1py(c + 1., c*x) / c,
  2293. fill_value=-x)
  2294. def _cdf(self, x, c):
  2295. return -sc.inv_boxcox1p(-x, -c)
  2296. def _sf(self, x, c):
  2297. return sc.inv_boxcox(-x, -c)
  2298. def _logsf(self, x, c):
  2299. return xpx.apply_where((x == x) & (c != 0), (x, c),
  2300. lambda x, c: -sc.log1p(c*x) / c,
  2301. fill_value=-x)
  2302. def _ppf(self, q, c):
  2303. return -sc.boxcox1p(-q, -c)
  2304. def _isf(self, q, c):
  2305. return -sc.boxcox(q, -c)
  2306. def _stats(self, c, moments='mv'):
  2307. m, v, s, k = None, None, None, None
  2308. if 'm' in moments:
  2309. m = xpx.apply_where(c < 1, c,
  2310. lambda xi: 1 / (1 - xi),
  2311. fill_value=np.inf)
  2312. if 'v' in moments:
  2313. v = xpx.apply_where(c < 1/2, c,
  2314. lambda xi: 1 / (1 - xi)**2 / (1 - 2 * xi),
  2315. fill_value=np.nan)
  2316. if 's' in moments:
  2317. s = xpx.apply_where(
  2318. c < 1/3, c,
  2319. lambda xi: 2 * (1 + xi) * np.sqrt(1 - 2*xi) / (1 - 3*xi),
  2320. fill_value=np.nan)
  2321. if 'k' in moments:
  2322. k = xpx.apply_where(
  2323. c < 1/4, c,
  2324. lambda xi: 3 * (1 - 2*xi) * (2*xi**2 + xi + 3)
  2325. / (1 - 3*xi) / (1 - 4*xi) - 3,
  2326. fill_value=np.nan)
  2327. return m, v, s, k
  2328. def _munp(self, n, c):
  2329. def __munp(c):
  2330. val = 0.0
  2331. k = np.arange(0, n + 1)
  2332. for ki, cnk in zip(k, sc.comb(n, k)):
  2333. val = val + cnk * (-1) ** ki / (1.0 - c * ki)
  2334. return np.where(c * n < 1, val * (-1.0 / c) ** n, np.inf)
  2335. return xpx.apply_where(c != 0, c, __munp, fill_value=sc.gamma(n + 1))
  2336. def _entropy(self, c):
  2337. return 1. + c
  2338. genpareto = genpareto_gen(a=0.0, name='genpareto')
  2339. class genexpon_gen(rv_continuous):
  2340. r"""A generalized exponential continuous random variable.
  2341. %(before_notes)s
  2342. Notes
  2343. -----
  2344. The probability density function for `genexpon` is:
  2345. .. math::
  2346. f(x, a, b, c) = (a + b (1 - \exp(-c x)))
  2347. \exp(-a x - b x + \frac{b}{c} (1-\exp(-c x)))
  2348. for :math:`x \ge 0`, :math:`a, b, c > 0`.
  2349. `genexpon` takes :math:`a`, :math:`b` and :math:`c` as shape parameters.
  2350. %(after_notes)s
  2351. References
  2352. ----------
  2353. H.K. Ryu, "An Extension of Marshall and Olkin's Bivariate Exponential
  2354. Distribution", Journal of the American Statistical Association, 1993.
  2355. N. Balakrishnan, Asit P. Basu (editors), *The Exponential Distribution:
  2356. Theory, Methods and Applications*, Gordon and Breach, 1995.
  2357. ISBN 10: 2884491929
  2358. %(example)s
  2359. """
  2360. def _shape_info(self):
  2361. ia = _ShapeInfo("a", False, (0, np.inf), (False, False))
  2362. ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
  2363. ic = _ShapeInfo("c", False, (0, np.inf), (False, False))
  2364. return [ia, ib, ic]
  2365. def _pdf(self, x, a, b, c):
  2366. # genexpon.pdf(x, a, b, c) = (a + b * (1 - exp(-c*x))) * \
  2367. # exp(-a*x - b*x + b/c * (1-exp(-c*x)))
  2368. return (a + b*(-sc.expm1(-c*x)))*np.exp((-a-b)*x +
  2369. b*(-sc.expm1(-c*x))/c)
  2370. def _logpdf(self, x, a, b, c):
  2371. return np.log(a+b*(-sc.expm1(-c*x))) + (-a-b)*x+b*(-sc.expm1(-c*x))/c
  2372. def _cdf(self, x, a, b, c):
  2373. return -sc.expm1((-a-b)*x + b*(-sc.expm1(-c*x))/c)
  2374. def _ppf(self, p, a, b, c):
  2375. s = a + b
  2376. t = (b - c*np.log1p(-p))/s
  2377. return (t + sc.lambertw(-b/s * np.exp(-t)).real)/c
  2378. def _sf(self, x, a, b, c):
  2379. return np.exp((-a-b)*x + b*(-sc.expm1(-c*x))/c)
  2380. def _isf(self, p, a, b, c):
  2381. s = a + b
  2382. t = (b - c*np.log(p))/s
  2383. return (t + sc.lambertw(-b/s * np.exp(-t)).real)/c
  2384. genexpon = genexpon_gen(a=0.0, name='genexpon')
  2385. class genextreme_gen(rv_continuous):
  2386. r"""A generalized extreme value continuous random variable.
  2387. %(before_notes)s
  2388. See Also
  2389. --------
  2390. gumbel_r
  2391. Notes
  2392. -----
  2393. For :math:`c=0`, `genextreme` is equal to `gumbel_r` with
  2394. probability density function
  2395. .. math::
  2396. f(x) = \exp(-\exp(-x)) \exp(-x),
  2397. where :math:`-\infty < x < \infty`.
  2398. For :math:`c \ne 0`, the probability density function for `genextreme` is:
  2399. .. math::
  2400. f(x, c) = \exp(-(1-c x)^{1/c}) (1-c x)^{1/c-1},
  2401. where :math:`-\infty < x \le 1/c` if :math:`c > 0` and
  2402. :math:`1/c \le x < \infty` if :math:`c < 0`.
  2403. Note that several sources and software packages use the opposite
  2404. convention for the sign of the shape parameter :math:`c`.
  2405. `genextreme` takes ``c`` as a shape parameter for :math:`c`.
  2406. %(after_notes)s
  2407. %(example)s
  2408. """
  2409. def _argcheck(self, c):
  2410. return np.isfinite(c)
  2411. def _shape_info(self):
  2412. return [_ShapeInfo("c", False, (-np.inf, np.inf), (False, False))]
  2413. def _get_support(self, c):
  2414. _b = np.where(c > 0, 1.0 / np.maximum(c, _XMIN), np.inf)
  2415. _a = np.where(c < 0, 1.0 / np.minimum(c, -_XMIN), -np.inf)
  2416. return _a, _b
  2417. def _loglogcdf(self, x, c):
  2418. # Returns log(-log(cdf(x, c)))
  2419. return xpx.apply_where(
  2420. (x == x) & (c != 0), (x, c),
  2421. lambda x, c: sc.log1p(-c*x)/c,
  2422. fill_value=-x)
  2423. def _pdf(self, x, c):
  2424. # genextreme.pdf(x, c) =
  2425. # exp(-exp(-x))*exp(-x), for c==0
  2426. # exp(-(1-c*x)**(1/c))*(1-c*x)**(1/c-1), for x \le 1/c, c > 0
  2427. return np.exp(self._logpdf(x, c))
  2428. def _logpdf(self, x, c):
  2429. # Suppress warnings 0 * inf
  2430. cx = xpx.apply_where((x == x) & (c != 0), (c, x),
  2431. operator.mul, fill_value=0.0)
  2432. logex2 = sc.log1p(-cx)
  2433. logpex2 = self._loglogcdf(x, c)
  2434. pex2 = np.exp(logpex2)
  2435. # Handle special cases
  2436. np.putmask(logpex2, (c == 0) & (x == -np.inf), 0.0)
  2437. logpdf = xpx.apply_where(
  2438. ~((cx == 1) | (cx == -np.inf)),
  2439. (pex2, logpex2, logex2),
  2440. lambda pex2, lpex2, lex2: -pex2 + lpex2 - lex2,
  2441. fill_value=-np.inf)
  2442. np.putmask(logpdf, (c == 1) & (x == 1), 0.0)
  2443. return logpdf
  2444. def _logcdf(self, x, c):
  2445. return -np.exp(self._loglogcdf(x, c))
  2446. def _cdf(self, x, c):
  2447. return np.exp(self._logcdf(x, c))
  2448. def _sf(self, x, c):
  2449. return -sc.expm1(self._logcdf(x, c))
  2450. def _ppf(self, q, c):
  2451. x = -np.log(-np.log(q))
  2452. return xpx.apply_where(
  2453. (x == x) & (c != 0), (x, c),
  2454. lambda x, c: -sc.expm1(-c * x) / c,
  2455. fill_value=x)
  2456. def _isf(self, q, c):
  2457. x = -np.log(-sc.log1p(-q))
  2458. return xpx.apply_where(
  2459. (x == x) & (c != 0), (x, c),
  2460. lambda x, c: -sc.expm1(-c * x) / c,
  2461. fill_value=x)
  2462. def _stats(self, c):
  2463. def g(n):
  2464. return sc.gamma(n * c + 1)
  2465. g1 = g(1)
  2466. g2 = g(2)
  2467. g3 = g(3)
  2468. g4 = g(4)
  2469. g2mg12 = np.where(abs(c) < 1e-7, (c*np.pi)**2.0/6.0, g2-g1**2.0)
  2470. def gam2k_f(c):
  2471. return sc.expm1(sc.gammaln(2.0*c+1.0)-2*sc.gammaln(c + 1.0))/c**2.0
  2472. gam2k = xpx.apply_where(abs(c) >= 1e-7, c, gam2k_f, fill_value=np.pi**2.0/6.0)
  2473. eps = 1e-14
  2474. def gamk_f(c):
  2475. return sc.expm1(sc.gammaln(c + 1))/c
  2476. gamk = xpx.apply_where(abs(c) >= eps, c, gamk_f, fill_value=-_EULER)
  2477. # mean
  2478. m = np.where(c < -1.0, np.nan, -gamk)
  2479. # variance
  2480. v = np.where(c < -0.5, np.nan, g1**2.0*gam2k)
  2481. # skewness
  2482. def sk1_eval(c, *args):
  2483. def sk1_eval_f(c, g1, g2, g3, g2mg12):
  2484. return np.sign(c)*(-g3 + (g2 + 2*g2mg12)*g1)/g2mg12**1.5
  2485. return xpx.apply_where(c >= -1./3, (c, *args),
  2486. sk1_eval_f, fill_value=np.nan)
  2487. sk_fill = 12*np.sqrt(6)*_ZETA3/np.pi**3
  2488. args = (g1, g2, g3, g2mg12)
  2489. sk = xpx.apply_where(abs(c) > eps**0.29, (c, *args),
  2490. sk1_eval, fill_value=sk_fill)
  2491. # kurtosis
  2492. def ku1_eval(c, *args):
  2493. def ku1_eval_f(g1, g2, g3, g4, g2mg12):
  2494. return (g4 + (-4*g3 + 3*(g2 + g2mg12)*g1)*g1)/g2mg12**2 - 3
  2495. return xpx.apply_where(c >= -1./4, args, ku1_eval_f, fill_value=np.nan)
  2496. args = (g1, g2, g3, g4, g2mg12)
  2497. ku = xpx.apply_where(abs(c) > eps**0.23, (c, *args),
  2498. ku1_eval, fill_value=12.0/5.0)
  2499. return m, v, sk, ku
  2500. def _fitstart(self, data):
  2501. if isinstance(data, CensoredData):
  2502. data = data._uncensor()
  2503. # This is better than the default shape of (1,).
  2504. g = _skew(data)
  2505. if g < 0:
  2506. a = 0.5
  2507. else:
  2508. a = -0.5
  2509. return super()._fitstart(data, args=(a,))
  2510. def _munp(self, n, c):
  2511. k = np.arange(0, n+1)
  2512. vals = 1.0/c**n * np.sum(
  2513. sc.comb(n, k) * (-1)**k * sc.gamma(c*k + 1),
  2514. axis=0)
  2515. return np.where(c*n > -1, vals, np.inf)
  2516. def _entropy(self, c):
  2517. return _EULER*(1 - c) + 1
  2518. genextreme = genextreme_gen(name='genextreme')
  2519. def _digammainv(y):
  2520. """Inverse of the digamma function (real positive arguments only).
  2521. This function is used in the `fit` method of `gamma_gen`.
  2522. The function uses either optimize.fsolve or optimize.newton
  2523. to solve `sc.digamma(x) - y = 0`. There is probably room for
  2524. improvement, but currently it works over a wide range of y:
  2525. >>> import numpy as np
  2526. >>> rng = np.random.default_rng()
  2527. >>> y = 64*rng.standard_normal(1000000)
  2528. >>> y.min(), y.max()
  2529. (-311.43592651416662, 351.77388222276869)
  2530. >>> x = [_digammainv(t) for t in y]
  2531. >>> np.abs(sc.digamma(x) - y).max()
  2532. 1.1368683772161603e-13
  2533. """
  2534. _em = 0.5772156649015328606065120
  2535. def func(x):
  2536. return sc.digamma(x) - y
  2537. if y > -0.125:
  2538. x0 = np.exp(y) + 0.5
  2539. if y < 10:
  2540. # Some experimentation shows that newton reliably converges
  2541. # must faster than fsolve in this y range. For larger y,
  2542. # newton sometimes fails to converge.
  2543. value = optimize.newton(func, x0, tol=1e-10)
  2544. return value
  2545. elif y > -3:
  2546. x0 = np.exp(y/2.332) + 0.08661
  2547. else:
  2548. x0 = 1.0 / (-y - _em)
  2549. value, info, ier, mesg = optimize.fsolve(func, x0, xtol=1e-11,
  2550. full_output=True)
  2551. if ier != 1:
  2552. raise RuntimeError(f"_digammainv: fsolve failed, y = {y!r}")
  2553. return value[0]
  2554. ## Gamma (Use MATLAB and MATHEMATICA (b=theta=scale, a=alpha=shape) definition)
  2555. ## gamma(a, loc, scale) with a an integer is the Erlang distribution
  2556. ## gamma(1, loc, scale) is the Exponential distribution
  2557. ## gamma(df/2, 0, 2) is the chi2 distribution with df degrees of freedom.
  2558. class gamma_gen(rv_continuous):
  2559. r"""A gamma continuous random variable.
  2560. %(before_notes)s
  2561. See Also
  2562. --------
  2563. erlang, expon
  2564. Notes
  2565. -----
  2566. The probability density function for `gamma` is:
  2567. .. math::
  2568. f(x, a) = \frac{x^{a-1} e^{-x}}{\Gamma(a)}
  2569. for :math:`x \ge 0`, :math:`a > 0`. Here :math:`\Gamma(a)` refers to the
  2570. gamma function.
  2571. `gamma` takes ``a`` as a shape parameter for :math:`a`.
  2572. When :math:`a` is an integer, `gamma` reduces to the Erlang
  2573. distribution, and when :math:`a=1` to the exponential distribution.
  2574. Gamma distributions are sometimes parameterized with two variables,
  2575. with a probability density function of:
  2576. .. math::
  2577. f(x, \alpha, \beta) =
  2578. \frac{\beta^\alpha x^{\alpha - 1} e^{-\beta x }}{\Gamma(\alpha)}
  2579. Note that this parameterization is equivalent to the above, with
  2580. ``scale = 1 / beta``.
  2581. %(after_notes)s
  2582. %(example)s
  2583. """
  2584. def _shape_info(self):
  2585. return [_ShapeInfo("a", False, (0, np.inf), (False, False))]
  2586. def _rvs(self, a, size=None, random_state=None):
  2587. return random_state.standard_gamma(a, size)
  2588. def _pdf(self, x, a):
  2589. # gamma.pdf(x, a) = x**(a-1) * exp(-x) / gamma(a)
  2590. return np.exp(self._logpdf(x, a))
  2591. def _logpdf(self, x, a):
  2592. return sc.xlogy(a-1.0, x) - x - sc.gammaln(a)
  2593. def _cdf(self, x, a):
  2594. return sc.gammainc(a, x)
  2595. def _sf(self, x, a):
  2596. return sc.gammaincc(a, x)
  2597. def _ppf(self, q, a):
  2598. return sc.gammaincinv(a, q)
  2599. def _isf(self, q, a):
  2600. return sc.gammainccinv(a, q)
  2601. def _stats(self, a):
  2602. return a, a, 2.0/np.sqrt(a), 6.0/a
  2603. def _munp(self, n, a):
  2604. return sc.poch(a, n)
  2605. def _entropy(self, a):
  2606. def regular_formula(a):
  2607. return sc.psi(a) * (1-a) + a + sc.gammaln(a)
  2608. def asymptotic_formula(a):
  2609. # plug in above formula the expansions:
  2610. # psi(a) ~ ln(a) - 1/2a - 1/12a^2 + 1/120a^4
  2611. # gammaln(a) ~ a * ln(a) - a - 1/2 * ln(a) + 1/2 ln(2 * pi) +
  2612. # 1/12a - 1/360a^3
  2613. return (0.5 * (1. + np.log(2*np.pi) + np.log(a)) - 1/(3 * a)
  2614. - (a**-2.)/12 - (a**-3.)/90 + (a**-4.)/120)
  2615. return xpx.apply_where(a < 250, a, regular_formula, asymptotic_formula)
  2616. def _fitstart(self, data):
  2617. # The skewness of the gamma distribution is `2 / np.sqrt(a)`.
  2618. # We invert that to estimate the shape `a` using the skewness
  2619. # of the data. The formula is regularized with 1e-8 in the
  2620. # denominator to allow for degenerate data where the skewness
  2621. # is close to 0.
  2622. if isinstance(data, CensoredData):
  2623. data = data._uncensor()
  2624. sk = _skew(data)
  2625. a = 4 / (1e-8 + sk**2)
  2626. return super()._fitstart(data, args=(a,))
  2627. @extend_notes_in_docstring(rv_continuous, notes="""\
  2628. When the location is fixed by using the argument `floc`
  2629. and `method='MLE'`, this
  2630. function uses explicit formulas or solves a simpler numerical
  2631. problem than the full ML optimization problem. So in that case,
  2632. the `optimizer`, `loc` and `scale` arguments are ignored.
  2633. \n\n""")
  2634. def fit(self, data, *args, **kwds):
  2635. floc = kwds.get('floc', None)
  2636. method = kwds.get('method', 'mle')
  2637. if (isinstance(data, CensoredData) or
  2638. floc is None and method.lower() != 'mm'):
  2639. # loc is not fixed or we're not doing standard MLE.
  2640. # Use the default fit method.
  2641. return super().fit(data, *args, **kwds)
  2642. # We already have this value, so just pop it from kwds.
  2643. kwds.pop('floc', None)
  2644. f0 = _get_fixed_fit_value(kwds, ['f0', 'fa', 'fix_a'])
  2645. fscale = kwds.pop('fscale', None)
  2646. _remove_optimizer_parameters(kwds)
  2647. if f0 is not None and floc is not None and fscale is not None:
  2648. # This check is for consistency with `rv_continuous.fit`.
  2649. # Without this check, this function would just return the
  2650. # parameters that were given.
  2651. raise ValueError("All parameters fixed. There is nothing to "
  2652. "optimize.")
  2653. # Fixed location is handled by shifting the data.
  2654. data = np.asarray(data)
  2655. if not np.isfinite(data).all():
  2656. raise ValueError("The data contains non-finite values.")
  2657. # Use explicit formulas for mm (gh-19884)
  2658. if method.lower() == 'mm':
  2659. m1 = np.mean(data)
  2660. m2 = np.var(data)
  2661. m3 = np.mean((data - m1) ** 3)
  2662. a, loc, scale = f0, floc, fscale
  2663. # Three unknowns
  2664. if a is None and loc is None and scale is None:
  2665. scale = m3 / (2 * m2)
  2666. # Two unknowns
  2667. if loc is None and scale is None:
  2668. scale = np.sqrt(m2 / a)
  2669. if a is None and scale is None:
  2670. scale = m2 / (m1 - loc)
  2671. if a is None and loc is None:
  2672. a = m2 / (scale ** 2)
  2673. # One unknown
  2674. if a is None:
  2675. a = (m1 - loc) / scale
  2676. if loc is None:
  2677. loc = m1 - a * scale
  2678. if scale is None:
  2679. scale = (m1 - loc) / a
  2680. return a, loc, scale
  2681. # Special case: loc is fixed.
  2682. # NB: data == loc is ok if a >= 1; the below check is more strict.
  2683. if np.any(data <= floc):
  2684. raise FitDataError("gamma", lower=floc, upper=np.inf)
  2685. if floc != 0:
  2686. # Don't do the subtraction in-place, because `data` might be a
  2687. # view of the input array.
  2688. data = data - floc
  2689. xbar = data.mean()
  2690. # Three cases to handle:
  2691. # * shape and scale both free
  2692. # * shape fixed, scale free
  2693. # * shape free, scale fixed
  2694. if fscale is None:
  2695. # scale is free
  2696. if f0 is not None:
  2697. # shape is fixed
  2698. a = f0
  2699. else:
  2700. # shape and scale are both free.
  2701. # The MLE for the shape parameter `a` is the solution to:
  2702. # np.log(a) - sc.digamma(a) - np.log(xbar) +
  2703. # np.log(data).mean() = 0
  2704. s = np.log(xbar) - np.log(data).mean()
  2705. aest = (3-s + np.sqrt((s-3)**2 + 24*s)) / (12*s)
  2706. xa = aest*(1-0.4)
  2707. xb = aest*(1+0.4)
  2708. a = optimize.brentq(lambda a: np.log(a) - sc.digamma(a) - s,
  2709. xa, xb, disp=0)
  2710. # The MLE for the scale parameter is just the data mean
  2711. # divided by the shape parameter.
  2712. scale = xbar / a
  2713. else:
  2714. # scale is fixed, shape is free
  2715. # The MLE for the shape parameter `a` is the solution to:
  2716. # sc.digamma(a) - np.log(data).mean() + np.log(fscale) = 0
  2717. c = np.log(data).mean() - np.log(fscale)
  2718. a = _digammainv(c)
  2719. scale = fscale
  2720. return a, floc, scale
  2721. gamma = gamma_gen(a=0.0, name='gamma')
  2722. class erlang_gen(gamma_gen):
  2723. """An Erlang continuous random variable.
  2724. %(before_notes)s
  2725. See Also
  2726. --------
  2727. gamma
  2728. Notes
  2729. -----
  2730. The Erlang distribution is a special case of the Gamma distribution, with
  2731. the shape parameter `a` an integer. Note that this restriction is not
  2732. enforced by `erlang`. It will, however, generate a warning the first time
  2733. a non-integer value is used for the shape parameter.
  2734. Refer to `gamma` for examples.
  2735. """
  2736. def _argcheck(self, a):
  2737. allint = np.all(np.floor(a) == a)
  2738. if not allint:
  2739. # An Erlang distribution shouldn't really have a non-integer
  2740. # shape parameter, so warn the user.
  2741. message = ('The shape parameter of the erlang distribution '
  2742. f'has been given a non-integer value {a!r}.')
  2743. warnings.warn(message, RuntimeWarning, stacklevel=3)
  2744. return a > 0
  2745. def _shape_info(self):
  2746. return [_ShapeInfo("a", True, (1, np.inf), (True, False))]
  2747. def _fitstart(self, data):
  2748. # Override gamma_gen_fitstart so that an integer initial value is
  2749. # used. (Also regularize the division, to avoid issues when
  2750. # _skew(data) is 0 or close to 0.)
  2751. if isinstance(data, CensoredData):
  2752. data = data._uncensor()
  2753. a = int(4.0 / (1e-8 + _skew(data)**2))
  2754. return super(gamma_gen, self)._fitstart(data, args=(a,))
  2755. # Trivial override of the fit method, so we can monkey-patch its
  2756. # docstring.
  2757. @extend_notes_in_docstring(rv_continuous, notes="""\
  2758. The Erlang distribution is generally defined to have integer values
  2759. for the shape parameter. This is not enforced by the `erlang` class.
  2760. When fitting the distribution, it will generally return a non-integer
  2761. value for the shape parameter. By using the keyword argument
  2762. `f0=<integer>`, the fit method can be constrained to fit the data to
  2763. a specific integer shape parameter.""")
  2764. def fit(self, data, *args, **kwds):
  2765. return super().fit(data, *args, **kwds)
  2766. erlang = erlang_gen(a=0.0, name='erlang')
  2767. class gengamma_gen(rv_continuous):
  2768. r"""A generalized gamma continuous random variable.
  2769. %(before_notes)s
  2770. See Also
  2771. --------
  2772. gamma, invgamma, weibull_min
  2773. Notes
  2774. -----
  2775. The probability density function for `gengamma` is ([1]_):
  2776. .. math::
  2777. f(x, a, c) = \frac{|c| x^{c a-1} \exp(-x^c)}{\Gamma(a)}
  2778. for :math:`x \ge 0`, :math:`a > 0`, and :math:`c \ne 0`.
  2779. :math:`\Gamma` is the gamma function (`scipy.special.gamma`).
  2780. `gengamma` takes :math:`a` and :math:`c` as shape parameters.
  2781. %(after_notes)s
  2782. References
  2783. ----------
  2784. .. [1] E.W. Stacy, "A Generalization of the Gamma Distribution",
  2785. Annals of Mathematical Statistics, Vol 33(3), pp. 1187--1192.
  2786. %(example)s
  2787. """
  2788. def _argcheck(self, a, c):
  2789. return (a > 0) & (c != 0)
  2790. def _shape_info(self):
  2791. ia = _ShapeInfo("a", False, (0, np.inf), (False, False))
  2792. ic = _ShapeInfo("c", False, (-np.inf, np.inf), (False, False))
  2793. return [ia, ic]
  2794. def _pdf(self, x, a, c):
  2795. return np.exp(self._logpdf(x, a, c))
  2796. def _logpdf(self, x, a, c):
  2797. return xpx.apply_where(
  2798. (x != 0) | (c > 0), (x, c),
  2799. lambda x, c: (np.log(abs(c)) + sc.xlogy(c*a - 1, x) - x**c - sc.gammaln(a)),
  2800. fill_value=-np.inf)
  2801. def _cdf(self, x, a, c):
  2802. xc = x**c
  2803. val1 = sc.gammainc(a, xc)
  2804. val2 = sc.gammaincc(a, xc)
  2805. return np.where(c > 0, val1, val2)
  2806. def _rvs(self, a, c, size=None, random_state=None):
  2807. r = random_state.standard_gamma(a, size=size)
  2808. return r**(1./c)
  2809. def _sf(self, x, a, c):
  2810. xc = x**c
  2811. val1 = sc.gammainc(a, xc)
  2812. val2 = sc.gammaincc(a, xc)
  2813. return np.where(c > 0, val2, val1)
  2814. def _ppf(self, q, a, c):
  2815. val1 = sc.gammaincinv(a, q)
  2816. val2 = sc.gammainccinv(a, q)
  2817. return np.where(c > 0, val1, val2)**(1.0/c)
  2818. def _isf(self, q, a, c):
  2819. val1 = sc.gammaincinv(a, q)
  2820. val2 = sc.gammainccinv(a, q)
  2821. return np.where(c > 0, val2, val1)**(1.0/c)
  2822. def _munp(self, n, a, c):
  2823. # Pochhammer symbol: sc.pocha,n) = gamma(a+n)/gamma(a)
  2824. return sc.poch(a, n*1.0/c)
  2825. def _entropy(self, a, c):
  2826. def regular(a, c):
  2827. val = sc.psi(a)
  2828. A = a * (1 - val) + val / c
  2829. B = sc.gammaln(a) - np.log(abs(c))
  2830. h = A + B
  2831. return h
  2832. def asymptotic(a, c):
  2833. # using asymptotic expansions for gammaln and psi (see gh-18093)
  2834. return (norm._entropy() - np.log(a)/2
  2835. - np.log(np.abs(c)) + (a**-1.)/6 - (a**-3.)/90
  2836. + (np.log(a) - (a**-1.)/2 - (a**-2.)/12 + (a**-4.)/120)/c)
  2837. return xpx.apply_where(a >= 200, (a, c), asymptotic, regular)
  2838. gengamma = gengamma_gen(a=0.0, name='gengamma')
  2839. class genhalflogistic_gen(rv_continuous):
  2840. r"""A generalized half-logistic continuous random variable.
  2841. %(before_notes)s
  2842. Notes
  2843. -----
  2844. The probability density function for `genhalflogistic` is:
  2845. .. math::
  2846. f(x, c) = \frac{2 (1 - c x)^{1/(c-1)}}{[1 + (1 - c x)^{1/c}]^2}
  2847. for :math:`0 \le x \le 1/c`, and :math:`c > 0`.
  2848. `genhalflogistic` takes ``c`` as a shape parameter for :math:`c`.
  2849. %(after_notes)s
  2850. %(example)s
  2851. """
  2852. def _shape_info(self):
  2853. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  2854. def _get_support(self, c):
  2855. return self.a, 1.0/c
  2856. def _pdf(self, x, c):
  2857. # genhalflogistic.pdf(x, c) =
  2858. # 2 * (1-c*x)**(1/c-1) / (1+(1-c*x)**(1/c))**2
  2859. limit = 1.0/c
  2860. tmp = np.asarray(1-c*x)
  2861. tmp0 = tmp**(limit-1)
  2862. tmp2 = tmp0*tmp
  2863. return 2*tmp0 / (1+tmp2)**2
  2864. def _cdf(self, x, c):
  2865. limit = 1.0/c
  2866. tmp = np.asarray(1-c*x)
  2867. tmp2 = tmp**(limit)
  2868. return (1.0-tmp2) / (1+tmp2)
  2869. def _ppf(self, q, c):
  2870. return 1.0/c*(1-((1.0-q)/(1.0+q))**c)
  2871. def _entropy(self, c):
  2872. return 2 - (2*c+1)*np.log(2)
  2873. genhalflogistic = genhalflogistic_gen(a=0.0, name='genhalflogistic')
  2874. class genhyperbolic_gen(rv_continuous):
  2875. r"""A generalized hyperbolic continuous random variable.
  2876. %(before_notes)s
  2877. See Also
  2878. --------
  2879. t, norminvgauss, geninvgauss, laplace, cauchy
  2880. Notes
  2881. -----
  2882. The probability density function for `genhyperbolic` is:
  2883. .. math::
  2884. f(x, p, a, b) =
  2885. \frac{(a^2 - b^2)^{p/2}}
  2886. {\sqrt{2\pi}a^{p-1/2}
  2887. K_p\Big(\sqrt{a^2 - b^2}\Big)}
  2888. e^{bx} \times \frac{K_{p - 1/2}
  2889. (a \sqrt{1 + x^2})}
  2890. {(\sqrt{1 + x^2})^{1/2 - p}}
  2891. for :math:`x, p \in ( - \infty; \infty)`,
  2892. :math:`|b| < a` if :math:`p \ge 0`,
  2893. :math:`|b| \le a` if :math:`p < 0`.
  2894. :math:`K_{p}(.)` denotes the modified Bessel function of the second
  2895. kind and order :math:`p` (`scipy.special.kv`)
  2896. `genhyperbolic` takes ``p`` as a tail parameter,
  2897. ``a`` as a shape parameter,
  2898. ``b`` as a skewness parameter.
  2899. %(after_notes)s
  2900. The original parameterization of the Generalized Hyperbolic Distribution
  2901. is found in [1]_ as follows
  2902. .. math::
  2903. f(x, \lambda, \alpha, \beta, \delta, \mu) =
  2904. \frac{(\gamma/\delta)^\lambda}{\sqrt{2\pi}K_\lambda(\delta \gamma)}
  2905. e^{\beta (x - \mu)} \times \frac{K_{\lambda - 1/2}
  2906. (\alpha \sqrt{\delta^2 + (x - \mu)^2})}
  2907. {(\sqrt{\delta^2 + (x - \mu)^2} / \alpha)^{1/2 - \lambda}}
  2908. for :math:`x \in ( - \infty; \infty)`,
  2909. :math:`\gamma := \sqrt{\alpha^2 - \beta^2}`,
  2910. :math:`\lambda, \mu \in ( - \infty; \infty)`,
  2911. :math:`\delta \ge 0, |\beta| < \alpha` if :math:`\lambda \ge 0`,
  2912. :math:`\delta > 0, |\beta| \le \alpha` if :math:`\lambda < 0`.
  2913. The location-scale-based parameterization implemented in
  2914. SciPy is based on [2]_, where :math:`a = \alpha\delta`,
  2915. :math:`b = \beta\delta`, :math:`p = \lambda`,
  2916. :math:`scale=\delta` and :math:`loc=\mu`
  2917. Moments are implemented based on [3]_ and [4]_.
  2918. For the distributions that are a special case such as Student's t,
  2919. it is not recommended to rely on the implementation of genhyperbolic.
  2920. To avoid potential numerical problems and for performance reasons,
  2921. the methods of the specific distributions should be used.
  2922. References
  2923. ----------
  2924. .. [1] O. Barndorff-Nielsen, "Hyperbolic Distributions and Distributions
  2925. on Hyperbolae", Scandinavian Journal of Statistics, Vol. 5(3),
  2926. pp. 151-157, 1978. https://www.jstor.org/stable/4615705
  2927. .. [2] Eberlein E., Prause K. (2002) The Generalized Hyperbolic Model:
  2928. Financial Derivatives and Risk Measures. In: Geman H., Madan D.,
  2929. Pliska S.R., Vorst T. (eds) Mathematical Finance - Bachelier
  2930. Congress 2000. Springer Finance. Springer, Berlin, Heidelberg.
  2931. :doi:`10.1007/978-3-662-12429-1_12`
  2932. .. [3] Scott, David J, Würtz, Diethelm, Dong, Christine and Tran,
  2933. Thanh Tam, (2009), Moments of the generalized hyperbolic
  2934. distribution, MPRA Paper, University Library of Munich, Germany,
  2935. https://EconPapers.repec.org/RePEc:pra:mprapa:19081.
  2936. .. [4] E. Eberlein and E. A. von Hammerstein. Generalized hyperbolic
  2937. and inverse Gaussian distributions: Limiting cases and approximation
  2938. of processes. FDM Preprint 80, April 2003. University of Freiburg.
  2939. https://freidok.uni-freiburg.de/fedora/objects/freidok:7974/datastreams/FILE1/content
  2940. %(example)s
  2941. """
  2942. def _argcheck(self, p, a, b):
  2943. return (np.logical_and(np.abs(b) < a, p >= 0)
  2944. | np.logical_and(np.abs(b) <= a, p < 0))
  2945. def _shape_info(self):
  2946. ip = _ShapeInfo("p", False, (-np.inf, np.inf), (False, False))
  2947. ia = _ShapeInfo("a", False, (0, np.inf), (True, False))
  2948. ib = _ShapeInfo("b", False, (-np.inf, np.inf), (False, False))
  2949. return [ip, ia, ib]
  2950. def _fitstart(self, data):
  2951. # Arbitrary, but the default p = a = b = 1 is not valid; the
  2952. # distribution requires |b| < a if p >= 0.
  2953. return super()._fitstart(data, args=(1, 1, 0.5))
  2954. def _logpdf(self, x, p, a, b):
  2955. # kve instead of kv works better for large values of p
  2956. # and smaller values of sqrt(a^2 - b^2)
  2957. @np.vectorize
  2958. def _logpdf_single(x, p, a, b):
  2959. return _stats.genhyperbolic_logpdf(x, p, a, b)
  2960. return _logpdf_single(x, p, a, b)
  2961. def _pdf(self, x, p, a, b):
  2962. # kve instead of kv works better for large values of p
  2963. # and smaller values of sqrt(a^2 - b^2)
  2964. @np.vectorize
  2965. def _pdf_single(x, p, a, b):
  2966. return _stats.genhyperbolic_pdf(x, p, a, b)
  2967. return _pdf_single(x, p, a, b)
  2968. # np.vectorize isn't currently designed to be used as a decorator,
  2969. # so use a lambda instead. This allows us to decorate the function
  2970. # with `np.vectorize` and still provide the `otypes` parameter.
  2971. @lambda func: np.vectorize(func, otypes=[np.float64])
  2972. @staticmethod
  2973. def _integrate_pdf(x0, x1, p, a, b):
  2974. """
  2975. Integrate the pdf of the genhyberbolic distribution from x0 to x1.
  2976. This is a private function used by _cdf() and _sf() only; either x0
  2977. will be -inf or x1 will be inf.
  2978. """
  2979. user_data = np.array([p, a, b], float).ctypes.data_as(ctypes.c_void_p)
  2980. llc = LowLevelCallable.from_cython(_stats, '_genhyperbolic_pdf',
  2981. user_data)
  2982. d = np.sqrt((a + b)*(a - b))
  2983. mean = b/d * sc.kv(p + 1, d) / sc.kv(p, d)
  2984. epsrel = 1e-10
  2985. epsabs = 0
  2986. if x0 < mean < x1:
  2987. # If the interval includes the mean, integrate over the two
  2988. # intervals [x0, mean] and [mean, x1] and add. If we try to do
  2989. # the integral in one call of quad and the non-infinite endpoint
  2990. # is far in the tail, quad might return an incorrect result
  2991. # because it does not "see" the peak of the PDF.
  2992. intgrl = (integrate.quad(llc, x0, mean,
  2993. epsrel=epsrel, epsabs=epsabs)[0]
  2994. + integrate.quad(llc, mean, x1,
  2995. epsrel=epsrel, epsabs=epsabs)[0])
  2996. else:
  2997. intgrl = integrate.quad(llc, x0, x1,
  2998. epsrel=epsrel, epsabs=epsabs)[0]
  2999. if np.isnan(intgrl):
  3000. msg = ("Infinite values encountered in scipy.special.kve. "
  3001. "Values replaced by NaN to avoid incorrect results.")
  3002. warnings.warn(msg, RuntimeWarning, stacklevel=3)
  3003. return max(0.0, min(1.0, intgrl))
  3004. def _cdf(self, x, p, a, b):
  3005. return self._integrate_pdf(-np.inf, x, p, a, b)
  3006. def _sf(self, x, p, a, b):
  3007. return self._integrate_pdf(x, np.inf, p, a, b)
  3008. def _rvs(self, p, a, b, size=None, random_state=None):
  3009. # note: X = b * V + sqrt(V) * X has a
  3010. # generalized hyperbolic distribution
  3011. # if X is standard normal and V is
  3012. # geninvgauss(p = p, b = t2, loc = loc, scale = t3)
  3013. t1 = np.float_power(a, 2) - np.float_power(b, 2)
  3014. # b in the GIG
  3015. t2 = np.float_power(t1, 0.5)
  3016. # scale in the GIG
  3017. t3 = np.float_power(t1, - 0.5)
  3018. gig = geninvgauss.rvs(
  3019. p=p,
  3020. b=t2,
  3021. scale=t3,
  3022. size=size,
  3023. random_state=random_state
  3024. )
  3025. normst = norm.rvs(size=size, random_state=random_state)
  3026. return b * gig + np.sqrt(gig) * normst
  3027. def _stats(self, p, a, b):
  3028. # https://mpra.ub.uni-muenchen.de/19081/1/MPRA_paper_19081.pdf
  3029. # https://freidok.uni-freiburg.de/fedora/objects/freidok:7974/datastreams/FILE1/content
  3030. # standardized moments
  3031. p, a, b = np.broadcast_arrays(p, a, b)
  3032. t1 = np.float_power(a, 2) - np.float_power(b, 2)
  3033. t1 = np.float_power(t1, 0.5)
  3034. t2 = np.float_power(1, 2) * np.float_power(t1, - 1)
  3035. integers = np.linspace(0, 4, 5)
  3036. # make integers perpendicular to existing dimensions
  3037. integers = integers.reshape(integers.shape + (1,) * p.ndim)
  3038. b0, b1, b2, b3, b4 = sc.kv(p + integers, t1)
  3039. r1, r2, r3, r4 = (b / b0 for b in (b1, b2, b3, b4))
  3040. m = b * t2 * r1
  3041. v = (
  3042. t2 * r1 + np.float_power(b, 2) * np.float_power(t2, 2) *
  3043. (r2 - np.float_power(r1, 2))
  3044. )
  3045. m3e = (
  3046. np.float_power(b, 3) * np.float_power(t2, 3) *
  3047. (r3 - 3 * b2 * b1 * np.float_power(b0, -2) +
  3048. 2 * np.float_power(r1, 3)) +
  3049. 3 * b * np.float_power(t2, 2) *
  3050. (r2 - np.float_power(r1, 2))
  3051. )
  3052. s = m3e * np.float_power(v, - 3 / 2)
  3053. m4e = (
  3054. np.float_power(b, 4) * np.float_power(t2, 4) *
  3055. (r4 - 4 * b3 * b1 * np.float_power(b0, - 2) +
  3056. 6 * b2 * np.float_power(b1, 2) * np.float_power(b0, - 3) -
  3057. 3 * np.float_power(r1, 4)) +
  3058. np.float_power(b, 2) * np.float_power(t2, 3) *
  3059. (6 * r3 - 12 * b2 * b1 * np.float_power(b0, - 2) +
  3060. 6 * np.float_power(r1, 3)) +
  3061. 3 * np.float_power(t2, 2) * r2
  3062. )
  3063. k = m4e * np.float_power(v, -2) - 3
  3064. return m, v, s, k
  3065. genhyperbolic = genhyperbolic_gen(name='genhyperbolic')
  3066. class gompertz_gen(rv_continuous):
  3067. r"""A Gompertz (or truncated Gumbel) continuous random variable.
  3068. %(before_notes)s
  3069. Notes
  3070. -----
  3071. The probability density function for `gompertz` is:
  3072. .. math::
  3073. f(x, c) = c \exp(x) \exp(-c (e^x-1))
  3074. for :math:`x \ge 0`, :math:`c > 0`.
  3075. `gompertz` takes ``c`` as a shape parameter for :math:`c`.
  3076. %(after_notes)s
  3077. %(example)s
  3078. """
  3079. def _shape_info(self):
  3080. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  3081. def _pdf(self, x, c):
  3082. # gompertz.pdf(x, c) = c * exp(x) * exp(-c*(exp(x)-1))
  3083. return np.exp(self._logpdf(x, c))
  3084. def _logpdf(self, x, c):
  3085. return np.log(c) + x - c * sc.expm1(x)
  3086. def _cdf(self, x, c):
  3087. return -sc.expm1(-c * sc.expm1(x))
  3088. def _ppf(self, q, c):
  3089. return sc.log1p(-1.0 / c * sc.log1p(-q))
  3090. def _sf(self, x, c):
  3091. return np.exp(-c * sc.expm1(x))
  3092. def _isf(self, p, c):
  3093. return sc.log1p(-np.log(p)/c)
  3094. def _entropy(self, c):
  3095. return 1.0 - np.log(c) - sc._ufuncs._scaled_exp1(c)/c
  3096. gompertz = gompertz_gen(a=0.0, name='gompertz')
  3097. def _average_with_log_weights(x, logweights):
  3098. x = np.asarray(x)
  3099. logweights = np.asarray(logweights)
  3100. maxlogw = logweights.max()
  3101. weights = np.exp(logweights - maxlogw)
  3102. return np.average(x, weights=weights)
  3103. class gumbel_r_gen(rv_continuous):
  3104. r"""A right-skewed Gumbel continuous random variable.
  3105. %(before_notes)s
  3106. See Also
  3107. --------
  3108. gumbel_l, gompertz, genextreme
  3109. Notes
  3110. -----
  3111. The probability density function for `gumbel_r` is:
  3112. .. math::
  3113. f(x) = \exp(-(x + e^{-x}))
  3114. for real :math:`x`.
  3115. The Gumbel distribution is sometimes referred to as a type I Fisher-Tippett
  3116. distribution. It is also related to the extreme value distribution,
  3117. log-Weibull and Gompertz distributions.
  3118. %(after_notes)s
  3119. %(example)s
  3120. """
  3121. def _shape_info(self):
  3122. return []
  3123. def _pdf(self, x):
  3124. # gumbel_r.pdf(x) = exp(-(x + exp(-x)))
  3125. return np.exp(self._logpdf(x))
  3126. def _logpdf(self, x):
  3127. return -x - np.exp(-x)
  3128. def _cdf(self, x):
  3129. return np.exp(-np.exp(-x))
  3130. def _logcdf(self, x):
  3131. return -np.exp(-x)
  3132. def _ppf(self, q):
  3133. return -np.log(-np.log(q))
  3134. def _sf(self, x):
  3135. return -sc.expm1(-np.exp(-x))
  3136. def _isf(self, p):
  3137. return -np.log(-np.log1p(-p))
  3138. def _stats(self):
  3139. return _EULER, np.pi*np.pi/6.0, 12*np.sqrt(6)/np.pi**3 * _ZETA3, 12.0/5
  3140. def _entropy(self):
  3141. # https://en.wikipedia.org/wiki/Gumbel_distribution
  3142. return _EULER + 1.
  3143. @_call_super_mom
  3144. @inherit_docstring_from(rv_continuous)
  3145. def fit(self, data, *args, **kwds):
  3146. data, floc, fscale = _check_fit_input_parameters(self, data,
  3147. args, kwds)
  3148. # By the method of maximum likelihood, the estimators of the
  3149. # location and scale are the roots of the equations defined in
  3150. # `func` and the value of the expression for `loc` that follows.
  3151. # The first `func` is a first order derivative of the log-likelihood
  3152. # equation and the second is from Source: Statistical Distributions,
  3153. # 3rd Edition. Evans, Hastings, and Peacock (2000), Page 101.
  3154. def get_loc_from_scale(scale):
  3155. return -scale * (sc.logsumexp(-data / scale) - np.log(len(data)))
  3156. if fscale is not None:
  3157. # if the scale is fixed, the location can be analytically
  3158. # determined.
  3159. scale = fscale
  3160. loc = get_loc_from_scale(scale)
  3161. else:
  3162. # A different function is solved depending on whether the location
  3163. # is fixed.
  3164. if floc is not None:
  3165. loc = floc
  3166. # equation to use if the location is fixed.
  3167. # note that one cannot use the equation in Evans, Hastings,
  3168. # and Peacock (2000) (since it assumes that the derivative
  3169. # w.r.t. the log-likelihood is zero). however, it is easy to
  3170. # derive the MLE condition directly if loc is fixed
  3171. def func(scale):
  3172. term1 = (loc - data) * np.exp((loc - data) / scale) + data
  3173. term2 = len(data) * (loc + scale)
  3174. return term1.sum() - term2
  3175. else:
  3176. # equation to use if both location and scale are free
  3177. def func(scale):
  3178. sdata = -data / scale
  3179. wavg = _average_with_log_weights(data, logweights=sdata)
  3180. return data.mean() - wavg - scale
  3181. # set brackets for `root_scalar` to use when optimizing over the
  3182. # scale such that a root is likely between them. Use user supplied
  3183. # guess or default 1.
  3184. brack_start = kwds.get('scale', 1)
  3185. lbrack, rbrack = brack_start / 2, brack_start * 2
  3186. # if a root is not between the brackets, iteratively expand them
  3187. # until they include a sign change, checking after each bracket is
  3188. # modified.
  3189. def interval_contains_root(lbrack, rbrack):
  3190. # return true if the signs disagree.
  3191. return (np.sign(func(lbrack)) !=
  3192. np.sign(func(rbrack)))
  3193. while (not interval_contains_root(lbrack, rbrack)
  3194. and (lbrack > 0 or rbrack < np.inf)):
  3195. lbrack /= 2
  3196. rbrack *= 2
  3197. res = optimize.root_scalar(func, bracket=(lbrack, rbrack),
  3198. rtol=1e-14, xtol=1e-14)
  3199. scale = res.root
  3200. loc = floc if floc is not None else get_loc_from_scale(scale)
  3201. return loc, scale
  3202. gumbel_r = gumbel_r_gen(name='gumbel_r')
  3203. class gumbel_l_gen(rv_continuous):
  3204. r"""A left-skewed Gumbel continuous random variable.
  3205. %(before_notes)s
  3206. See Also
  3207. --------
  3208. gumbel_r, gompertz, genextreme
  3209. Notes
  3210. -----
  3211. The probability density function for `gumbel_l` is:
  3212. .. math::
  3213. f(x) = \exp(x - e^x)
  3214. for real :math:`x`.
  3215. The Gumbel distribution is sometimes referred to as a type I Fisher-Tippett
  3216. distribution. It is also related to the extreme value distribution,
  3217. log-Weibull and Gompertz distributions.
  3218. %(after_notes)s
  3219. %(example)s
  3220. """
  3221. def _shape_info(self):
  3222. return []
  3223. def _pdf(self, x):
  3224. # gumbel_l.pdf(x) = exp(x - exp(x))
  3225. return np.exp(self._logpdf(x))
  3226. def _logpdf(self, x):
  3227. return x - np.exp(x)
  3228. def _cdf(self, x):
  3229. return -sc.expm1(-np.exp(x))
  3230. def _ppf(self, q):
  3231. return np.log(-sc.log1p(-q))
  3232. def _logsf(self, x):
  3233. return -np.exp(x)
  3234. def _sf(self, x):
  3235. return np.exp(-np.exp(x))
  3236. def _isf(self, x):
  3237. return np.log(-np.log(x))
  3238. def _stats(self):
  3239. return -_EULER, np.pi*np.pi/6.0, \
  3240. -12*np.sqrt(6)/np.pi**3 * _ZETA3, 12.0/5
  3241. def _entropy(self):
  3242. return _EULER + 1.
  3243. @_call_super_mom
  3244. @inherit_docstring_from(rv_continuous)
  3245. def fit(self, data, *args, **kwds):
  3246. # The fit method of `gumbel_r` can be used for this distribution with
  3247. # small modifications. The process to do this is
  3248. # 1. pass the sign negated data into `gumbel_r.fit`
  3249. # - if the location is fixed, it should also be negated.
  3250. # 2. negate the sign of the resulting location, leaving the scale
  3251. # unmodified.
  3252. # `gumbel_r.fit` holds necessary input checks.
  3253. if kwds.get('floc') is not None:
  3254. kwds['floc'] = -kwds['floc']
  3255. loc_r, scale_r, = gumbel_r.fit(-np.asarray(data), *args, **kwds)
  3256. return -loc_r, scale_r
  3257. gumbel_l = gumbel_l_gen(name='gumbel_l')
  3258. class halfcauchy_gen(rv_continuous):
  3259. r"""A Half-Cauchy continuous random variable.
  3260. %(before_notes)s
  3261. Notes
  3262. -----
  3263. The probability density function for `halfcauchy` is:
  3264. .. math::
  3265. f(x) = \frac{2}{\pi (1 + x^2)}
  3266. for :math:`x \ge 0`.
  3267. %(after_notes)s
  3268. %(example)s
  3269. """
  3270. def _shape_info(self):
  3271. return []
  3272. def _pdf(self, x):
  3273. # halfcauchy.pdf(x) = 2 / (pi * (1 + x**2))
  3274. return 2.0/np.pi/(1.0+x*x)
  3275. def _logpdf(self, x):
  3276. return np.log(2.0/np.pi) - sc.log1p(x*x)
  3277. def _cdf(self, x):
  3278. return 2.0/np.pi*np.arctan(x)
  3279. def _ppf(self, q):
  3280. return np.tan(np.pi/2*q)
  3281. def _sf(self, x):
  3282. return 2.0/np.pi * np.arctan2(1, x)
  3283. def _isf(self, p):
  3284. return 1.0/np.tan(np.pi*p/2)
  3285. def _stats(self):
  3286. return np.inf, np.inf, np.nan, np.nan
  3287. def _entropy(self):
  3288. return np.log(2*np.pi)
  3289. @_call_super_mom
  3290. @inherit_docstring_from(rv_continuous)
  3291. def fit(self, data, *args, **kwds):
  3292. if kwds.pop('superfit', False):
  3293. return super().fit(data, *args, **kwds)
  3294. data, floc, fscale = _check_fit_input_parameters(self, data,
  3295. args, kwds)
  3296. # location is independent from the scale
  3297. data_min = np.min(data)
  3298. if floc is not None:
  3299. if data_min < floc:
  3300. # There are values that are less than the specified loc.
  3301. raise FitDataError("halfcauchy", lower=floc, upper=np.inf)
  3302. loc = floc
  3303. else:
  3304. # if not provided, location MLE is the minimal data point
  3305. loc = data_min
  3306. # find scale
  3307. def find_scale(loc, data):
  3308. shifted_data = data - loc
  3309. n = data.size
  3310. shifted_data_squared = np.square(shifted_data)
  3311. def fun_to_solve(scale):
  3312. denominator = scale**2 + shifted_data_squared
  3313. return 2 * np.sum(shifted_data_squared/denominator) - n
  3314. small = np.finfo(1.0).tiny**0.5 # avoid underflow
  3315. res = root_scalar(fun_to_solve, bracket=(small, np.max(shifted_data)))
  3316. return res.root
  3317. if fscale is not None:
  3318. scale = fscale
  3319. else:
  3320. scale = find_scale(loc, data)
  3321. return loc, scale
  3322. halfcauchy = halfcauchy_gen(a=0.0, name='halfcauchy')
  3323. class halflogistic_gen(rv_continuous):
  3324. r"""A half-logistic continuous random variable.
  3325. %(before_notes)s
  3326. Notes
  3327. -----
  3328. The probability density function for `halflogistic` is:
  3329. .. math::
  3330. f(x) = \frac{ 2 e^{-x} }{ (1+e^{-x})^2 }
  3331. = \frac{1}{2} \text{sech}(x/2)^2
  3332. for :math:`x \ge 0`.
  3333. %(after_notes)s
  3334. References
  3335. ----------
  3336. .. [1] Asgharzadeh et al (2011). "Comparisons of Methods of Estimation for the
  3337. Half-Logistic Distribution". Selcuk J. Appl. Math. 93-108.
  3338. %(example)s
  3339. """
  3340. def _shape_info(self):
  3341. return []
  3342. def _pdf(self, x):
  3343. # halflogistic.pdf(x) = 2 * exp(-x) / (1+exp(-x))**2
  3344. # = 1/2 * sech(x/2)**2
  3345. return np.exp(self._logpdf(x))
  3346. def _logpdf(self, x):
  3347. return np.log(2) - x - 2. * sc.log1p(np.exp(-x))
  3348. def _cdf(self, x):
  3349. return np.tanh(x/2.0)
  3350. def _ppf(self, q):
  3351. return 2*np.arctanh(q)
  3352. def _sf(self, x):
  3353. return 2 * sc.expit(-x)
  3354. def _isf(self, q):
  3355. return xpx.apply_where(q < 0.5, q,
  3356. lambda q: -sc.logit(0.5 * q),
  3357. lambda q: 2*np.arctanh(1 - q))
  3358. def _munp(self, n):
  3359. if n == 0:
  3360. return 1 # otherwise returns NaN
  3361. if n == 1:
  3362. return 2*np.log(2)
  3363. if n == 2:
  3364. return np.pi*np.pi/3.0
  3365. if n == 3:
  3366. return 9*_ZETA3
  3367. if n == 4:
  3368. return 7*np.pi**4 / 15.0
  3369. return 2*(1-pow(2.0, 1-n))*sc.gamma(n+1)*sc.zeta(n, 1)
  3370. def _entropy(self):
  3371. return 2-np.log(2)
  3372. @_call_super_mom
  3373. @inherit_docstring_from(rv_continuous)
  3374. def fit(self, data, *args, **kwds):
  3375. if kwds.pop('superfit', False):
  3376. return super().fit(data, *args, **kwds)
  3377. data, floc, fscale = _check_fit_input_parameters(self, data,
  3378. args, kwds)
  3379. def find_scale(data, loc):
  3380. # scale is solution to a fix point problem ([1] 2.6)
  3381. # use approximate MLE as starting point ([1] 3.1)
  3382. n_observations = data.shape[0]
  3383. sorted_data = np.sort(data, axis=0)
  3384. p = np.arange(1, n_observations + 1)/(n_observations + 1)
  3385. q = 1 - p
  3386. pp1 = 1 + p
  3387. alpha = p - 0.5 * q * pp1 * np.log(pp1 / q)
  3388. beta = 0.5 * q * pp1
  3389. sorted_data = sorted_data - loc
  3390. B = 2 * np.sum(alpha[1:] * sorted_data[1:])
  3391. C = 2 * np.sum(beta[1:] * sorted_data[1:]**2)
  3392. # starting guess
  3393. scale = ((B + np.sqrt(B**2 + 8 * n_observations * C))
  3394. /(4 * n_observations))
  3395. # relative tolerance of fix point iterator
  3396. rtol = 1e-8
  3397. relative_residual = 1
  3398. shifted_mean = sorted_data.mean() # y_mean - y_min
  3399. # find fix point by repeated application of eq. (2.6)
  3400. # simplify as
  3401. # exp(-x) / (1 + exp(-x)) = 1 / (1 + exp(x))
  3402. # = expit(-x))
  3403. while relative_residual > rtol:
  3404. sum_term = sorted_data * sc.expit(-sorted_data/scale)
  3405. scale_new = shifted_mean - 2/n_observations * sum_term.sum()
  3406. relative_residual = abs((scale - scale_new)/scale)
  3407. scale = scale_new
  3408. return scale
  3409. # location is independent from the scale
  3410. data_min = np.min(data)
  3411. if floc is not None:
  3412. if data_min < floc:
  3413. # There are values that are less than the specified loc.
  3414. raise FitDataError("halflogistic", lower=floc, upper=np.inf)
  3415. loc = floc
  3416. else:
  3417. # if not provided, location MLE is the minimal data point
  3418. loc = data_min
  3419. # scale depends on location
  3420. scale = fscale if fscale is not None else find_scale(data, loc)
  3421. return loc, scale
  3422. halflogistic = halflogistic_gen(a=0.0, name='halflogistic')
  3423. class halfnorm_gen(rv_continuous):
  3424. r"""A half-normal continuous random variable.
  3425. %(before_notes)s
  3426. Notes
  3427. -----
  3428. The probability density function for `halfnorm` is:
  3429. .. math::
  3430. f(x) = \sqrt{2/\pi} \exp(-x^2 / 2)
  3431. for :math:`x >= 0`.
  3432. `halfnorm` is a special case of `chi` with ``df=1``.
  3433. %(after_notes)s
  3434. %(example)s
  3435. """
  3436. def _shape_info(self):
  3437. return []
  3438. def _rvs(self, size=None, random_state=None):
  3439. return abs(random_state.standard_normal(size=size))
  3440. def _pdf(self, x):
  3441. # halfnorm.pdf(x) = sqrt(2/pi) * exp(-x**2/2)
  3442. return np.sqrt(2.0/np.pi)*np.exp(-x*x/2.0)
  3443. def _logpdf(self, x):
  3444. return 0.5 * np.log(2.0/np.pi) - x*x/2.0
  3445. def _cdf(self, x):
  3446. return sc.erf(x / np.sqrt(2))
  3447. def _ppf(self, q):
  3448. return _norm_ppf((1+q)/2.0)
  3449. def _sf(self, x):
  3450. return 2 * _norm_sf(x)
  3451. def _isf(self, p):
  3452. return _norm_isf(p/2)
  3453. def _stats(self):
  3454. return (np.sqrt(2.0/np.pi),
  3455. 1-2.0/np.pi,
  3456. np.sqrt(2)*(4-np.pi)/(np.pi-2)**1.5,
  3457. 8*(np.pi-3)/(np.pi-2)**2)
  3458. def _entropy(self):
  3459. return 0.5*np.log(np.pi/2.0)+0.5
  3460. @_call_super_mom
  3461. @inherit_docstring_from(rv_continuous)
  3462. def fit(self, data, *args, **kwds):
  3463. if kwds.pop('superfit', False):
  3464. return super().fit(data, *args, **kwds)
  3465. data, floc, fscale = _check_fit_input_parameters(self, data,
  3466. args, kwds)
  3467. data_min = np.min(data)
  3468. if floc is not None:
  3469. if data_min < floc:
  3470. # There are values that are less than the specified loc.
  3471. raise FitDataError("halfnorm", lower=floc, upper=np.inf)
  3472. loc = floc
  3473. else:
  3474. loc = data_min
  3475. if fscale is not None:
  3476. scale = fscale
  3477. else:
  3478. scale = stats.moment(data, order=2, center=loc)**0.5
  3479. return loc, scale
  3480. halfnorm = halfnorm_gen(a=0.0, name='halfnorm')
  3481. class hypsecant_gen(rv_continuous):
  3482. r"""A hyperbolic secant continuous random variable.
  3483. %(before_notes)s
  3484. Notes
  3485. -----
  3486. The probability density function for `hypsecant` is:
  3487. .. math::
  3488. f(x) = \frac{1}{\pi} \text{sech}(x)
  3489. for a real number :math:`x`.
  3490. %(after_notes)s
  3491. %(example)s
  3492. """
  3493. def _shape_info(self):
  3494. return []
  3495. def _pdf(self, x):
  3496. # hypsecant.pdf(x) = 1/pi * sech(x)
  3497. return 1.0/(np.pi*np.cosh(x))
  3498. def _cdf(self, x):
  3499. return 2.0/np.pi*np.arctan(np.exp(x))
  3500. def _ppf(self, q):
  3501. return np.log(np.tan(np.pi*q/2.0))
  3502. def _sf(self, x):
  3503. return 2.0/np.pi*np.arctan(np.exp(-x))
  3504. def _isf(self, q):
  3505. return -np.log(np.tan(np.pi*q/2.0))
  3506. def _stats(self):
  3507. return 0, np.pi*np.pi/4, 0, 2
  3508. def _entropy(self):
  3509. return np.log(2*np.pi)
  3510. hypsecant = hypsecant_gen(name='hypsecant')
  3511. class gausshyper_gen(rv_continuous):
  3512. r"""A Gauss hypergeometric continuous random variable.
  3513. %(before_notes)s
  3514. Notes
  3515. -----
  3516. The probability density function for `gausshyper` is:
  3517. .. math::
  3518. f(x, a, b, c, z) = C x^{a-1} (1-x)^{b-1} (1+zx)^{-c}
  3519. for :math:`0 \le x \le 1`, :math:`a,b > 0`, :math:`c` a real number,
  3520. :math:`z > -1`, and :math:`C = \frac{1}{B(a, b) F[2, 1](c, a; a+b; -z)}`.
  3521. :math:`F[2, 1]` is the Gauss hypergeometric function
  3522. `scipy.special.hyp2f1`.
  3523. `gausshyper` takes :math:`a`, :math:`b`, :math:`c` and :math:`z` as shape
  3524. parameters.
  3525. %(after_notes)s
  3526. References
  3527. ----------
  3528. .. [1] Armero, C., and M. J. Bayarri. "Prior Assessments for Prediction in
  3529. Queues." *Journal of the Royal Statistical Society*. Series D (The
  3530. Statistician) 43, no. 1 (1994): 139-53. doi:10.2307/2348939
  3531. %(example)s
  3532. """
  3533. def _argcheck(self, a, b, c, z):
  3534. # z > -1 per gh-10134
  3535. return (a > 0) & (b > 0) & (c == c) & (z > -1)
  3536. def _shape_info(self):
  3537. ia = _ShapeInfo("a", False, (0, np.inf), (False, False))
  3538. ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
  3539. ic = _ShapeInfo("c", False, (-np.inf, np.inf), (False, False))
  3540. iz = _ShapeInfo("z", False, (-1, np.inf), (False, False))
  3541. return [ia, ib, ic, iz]
  3542. def _pdf(self, x, a, b, c, z):
  3543. normalization_constant = sc.beta(a, b) * sc.hyp2f1(c, a, a + b, -z)
  3544. return (1./normalization_constant * x**(a - 1.) * (1. - x)**(b - 1.0)
  3545. / (1.0 + z*x)**c)
  3546. def _munp(self, n, a, b, c, z):
  3547. fac = sc.beta(n+a, b) / sc.beta(a, b)
  3548. num = sc.hyp2f1(c, a+n, a+b+n, -z)
  3549. den = sc.hyp2f1(c, a, a+b, -z)
  3550. return fac*num / den
  3551. gausshyper = gausshyper_gen(a=0.0, b=1.0, name='gausshyper')
  3552. class invgamma_gen(rv_continuous):
  3553. r"""An inverted gamma continuous random variable.
  3554. %(before_notes)s
  3555. Notes
  3556. -----
  3557. The probability density function for `invgamma` is:
  3558. .. math::
  3559. f(x, a) = \frac{x^{-a-1}}{\Gamma(a)} \exp(-\frac{1}{x})
  3560. for :math:`x >= 0`, :math:`a > 0`. :math:`\Gamma` is the gamma function
  3561. (`scipy.special.gamma`).
  3562. `invgamma` takes ``a`` as a shape parameter for :math:`a`.
  3563. `invgamma` is a special case of `gengamma` with ``c=-1``, and it is a
  3564. different parameterization of the scaled inverse chi-squared distribution.
  3565. Specifically, if the scaled inverse chi-squared distribution is
  3566. parameterized with degrees of freedom :math:`\nu` and scaling parameter
  3567. :math:`\tau^2`, then it can be modeled using `invgamma` with
  3568. ``a=`` :math:`\nu/2` and ``scale=`` :math:`\nu \tau^2/2`.
  3569. %(after_notes)s
  3570. %(example)s
  3571. """
  3572. _support_mask = rv_continuous._open_support_mask
  3573. def _shape_info(self):
  3574. return [_ShapeInfo("a", False, (0, np.inf), (False, False))]
  3575. def _pdf(self, x, a):
  3576. # invgamma.pdf(x, a) = x**(-a-1) / gamma(a) * exp(-1/x)
  3577. return np.exp(self._logpdf(x, a))
  3578. def _logpdf(self, x, a):
  3579. return -(a+1) * np.log(x) - sc.gammaln(a) - 1.0/x
  3580. def _cdf(self, x, a):
  3581. return sc.gammaincc(a, 1.0 / x)
  3582. def _ppf(self, q, a):
  3583. return 1.0 / sc.gammainccinv(a, q)
  3584. def _sf(self, x, a):
  3585. return sc.gammainc(a, 1.0 / x)
  3586. def _isf(self, q, a):
  3587. return 1.0 / sc.gammaincinv(a, q)
  3588. def _stats(self, a, moments='mvsk'):
  3589. m1 = xpx.apply_where(a > 1, a,
  3590. lambda x: 1. / (x - 1.),
  3591. fill_value=np.inf)
  3592. m2 = xpx.apply_where(a > 2, a,
  3593. lambda x: 1. / (x - 1.)**2 / (x - 2.),
  3594. fill_value=np.inf)
  3595. g1, g2 = None, None
  3596. if 's' in moments:
  3597. g1 = xpx.apply_where(a > 3, a,
  3598. lambda x: 4. * np.sqrt(x - 2.) / (x - 3.),
  3599. fill_value=np.nan)
  3600. if 'k' in moments:
  3601. g2 = xpx.apply_where(a > 4, a,
  3602. lambda x: 6. * (5. * x - 11.) / (x - 3.) / (x - 4.),
  3603. fill_value=np.nan)
  3604. return m1, m2, g1, g2
  3605. def _entropy(self, a):
  3606. def regular(a):
  3607. h = a - (a + 1.0) * sc.psi(a) + sc.gammaln(a)
  3608. return h
  3609. def asymptotic(a):
  3610. # gammaln(a) ~ a * ln(a) - a - 0.5 * ln(a) + 0.5 * ln(2 * pi)
  3611. # psi(a) ~ ln(a) - 1 / (2 * a)
  3612. h = ((1 - 3*np.log(a) + np.log(2) + np.log(np.pi))/2
  3613. + 2/3*a**-1. + a**-2./12 - a**-3./90 - a**-4./120)
  3614. return h
  3615. h = xpx.apply_where(a >= 200, a, asymptotic, regular)
  3616. return h
  3617. invgamma = invgamma_gen(a=0.0, name='invgamma')
  3618. class invgauss_gen(rv_continuous):
  3619. r"""An inverse Gaussian continuous random variable.
  3620. %(before_notes)s
  3621. Notes
  3622. -----
  3623. The probability density function for `invgauss` is:
  3624. .. math::
  3625. f(x; \mu) = \frac{1}{\sqrt{2 \pi x^3}}
  3626. \exp\left(-\frac{(x-\mu)^2}{2 \mu^2 x}\right)
  3627. for :math:`x \ge 0` and :math:`\mu > 0`.
  3628. `invgauss` takes ``mu`` as a shape parameter for :math:`\mu`.
  3629. %(after_notes)s
  3630. A common shape-scale parameterization of the inverse Gaussian distribution
  3631. has density
  3632. .. math::
  3633. f(x; \nu, \lambda) = \sqrt{\frac{\lambda}{2 \pi x^3}}
  3634. \exp\left( -\frac{\lambda(x-\nu)^2}{2 \nu^2 x}\right)
  3635. Using ``nu`` for :math:`\nu` and ``lam`` for :math:`\lambda`, this
  3636. parameterization is equivalent to the one above with ``mu = nu/lam``,
  3637. ``loc = 0``, and ``scale = lam``.
  3638. This distribution uses routines from the Boost Math C++ library for
  3639. the computation of the ``ppf`` and ``isf`` methods. [1]_
  3640. References
  3641. ----------
  3642. .. [1] The Boost Developers. "Boost C++ Libraries". https://www.boost.org/.
  3643. %(example)s
  3644. """
  3645. _support_mask = rv_continuous._open_support_mask
  3646. def _shape_info(self):
  3647. return [_ShapeInfo("mu", False, (0, np.inf), (False, False))]
  3648. def _rvs(self, mu, size=None, random_state=None):
  3649. return random_state.wald(mu, 1.0, size=size)
  3650. def _pdf(self, x, mu):
  3651. # invgauss.pdf(x, mu) =
  3652. # 1 / sqrt(2*pi*x**3) * exp(-(x-mu)**2/(2*x*mu**2))
  3653. return 1.0/np.sqrt(2*np.pi*x**3.0)*np.exp(-1.0/(2*x)*(x/mu - 1)**2)
  3654. def _logpdf(self, x, mu):
  3655. return -0.5*np.log(2*np.pi) - 1.5*np.log(x) - (x/mu - 1)**2/(2*x)
  3656. # approach adapted from equations in
  3657. # https://journal.r-project.org/archive/2016-1/giner-smyth.pdf,
  3658. # not R code. see gh-13616
  3659. def _logcdf(self, x, mu):
  3660. fac = 1 / np.sqrt(x)
  3661. a = _norm_logcdf(fac * (x/mu - 1))
  3662. b = 2 / mu + _norm_logcdf(-fac * (x/mu + 1))
  3663. return a + np.log1p(np.exp(b - a))
  3664. def _logsf(self, x, mu):
  3665. fac = 1 / np.sqrt(x)
  3666. a = _norm_logsf(fac * (x/mu - 1))
  3667. b = 2 / mu + _norm_logcdf(-fac * (x/mu + 1))
  3668. return a + np.log1p(-np.exp(b - a))
  3669. def _sf(self, x, mu):
  3670. return np.exp(self._logsf(x, mu))
  3671. def _cdf(self, x, mu):
  3672. return np.exp(self._logcdf(x, mu))
  3673. def _ppf(self, x, mu):
  3674. with np.errstate(divide='ignore', over='ignore', invalid='ignore'):
  3675. x, mu = np.broadcast_arrays(x, mu)
  3676. ppf = np.asarray(scu._invgauss_ppf(x, mu, 1))
  3677. i_wt = x > 0.5 # "wrong tail" - sometimes too inaccurate
  3678. ppf[i_wt] = scu._invgauss_isf(1-x[i_wt], mu[i_wt], 1)
  3679. i_nan = np.isnan(ppf)
  3680. ppf[i_nan] = super()._ppf(x[i_nan], mu[i_nan])
  3681. return ppf
  3682. def _isf(self, x, mu):
  3683. with np.errstate(divide='ignore', over='ignore', invalid='ignore'):
  3684. x, mu = np.broadcast_arrays(x, mu)
  3685. isf = scu._invgauss_isf(x, mu, 1)
  3686. i_wt = x > 0.5 # "wrong tail" - sometimes too inaccurate
  3687. isf[i_wt] = scu._invgauss_ppf(1-x[i_wt], mu[i_wt], 1)
  3688. i_nan = np.isnan(isf)
  3689. isf[i_nan] = super()._isf(x[i_nan], mu[i_nan])
  3690. return isf
  3691. def _stats(self, mu):
  3692. return mu, mu**3.0, 3*np.sqrt(mu), 15*mu
  3693. @inherit_docstring_from(rv_continuous)
  3694. def fit(self, data, *args, **kwds):
  3695. method = kwds.get('method', 'mle')
  3696. if (isinstance(data, CensoredData) or isinstance(self, wald_gen)
  3697. or method.lower() == 'mm'):
  3698. return super().fit(data, *args, **kwds)
  3699. data, fshape_s, floc, fscale = _check_fit_input_parameters(self, data,
  3700. args, kwds)
  3701. '''
  3702. Source: Statistical Distributions, 3rd Edition. Evans, Hastings,
  3703. and Peacock (2000), Page 121. Their shape parameter is equivalent to
  3704. SciPy's with the conversion `fshape_s = fshape / scale`.
  3705. MLE formulas are not used in 3 conditions:
  3706. - `loc` is not fixed
  3707. - `mu` is fixed
  3708. These cases fall back on the superclass fit method.
  3709. - `loc` is fixed but translation results in negative data raises
  3710. a `FitDataError`.
  3711. '''
  3712. if floc is None or fshape_s is not None:
  3713. return super().fit(data, *args, **kwds)
  3714. elif np.any(data - floc < 0):
  3715. raise FitDataError("invgauss", lower=0, upper=np.inf)
  3716. else:
  3717. data = data - floc
  3718. fshape_n = np.mean(data)
  3719. if fscale is None:
  3720. fscale = len(data) / (np.sum(data ** -1 - fshape_n ** -1))
  3721. fshape_s = fshape_n / fscale
  3722. return fshape_s, floc, fscale
  3723. def _entropy(self, mu):
  3724. """
  3725. Ref.: https://moser-isi.ethz.ch/docs/papers/smos-2012-10.pdf (eq. 9)
  3726. """
  3727. # a = log(2*pi*e*mu**3)
  3728. # = 1 + log(2*pi) + 3 * log(mu)
  3729. a = 1. + np.log(2 * np.pi) + 3 * np.log(mu)
  3730. # b = exp(2/mu) * exp1(2/mu)
  3731. # = _scaled_exp1(2/mu) / (2/mu)
  3732. r = 2/mu
  3733. b = sc._ufuncs._scaled_exp1(r)/r
  3734. return 0.5 * a - 1.5 * b
  3735. invgauss = invgauss_gen(a=0.0, name='invgauss')
  3736. class geninvgauss_gen(rv_continuous):
  3737. r"""A Generalized Inverse Gaussian continuous random variable.
  3738. %(before_notes)s
  3739. Notes
  3740. -----
  3741. The probability density function for `geninvgauss` is:
  3742. .. math::
  3743. f(x, p, b) = x^{p-1} \exp(-b (x + 1/x) / 2) / (2 K_p(b))
  3744. where ``x > 0``, `p` is a real number and ``b > 0``\([1]_).
  3745. :math:`K_p` is the modified Bessel function of second kind of order `p`
  3746. (`scipy.special.kv`).
  3747. %(after_notes)s
  3748. The inverse Gaussian distribution `stats.invgauss(mu)` is a special case of
  3749. `geninvgauss` with ``p = -1/2``, ``b = 1 / mu`` and ``scale = mu``.
  3750. Generating random variates is challenging for this distribution. The
  3751. implementation is based on [2]_.
  3752. References
  3753. ----------
  3754. .. [1] O. Barndorff-Nielsen, P. Blaesild, C. Halgreen, "First hitting time
  3755. models for the generalized inverse gaussian distribution",
  3756. Stochastic Processes and their Applications 7, pp. 49--54, 1978.
  3757. .. [2] W. Hoermann and J. Leydold, "Generating generalized inverse Gaussian
  3758. random variates", Statistics and Computing, 24(4), p. 547--557, 2014.
  3759. %(example)s
  3760. """
  3761. def _argcheck(self, p, b):
  3762. return (p == p) & (b > 0)
  3763. def _shape_info(self):
  3764. ip = _ShapeInfo("p", False, (-np.inf, np.inf), (False, False))
  3765. ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
  3766. return [ip, ib]
  3767. def _logpdf(self, x, p, b):
  3768. # kve instead of kv works better for large values of b
  3769. # warn if kve produces infinite values and replace by nan
  3770. # otherwise c = -inf and the results are often incorrect
  3771. def logpdf_single(x, p, b):
  3772. return _stats.geninvgauss_logpdf(x, p, b)
  3773. logpdf_single = np.vectorize(logpdf_single, otypes=[np.float64])
  3774. z = logpdf_single(x, p, b)
  3775. if np.isnan(z).any():
  3776. msg = ("Infinite values encountered in scipy.special.kve(p, b). "
  3777. "Values replaced by NaN to avoid incorrect results.")
  3778. warnings.warn(msg, RuntimeWarning, stacklevel=3)
  3779. return z
  3780. def _pdf(self, x, p, b):
  3781. # relying on logpdf avoids overflow of x**(p-1) for large x and p
  3782. return np.exp(self._logpdf(x, p, b))
  3783. def _cdf(self, x, p, b):
  3784. _a, _b = self._get_support(p, b)
  3785. def _cdf_single(x, p, b):
  3786. user_data = np.array([p, b], float).ctypes.data_as(ctypes.c_void_p)
  3787. llc = LowLevelCallable.from_cython(_stats, '_geninvgauss_pdf',
  3788. user_data)
  3789. return integrate.quad(llc, _a, x)[0]
  3790. _cdf_single = np.vectorize(_cdf_single, otypes=[np.float64])
  3791. return _cdf_single(x, p, b)
  3792. def _logquasipdf(self, x, p, b):
  3793. # log of the quasi-density (w/o normalizing constant) used in _rvs
  3794. return xpx.apply_where(x > 0, (x, p, b),
  3795. lambda x, p, b: (p - 1)*np.log(x) - b*(x + 1/x)/2,
  3796. fill_value=-np.inf)
  3797. def _rvs(self, p, b, size=None, random_state=None):
  3798. # if p and b are scalar, use _rvs_scalar, otherwise need to create
  3799. # output by iterating over parameters
  3800. if np.isscalar(p) and np.isscalar(b):
  3801. out = self._rvs_scalar(p, b, size, random_state)
  3802. elif p.size == 1 and b.size == 1:
  3803. out = self._rvs_scalar(p.item(), b.item(), size, random_state)
  3804. else:
  3805. # When this method is called, size will be a (possibly empty)
  3806. # tuple of integers. It will not be None; if `size=None` is passed
  3807. # to `rvs()`, size will be the empty tuple ().
  3808. p, b = np.broadcast_arrays(p, b)
  3809. # p and b now have the same shape.
  3810. # `shp` is the shape of the blocks of random variates that are
  3811. # generated for each combination of parameters associated with
  3812. # broadcasting p and b.
  3813. # bc is a tuple the same length as size. The values
  3814. # in bc are bools. If bc[j] is True, it means that
  3815. # entire axis is filled in for a given combination of the
  3816. # broadcast arguments.
  3817. shp, bc = _check_shape(p.shape, size)
  3818. # `numsamples` is the total number of variates to be generated
  3819. # for each combination of the input arguments.
  3820. numsamples = int(np.prod(shp))
  3821. # `out` is the array to be returned. It is filled in the
  3822. # loop below.
  3823. out = np.empty(size)
  3824. it = np.nditer([p, b],
  3825. flags=['multi_index'],
  3826. op_flags=[['readonly'], ['readonly']])
  3827. while not it.finished:
  3828. # Convert the iterator's multi_index into an index into the
  3829. # `out` array where the call to _rvs_scalar() will be stored.
  3830. # Where bc is True, we use a full slice; otherwise we use the
  3831. # index value from it.multi_index. len(it.multi_index) might
  3832. # be less than len(bc), and in that case we want to align these
  3833. # two sequences to the right, so the loop variable j runs from
  3834. # -len(size) to 0. This doesn't cause an IndexError, as
  3835. # bc[j] will be True in those cases where it.multi_index[j]
  3836. # would cause an IndexError.
  3837. idx = tuple((it.multi_index[j] if not bc[j] else slice(None))
  3838. for j in range(-len(size), 0))
  3839. out[idx] = self._rvs_scalar(it[0], it[1], numsamples,
  3840. random_state).reshape(shp)
  3841. it.iternext()
  3842. if size == ():
  3843. out = out.item()
  3844. return out
  3845. def _rvs_scalar(self, p, b, numsamples, random_state):
  3846. # following [2], the quasi-pdf is used instead of the pdf for the
  3847. # generation of rvs
  3848. invert_res = False
  3849. if not numsamples:
  3850. numsamples = 1
  3851. if p < 0:
  3852. # note: if X is geninvgauss(p, b), then 1/X is geninvgauss(-p, b)
  3853. p = -p
  3854. invert_res = True
  3855. m = self._mode(p, b)
  3856. # determine method to be used following [2]
  3857. ratio_unif = True
  3858. if p >= 1 or b > 1:
  3859. # ratio of uniforms with mode shift below
  3860. mode_shift = True
  3861. elif b >= min(0.5, 2 * np.sqrt(1 - p) / 3):
  3862. # ratio of uniforms without mode shift below
  3863. mode_shift = False
  3864. else:
  3865. # new algorithm in [2]
  3866. ratio_unif = False
  3867. # prepare sampling of rvs
  3868. size1d = tuple(np.atleast_1d(numsamples))
  3869. N = np.prod(size1d) # number of rvs needed, reshape upon return
  3870. x = np.zeros(N)
  3871. simulated = 0
  3872. if ratio_unif:
  3873. # use ratio of uniforms method
  3874. if mode_shift:
  3875. a2 = -2 * (p + 1) / b - m
  3876. a1 = 2 * m * (p - 1) / b - 1
  3877. # find roots of x**3 + a2*x**2 + a1*x + m (Cardano's formula)
  3878. p1 = a1 - a2**2 / 3
  3879. q1 = 2 * a2**3 / 27 - a2 * a1 / 3 + m
  3880. phi = np.arccos(-q1 * np.sqrt(-27 / p1**3) / 2)
  3881. s1 = -np.sqrt(-4 * p1 / 3)
  3882. root1 = s1 * np.cos(phi / 3 + np.pi / 3) - a2 / 3
  3883. root2 = -s1 * np.cos(phi / 3) - a2 / 3
  3884. # root3 = s1 * np.cos(phi / 3 - np.pi / 3) - a2 / 3
  3885. # if g is the quasipdf, rescale: g(x) / g(m) which we can write
  3886. # as exp(log(g(x)) - log(g(m))). This is important
  3887. # since for large values of p and b, g cannot be evaluated.
  3888. # denote the rescaled quasipdf by h
  3889. lm = self._logquasipdf(m, p, b)
  3890. d1 = self._logquasipdf(root1, p, b) - lm
  3891. d2 = self._logquasipdf(root2, p, b) - lm
  3892. # compute the bounding rectangle w.r.t. h. Note that
  3893. # np.exp(0.5*d1) = np.sqrt(g(root1)/g(m)) = np.sqrt(h(root1))
  3894. vmin = (root1 - m) * np.exp(0.5 * d1)
  3895. vmax = (root2 - m) * np.exp(0.5 * d2)
  3896. umax = 1 # umax = sqrt(h(m)) = 1
  3897. def logqpdf(x):
  3898. return self._logquasipdf(x, p, b) - lm
  3899. c = m
  3900. else:
  3901. # ratio of uniforms without mode shift
  3902. # compute np.sqrt(quasipdf(m))
  3903. umax = np.exp(0.5*self._logquasipdf(m, p, b))
  3904. xplus = ((1 + p) + np.sqrt((1 + p)**2 + b**2))/b
  3905. vmin = 0
  3906. # compute xplus * np.sqrt(quasipdf(xplus))
  3907. vmax = xplus * np.exp(0.5 * self._logquasipdf(xplus, p, b))
  3908. c = 0
  3909. def logqpdf(x):
  3910. return self._logquasipdf(x, p, b)
  3911. if vmin >= vmax:
  3912. raise ValueError("vmin must be smaller than vmax.")
  3913. if umax <= 0:
  3914. raise ValueError("umax must be positive.")
  3915. i = 1
  3916. while simulated < N:
  3917. k = N - simulated
  3918. # simulate uniform rvs on [0, umax] and [vmin, vmax]
  3919. u = umax * random_state.uniform(size=k)
  3920. v = random_state.uniform(size=k)
  3921. v = vmin + (vmax - vmin) * v
  3922. rvs = v / u + c
  3923. # rewrite acceptance condition u**2 <= pdf(rvs) by taking logs
  3924. accept = (2*np.log(u) <= logqpdf(rvs))
  3925. num_accept = np.sum(accept)
  3926. if num_accept > 0:
  3927. x[simulated:(simulated + num_accept)] = rvs[accept]
  3928. simulated += num_accept
  3929. if (simulated == 0) and (i*N >= 50000):
  3930. msg = ("Not a single random variate could be generated "
  3931. f"in {i*N} attempts. Sampling does not appear to "
  3932. "work for the provided parameters.")
  3933. raise RuntimeError(msg)
  3934. i += 1
  3935. else:
  3936. # use new algorithm in [2]
  3937. x0 = b / (1 - p)
  3938. xs = np.max((x0, 2 / b))
  3939. k1 = np.exp(self._logquasipdf(m, p, b))
  3940. A1 = k1 * x0
  3941. if x0 < 2 / b:
  3942. k2 = np.exp(-b)
  3943. if p > 0:
  3944. A2 = k2 * ((2 / b)**p - x0**p) / p
  3945. else:
  3946. A2 = k2 * np.log(2 / b**2)
  3947. else:
  3948. k2, A2 = 0, 0
  3949. k3 = xs**(p - 1)
  3950. A3 = 2 * k3 * np.exp(-xs * b / 2) / b
  3951. A = A1 + A2 + A3
  3952. # [2]: rejection constant is < 2.73; so expected runtime is finite
  3953. while simulated < N:
  3954. k = N - simulated
  3955. h, rvs = np.zeros(k), np.zeros(k)
  3956. # simulate uniform rvs on [x1, x2] and [0, y2]
  3957. u = random_state.uniform(size=k)
  3958. v = A * random_state.uniform(size=k)
  3959. cond1 = v <= A1
  3960. cond2 = np.logical_not(cond1) & (v <= A1 + A2)
  3961. cond3 = np.logical_not(cond1 | cond2)
  3962. # subdomain (0, x0)
  3963. rvs[cond1] = x0 * v[cond1] / A1
  3964. h[cond1] = k1
  3965. # subdomain (x0, 2 / b)
  3966. if p > 0:
  3967. rvs[cond2] = (x0**p + (v[cond2] - A1) * p / k2)**(1 / p)
  3968. else:
  3969. rvs[cond2] = b * np.exp((v[cond2] - A1) * np.exp(b))
  3970. h[cond2] = k2 * rvs[cond2]**(p - 1)
  3971. # subdomain (xs, infinity)
  3972. z = np.exp(-xs * b / 2) - b * (v[cond3] - A1 - A2) / (2 * k3)
  3973. rvs[cond3] = -2 / b * np.log(z)
  3974. h[cond3] = k3 * np.exp(-rvs[cond3] * b / 2)
  3975. # apply rejection method
  3976. accept = (np.log(u * h) <= self._logquasipdf(rvs, p, b))
  3977. num_accept = sum(accept)
  3978. if num_accept > 0:
  3979. x[simulated:(simulated + num_accept)] = rvs[accept]
  3980. simulated += num_accept
  3981. rvs = np.reshape(x, size1d)
  3982. if invert_res:
  3983. rvs = 1 / rvs
  3984. return rvs
  3985. def _mode(self, p, b):
  3986. # distinguish cases to avoid catastrophic cancellation (see [2])
  3987. if p < 1:
  3988. return b / (np.sqrt((p - 1)**2 + b**2) + 1 - p)
  3989. else:
  3990. return (np.sqrt((1 - p)**2 + b**2) - (1 - p)) / b
  3991. def _munp(self, n, p, b):
  3992. num = sc.kve(p + n, b)
  3993. denom = sc.kve(p, b)
  3994. inf_vals = np.isinf(num) | np.isinf(denom)
  3995. if inf_vals.any():
  3996. msg = ("Infinite values encountered in the moment calculation "
  3997. "involving scipy.special.kve. Values replaced by NaN to "
  3998. "avoid incorrect results.")
  3999. warnings.warn(msg, RuntimeWarning, stacklevel=3)
  4000. m = np.full_like(num, np.nan, dtype=np.float64)
  4001. m[~inf_vals] = num[~inf_vals] / denom[~inf_vals]
  4002. else:
  4003. m = num / denom
  4004. return m
  4005. geninvgauss = geninvgauss_gen(a=0.0, name="geninvgauss")
  4006. class norminvgauss_gen(rv_continuous):
  4007. r"""A Normal Inverse Gaussian continuous random variable.
  4008. %(before_notes)s
  4009. Notes
  4010. -----
  4011. The probability density function for `norminvgauss` is:
  4012. .. math::
  4013. f(x, a, b) = \frac{a \, K_1(a \sqrt{1 + x^2})}{\pi \sqrt{1 + x^2}} \,
  4014. \exp(\sqrt{a^2 - b^2} + b x)
  4015. where :math:`x` is a real number, the parameter :math:`a` is the tail
  4016. heaviness and :math:`b` is the asymmetry parameter satisfying
  4017. :math:`a > 0` and :math:`|b| <= a`.
  4018. :math:`K_1` is the modified Bessel function of second kind
  4019. (`scipy.special.k1`).
  4020. %(after_notes)s
  4021. A normal inverse Gaussian random variable `Y` with parameters `a` and `b`
  4022. can be expressed as a normal mean-variance mixture:
  4023. ``Y = b * V + sqrt(V) * X`` where `X` is ``norm(0,1)`` and `V` is
  4024. ``invgauss(mu=1/sqrt(a**2 - b**2))``. This representation is used
  4025. to generate random variates.
  4026. Another common parametrization of the distribution (see Equation 2.1 in
  4027. [2]_) is given by the following expression of the pdf:
  4028. .. math::
  4029. g(x, \alpha, \beta, \delta, \mu) =
  4030. \frac{\alpha\delta K_1\left(\alpha\sqrt{\delta^2 + (x - \mu)^2}\right)}
  4031. {\pi \sqrt{\delta^2 + (x - \mu)^2}} \,
  4032. e^{\delta \sqrt{\alpha^2 - \beta^2} + \beta (x - \mu)}
  4033. In SciPy, this corresponds to
  4034. :math:`a=\alpha \delta, b=\beta \delta, \text{loc}=\mu, \text{scale}=\delta`.
  4035. References
  4036. ----------
  4037. .. [1] O. Barndorff-Nielsen, "Hyperbolic Distributions and Distributions on
  4038. Hyperbolae", Scandinavian Journal of Statistics, Vol. 5(3),
  4039. pp. 151-157, 1978.
  4040. .. [2] O. Barndorff-Nielsen, "Normal Inverse Gaussian Distributions and
  4041. Stochastic Volatility Modelling", Scandinavian Journal of
  4042. Statistics, Vol. 24, pp. 1-13, 1997.
  4043. %(example)s
  4044. """
  4045. _support_mask = rv_continuous._open_support_mask
  4046. def _argcheck(self, a, b):
  4047. return (a > 0) & (np.absolute(b) < a)
  4048. def _shape_info(self):
  4049. ia = _ShapeInfo("a", False, (0, np.inf), (False, False))
  4050. ib = _ShapeInfo("b", False, (-np.inf, np.inf), (False, False))
  4051. return [ia, ib]
  4052. def _fitstart(self, data):
  4053. # Arbitrary, but the default a = b = 1 is not valid; the distribution
  4054. # requires |b| < a.
  4055. return super()._fitstart(data, args=(1, 0.5))
  4056. def _pdf(self, x, a, b):
  4057. gamma = np.sqrt(a**2 - b**2)
  4058. fac1 = a / np.pi
  4059. sq = np.hypot(1, x) # reduce overflows
  4060. return fac1 * sc.k1e(a * sq) * np.exp(b*x - a*sq + gamma) / sq
  4061. def _sf(self, x, a, b):
  4062. if np.isscalar(x):
  4063. # If x is a scalar, then so are a and b.
  4064. return integrate.quad(self._pdf, x, np.inf, args=(a, b))[0]
  4065. else:
  4066. a = np.atleast_1d(a)
  4067. b = np.atleast_1d(b)
  4068. result = []
  4069. for (x0, a0, b0) in zip(x, a, b):
  4070. result.append(integrate.quad(self._pdf, x0, np.inf,
  4071. args=(a0, b0))[0])
  4072. return np.array(result)
  4073. def _isf(self, q, a, b):
  4074. def _isf_scalar(q, a, b):
  4075. def eq(x, a, b, q):
  4076. # Solve eq(x, a, b, q) = 0 to obtain isf(x, a, b) = q.
  4077. return self._sf(x, a, b) - q
  4078. # Find a bracketing interval for the root.
  4079. # Start at the mean, and grow the length of the interval
  4080. # by 2 each iteration until there is a sign change in eq.
  4081. xm = self.mean(a, b)
  4082. em = eq(xm, a, b, q)
  4083. if em == 0:
  4084. # Unlikely, but might as well check.
  4085. return xm
  4086. if em > 0:
  4087. delta = 1
  4088. left = xm
  4089. right = xm + delta
  4090. while eq(right, a, b, q) > 0:
  4091. delta = 2*delta
  4092. right = xm + delta
  4093. else:
  4094. # em < 0
  4095. delta = 1
  4096. right = xm
  4097. left = xm - delta
  4098. while eq(left, a, b, q) < 0:
  4099. delta = 2*delta
  4100. left = xm - delta
  4101. result = optimize.brentq(eq, left, right, args=(a, b, q),
  4102. xtol=self.xtol)
  4103. return result
  4104. if np.isscalar(q):
  4105. return _isf_scalar(q, a, b)
  4106. else:
  4107. result = []
  4108. for (q0, a0, b0) in zip(q, a, b):
  4109. result.append(_isf_scalar(q0, a0, b0))
  4110. return np.array(result)
  4111. def _rvs(self, a, b, size=None, random_state=None):
  4112. # note: X = b * V + sqrt(V) * X is norminvgaus(a,b) if X is standard
  4113. # normal and V is invgauss(mu=1/sqrt(a**2 - b**2))
  4114. gamma = np.sqrt(a**2 - b**2)
  4115. ig = invgauss.rvs(mu=1/gamma, size=size, random_state=random_state)
  4116. return b * ig + np.sqrt(ig) * norm.rvs(size=size,
  4117. random_state=random_state)
  4118. def _stats(self, a, b):
  4119. gamma = np.sqrt(a**2 - b**2)
  4120. mean = b / gamma
  4121. variance = a**2 / gamma**3
  4122. skewness = 3.0 * b / (a * np.sqrt(gamma))
  4123. kurtosis = 3.0 * (1 + 4 * b**2 / a**2) / gamma
  4124. return mean, variance, skewness, kurtosis
  4125. norminvgauss = norminvgauss_gen(name="norminvgauss")
  4126. class invweibull_gen(rv_continuous):
  4127. """An inverted Weibull continuous random variable.
  4128. This distribution is also known as the Fréchet distribution or the
  4129. type II extreme value distribution.
  4130. %(before_notes)s
  4131. Notes
  4132. -----
  4133. The probability density function for `invweibull` is:
  4134. .. math::
  4135. f(x, c) = c x^{-c-1} \\exp(-x^{-c})
  4136. for :math:`x > 0`, :math:`c > 0`.
  4137. `invweibull` takes ``c`` as a shape parameter for :math:`c`.
  4138. %(after_notes)s
  4139. References
  4140. ----------
  4141. F.R.S. de Gusmao, E.M.M Ortega and G.M. Cordeiro, "The generalized inverse
  4142. Weibull distribution", Stat. Papers, vol. 52, pp. 591-619, 2011.
  4143. %(example)s
  4144. """
  4145. _support_mask = rv_continuous._open_support_mask
  4146. def _shape_info(self):
  4147. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  4148. def _pdf(self, x, c):
  4149. # invweibull.pdf(x, c) = c * x**(-c-1) * exp(-x**(-c))
  4150. xc1 = np.power(x, -c - 1.0)
  4151. xc2 = np.power(x, -c)
  4152. xc2 = np.exp(-xc2)
  4153. return c * xc1 * xc2
  4154. def _cdf(self, x, c):
  4155. xc1 = np.power(x, -c)
  4156. return np.exp(-xc1)
  4157. def _sf(self, x, c):
  4158. return -np.expm1(-x**-c)
  4159. def _ppf(self, q, c):
  4160. return np.power(-np.log(q), -1.0/c)
  4161. def _isf(self, p, c):
  4162. return (-np.log1p(-p))**(-1/c)
  4163. def _munp(self, n, c):
  4164. return sc.gamma(1 - n / c)
  4165. def _entropy(self, c):
  4166. return 1+_EULER + _EULER / c - np.log(c)
  4167. def _fitstart(self, data, args=None):
  4168. # invweibull requires c > 1 for the first moment to exist, so use 2.0
  4169. args = (2.0,) if args is None else args
  4170. return super()._fitstart(data, args=args)
  4171. invweibull = invweibull_gen(a=0, name='invweibull')
  4172. class jf_skew_t_gen(rv_continuous):
  4173. r"""Jones and Faddy skew-t distribution.
  4174. %(before_notes)s
  4175. Notes
  4176. -----
  4177. The probability density function for `jf_skew_t` is:
  4178. .. math::
  4179. f(x; a, b) = C_{a,b}^{-1}
  4180. \left(1+\frac{x}{\left(a+b+x^2\right)^{1/2}}\right)^{a+1/2}
  4181. \left(1-\frac{x}{\left(a+b+x^2\right)^{1/2}}\right)^{b+1/2}
  4182. for real numbers :math:`a>0` and :math:`b>0`, where
  4183. :math:`C_{a,b} = 2^{a+b-1}B(a,b)(a+b)^{1/2}`, and :math:`B` denotes the
  4184. beta function (`scipy.special.beta`).
  4185. When :math:`a<b`, the distribution is negatively skewed, and when
  4186. :math:`a>b`, the distribution is positively skewed. If :math:`a=b`, then
  4187. we recover the `t` distribution with :math:`2a` degrees of freedom.
  4188. `jf_skew_t` takes :math:`a` and :math:`b` as shape parameters.
  4189. %(after_notes)s
  4190. References
  4191. ----------
  4192. .. [1] M.C. Jones and M.J. Faddy. "A skew extension of the t distribution,
  4193. with applications" *Journal of the Royal Statistical Society*.
  4194. Series B (Statistical Methodology) 65, no. 1 (2003): 159-174.
  4195. :doi:`10.1111/1467-9868.00378`
  4196. %(example)s
  4197. """
  4198. def _shape_info(self):
  4199. ia = _ShapeInfo("a", False, (0, np.inf), (False, False))
  4200. ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
  4201. return [ia, ib]
  4202. def _pdf(self, x, a, b):
  4203. c = 2 ** (a + b - 1) * sc.beta(a, b) * np.sqrt(a + b)
  4204. d1 = (1 + x / np.sqrt(a + b + x ** 2)) ** (a + 0.5)
  4205. d2 = (1 - x / np.sqrt(a + b + x ** 2)) ** (b + 0.5)
  4206. return d1 * d2 / c
  4207. def _rvs(self, a, b, size=None, random_state=None):
  4208. d1 = random_state.beta(a, b, size)
  4209. d2 = (2 * d1 - 1) * np.sqrt(a + b)
  4210. d3 = 2 * np.sqrt(d1 * (1 - d1))
  4211. return d2 / d3
  4212. def _cdf(self, x, a, b):
  4213. y = (1 + x / np.sqrt(a + b + x ** 2)) * 0.5
  4214. return sc.betainc(a, b, y)
  4215. def _sf(self, x, a, b):
  4216. y = (1 + x / np.sqrt(a + b + x ** 2)) * 0.5
  4217. return sc.betaincc(a, b, y)
  4218. def _ppf(self, q, a, b):
  4219. d1 = beta.ppf(q, a, b)
  4220. d2 = (2 * d1 - 1) * np.sqrt(a + b)
  4221. d3 = 2 * np.sqrt(d1 * (1 - d1))
  4222. return d2 / d3
  4223. def _munp(self, n, a, b):
  4224. """Returns the n-th moment(s) where all the following hold:
  4225. - n >= 0
  4226. - a > n / 2
  4227. - b > n / 2
  4228. The result is np.nan in all other cases.
  4229. """
  4230. def nth_moment(n_k, a_k, b_k):
  4231. """Computes E[T^(n_k)] where T is skew-t distributed with
  4232. parameters a_k and b_k.
  4233. """
  4234. num = (a_k + b_k) ** (0.5 * n_k)
  4235. denom = 2 ** n_k * sc.beta(a_k, b_k)
  4236. indices = np.arange(n_k + 1)
  4237. sgn = np.where(indices % 2 > 0, -1, 1)
  4238. d = sc.beta(a_k + 0.5 * n_k - indices, b_k - 0.5 * n_k + indices)
  4239. sum_terms = sc.comb(n_k, indices) * sgn * d
  4240. return num / denom * sum_terms.sum()
  4241. nth_moment_valid = (a > 0.5 * n) & (b > 0.5 * n) & (n >= 0)
  4242. return xpx.apply_where(
  4243. nth_moment_valid,
  4244. (n, a, b),
  4245. np.vectorize(nth_moment, otypes=[np.float64]),
  4246. fill_value=np.nan,
  4247. )
  4248. jf_skew_t = jf_skew_t_gen(name='jf_skew_t')
  4249. class johnsonsb_gen(rv_continuous):
  4250. r"""A Johnson SB continuous random variable.
  4251. %(before_notes)s
  4252. See Also
  4253. --------
  4254. johnsonsu
  4255. Notes
  4256. -----
  4257. The probability density function for `johnsonsb` is:
  4258. .. math::
  4259. f(x, a, b) = \frac{b}{x(1-x)} \phi(a + b \log \frac{x}{1-x} )
  4260. where :math:`x`, :math:`a`, and :math:`b` are real scalars; :math:`b > 0`
  4261. and :math:`x \in [0,1]`. :math:`\phi` is the pdf of the normal
  4262. distribution.
  4263. `johnsonsb` takes :math:`a` and :math:`b` as shape parameters.
  4264. %(after_notes)s
  4265. %(example)s
  4266. """
  4267. _support_mask = rv_continuous._open_support_mask
  4268. def _argcheck(self, a, b):
  4269. return (b > 0) & (a == a)
  4270. def _shape_info(self):
  4271. ia = _ShapeInfo("a", False, (-np.inf, np.inf), (False, False))
  4272. ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
  4273. return [ia, ib]
  4274. def _pdf(self, x, a, b):
  4275. # johnsonsb.pdf(x, a, b) = b / (x*(1-x)) * phi(a + b * log(x/(1-x)))
  4276. trm = _norm_pdf(a + b*sc.logit(x))
  4277. return b*1.0/(x*(1-x))*trm
  4278. def _cdf(self, x, a, b):
  4279. return _norm_cdf(a + b*sc.logit(x))
  4280. def _ppf(self, q, a, b):
  4281. return sc.expit(1.0 / b * (_norm_ppf(q) - a))
  4282. def _sf(self, x, a, b):
  4283. return _norm_sf(a + b*sc.logit(x))
  4284. def _isf(self, q, a, b):
  4285. return sc.expit(1.0 / b * (_norm_isf(q) - a))
  4286. johnsonsb = johnsonsb_gen(a=0.0, b=1.0, name='johnsonsb')
  4287. class johnsonsu_gen(rv_continuous):
  4288. r"""A Johnson SU continuous random variable.
  4289. %(before_notes)s
  4290. See Also
  4291. --------
  4292. johnsonsb
  4293. Notes
  4294. -----
  4295. The probability density function for `johnsonsu` is:
  4296. .. math::
  4297. f(x, a, b) = \frac{b}{\sqrt{x^2 + 1}}
  4298. \phi(a + b \log(x + \sqrt{x^2 + 1}))
  4299. where :math:`x`, :math:`a`, and :math:`b` are real scalars; :math:`b > 0`.
  4300. :math:`\phi` is the pdf of the normal distribution.
  4301. `johnsonsu` takes :math:`a` and :math:`b` as shape parameters.
  4302. The first four central moments are calculated according to the formulas
  4303. in [1]_.
  4304. %(after_notes)s
  4305. References
  4306. ----------
  4307. .. [1] Taylor Enterprises. "Johnson Family of Distributions".
  4308. https://variation.com/wp-content/distribution_analyzer_help/hs126.htm
  4309. %(example)s
  4310. """
  4311. def _argcheck(self, a, b):
  4312. return (b > 0) & (a == a)
  4313. def _shape_info(self):
  4314. ia = _ShapeInfo("a", False, (-np.inf, np.inf), (False, False))
  4315. ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
  4316. return [ia, ib]
  4317. def _pdf(self, x, a, b):
  4318. # johnsonsu.pdf(x, a, b) = b / sqrt(x**2 + 1) *
  4319. # phi(a + b * log(x + sqrt(x**2 + 1)))
  4320. x2 = x*x
  4321. trm = _norm_pdf(a + b * np.arcsinh(x))
  4322. return b*1.0/np.sqrt(x2+1.0)*trm
  4323. def _cdf(self, x, a, b):
  4324. return _norm_cdf(a + b * np.arcsinh(x))
  4325. def _ppf(self, q, a, b):
  4326. return np.sinh((_norm_ppf(q) - a) / b)
  4327. def _sf(self, x, a, b):
  4328. return _norm_sf(a + b * np.arcsinh(x))
  4329. def _isf(self, x, a, b):
  4330. return np.sinh((_norm_isf(x) - a) / b)
  4331. def _stats(self, a, b, moments='mv'):
  4332. # Naive implementation of first and second moment to address gh-18071.
  4333. # https://variation.com/wp-content/distribution_analyzer_help/hs126.htm
  4334. # Numerical improvements left to future enhancements.
  4335. mu, mu2, g1, g2 = None, None, None, None
  4336. bn2 = b**-2.
  4337. expbn2 = np.exp(bn2)
  4338. a_b = a / b
  4339. if 'm' in moments:
  4340. mu = -expbn2**0.5 * np.sinh(a_b)
  4341. if 'v' in moments:
  4342. mu2 = 0.5*sc.expm1(bn2)*(expbn2*np.cosh(2*a_b) + 1)
  4343. if 's' in moments:
  4344. t1 = expbn2**.5 * sc.expm1(bn2)**0.5
  4345. t2 = 3*np.sinh(a_b)
  4346. t3 = expbn2 * (expbn2 + 2) * np.sinh(3*a_b)
  4347. denom = np.sqrt(2) * (1 + expbn2 * np.cosh(2*a_b))**(3/2)
  4348. g1 = -t1 * (t2 + t3) / denom
  4349. if 'k' in moments:
  4350. t1 = 3 + 6*expbn2
  4351. t2 = 4*expbn2**2 * (expbn2 + 2) * np.cosh(2*a_b)
  4352. t3 = expbn2**2 * np.cosh(4*a_b)
  4353. t4 = -3 + 3*expbn2**2 + 2*expbn2**3 + expbn2**4
  4354. denom = 2*(1 + expbn2*np.cosh(2*a_b))**2
  4355. g2 = (t1 + t2 + t3*t4) / denom - 3
  4356. return mu, mu2, g1, g2
  4357. johnsonsu = johnsonsu_gen(name='johnsonsu')
  4358. class landau_gen(rv_continuous):
  4359. r"""A Landau continuous random variable.
  4360. %(before_notes)s
  4361. Notes
  4362. -----
  4363. The probability density function for `landau` ([1]_, [2]_) is:
  4364. .. math::
  4365. f(x) = \frac{1}{\pi}\int_0^\infty \exp(-t \log t - xt)\sin(\pi t) dt
  4366. for a real number :math:`x`.
  4367. %(after_notes)s
  4368. Often (e.g. [2]_), the Landau distribution is parameterized in terms of a
  4369. location parameter :math:`\mu` and scale parameter :math:`c`, the latter of
  4370. which *also* introduces a location shift. If ``mu`` and ``c`` are used to
  4371. represent these parameters, this corresponds with SciPy's parameterization
  4372. with ``loc = mu + 2*c / np.pi * np.log(c)`` and ``scale = c``.
  4373. This distribution uses routines from the Boost Math C++ library for
  4374. the computation of the ``pdf``, ``cdf``, ``ppf``, ``sf`` and ``isf``
  4375. methods. [1]_
  4376. References
  4377. ----------
  4378. .. [1] Landau, L. (1944). "On the energy loss of fast particles by
  4379. ionization". J. Phys. (USSR). 8: 201.
  4380. .. [2] "Landau Distribution", Wikipedia,
  4381. https://en.wikipedia.org/wiki/Landau_distribution
  4382. .. [3] Chambers, J. M., Mallows, C. L., & Stuck, B. (1976).
  4383. "A method for simulating stable random variables."
  4384. Journal of the American Statistical Association, 71(354), 340-344.
  4385. .. [4] The Boost Developers. "Boost C++ Libraries". https://www.boost.org/.
  4386. .. [5] Yoshimura, T. "Numerical Evaluation and High Precision Approximation
  4387. Formula for Landau Distribution".
  4388. :doi:`10.36227/techrxiv.171822215.53612870/v2`
  4389. %(example)s
  4390. """
  4391. def _shape_info(self):
  4392. return []
  4393. def _entropy(self):
  4394. # Computed with mpmath - see gh-19145
  4395. return 2.37263644000448182
  4396. def _pdf(self, x):
  4397. return scu._landau_pdf(x, 0, 1)
  4398. def _cdf(self, x):
  4399. return scu._landau_cdf(x, 0, 1)
  4400. def _sf(self, x):
  4401. return scu._landau_sf(x, 0, 1)
  4402. def _ppf(self, p):
  4403. return scu._landau_ppf(p, 0, 1)
  4404. def _isf(self, p):
  4405. return scu._landau_isf(p, 0, 1)
  4406. def _stats(self):
  4407. return np.nan, np.nan, np.nan, np.nan
  4408. def _munp(self, n):
  4409. return np.nan if n > 0 else 1
  4410. def _fitstart(self, data, args=None):
  4411. # Initialize ML guesses using quartiles instead of moments.
  4412. if isinstance(data, CensoredData):
  4413. data = data._uncensor()
  4414. p25, p50, p75 = np.percentile(data, [25, 50, 75])
  4415. return p50, (p75 - p25)/2
  4416. def _rvs(self, size=None, random_state=None):
  4417. # Method from https://www.jstor.org/stable/2285309 Eq. 2.4
  4418. pi_2 = np.pi / 2
  4419. U = random_state.uniform(-np.pi / 2, np.pi / 2, size=size)
  4420. W = random_state.standard_exponential(size=size)
  4421. S = 2 / np.pi * ((pi_2 + U) * np.tan(U)
  4422. - np.log((pi_2 * W * np.cos(U)) / (pi_2 + U)))
  4423. return S
  4424. landau = landau_gen(name='landau')
  4425. class laplace_gen(rv_continuous):
  4426. r"""A Laplace continuous random variable.
  4427. %(before_notes)s
  4428. Notes
  4429. -----
  4430. The probability density function for `laplace` is
  4431. .. math::
  4432. f(x) = \frac{1}{2} \exp(-|x|)
  4433. for a real number :math:`x`.
  4434. %(after_notes)s
  4435. %(example)s
  4436. """
  4437. def _shape_info(self):
  4438. return []
  4439. def _rvs(self, size=None, random_state=None):
  4440. return random_state.laplace(0, 1, size=size)
  4441. def _pdf(self, x):
  4442. # laplace.pdf(x) = 1/2 * exp(-abs(x))
  4443. return 0.5*np.exp(-abs(x))
  4444. def _cdf(self, x):
  4445. with np.errstate(over='ignore'):
  4446. return np.where(x > 0, 1.0 - 0.5*np.exp(-x), 0.5*np.exp(x))
  4447. def _sf(self, x):
  4448. # By symmetry...
  4449. return self._cdf(-x)
  4450. def _ppf(self, q):
  4451. return np.where(q > 0.5, -np.log(2*(1-q)), np.log(2*q))
  4452. def _isf(self, q):
  4453. # By symmetry...
  4454. return -self._ppf(q)
  4455. def _stats(self):
  4456. return 0, 2, 0, 3
  4457. def _entropy(self):
  4458. return np.log(2)+1
  4459. @_call_super_mom
  4460. @replace_notes_in_docstring(rv_continuous, notes="""\
  4461. This function uses explicit formulas for the maximum likelihood
  4462. estimation of the Laplace distribution parameters, so the keyword
  4463. arguments `loc`, `scale`, and `optimizer` are ignored.\n\n""")
  4464. def fit(self, data, *args, **kwds):
  4465. data, floc, fscale = _check_fit_input_parameters(self, data,
  4466. args, kwds)
  4467. # Source: Statistical Distributions, 3rd Edition. Evans, Hastings,
  4468. # and Peacock (2000), Page 124
  4469. if floc is None:
  4470. floc = np.median(data)
  4471. if fscale is None:
  4472. fscale = (np.sum(np.abs(data - floc))) / len(data)
  4473. return floc, fscale
  4474. laplace = laplace_gen(name='laplace')
  4475. class laplace_asymmetric_gen(rv_continuous):
  4476. r"""An asymmetric Laplace continuous random variable.
  4477. %(before_notes)s
  4478. See Also
  4479. --------
  4480. laplace : Laplace distribution
  4481. Notes
  4482. -----
  4483. The probability density function for `laplace_asymmetric` is
  4484. .. math::
  4485. f(x, \kappa) &= \frac{1}{\kappa+\kappa^{-1}}\exp(-x\kappa),\quad x\ge0\\
  4486. &= \frac{1}{\kappa+\kappa^{-1}}\exp(x/\kappa),\quad x<0\\
  4487. for :math:`-\infty < x < \infty`, :math:`\kappa > 0`.
  4488. `laplace_asymmetric` takes ``kappa`` as a shape parameter for
  4489. :math:`\kappa`. For :math:`\kappa = 1`, it is identical to a
  4490. Laplace distribution.
  4491. %(after_notes)s
  4492. Note that the scale parameter of some references is the reciprocal of
  4493. SciPy's ``scale``. For example, :math:`\lambda = 1/2` in the
  4494. parameterization of [1]_ is equivalent to ``scale = 2`` with
  4495. `laplace_asymmetric`.
  4496. References
  4497. ----------
  4498. .. [1] "Asymmetric Laplace distribution", Wikipedia
  4499. https://en.wikipedia.org/wiki/Asymmetric_Laplace_distribution
  4500. .. [2] Kozubowski TJ and Podgórski K. A Multivariate and
  4501. Asymmetric Generalization of Laplace Distribution,
  4502. Computational Statistics 15, 531--540 (2000).
  4503. :doi:`10.1007/PL00022717`
  4504. %(example)s
  4505. """
  4506. def _shape_info(self):
  4507. return [_ShapeInfo("kappa", False, (0, np.inf), (False, False))]
  4508. def _pdf(self, x, kappa):
  4509. return np.exp(self._logpdf(x, kappa))
  4510. def _logpdf(self, x, kappa):
  4511. kapinv = 1/kappa
  4512. lPx = x * np.where(x >= 0, -kappa, kapinv)
  4513. lPx -= np.log(kappa+kapinv)
  4514. return lPx
  4515. def _cdf(self, x, kappa):
  4516. kapinv = 1/kappa
  4517. kappkapinv = kappa+kapinv
  4518. return np.where(x >= 0,
  4519. 1 - np.exp(-x*kappa)*(kapinv/kappkapinv),
  4520. np.exp(x*kapinv)*(kappa/kappkapinv))
  4521. def _sf(self, x, kappa):
  4522. kapinv = 1/kappa
  4523. kappkapinv = kappa+kapinv
  4524. return np.where(x >= 0,
  4525. np.exp(-x*kappa)*(kapinv/kappkapinv),
  4526. 1 - np.exp(x*kapinv)*(kappa/kappkapinv))
  4527. def _ppf(self, q, kappa):
  4528. kapinv = 1/kappa
  4529. kappkapinv = kappa+kapinv
  4530. return np.where(q >= kappa/kappkapinv,
  4531. -np.log((1 - q)*kappkapinv*kappa)*kapinv,
  4532. np.log(q*kappkapinv/kappa)*kappa)
  4533. def _isf(self, q, kappa):
  4534. kapinv = 1/kappa
  4535. kappkapinv = kappa+kapinv
  4536. return np.where(q <= kapinv/kappkapinv,
  4537. -np.log(q*kappkapinv*kappa)*kapinv,
  4538. np.log((1 - q)*kappkapinv/kappa)*kappa)
  4539. def _stats(self, kappa):
  4540. kapinv = 1/kappa
  4541. mn = kapinv - kappa
  4542. var = kapinv*kapinv + kappa*kappa
  4543. g1 = 2.0*(1-np.power(kappa, 6))/np.power(1+np.power(kappa, 4), 1.5)
  4544. g2 = 6.0*(1+np.power(kappa, 8))/np.power(1+np.power(kappa, 4), 2)
  4545. return mn, var, g1, g2
  4546. def _entropy(self, kappa):
  4547. return 1 + np.log(kappa+1/kappa)
  4548. laplace_asymmetric = laplace_asymmetric_gen(name='laplace_asymmetric')
  4549. def _check_fit_input_parameters(dist, data, args, kwds):
  4550. if not isinstance(data, CensoredData):
  4551. data = np.asarray(data)
  4552. floc = kwds.get('floc', None)
  4553. fscale = kwds.get('fscale', None)
  4554. num_shapes = len(dist.shapes.split(",")) if dist.shapes else 0
  4555. fshape_keys = []
  4556. fshapes = []
  4557. # user has many options for fixing the shape, so here we standardize it
  4558. # into 'f' + the number of the shape.
  4559. # Adapted from `_reduce_func` in `_distn_infrastructure.py`:
  4560. if dist.shapes:
  4561. shapes = dist.shapes.replace(',', ' ').split()
  4562. for j, s in enumerate(shapes):
  4563. key = 'f' + str(j)
  4564. names = [key, 'f' + s, 'fix_' + s]
  4565. val = _get_fixed_fit_value(kwds, names)
  4566. fshape_keys.append(key)
  4567. fshapes.append(val)
  4568. if val is not None:
  4569. kwds[key] = val
  4570. # determine if there are any unknown arguments in kwds
  4571. known_keys = {'loc', 'scale', 'optimizer', 'method',
  4572. 'floc', 'fscale', *fshape_keys}
  4573. unknown_keys = set(kwds).difference(known_keys)
  4574. if unknown_keys:
  4575. raise TypeError(f"Unknown keyword arguments: {unknown_keys}.")
  4576. if len(args) > num_shapes:
  4577. raise TypeError("Too many positional arguments.")
  4578. if None not in {floc, fscale, *fshapes}:
  4579. # This check is for consistency with `rv_continuous.fit`.
  4580. # Without this check, this function would just return the
  4581. # parameters that were given.
  4582. raise RuntimeError("All parameters fixed. There is nothing to "
  4583. "optimize.")
  4584. uncensored = data._uncensor() if isinstance(data, CensoredData) else data
  4585. if not np.isfinite(uncensored).all():
  4586. raise ValueError("The data contains non-finite values.")
  4587. return (data, *fshapes, floc, fscale)
  4588. class levy_gen(rv_continuous):
  4589. r"""A Levy continuous random variable.
  4590. %(before_notes)s
  4591. See Also
  4592. --------
  4593. levy_stable, levy_l
  4594. Notes
  4595. -----
  4596. The probability density function for `levy` is:
  4597. .. math::
  4598. f(x) = \frac{1}{\sqrt{2\pi x^3}} \exp\left(-\frac{1}{2x}\right)
  4599. for :math:`x > 0`.
  4600. This is the same as the Levy-stable distribution with :math:`a=1/2` and
  4601. :math:`b=1`.
  4602. %(after_notes)s
  4603. Examples
  4604. --------
  4605. >>> import numpy as np
  4606. >>> from scipy.stats import levy
  4607. >>> import matplotlib.pyplot as plt
  4608. >>> fig, ax = plt.subplots(1, 1)
  4609. Calculate the first four moments:
  4610. >>> mean, var, skew, kurt = levy.stats(moments='mvsk')
  4611. Display the probability density function (``pdf``):
  4612. >>> # `levy` is very heavy-tailed.
  4613. >>> # To show a nice plot, let's cut off the upper 40 percent.
  4614. >>> a, b = levy.ppf(0), levy.ppf(0.6)
  4615. >>> x = np.linspace(a, b, 100)
  4616. >>> ax.plot(x, levy.pdf(x),
  4617. ... 'r-', lw=5, alpha=0.6, label='levy pdf')
  4618. Alternatively, the distribution object can be called (as a function)
  4619. to fix the shape, location and scale parameters. This returns a "frozen"
  4620. RV object holding the given parameters fixed.
  4621. Freeze the distribution and display the frozen ``pdf``:
  4622. >>> rv = levy()
  4623. >>> ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')
  4624. Check accuracy of ``cdf`` and ``ppf``:
  4625. >>> vals = levy.ppf([0.001, 0.5, 0.999])
  4626. >>> np.allclose([0.001, 0.5, 0.999], levy.cdf(vals))
  4627. True
  4628. Generate random numbers:
  4629. >>> r = levy.rvs(size=1000)
  4630. And compare the histogram:
  4631. >>> # manual binning to ignore the tail
  4632. >>> bins = np.concatenate((np.linspace(a, b, 20), [np.max(r)]))
  4633. >>> ax.hist(r, bins=bins, density=True, histtype='stepfilled', alpha=0.2)
  4634. >>> ax.set_xlim([x[0], x[-1]])
  4635. >>> ax.legend(loc='best', frameon=False)
  4636. >>> plt.show()
  4637. """
  4638. _support_mask = rv_continuous._open_support_mask
  4639. def _shape_info(self):
  4640. return []
  4641. def _pdf(self, x):
  4642. # levy.pdf(x) = 1 / (x * sqrt(2*pi*x)) * exp(-1/(2*x))
  4643. return 1 / np.sqrt(2*np.pi*x) / x * np.exp(-1/(2*x))
  4644. def _cdf(self, x):
  4645. # Equivalent to 2*norm.sf(np.sqrt(1/x))
  4646. return sc.erfc(np.sqrt(0.5 / x))
  4647. def _sf(self, x):
  4648. return sc.erf(np.sqrt(0.5 / x))
  4649. def _ppf(self, q):
  4650. # Equivalent to 1.0/(norm.isf(q/2)**2) or 0.5/(erfcinv(q)**2)
  4651. val = _norm_isf(q/2)
  4652. return 1.0 / (val * val)
  4653. def _isf(self, p):
  4654. return 1/(2*sc.erfinv(p)**2)
  4655. def _stats(self):
  4656. return np.inf, np.inf, np.nan, np.nan
  4657. levy = levy_gen(a=0.0, name="levy")
  4658. class levy_l_gen(rv_continuous):
  4659. r"""A left-skewed Levy continuous random variable.
  4660. %(before_notes)s
  4661. See Also
  4662. --------
  4663. levy, levy_stable
  4664. Notes
  4665. -----
  4666. The probability density function for `levy_l` is:
  4667. .. math::
  4668. f(x) = \frac{1}{|x| \sqrt{2\pi |x|}} \exp{ \left(-\frac{1}{2|x|} \right)}
  4669. for :math:`x < 0`.
  4670. This is the same as the Levy-stable distribution with :math:`a=1/2` and
  4671. :math:`b=-1`.
  4672. %(after_notes)s
  4673. Examples
  4674. --------
  4675. >>> import numpy as np
  4676. >>> from scipy.stats import levy_l
  4677. >>> import matplotlib.pyplot as plt
  4678. >>> fig, ax = plt.subplots(1, 1)
  4679. Calculate the first four moments:
  4680. >>> mean, var, skew, kurt = levy_l.stats(moments='mvsk')
  4681. Display the probability density function (``pdf``):
  4682. >>> # `levy_l` is very heavy-tailed.
  4683. >>> # To show a nice plot, let's cut off the lower 40 percent.
  4684. >>> a, b = levy_l.ppf(0.4), levy_l.ppf(1)
  4685. >>> x = np.linspace(a, b, 100)
  4686. >>> ax.plot(x, levy_l.pdf(x),
  4687. ... 'r-', lw=5, alpha=0.6, label='levy_l pdf')
  4688. Alternatively, the distribution object can be called (as a function)
  4689. to fix the shape, location and scale parameters. This returns a "frozen"
  4690. RV object holding the given parameters fixed.
  4691. Freeze the distribution and display the frozen ``pdf``:
  4692. >>> rv = levy_l()
  4693. >>> ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')
  4694. Check accuracy of ``cdf`` and ``ppf``:
  4695. >>> vals = levy_l.ppf([0.001, 0.5, 0.999])
  4696. >>> np.allclose([0.001, 0.5, 0.999], levy_l.cdf(vals))
  4697. True
  4698. Generate random numbers:
  4699. >>> r = levy_l.rvs(size=1000)
  4700. And compare the histogram:
  4701. >>> # manual binning to ignore the tail
  4702. >>> bins = np.concatenate(([np.min(r)], np.linspace(a, b, 20)))
  4703. >>> ax.hist(r, bins=bins, density=True, histtype='stepfilled', alpha=0.2)
  4704. >>> ax.set_xlim([x[0], x[-1]])
  4705. >>> ax.legend(loc='best', frameon=False)
  4706. >>> plt.show()
  4707. """
  4708. _support_mask = rv_continuous._open_support_mask
  4709. def _shape_info(self):
  4710. return []
  4711. def _pdf(self, x):
  4712. # levy_l.pdf(x) = 1 / (abs(x) * sqrt(2*pi*abs(x))) * exp(-1/(2*abs(x)))
  4713. ax = abs(x)
  4714. return 1/np.sqrt(2*np.pi*ax)/ax*np.exp(-1/(2*ax))
  4715. def _cdf(self, x):
  4716. ax = abs(x)
  4717. return 2 * _norm_cdf(1 / np.sqrt(ax)) - 1
  4718. def _sf(self, x):
  4719. ax = abs(x)
  4720. return 2 * _norm_sf(1 / np.sqrt(ax))
  4721. def _ppf(self, q):
  4722. val = _norm_ppf((q + 1.0) / 2)
  4723. return -1.0 / (val * val)
  4724. def _isf(self, p):
  4725. return -1/_norm_isf(p/2)**2
  4726. def _stats(self):
  4727. return np.inf, np.inf, np.nan, np.nan
  4728. levy_l = levy_l_gen(b=0.0, name="levy_l")
  4729. class logistic_gen(rv_continuous):
  4730. r"""A logistic (or Sech-squared) continuous random variable.
  4731. %(before_notes)s
  4732. Notes
  4733. -----
  4734. The probability density function for `logistic` is:
  4735. .. math::
  4736. f(x) = \frac{\exp(-x)}
  4737. {(1+\exp(-x))^2}
  4738. `logistic` is a special case of `genlogistic` with ``c=1``.
  4739. Remark that the survival function (``logistic.sf``) is equal to the
  4740. Fermi-Dirac distribution describing fermionic statistics.
  4741. %(after_notes)s
  4742. %(example)s
  4743. """
  4744. def _shape_info(self):
  4745. return []
  4746. def _rvs(self, size=None, random_state=None):
  4747. return random_state.logistic(size=size)
  4748. def _pdf(self, x):
  4749. # logistic.pdf(x) = exp(-x) / (1+exp(-x))**2
  4750. return np.exp(self._logpdf(x))
  4751. def _logpdf(self, x):
  4752. y = -np.abs(x)
  4753. return y - 2. * sc.log1p(np.exp(y))
  4754. def _cdf(self, x):
  4755. return sc.expit(x)
  4756. def _logcdf(self, x):
  4757. return sc.log_expit(x)
  4758. def _ppf(self, q):
  4759. return sc.logit(q)
  4760. def _sf(self, x):
  4761. return sc.expit(-x)
  4762. def _logsf(self, x):
  4763. return sc.log_expit(-x)
  4764. def _isf(self, q):
  4765. return -sc.logit(q)
  4766. def _stats(self):
  4767. return 0, np.pi*np.pi/3.0, 0, 6.0/5.0
  4768. def _entropy(self):
  4769. # https://en.wikipedia.org/wiki/Logistic_distribution
  4770. return 2.0
  4771. @_call_super_mom
  4772. @inherit_docstring_from(rv_continuous)
  4773. def fit(self, data, *args, **kwds):
  4774. if kwds.pop('superfit', False):
  4775. return super().fit(data, *args, **kwds)
  4776. data, floc, fscale = _check_fit_input_parameters(self, data,
  4777. args, kwds)
  4778. n = len(data)
  4779. # rv_continuous provided guesses
  4780. loc, scale = self._fitstart(data)
  4781. # these are trumped by user-provided guesses
  4782. loc, scale = kwds.get('loc', loc), kwds.get('scale', scale)
  4783. # the maximum likelihood estimators `a` and `b` of the location and
  4784. # scale parameters are roots of the two equations described in `func`.
  4785. # Source: Statistical Distributions, 3rd Edition. Evans, Hastings, and
  4786. # Peacock (2000), Page 130
  4787. def dl_dloc(loc, scale=fscale):
  4788. c = (data - loc) / scale
  4789. return np.sum(sc.expit(c)) - n/2
  4790. def dl_dscale(scale, loc=floc):
  4791. c = (data - loc) / scale
  4792. return np.sum(c*np.tanh(c/2)) - n
  4793. def func(params):
  4794. loc, scale = params
  4795. return dl_dloc(loc, scale), dl_dscale(scale, loc)
  4796. if fscale is not None and floc is None:
  4797. res = optimize.root(dl_dloc, (loc,))
  4798. loc = res.x[0]
  4799. scale = fscale
  4800. elif floc is not None and fscale is None:
  4801. res = optimize.root(dl_dscale, (scale,))
  4802. scale = res.x[0]
  4803. loc = floc
  4804. else:
  4805. res = optimize.root(func, (loc, scale))
  4806. loc, scale = res.x
  4807. # Note: gh-18176 reported data for which the reported MLE had
  4808. # `scale < 0`. To fix the bug, we return abs(scale). This is OK because
  4809. # `dl_dscale` and `dl_dloc` are even and odd functions of `scale`,
  4810. # respectively, so if `-scale` is a solution, so is `scale`.
  4811. scale = abs(scale)
  4812. return ((loc, scale) if res.success
  4813. else super().fit(data, *args, **kwds))
  4814. logistic = logistic_gen(name='logistic')
  4815. class loggamma_gen(rv_continuous):
  4816. r"""A log gamma continuous random variable.
  4817. %(before_notes)s
  4818. Notes
  4819. -----
  4820. The probability density function for `loggamma` is:
  4821. .. math::
  4822. f(x, c) = \frac{\exp(c x - \exp(x))}
  4823. {\Gamma(c)}
  4824. for all :math:`x, c > 0`. Here, :math:`\Gamma` is the
  4825. gamma function (`scipy.special.gamma`).
  4826. `loggamma` takes ``c`` as a shape parameter for :math:`c`.
  4827. %(after_notes)s
  4828. %(example)s
  4829. """
  4830. def _shape_info(self):
  4831. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  4832. def _rvs(self, c, size=None, random_state=None):
  4833. # Use the property of the gamma distribution Gamma(c)
  4834. # Gamma(c) ~ Gamma(c + 1)*U**(1/c),
  4835. # where U is uniform on [0, 1]. (See, e.g.,
  4836. # G. Marsaglia and W.W. Tsang, "A simple method for generating gamma
  4837. # variables", https://doi.org/10.1145/358407.358414)
  4838. # So
  4839. # log(Gamma(c)) ~ log(Gamma(c + 1)) + log(U)/c
  4840. # Generating a sample with this formulation is a bit slower
  4841. # than the more obvious log(Gamma(c)), but it avoids loss
  4842. # of precision when c << 1.
  4843. return (np.log(random_state.gamma(c + 1, size=size))
  4844. + np.log(random_state.uniform(size=size))/c)
  4845. def _pdf(self, x, c):
  4846. # loggamma.pdf(x, c) = exp(c*x-exp(x)) / gamma(c)
  4847. return np.exp(c*x-np.exp(x)-sc.gammaln(c))
  4848. def _logpdf(self, x, c):
  4849. return c*x - np.exp(x) - sc.gammaln(c)
  4850. def _cdf(self, x, c):
  4851. # This function is gammainc(c, exp(x)), where gammainc(c, z) is
  4852. # the regularized incomplete gamma function.
  4853. # The first term in a series expansion of gamminc(c, z) is
  4854. # z**c/Gamma(c+1); see 6.5.29 of Abramowitz & Stegun (and refer
  4855. # back to 6.5.1, 6.5.2 and 6.5.4 for the relevant notation).
  4856. # This can also be found in the wikipedia article
  4857. # https://en.wikipedia.org/wiki/Incomplete_gamma_function.
  4858. # Here we use that formula when x is sufficiently negative that
  4859. # exp(x) will result in subnormal numbers and lose precision.
  4860. # We evaluate the log of the expression first to allow the possible
  4861. # cancellation of the terms in the division, and then exponentiate.
  4862. # That is,
  4863. # exp(x)**c/Gamma(c+1) = exp(log(exp(x)**c/Gamma(c+1)))
  4864. # = exp(c*x - gammaln(c+1))
  4865. return xpx.apply_where(
  4866. x < _LOGXMIN, (x, c),
  4867. lambda x, c: np.exp(c*x - sc.gammaln(c+1)),
  4868. lambda x, c: sc.gammainc(c, np.exp(x)))
  4869. def _ppf(self, q, c):
  4870. # The expression used when g < _XMIN inverts the one term expansion
  4871. # given in the comments of _cdf().
  4872. g = sc.gammaincinv(c, q)
  4873. return xpx.apply_where(
  4874. g < _XMIN, (g, q, c),
  4875. lambda g, q, c: (np.log(q) + sc.gammaln(c+1))/c,
  4876. lambda g, q, c: np.log(g))
  4877. def _sf(self, x, c):
  4878. # See the comments for _cdf() for how x < _LOGXMIN is handled.
  4879. return xpx.apply_where(
  4880. x < _LOGXMIN, (x, c),
  4881. lambda x, c: -np.expm1(c*x - sc.gammaln(c+1)),
  4882. lambda x, c: sc.gammaincc(c, np.exp(x)))
  4883. def _isf(self, q, c):
  4884. # The expression used when g < _XMIN inverts the complement of
  4885. # the one term expansion given in the comments of _cdf().
  4886. g = sc.gammainccinv(c, q)
  4887. return xpx.apply_where(
  4888. g < _XMIN, (g, q, c),
  4889. lambda g, q, c: (np.log1p(-q) + sc.gammaln(c+1))/c,
  4890. lambda g, q, c: np.log(g))
  4891. def _stats(self, c):
  4892. # See, for example, "A Statistical Study of Log-Gamma Distribution", by
  4893. # Ping Shing Chan (thesis, McMaster University, 1993).
  4894. mean = sc.digamma(c)
  4895. var = sc.polygamma(1, c)
  4896. skewness = sc.polygamma(2, c) / np.power(var, 1.5)
  4897. excess_kurtosis = sc.polygamma(3, c) / (var*var)
  4898. return mean, var, skewness, excess_kurtosis
  4899. def _entropy(self, c):
  4900. def regular(c):
  4901. h = sc.gammaln(c) - c * sc.digamma(c) + c
  4902. return h
  4903. def asymptotic(c):
  4904. # using asymptotic expansions for gammaln and psi (see gh-18093)
  4905. term = -0.5*np.log(c) + c**-1./6 - c**-3./90 + c**-5./210
  4906. h = norm._entropy() + term
  4907. return h
  4908. return xpx.apply_where(c >= 45, c, asymptotic, regular)
  4909. loggamma = loggamma_gen(name='loggamma')
  4910. class loglaplace_gen(rv_continuous):
  4911. r"""A log-Laplace continuous random variable.
  4912. %(before_notes)s
  4913. Notes
  4914. -----
  4915. The probability density function for `loglaplace` is:
  4916. .. math::
  4917. f(x, c) = \begin{cases}\frac{c}{2} x^{ c-1} &\text{for } 0 < x < 1\\
  4918. \frac{c}{2} x^{-c-1} &\text{for } x \ge 1
  4919. \end{cases}
  4920. for :math:`c > 0`.
  4921. `loglaplace` takes ``c`` as a shape parameter for :math:`c`.
  4922. %(after_notes)s
  4923. Suppose a random variable ``X`` follows the Laplace distribution with
  4924. location ``a`` and scale ``b``. Then ``Y = exp(X)`` follows the
  4925. log-Laplace distribution with ``c = 1 / b`` and ``scale = exp(a)``.
  4926. References
  4927. ----------
  4928. T.J. Kozubowski and K. Podgorski, "A log-Laplace growth rate model",
  4929. The Mathematical Scientist, vol. 28, pp. 49-60, 2003.
  4930. %(example)s
  4931. """
  4932. def _shape_info(self):
  4933. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  4934. def _pdf(self, x, c):
  4935. # loglaplace.pdf(x, c) = c / 2 * x**(c-1), for 0 < x < 1
  4936. # = c / 2 * x**(-c-1), for x >= 1
  4937. cd2 = c/2.0
  4938. c = np.where(x < 1, c, -c)
  4939. return cd2*x**(c-1)
  4940. def _cdf(self, x, c):
  4941. return np.where(x < 1, 0.5*x**c, 1-0.5*x**(-c))
  4942. def _sf(self, x, c):
  4943. return np.where(x < 1, 1 - 0.5*x**c, 0.5*x**(-c))
  4944. def _ppf(self, q, c):
  4945. return np.where(q < 0.5, (2.0*q)**(1.0/c), (2*(1.0-q))**(-1.0/c))
  4946. def _isf(self, q, c):
  4947. return np.where(q > 0.5, (2.0*(1.0 - q))**(1.0/c), (2*q)**(-1.0/c))
  4948. def _munp(self, n, c):
  4949. with np.errstate(divide='ignore'):
  4950. c2, n2 = c**2, n**2
  4951. return np.where(n2 < c2, c2 / (c2 - n2), np.inf)
  4952. def _entropy(self, c):
  4953. return np.log(2.0/c) + 1.0
  4954. @_call_super_mom
  4955. @inherit_docstring_from(rv_continuous)
  4956. def fit(self, data, *args, **kwds):
  4957. data, fc, floc, fscale = _check_fit_input_parameters(self, data,
  4958. args, kwds)
  4959. # Specialize MLE only when location is known.
  4960. if floc is None:
  4961. return super(type(self), self).fit(data, *args, **kwds)
  4962. # Raise an error if any observation has zero likelihood.
  4963. if np.any(data <= floc):
  4964. raise FitDataError("loglaplace", lower=floc, upper=np.inf)
  4965. # Remove location from data.
  4966. if floc != 0:
  4967. data = data - floc
  4968. # When location is zero, the log-Laplace distribution is related to
  4969. # the Laplace distribution in that if X ~ Laplace(loc=a, scale=b),
  4970. # then Y = exp(X) ~ LogLaplace(c=1/b, loc=0, scale=exp(a)). It can
  4971. # be shown that the MLE for Y is the same as the MLE for X = ln(Y).
  4972. # Therefore, we reuse the formulas from laplace.fit() and transform
  4973. # the result back into log-laplace's parameter space.
  4974. a, b = laplace.fit(np.log(data),
  4975. floc=np.log(fscale) if fscale is not None else None,
  4976. fscale=1/fc if fc is not None else None,
  4977. method='mle')
  4978. loc = floc
  4979. scale = np.exp(a) if fscale is None else fscale
  4980. c = 1 / b if fc is None else fc
  4981. return c, loc, scale
  4982. loglaplace = loglaplace_gen(a=0.0, name='loglaplace')
  4983. def _lognorm_logpdf(x, s):
  4984. return xpx.apply_where(
  4985. x != 0, (x, s),
  4986. lambda x, s: (-np.log(x)**2 / (2 * s**2)
  4987. - np.log(s * x * np.sqrt(2 * np.pi))),
  4988. fill_value=-np.inf)
  4989. class lognorm_gen(rv_continuous):
  4990. r"""A lognormal continuous random variable.
  4991. %(before_notes)s
  4992. Notes
  4993. -----
  4994. The probability density function for `lognorm` is:
  4995. .. math::
  4996. f(x, s) = \frac{1}{s x \sqrt{2\pi}}
  4997. \exp\left(-\frac{\log^2(x)}{2s^2}\right)
  4998. for :math:`x > 0`, :math:`s > 0`.
  4999. `lognorm` takes ``s`` as a shape parameter for :math:`s`.
  5000. %(after_notes)s
  5001. Suppose a normally distributed random variable ``X`` has mean ``mu`` and
  5002. standard deviation ``sigma``. Then ``Y = exp(X)`` is lognormally
  5003. distributed with ``s = sigma`` and ``scale = exp(mu)``.
  5004. %(example)s
  5005. The logarithm of a log-normally distributed random variable is
  5006. normally distributed:
  5007. >>> import numpy as np
  5008. >>> import matplotlib.pyplot as plt
  5009. >>> from scipy import stats
  5010. >>> fig, ax = plt.subplots(1, 1)
  5011. >>> mu, sigma = 2, 0.5
  5012. >>> X = stats.norm(loc=mu, scale=sigma)
  5013. >>> Y = stats.lognorm(s=sigma, scale=np.exp(mu))
  5014. >>> x = np.linspace(*X.interval(0.999))
  5015. >>> y = Y.rvs(size=10000)
  5016. >>> ax.plot(x, X.pdf(x), label='X (pdf)')
  5017. >>> ax.hist(np.log(y), density=True, bins=x, label='log(Y) (histogram)')
  5018. >>> ax.legend()
  5019. >>> plt.show()
  5020. """
  5021. _support_mask = rv_continuous._open_support_mask
  5022. def _shape_info(self):
  5023. return [_ShapeInfo("s", False, (0, np.inf), (False, False))]
  5024. def _rvs(self, s, size=None, random_state=None):
  5025. return np.exp(s * random_state.standard_normal(size))
  5026. def _pdf(self, x, s):
  5027. # lognorm.pdf(x, s) = 1 / (s*x*sqrt(2*pi)) * exp(-1/2*(log(x)/s)**2)
  5028. return np.exp(self._logpdf(x, s))
  5029. def _logpdf(self, x, s):
  5030. return _lognorm_logpdf(x, s)
  5031. def _cdf(self, x, s):
  5032. return _norm_cdf(np.log(x) / s)
  5033. def _logcdf(self, x, s):
  5034. return _norm_logcdf(np.log(x) / s)
  5035. def _ppf(self, q, s):
  5036. return np.exp(s * _norm_ppf(q))
  5037. def _sf(self, x, s):
  5038. return _norm_sf(np.log(x) / s)
  5039. def _logsf(self, x, s):
  5040. return _norm_logsf(np.log(x) / s)
  5041. def _isf(self, q, s):
  5042. return np.exp(s * _norm_isf(q))
  5043. def _stats(self, s):
  5044. p = np.exp(s*s)
  5045. mu = np.sqrt(p)
  5046. mu2 = p*(p-1)
  5047. g1 = np.sqrt(p-1)*(2+p)
  5048. g2 = np.polyval([1, 2, 3, 0, -6.0], p)
  5049. return mu, mu2, g1, g2
  5050. def _entropy(self, s):
  5051. return 0.5 * (1 + np.log(2*np.pi) + 2 * np.log(s))
  5052. @_call_super_mom
  5053. @extend_notes_in_docstring(rv_continuous, notes="""\
  5054. When `method='MLE'` and
  5055. the location parameter is fixed by using the `floc` argument,
  5056. this function uses explicit formulas for the maximum likelihood
  5057. estimation of the log-normal shape and scale parameters, so the
  5058. `optimizer`, `loc` and `scale` keyword arguments are ignored.
  5059. If the location is free, a likelihood maximum is found by
  5060. setting its partial derivative wrt to location to 0, and
  5061. solving by substituting the analytical expressions of shape
  5062. and scale (or provided parameters).
  5063. See, e.g., equation 3.1 in
  5064. A. Clifford Cohen & Betty Jones Whitten (1980)
  5065. Estimation in the Three-Parameter Lognormal Distribution,
  5066. Journal of the American Statistical Association, 75:370, 399-404
  5067. https://doi.org/10.2307/2287466
  5068. \n\n""")
  5069. def fit(self, data, *args, **kwds):
  5070. if kwds.pop('superfit', False):
  5071. return super().fit(data, *args, **kwds)
  5072. parameters = _check_fit_input_parameters(self, data, args, kwds)
  5073. data, fshape, floc, fscale = parameters
  5074. data_min = np.min(data)
  5075. def get_shape_scale(loc):
  5076. # Calculate maximum likelihood scale and shape with analytical
  5077. # formulas unless provided by the user
  5078. if fshape is None or fscale is None:
  5079. lndata = np.log(data - loc)
  5080. scale = fscale or np.exp(lndata.mean())
  5081. shape = fshape or np.sqrt(np.mean((lndata - np.log(scale))**2))
  5082. return shape, scale
  5083. def dL_dLoc(loc):
  5084. # Derivative of (positive) LL w.r.t. loc
  5085. shape, scale = get_shape_scale(loc)
  5086. shifted = data - loc
  5087. return np.sum((1 + np.log(shifted/scale)/shape**2)/shifted)
  5088. def ll(loc):
  5089. # (Positive) log-likelihood
  5090. shape, scale = get_shape_scale(loc)
  5091. return -self.nnlf((shape, loc, scale), data)
  5092. if floc is None:
  5093. # The location must be less than the minimum of the data.
  5094. # Back off a bit to avoid numerical issues.
  5095. spacing = np.spacing(data_min)
  5096. rbrack = data_min - spacing
  5097. # Find the right end of the bracket by successive doubling of the
  5098. # distance to data_min. We're interested in a maximum LL, so the
  5099. # slope dL_dLoc_rbrack should be negative at the right end.
  5100. # optimization for later: share shape, scale
  5101. dL_dLoc_rbrack = dL_dLoc(rbrack)
  5102. ll_rbrack = ll(rbrack)
  5103. delta = 2 * spacing # 2 * (data_min - rbrack)
  5104. while dL_dLoc_rbrack >= -1e-6:
  5105. rbrack = data_min - delta
  5106. dL_dLoc_rbrack = dL_dLoc(rbrack)
  5107. delta *= 2
  5108. if not np.isfinite(rbrack) or not np.isfinite(dL_dLoc_rbrack):
  5109. # If we never find a negative slope, either we missed it or the
  5110. # slope is always positive. It's usually the latter,
  5111. # which means
  5112. # loc = data_min - spacing
  5113. # But sometimes when shape and/or scale are fixed there are
  5114. # other issues, so be cautious.
  5115. return super().fit(data, *args, **kwds)
  5116. # Now find the left end of the bracket. Guess is `rbrack-1`
  5117. # unless that is too small of a difference to resolve. Double
  5118. # the size of the interval until the left end is found.
  5119. lbrack = np.minimum(np.nextafter(rbrack, -np.inf), rbrack-1)
  5120. dL_dLoc_lbrack = dL_dLoc(lbrack)
  5121. delta = 2 * (rbrack - lbrack)
  5122. while (np.isfinite(lbrack) and np.isfinite(dL_dLoc_lbrack)
  5123. and np.sign(dL_dLoc_lbrack) == np.sign(dL_dLoc_rbrack)):
  5124. lbrack = rbrack - delta
  5125. dL_dLoc_lbrack = dL_dLoc(lbrack)
  5126. delta *= 2
  5127. # I don't recall observing this, but just in case...
  5128. if not np.isfinite(lbrack) or not np.isfinite(dL_dLoc_lbrack):
  5129. return super().fit(data, *args, **kwds)
  5130. # If we have a valid bracket, find the root
  5131. res = root_scalar(dL_dLoc, bracket=(lbrack, rbrack))
  5132. if not res.converged:
  5133. return super().fit(data, *args, **kwds)
  5134. # If the slope was positive near the minimum of the data,
  5135. # the maximum LL could be there instead of at the root. Compare
  5136. # the LL of the two points to decide.
  5137. ll_root = ll(res.root)
  5138. loc = res.root if ll_root > ll_rbrack else data_min-spacing
  5139. else:
  5140. if floc >= data_min:
  5141. raise FitDataError("lognorm", lower=0., upper=np.inf)
  5142. loc = floc
  5143. shape, scale = get_shape_scale(loc)
  5144. if not (self._argcheck(shape) and scale > 0):
  5145. return super().fit(data, *args, **kwds)
  5146. return shape, loc, scale
  5147. lognorm = lognorm_gen(a=0.0, name='lognorm')
  5148. class gibrat_gen(rv_continuous):
  5149. r"""A Gibrat continuous random variable.
  5150. %(before_notes)s
  5151. Notes
  5152. -----
  5153. The probability density function for `gibrat` is:
  5154. .. math::
  5155. f(x) = \frac{1}{x \sqrt{2\pi}} \exp(-\frac{1}{2} (\log(x))^2)
  5156. for :math:`x >= 0`.
  5157. `gibrat` is a special case of `lognorm` with ``s=1``.
  5158. %(after_notes)s
  5159. %(example)s
  5160. """
  5161. _support_mask = rv_continuous._open_support_mask
  5162. def _shape_info(self):
  5163. return []
  5164. def _rvs(self, size=None, random_state=None):
  5165. return np.exp(random_state.standard_normal(size))
  5166. def _pdf(self, x):
  5167. # gibrat.pdf(x) = 1/(x*sqrt(2*pi)) * exp(-1/2*(log(x))**2)
  5168. return np.exp(self._logpdf(x))
  5169. def _logpdf(self, x):
  5170. return _lognorm_logpdf(x, 1.0)
  5171. def _cdf(self, x):
  5172. return _norm_cdf(np.log(x))
  5173. def _ppf(self, q):
  5174. return np.exp(_norm_ppf(q))
  5175. def _sf(self, x):
  5176. return _norm_sf(np.log(x))
  5177. def _isf(self, p):
  5178. return np.exp(_norm_isf(p))
  5179. def _stats(self):
  5180. p = np.e
  5181. mu = np.sqrt(p)
  5182. mu2 = p * (p - 1)
  5183. g1 = np.sqrt(p - 1) * (2 + p)
  5184. g2 = np.polyval([1, 2, 3, 0, -6.0], p)
  5185. return mu, mu2, g1, g2
  5186. def _entropy(self):
  5187. return 0.5 * np.log(2 * np.pi) + 0.5
  5188. gibrat = gibrat_gen(a=0.0, name='gibrat')
  5189. class maxwell_gen(rv_continuous):
  5190. r"""A Maxwell continuous random variable.
  5191. %(before_notes)s
  5192. Notes
  5193. -----
  5194. A special case of a `chi` distribution, with ``df=3``, ``loc=0.0``,
  5195. and given ``scale = a``, where ``a`` is the parameter used in the
  5196. Mathworld description [1]_.
  5197. The probability density function for `maxwell` is:
  5198. .. math::
  5199. f(x) = \sqrt{2/\pi}x^2 \exp(-x^2/2)
  5200. for :math:`x >= 0`.
  5201. %(after_notes)s
  5202. References
  5203. ----------
  5204. .. [1] http://mathworld.wolfram.com/MaxwellDistribution.html
  5205. %(example)s
  5206. """
  5207. def _shape_info(self):
  5208. return []
  5209. def _rvs(self, size=None, random_state=None):
  5210. return chi.rvs(3.0, size=size, random_state=random_state)
  5211. def _pdf(self, x):
  5212. # maxwell.pdf(x) = sqrt(2/pi)x**2 * exp(-x**2/2)
  5213. return _SQRT_2_OVER_PI*x*x*np.exp(-x*x/2.0)
  5214. def _logpdf(self, x):
  5215. # Allow x=0 without 'divide by zero' warnings
  5216. with np.errstate(divide='ignore'):
  5217. return _LOG_SQRT_2_OVER_PI + 2*np.log(x) - 0.5*x*x
  5218. def _cdf(self, x):
  5219. return sc.gammainc(1.5, x*x/2.0)
  5220. def _ppf(self, q):
  5221. return np.sqrt(2*sc.gammaincinv(1.5, q))
  5222. def _sf(self, x):
  5223. return sc.gammaincc(1.5, x*x/2.0)
  5224. def _isf(self, q):
  5225. return np.sqrt(2*sc.gammainccinv(1.5, q))
  5226. def _stats(self):
  5227. val = 3*np.pi-8
  5228. return (2*np.sqrt(2.0/np.pi),
  5229. 3-8/np.pi,
  5230. np.sqrt(2)*(32-10*np.pi)/val**1.5,
  5231. (-12*np.pi*np.pi + 160*np.pi - 384) / val**2.0)
  5232. def _entropy(self):
  5233. return _EULER + 0.5*np.log(2*np.pi)-0.5
  5234. maxwell = maxwell_gen(a=0.0, name='maxwell')
  5235. class mielke_gen(rv_continuous):
  5236. r"""A Mielke Beta-Kappa / Dagum continuous random variable.
  5237. %(before_notes)s
  5238. Notes
  5239. -----
  5240. The probability density function for `mielke` is:
  5241. .. math::
  5242. f(x, k, s) = \frac{k x^{k-1}}{(1+x^s)^{1+k/s}}
  5243. for :math:`x > 0` and :math:`k, s > 0`. The distribution is sometimes
  5244. called Dagum distribution ([2]_). It was already defined in [3]_, called
  5245. a Burr Type III distribution (`burr` with parameters ``c=s`` and
  5246. ``d=k/s``).
  5247. `mielke` takes ``k`` and ``s`` as shape parameters.
  5248. %(after_notes)s
  5249. References
  5250. ----------
  5251. .. [1] Mielke, P.W., 1973 "Another Family of Distributions for Describing
  5252. and Analyzing Precipitation Data." J. Appl. Meteor., 12, 275-280
  5253. .. [2] Dagum, C., 1977 "A new model for personal income distribution."
  5254. Economie Appliquee, 33, 327-367.
  5255. .. [3] Burr, I. W. "Cumulative frequency functions", Annals of
  5256. Mathematical Statistics, 13(2), pp 215-232 (1942).
  5257. %(example)s
  5258. """
  5259. def _shape_info(self):
  5260. ik = _ShapeInfo("k", False, (0, np.inf), (False, False))
  5261. i_s = _ShapeInfo("s", False, (0, np.inf), (False, False))
  5262. return [ik, i_s]
  5263. def _pdf(self, x, k, s):
  5264. return k*x**(k-1.0) / (1.0+x**s)**(1.0+k*1.0/s)
  5265. def _logpdf(self, x, k, s):
  5266. # Allow x=0 without 'divide by zero' warnings.
  5267. with np.errstate(divide='ignore'):
  5268. return np.log(k) + np.log(x)*(k - 1) - np.log1p(x**s)*(1 + k/s)
  5269. def _cdf(self, x, k, s):
  5270. return x**k / (1.0+x**s)**(k*1.0/s)
  5271. def _ppf(self, q, k, s):
  5272. qsk = pow(q, s*1.0/k)
  5273. return pow(qsk/(1.0-qsk), 1.0/s)
  5274. def _munp(self, n, k, s):
  5275. def nth_moment(n, k, s):
  5276. # n-th moment is defined for -k < n < s
  5277. return sc.gamma((k+n)/s)*sc.gamma(1-n/s)/sc.gamma(k/s)
  5278. return xpx.apply_where(n < s, (n, k, s), nth_moment, fill_value=np.inf)
  5279. mielke = mielke_gen(a=0.0, name='mielke')
  5280. class kappa4_gen(rv_continuous):
  5281. r"""Kappa 4 parameter distribution.
  5282. %(before_notes)s
  5283. Notes
  5284. -----
  5285. The probability density function for kappa4 is:
  5286. .. math::
  5287. f(x, h, k) = (1 - k x)^{1/k - 1} (1 - h (1 - k x)^{1/k})^{1/h-1}
  5288. if :math:`h` and :math:`k` are not equal to 0.
  5289. If :math:`h` or :math:`k` are zero then the pdf can be simplified:
  5290. :math:`h = 0` and :math:`k \neq 0`::
  5291. kappa4.pdf(x, h, k) = (1.0 - k*x)**(1.0/k - 1.0)*
  5292. exp(-(1.0 - k*x)**(1.0/k))
  5293. :math:`h \neq 0` and :math:`k = 0`::
  5294. kappa4.pdf(x, h, k) = exp(-x)*(1.0 - h*exp(-x))**(1.0/h - 1.0)
  5295. :math:`h = 0` and :math:`k = 0`::
  5296. kappa4.pdf(x, h, k) = exp(-x)*exp(-exp(-x))
  5297. kappa4 takes :math:`h` and :math:`k` as shape parameters.
  5298. The kappa4 distribution returns other distributions when certain
  5299. :math:`h` and :math:`k` values are used.
  5300. +------+-------------+----------------+------------------+
  5301. | h | k=0.0 | k=1.0 | -inf<=k<=inf |
  5302. +======+=============+================+==================+
  5303. | -1.0 | Logistic | | Generalized |
  5304. | | | | Logistic(1) |
  5305. | | | | |
  5306. | | logistic(x) | | |
  5307. +------+-------------+----------------+------------------+
  5308. | 0.0 | Gumbel | Reverse | Generalized |
  5309. | | | Exponential(2) | Extreme Value |
  5310. | | | | |
  5311. | | gumbel_r(x) | | genextreme(x, k) |
  5312. +------+-------------+----------------+------------------+
  5313. | 1.0 | Exponential | Uniform | Generalized |
  5314. | | | | Pareto |
  5315. | | | | |
  5316. | | expon(x) | uniform(x) | genpareto(x, -k) |
  5317. +------+-------------+----------------+------------------+
  5318. (1) There are at least five generalized logistic distributions.
  5319. Four are described here:
  5320. https://en.wikipedia.org/wiki/Generalized_logistic_distribution
  5321. The "fifth" one is the one kappa4 should match which currently
  5322. isn't implemented in scipy:
  5323. https://en.wikipedia.org/wiki/Talk:Generalized_logistic_distribution
  5324. https://www.mathwave.com/help/easyfit/html/analyses/distributions/gen_logistic.html
  5325. (2) This distribution is currently not in scipy.
  5326. References
  5327. ----------
  5328. J.C. Finney, "Optimization of a Skewed Logistic Distribution With Respect
  5329. to the Kolmogorov-Smirnov Test", A Dissertation Submitted to the Graduate
  5330. Faculty of the Louisiana State University and Agricultural and Mechanical
  5331. College, (August, 2004),
  5332. https://digitalcommons.lsu.edu/gradschool_dissertations/3672
  5333. J.R.M. Hosking, "The four-parameter kappa distribution". IBM J. Res.
  5334. Develop. 38 (3), 25 1-258 (1994).
  5335. B. Kumphon, A. Kaew-Man, P. Seenoi, "A Rainfall Distribution for the Lampao
  5336. Site in the Chi River Basin, Thailand", Journal of Water Resource and
  5337. Protection, vol. 4, 866-869, (2012).
  5338. :doi:`10.4236/jwarp.2012.410101`
  5339. C. Winchester, "On Estimation of the Four-Parameter Kappa Distribution", A
  5340. Thesis Submitted to Dalhousie University, Halifax, Nova Scotia, (March
  5341. 2000).
  5342. http://www.nlc-bnc.ca/obj/s4/f2/dsk2/ftp01/MQ57336.pdf
  5343. %(after_notes)s
  5344. %(example)s
  5345. """
  5346. def _argcheck(self, h, k):
  5347. shape = np.broadcast_arrays(h, k)[0].shape
  5348. return np.full(shape, fill_value=True)
  5349. def _shape_info(self):
  5350. ih = _ShapeInfo("h", False, (-np.inf, np.inf), (False, False))
  5351. ik = _ShapeInfo("k", False, (-np.inf, np.inf), (False, False))
  5352. return [ih, ik]
  5353. def _get_support(self, h, k):
  5354. condlist = [np.logical_and(h > 0, k > 0),
  5355. np.logical_and(h > 0, k == 0),
  5356. np.logical_and(h > 0, k < 0),
  5357. np.logical_and(h <= 0, k > 0),
  5358. np.logical_and(h <= 0, k == 0),
  5359. np.logical_and(h <= 0, k < 0)]
  5360. def f0(h, k):
  5361. return (1.0 - np.float_power(h, -k))/k
  5362. def f1(h, k):
  5363. return np.log(h)
  5364. def f3(h, k):
  5365. a = np.empty(np.shape(h))
  5366. a[:] = -np.inf
  5367. return a
  5368. def f5(h, k):
  5369. return 1.0/k
  5370. _a = _lazyselect(condlist,
  5371. [f0, f1, f0, f3, f3, f5],
  5372. [h, k],
  5373. default=np.nan)
  5374. def f0(h, k):
  5375. return 1.0/k
  5376. def f1(h, k):
  5377. a = np.empty(np.shape(h))
  5378. a[:] = np.inf
  5379. return a
  5380. _b = _lazyselect(condlist,
  5381. [f0, f1, f1, f0, f1, f1],
  5382. [h, k],
  5383. default=np.nan)
  5384. return _a, _b
  5385. def _pdf(self, x, h, k):
  5386. # kappa4.pdf(x, h, k) = (1.0 - k*x)**(1.0/k - 1.0)*
  5387. # (1.0 - h*(1.0 - k*x)**(1.0/k))**(1.0/h-1)
  5388. return np.exp(self._logpdf(x, h, k))
  5389. def _logpdf(self, x, h, k):
  5390. condlist = [np.logical_and(h != 0, k != 0),
  5391. np.logical_and(h == 0, k != 0),
  5392. np.logical_and(h != 0, k == 0),
  5393. np.logical_and(h == 0, k == 0)]
  5394. def f0(x, h, k):
  5395. '''pdf = (1.0 - k*x)**(1.0/k - 1.0)*(
  5396. 1.0 - h*(1.0 - k*x)**(1.0/k))**(1.0/h-1.0)
  5397. logpdf = ...
  5398. '''
  5399. return (sc.xlog1py(1.0/k - 1.0, -k*x) +
  5400. sc.xlog1py(1.0/h - 1.0, -h*(1.0 - k*x)**(1.0/k)))
  5401. def f1(x, h, k):
  5402. '''pdf = (1.0 - k*x)**(1.0/k - 1.0)*np.exp(-(
  5403. 1.0 - k*x)**(1.0/k))
  5404. logpdf = ...
  5405. '''
  5406. return sc.xlog1py(1.0/k - 1.0, -k*x) - (1.0 - k*x)**(1.0/k)
  5407. def f2(x, h, k):
  5408. '''pdf = np.exp(-x)*(1.0 - h*np.exp(-x))**(1.0/h - 1.0)
  5409. logpdf = ...
  5410. '''
  5411. return -x + sc.xlog1py(1.0/h - 1.0, -h*np.exp(-x))
  5412. def f3(x, h, k):
  5413. '''pdf = np.exp(-x-np.exp(-x))
  5414. logpdf = ...
  5415. '''
  5416. return -x - np.exp(-x)
  5417. return _lazyselect(condlist,
  5418. [f0, f1, f2, f3],
  5419. [x, h, k],
  5420. default=np.nan)
  5421. def _cdf(self, x, h, k):
  5422. return np.exp(self._logcdf(x, h, k))
  5423. def _logcdf(self, x, h, k):
  5424. condlist = [np.logical_and(h != 0, k != 0),
  5425. np.logical_and(h == 0, k != 0),
  5426. np.logical_and(h != 0, k == 0),
  5427. np.logical_and(h == 0, k == 0)]
  5428. def f0(x, h, k):
  5429. '''cdf = (1.0 - h*(1.0 - k*x)**(1.0/k))**(1.0/h)
  5430. logcdf = ...
  5431. '''
  5432. return (1.0/h)*sc.log1p(-h*(1.0 - k*x)**(1.0/k))
  5433. def f1(x, h, k):
  5434. '''cdf = np.exp(-(1.0 - k*x)**(1.0/k))
  5435. logcdf = ...
  5436. '''
  5437. return -(1.0 - k*x)**(1.0/k)
  5438. def f2(x, h, k):
  5439. '''cdf = (1.0 - h*np.exp(-x))**(1.0/h)
  5440. logcdf = ...
  5441. '''
  5442. return (1.0/h)*sc.log1p(-h*np.exp(-x))
  5443. def f3(x, h, k):
  5444. '''cdf = np.exp(-np.exp(-x))
  5445. logcdf = ...
  5446. '''
  5447. return -np.exp(-x)
  5448. return _lazyselect(condlist,
  5449. [f0, f1, f2, f3],
  5450. [x, h, k],
  5451. default=np.nan)
  5452. def _ppf(self, q, h, k):
  5453. condlist = [np.logical_and(h != 0, k != 0),
  5454. np.logical_and(h == 0, k != 0),
  5455. np.logical_and(h != 0, k == 0),
  5456. np.logical_and(h == 0, k == 0)]
  5457. def f0(q, h, k):
  5458. return 1.0/k*(1.0 - ((1.0 - (q**h))/h)**k)
  5459. def f1(q, h, k):
  5460. return 1.0/k*(1.0 - (-np.log(q))**k)
  5461. def f2(q, h, k):
  5462. '''ppf = -np.log((1.0 - (q**h))/h)
  5463. '''
  5464. return -sc.log1p(-(q**h)) + np.log(h)
  5465. def f3(q, h, k):
  5466. return -np.log(-np.log(q))
  5467. return _lazyselect(condlist,
  5468. [f0, f1, f2, f3],
  5469. [q, h, k],
  5470. default=np.nan)
  5471. def _get_stats_info(self, h, k):
  5472. condlist = [
  5473. np.logical_and(h < 0, k >= 0),
  5474. k < 0,
  5475. ]
  5476. def f0(h, k):
  5477. return (-1.0/h*k).astype(int)
  5478. def f1(h, k):
  5479. return (-1.0/k).astype(int)
  5480. return _lazyselect(condlist, [f0, f1], [h, k], default=5)
  5481. def _stats(self, h, k):
  5482. maxr = self._get_stats_info(h, k)
  5483. outputs = [None if np.any(r < maxr) else np.nan for r in range(1, 5)]
  5484. return outputs[:]
  5485. def _mom1_sc(self, m, *args):
  5486. maxr = self._get_stats_info(args[0], args[1])
  5487. if m >= maxr:
  5488. return np.nan
  5489. return integrate.quad(self._mom_integ1, 0, 1, args=(m,)+args)[0]
  5490. kappa4 = kappa4_gen(name='kappa4')
  5491. class kappa3_gen(rv_continuous):
  5492. r"""Kappa 3 parameter distribution.
  5493. %(before_notes)s
  5494. Notes
  5495. -----
  5496. The probability density function for `kappa3` is:
  5497. .. math::
  5498. f(x, a) = a (a + x^a)^{-(a + 1)/a}
  5499. for :math:`x > 0` and :math:`a > 0`.
  5500. `kappa3` takes ``a`` as a shape parameter for :math:`a`.
  5501. References
  5502. ----------
  5503. P.W. Mielke and E.S. Johnson, "Three-Parameter Kappa Distribution Maximum
  5504. Likelihood and Likelihood Ratio Tests", Methods in Weather Research,
  5505. 701-707, (September, 1973),
  5506. :doi:`10.1175/1520-0493(1973)101<0701:TKDMLE>2.3.CO;2`
  5507. B. Kumphon, "Maximum Entropy and Maximum Likelihood Estimation for the
  5508. Three-Parameter Kappa Distribution", Open Journal of Statistics, vol 2,
  5509. 415-419 (2012), :doi:`10.4236/ojs.2012.24050`
  5510. %(after_notes)s
  5511. %(example)s
  5512. """
  5513. def _shape_info(self):
  5514. return [_ShapeInfo("a", False, (0, np.inf), (False, False))]
  5515. def _pdf(self, x, a):
  5516. # kappa3.pdf(x, a) = a*(a + x**a)**(-(a + 1)/a), for x > 0
  5517. return a*(a + x**a)**(-1.0/a-1)
  5518. def _cdf(self, x, a):
  5519. return x*(a + x**a)**(-1.0/a)
  5520. def _sf(self, x, a):
  5521. x, a = np.broadcast_arrays(x, a) # some code paths pass scalars
  5522. sf = super()._sf(x, a)
  5523. # When the SF is small, another formulation is typically more accurate.
  5524. # However, it blows up for large `a`, so use it only if it also returns
  5525. # a small value of the SF.
  5526. cutoff = 0.01
  5527. i = sf < cutoff
  5528. sf2 = -sc.expm1(sc.xlog1py(-1.0 / a[i], a[i] * x[i]**-a[i]))
  5529. i2 = sf2 > cutoff
  5530. sf2[i2] = sf[i][i2] # replace bad values with original values
  5531. sf[i] = sf2
  5532. return sf
  5533. def _ppf(self, q, a):
  5534. return (a/(q**-a - 1.0))**(1.0/a)
  5535. def _isf(self, q, a):
  5536. lg = sc.xlog1py(-a, -q)
  5537. denom = sc.expm1(lg)
  5538. return (a / denom)**(1.0 / a)
  5539. def _stats(self, a):
  5540. outputs = [None if np.any(i < a) else np.nan for i in range(1, 5)]
  5541. return outputs[:]
  5542. def _mom1_sc(self, m, *args):
  5543. if np.any(m >= args[0]):
  5544. return np.nan
  5545. return integrate.quad(self._mom_integ1, 0, 1, args=(m,)+args)[0]
  5546. kappa3 = kappa3_gen(a=0.0, name='kappa3')
  5547. class moyal_gen(rv_continuous):
  5548. r"""A Moyal continuous random variable.
  5549. %(before_notes)s
  5550. Notes
  5551. -----
  5552. The probability density function for `moyal` is:
  5553. .. math::
  5554. f(x) = \exp(-(x + \exp(-x))/2) / \sqrt{2\pi}
  5555. for a real number :math:`x`.
  5556. %(after_notes)s
  5557. This distribution has utility in high-energy physics and radiation
  5558. detection. It describes the energy loss of a charged relativistic
  5559. particle due to ionization of the medium [1]_. It also provides an
  5560. approximation for the Landau distribution. For an in depth description
  5561. see [2]_. For additional description, see [3]_.
  5562. References
  5563. ----------
  5564. .. [1] J.E. Moyal, "XXX. Theory of ionization fluctuations",
  5565. The London, Edinburgh, and Dublin Philosophical Magazine
  5566. and Journal of Science, vol 46, 263-280, (1955).
  5567. :doi:`10.1080/14786440308521076` (gated)
  5568. .. [2] G. Cordeiro et al., "The beta Moyal: A useful skew distribution",
  5569. International Journal of Research and Reviews in Applied Sciences,
  5570. vol 10, 171-192, (2012).
  5571. https://www.arpapress.com/files/volumes/vol10issue2/ijrras_10_2_02.pdf
  5572. .. [3] C. Walck, "Handbook on Statistical Distributions for
  5573. Experimentalists; International Report SUF-PFY/96-01", Chapter 26,
  5574. University of Stockholm: Stockholm, Sweden, (2007).
  5575. http://www.stat.rice.edu/~dobelman/textfiles/DistributionsHandbook.pdf
  5576. .. versionadded:: 1.1.0
  5577. %(example)s
  5578. """
  5579. def _shape_info(self):
  5580. return []
  5581. def _rvs(self, size=None, random_state=None):
  5582. u1 = gamma.rvs(a=0.5, scale=2, size=size,
  5583. random_state=random_state)
  5584. return -np.log(u1)
  5585. def _pdf(self, x):
  5586. return np.exp(-0.5 * (x + np.exp(-x))) / np.sqrt(2*np.pi)
  5587. def _cdf(self, x):
  5588. return sc.erfc(np.exp(-0.5 * x) / np.sqrt(2))
  5589. def _sf(self, x):
  5590. return sc.erf(np.exp(-0.5 * x) / np.sqrt(2))
  5591. def _ppf(self, x):
  5592. return -np.log(2 * sc.erfcinv(x)**2)
  5593. def _stats(self):
  5594. mu = np.log(2) + np.euler_gamma
  5595. mu2 = np.pi**2 / 2
  5596. g1 = 28 * np.sqrt(2) * sc.zeta(3) / np.pi**3
  5597. g2 = 4.
  5598. return mu, mu2, g1, g2
  5599. def _munp(self, n):
  5600. if n == 1.0:
  5601. return np.log(2) + np.euler_gamma
  5602. elif n == 2.0:
  5603. return np.pi**2 / 2 + (np.log(2) + np.euler_gamma)**2
  5604. elif n == 3.0:
  5605. tmp1 = 1.5 * np.pi**2 * (np.log(2)+np.euler_gamma)
  5606. tmp2 = (np.log(2)+np.euler_gamma)**3
  5607. tmp3 = 14 * sc.zeta(3)
  5608. return tmp1 + tmp2 + tmp3
  5609. elif n == 4.0:
  5610. tmp1 = 4 * 14 * sc.zeta(3) * (np.log(2) + np.euler_gamma)
  5611. tmp2 = 3 * np.pi**2 * (np.log(2) + np.euler_gamma)**2
  5612. tmp3 = (np.log(2) + np.euler_gamma)**4
  5613. tmp4 = 7 * np.pi**4 / 4
  5614. return tmp1 + tmp2 + tmp3 + tmp4
  5615. else:
  5616. # return generic for higher moments
  5617. # return rv_continuous._mom1_sc(self, n, b)
  5618. return self._mom1_sc(n)
  5619. moyal = moyal_gen(name="moyal")
  5620. class nakagami_gen(rv_continuous):
  5621. r"""A Nakagami continuous random variable.
  5622. %(before_notes)s
  5623. Notes
  5624. -----
  5625. The probability density function for `nakagami` is:
  5626. .. math::
  5627. f(x, \nu) = \frac{2 \nu^\nu}{\Gamma(\nu)} x^{2\nu-1} \exp(-\nu x^2)
  5628. for :math:`x >= 0`, :math:`\nu > 0`. The distribution was introduced in
  5629. [2]_, see also [1]_ for further information.
  5630. `nakagami` takes ``nu`` as a shape parameter for :math:`\nu`.
  5631. %(after_notes)s
  5632. References
  5633. ----------
  5634. .. [1] "Nakagami distribution", Wikipedia
  5635. https://en.wikipedia.org/wiki/Nakagami_distribution
  5636. .. [2] M. Nakagami, "The m-distribution - A general formula of intensity
  5637. distribution of rapid fading", Statistical methods in radio wave
  5638. propagation, Pergamon Press, 1960, 3-36.
  5639. :doi:`10.1016/B978-0-08-009306-2.50005-4`
  5640. %(example)s
  5641. """
  5642. def _argcheck(self, nu):
  5643. return nu > 0
  5644. def _shape_info(self):
  5645. return [_ShapeInfo("nu", False, (0, np.inf), (False, False))]
  5646. def _pdf(self, x, nu):
  5647. return np.exp(self._logpdf(x, nu))
  5648. def _logpdf(self, x, nu):
  5649. # nakagami.pdf(x, nu) = 2 * nu**nu / gamma(nu) *
  5650. # x**(2*nu-1) * exp(-nu*x**2)
  5651. return (np.log(2) + sc.xlogy(nu, nu) - sc.gammaln(nu) +
  5652. sc.xlogy(2*nu - 1, x) - nu*x**2)
  5653. def _cdf(self, x, nu):
  5654. return sc.gammainc(nu, nu*x*x)
  5655. def _ppf(self, q, nu):
  5656. return np.sqrt(1.0/nu*sc.gammaincinv(nu, q))
  5657. def _sf(self, x, nu):
  5658. return sc.gammaincc(nu, nu*x*x)
  5659. def _isf(self, p, nu):
  5660. return np.sqrt(1/nu * sc.gammainccinv(nu, p))
  5661. def _stats(self, nu):
  5662. mu = sc.poch(nu, 0.5)/np.sqrt(nu)
  5663. mu2 = 1.0-mu*mu
  5664. g1 = mu * (1 - 4*nu*mu2) / 2.0 / nu / np.power(mu2, 1.5)
  5665. g2 = -6*mu**4*nu + (8*nu-2)*mu**2-2*nu + 1
  5666. g2 /= nu*mu2**2.0
  5667. return mu, mu2, g1, g2
  5668. def _entropy(self, nu):
  5669. shape = np.shape(nu)
  5670. # because somehow this isn't taken care of by the infrastructure...
  5671. nu = np.atleast_1d(nu)
  5672. A = sc.gammaln(nu)
  5673. B = nu - (nu - 0.5) * sc.digamma(nu)
  5674. C = -0.5 * np.log(nu) - np.log(2)
  5675. h = A + B + C
  5676. # This is the asymptotic sum of A and B (see gh-17868)
  5677. norm_entropy = stats.norm._entropy()
  5678. # Above, this is lost to rounding error for large nu, so use the
  5679. # asymptotic sum when the approximation becomes accurate
  5680. i = nu > 5e4 # roundoff error ~ approximation error
  5681. # -1 / (12 * nu) is the O(1/nu) term; see gh-17929
  5682. h[i] = C[i] + norm_entropy - 1/(12*nu[i])
  5683. return h.reshape(shape)[()]
  5684. def _rvs(self, nu, size=None, random_state=None):
  5685. # this relationship can be found in [1] or by a direct calculation
  5686. return np.sqrt(random_state.standard_gamma(nu, size=size) / nu)
  5687. def _fitstart(self, data, args=None):
  5688. if isinstance(data, CensoredData):
  5689. data = data._uncensor()
  5690. if args is None:
  5691. args = (1.0,) * self.numargs
  5692. # Analytical justified estimates
  5693. # see: https://docs.scipy.org/doc/scipy/reference/tutorial/stats/continuous_nakagami.html
  5694. loc = np.min(data)
  5695. scale = np.sqrt(np.sum((data - loc)**2) / len(data))
  5696. return args + (loc, scale)
  5697. nakagami = nakagami_gen(a=0.0, name="nakagami")
  5698. # The function name ncx2 is an abbreviation for noncentral chi squared.
  5699. def _ncx2_log_pdf(x, df, nc):
  5700. # We use (xs**2 + ns**2)/2 = (xs - ns)**2/2 + xs*ns, and include the
  5701. # factor of exp(-xs*ns) into the ive function to improve numerical
  5702. # stability at large values of xs. See also `rice.pdf`.
  5703. df2 = df/2.0 - 1.0
  5704. xs, ns = np.sqrt(x), np.sqrt(nc)
  5705. res = sc.xlogy(df2/2.0, x/nc) - 0.5*(xs - ns)**2
  5706. corr = sc.ive(df2, xs*ns) / 2.0
  5707. # Return res + np.log(corr) avoiding np.log(0)
  5708. return xpx.apply_where(
  5709. corr > 0,
  5710. (res, corr),
  5711. lambda r, c: r + np.log(c),
  5712. fill_value=-np.inf)
  5713. class ncx2_gen(rv_continuous):
  5714. r"""A non-central chi-squared continuous random variable.
  5715. %(before_notes)s
  5716. Notes
  5717. -----
  5718. The probability density function for `ncx2` is:
  5719. .. math::
  5720. f(x, k, \lambda) = \frac{1}{2} \exp(-(\lambda+x)/2)
  5721. (x/\lambda)^{(k-2)/4} I_{(k-2)/2}(\sqrt{\lambda x})
  5722. for :math:`x >= 0`, :math:`k > 0` and :math:`\lambda \ge 0`.
  5723. :math:`k` specifies the degrees of freedom (denoted ``df`` in the
  5724. implementation) and :math:`\lambda` is the non-centrality parameter
  5725. (denoted ``nc`` in the implementation). :math:`I_\nu` denotes the
  5726. modified Bessel function of first order of degree :math:`\nu`
  5727. (`scipy.special.iv`).
  5728. `ncx2` takes ``df`` and ``nc`` as shape parameters.
  5729. This distribution uses routines from the Boost Math C++ library for
  5730. the computation of the ``pdf``, ``cdf``, ``ppf``, ``sf`` and ``isf``
  5731. methods. [1]_
  5732. %(after_notes)s
  5733. References
  5734. ----------
  5735. .. [1] The Boost Developers. "Boost C++ Libraries". https://www.boost.org/.
  5736. %(example)s
  5737. """
  5738. def _argcheck(self, df, nc):
  5739. return (df > 0) & np.isfinite(df) & (nc >= 0)
  5740. def _shape_info(self):
  5741. idf = _ShapeInfo("df", False, (0, np.inf), (False, False))
  5742. inc = _ShapeInfo("nc", False, (0, np.inf), (True, False))
  5743. return [idf, inc]
  5744. def _rvs(self, df, nc, size=None, random_state=None):
  5745. return random_state.noncentral_chisquare(df, nc, size)
  5746. def _logpdf(self, x, df, nc):
  5747. return xpx.apply_where(nc != 0, (x, df, nc), _ncx2_log_pdf,
  5748. lambda x, df, _: chi2._logpdf(x, df))
  5749. def _pdf(self, x, df, nc):
  5750. with np.errstate(over='ignore'): # see gh-17432
  5751. return xpx.apply_where(nc != 0, (x, df, nc), scu._ncx2_pdf,
  5752. lambda x, df, _: chi2._pdf(x, df))
  5753. def _cdf(self, x, df, nc):
  5754. with np.errstate(over='ignore'): # see gh-17432
  5755. return xpx.apply_where(nc != 0, (x, df, nc), sc.chndtr,
  5756. lambda x, df, _: chi2._cdf(x, df))
  5757. def _ppf(self, q, df, nc):
  5758. with np.errstate(over='ignore'): # see gh-17432
  5759. return xpx.apply_where(nc != 0, (q, df, nc), sc.chndtrix,
  5760. lambda x, df, _: chi2._ppf(x, df))
  5761. def _sf(self, x, df, nc):
  5762. with np.errstate(over='ignore'): # see gh-17432
  5763. return xpx.apply_where(nc != 0, (x, df, nc), scu._ncx2_sf,
  5764. lambda x, df, _: chi2._sf(x, df))
  5765. def _isf(self, x, df, nc):
  5766. with np.errstate(over='ignore'): # see gh-17432
  5767. return xpx.apply_where(nc != 0, (x, df, nc), scu._ncx2_isf,
  5768. lambda x, df, _: chi2._isf(x, df))
  5769. def _stats(self, df, nc):
  5770. _ncx2_mean = df + nc
  5771. def k_plus_cl(k, l, c):
  5772. return k + c*l
  5773. _ncx2_variance = 2.0 * k_plus_cl(df, nc, 2.0)
  5774. _ncx2_skewness = (np.sqrt(8.0) * k_plus_cl(df, nc, 3) /
  5775. np.sqrt(k_plus_cl(df, nc, 2.0)**3))
  5776. _ncx2_kurtosis_excess = (12.0 * k_plus_cl(df, nc, 4.0) /
  5777. k_plus_cl(df, nc, 2.0)**2)
  5778. return (
  5779. _ncx2_mean,
  5780. _ncx2_variance,
  5781. _ncx2_skewness,
  5782. _ncx2_kurtosis_excess,
  5783. )
  5784. ncx2 = ncx2_gen(a=0.0, name='ncx2')
  5785. class ncf_gen(rv_continuous):
  5786. r"""A non-central F distribution continuous random variable.
  5787. %(before_notes)s
  5788. See Also
  5789. --------
  5790. scipy.stats.f : Fisher distribution
  5791. Notes
  5792. -----
  5793. The probability density function for `ncf` is:
  5794. .. math::
  5795. f(x, n_1, n_2, \lambda) =
  5796. \exp\left(\frac{\lambda}{2} +
  5797. \lambda n_1 \frac{x}{2(n_1 x + n_2)}
  5798. \right)
  5799. n_1^{n_1/2} n_2^{n_2/2} x^{n_1/2 - 1} \\
  5800. (n_2 + n_1 x)^{-(n_1 + n_2)/2}
  5801. \gamma(n_1/2) \gamma(1 + n_2/2) \\
  5802. \frac{L^{\frac{n_1}{2}-1}_{n_2/2}
  5803. \left(-\lambda n_1 \frac{x}{2(n_1 x + n_2)}\right)}
  5804. {B(n_1/2, n_2/2)
  5805. \gamma\left(\frac{n_1 + n_2}{2}\right)}
  5806. for :math:`n_1, n_2 > 0`, :math:`\lambda \ge 0`. Here :math:`n_1` is the
  5807. degrees of freedom in the numerator, :math:`n_2` the degrees of freedom in
  5808. the denominator, :math:`\lambda` the non-centrality parameter,
  5809. :math:`\gamma` is the logarithm of the Gamma function, :math:`L_n^k` is a
  5810. generalized Laguerre polynomial and :math:`B` is the beta function.
  5811. `ncf` takes ``dfn``, ``dfd`` and ``nc`` as shape parameters. If ``nc=0``,
  5812. the distribution becomes equivalent to the Fisher distribution.
  5813. This distribution uses routines from the Boost Math C++ library for
  5814. the computation of the ``pdf``, ``cdf``, ``ppf``, ``stats``, ``sf`` and
  5815. ``isf`` methods. [1]_
  5816. %(after_notes)s
  5817. References
  5818. ----------
  5819. .. [1] The Boost Developers. "Boost C++ Libraries". https://www.boost.org/.
  5820. %(example)s
  5821. """
  5822. def _argcheck(self, dfn, dfd, nc):
  5823. return (dfn > 0) & (dfd > 0) & (nc >= 0)
  5824. def _shape_info(self):
  5825. idf1 = _ShapeInfo("dfn", False, (0, np.inf), (False, False))
  5826. idf2 = _ShapeInfo("dfd", False, (0, np.inf), (False, False))
  5827. inc = _ShapeInfo("nc", False, (0, np.inf), (True, False))
  5828. return [idf1, idf2, inc]
  5829. def _rvs(self, dfn, dfd, nc, size=None, random_state=None):
  5830. return random_state.noncentral_f(dfn, dfd, nc, size)
  5831. def _pdf(self, x, dfn, dfd, nc):
  5832. return scu._ncf_pdf(x, dfn, dfd, nc)
  5833. def _cdf(self, x, dfn, dfd, nc):
  5834. return sc.ncfdtr(dfn, dfd, nc, x)
  5835. def _ppf(self, q, dfn, dfd, nc):
  5836. with np.errstate(over='ignore'): # see gh-17432
  5837. return sc.ncfdtri(dfn, dfd, nc, q)
  5838. def _sf(self, x, dfn, dfd, nc):
  5839. return scu._ncf_sf(x, dfn, dfd, nc)
  5840. def _isf(self, x, dfn, dfd, nc):
  5841. with np.errstate(over='ignore'): # see gh-17432
  5842. return scu._ncf_isf(x, dfn, dfd, nc)
  5843. # # Produces bogus values as written - maybe it's close, though?
  5844. # def _munp(self, n, dfn, dfd, nc):
  5845. # val = (dfn * 1.0/dfd)**n
  5846. # term = sc.gammaln(n+0.5*dfn) + sc.gammaln(0.5*dfd-n) - sc.gammaln(dfd*0.5)
  5847. # val *= np.exp(-nc / 2.0+term)
  5848. # val *= sc.hyp1f1(n+0.5*dfn, 0.5*dfn, 0.5*nc)
  5849. # return val
  5850. def _stats(self, dfn, dfd, nc, moments='mv'):
  5851. mu = scu._ncf_mean(dfn, dfd, nc)
  5852. mu2 = scu._ncf_variance(dfn, dfd, nc)
  5853. g1 = scu._ncf_skewness(dfn, dfd, nc) if 's' in moments else None
  5854. g2 = scu._ncf_kurtosis_excess( # isn't really excess kurtosis!
  5855. dfn, dfd, nc) - 3 if 'k' in moments else None
  5856. # Mathematica: Kurtosis[NoncentralFRatioDistribution[27, 27, 0.415784417992261]]
  5857. return mu, mu2, g1, g2
  5858. ncf = ncf_gen(a=0.0, name='ncf')
  5859. class t_gen(rv_continuous):
  5860. r"""A Student's t continuous random variable.
  5861. For the noncentral t distribution, see `nct`.
  5862. %(before_notes)s
  5863. See Also
  5864. --------
  5865. nct
  5866. Notes
  5867. -----
  5868. The probability density function for `t` is:
  5869. .. math::
  5870. f(x, \nu) = \frac{\Gamma((\nu+1)/2)}
  5871. {\sqrt{\pi \nu} \Gamma(\nu/2)}
  5872. (1+x^2/\nu)^{-(\nu+1)/2}
  5873. where :math:`x` is a real number and the degrees of freedom parameter
  5874. :math:`\nu` (denoted ``df`` in the implementation) satisfies
  5875. :math:`\nu > 0`. :math:`\Gamma` is the gamma function
  5876. (`scipy.special.gamma`).
  5877. %(after_notes)s
  5878. %(example)s
  5879. """
  5880. def _shape_info(self):
  5881. return [_ShapeInfo("df", False, (0, np.inf), (False, False))]
  5882. def _rvs(self, df, size=None, random_state=None):
  5883. return random_state.standard_t(df, size=size)
  5884. def _pdf(self, x, df):
  5885. return xpx.apply_where(
  5886. df == np.inf, (x, df),
  5887. lambda x, df: norm._pdf(x),
  5888. lambda x, df: np.exp(self._logpdf(x, df)))
  5889. def _logpdf(self, x, df):
  5890. def t_logpdf(x, df):
  5891. return (np.log(sc.poch(0.5 * df, 0.5))
  5892. - 0.5 * (np.log(df) + np.log(np.pi))
  5893. - (df + 1)/2*np.log1p(x * x/df))
  5894. def norm_logpdf(x, df):
  5895. return norm._logpdf(x)
  5896. return xpx.apply_where(df == np.inf, (x, df), norm_logpdf, t_logpdf)
  5897. def _cdf(self, x, df):
  5898. return sc.stdtr(df, x)
  5899. def _sf(self, x, df):
  5900. return sc.stdtr(df, -x)
  5901. def _ppf(self, q, df):
  5902. return sc.stdtrit(df, q)
  5903. def _isf(self, q, df):
  5904. return -sc.stdtrit(df, q)
  5905. def _stats(self, df):
  5906. # infinite df -> normal distribution (0.0, 1.0, 0.0, 0.0)
  5907. infinite_df = np.isposinf(df)
  5908. mu = np.where(df > 1, 0.0, np.inf)
  5909. condlist = ((df > 1) & (df <= 2),
  5910. (df > 2) & np.isfinite(df),
  5911. infinite_df)
  5912. choicelist = (lambda df: np.broadcast_to(np.inf, df.shape),
  5913. lambda df: df / (df-2.0),
  5914. lambda df: np.broadcast_to(1, df.shape))
  5915. mu2 = _lazyselect(condlist, choicelist, (df,), np.nan)
  5916. g1 = np.where(df > 3, 0.0, np.nan)
  5917. condlist = ((df > 2) & (df <= 4),
  5918. (df > 4) & np.isfinite(df),
  5919. infinite_df)
  5920. choicelist = (lambda df: np.broadcast_to(np.inf, df.shape),
  5921. lambda df: 6.0 / (df-4.0),
  5922. lambda df: np.broadcast_to(0, df.shape))
  5923. g2 = _lazyselect(condlist, choicelist, (df,), np.nan)
  5924. return mu, mu2, g1, g2
  5925. def _entropy(self, df):
  5926. if df == np.inf:
  5927. return norm._entropy()
  5928. def regular(df):
  5929. half = df/2
  5930. half1 = (df + 1)/2
  5931. return (half1*(sc.digamma(half1) - sc.digamma(half))
  5932. + np.log(np.sqrt(df)*sc.beta(half, 0.5)))
  5933. def asymptotic(df):
  5934. # Formula from Wolfram Alpha:
  5935. # "asymptotic expansion (d+1)/2 * (digamma((d+1)/2) - digamma(d/2))
  5936. # + log(sqrt(d) * beta(d/2, 1/2))"
  5937. h = (norm._entropy() + 1/df + (df**-2.)/4 - (df**-3.)/6
  5938. - (df**-4.)/8 + 3/10*(df**-5.) + (df**-6.)/4)
  5939. return h
  5940. return xpx.apply_where(df >= 100, df, asymptotic, regular)
  5941. t = t_gen(name='t')
  5942. class nct_gen(rv_continuous):
  5943. r"""A non-central Student's t continuous random variable.
  5944. %(before_notes)s
  5945. Notes
  5946. -----
  5947. If :math:`Y` is a standard normal random variable and :math:`V` is
  5948. an independent chi-square random variable (`chi2`) with :math:`k` degrees
  5949. of freedom, then
  5950. .. math::
  5951. X = \frac{Y + c}{\sqrt{V/k}}
  5952. has a non-central Student's t distribution on the real line.
  5953. The degrees of freedom parameter :math:`k` (denoted ``df`` in the
  5954. implementation) satisfies :math:`k > 0` and the noncentrality parameter
  5955. :math:`c` (denoted ``nc`` in the implementation) is a real number.
  5956. This distribution uses routines from the Boost Math C++ library for
  5957. the computation of the ``pdf``, ``cdf``, ``ppf``, ``sf`` and ``isf``
  5958. methods. [1]_
  5959. %(after_notes)s
  5960. References
  5961. ----------
  5962. .. [1] The Boost Developers. "Boost C++ Libraries". https://www.boost.org/.
  5963. %(example)s
  5964. """
  5965. def _argcheck(self, df, nc):
  5966. return (df > 0) & (nc == nc)
  5967. def _shape_info(self):
  5968. idf = _ShapeInfo("df", False, (0, np.inf), (False, False))
  5969. inc = _ShapeInfo("nc", False, (-np.inf, np.inf), (False, False))
  5970. return [idf, inc]
  5971. def _rvs(self, df, nc, size=None, random_state=None):
  5972. n = norm.rvs(loc=nc, size=size, random_state=random_state)
  5973. c2 = chi2.rvs(df, size=size, random_state=random_state)
  5974. return n * np.sqrt(df) / np.sqrt(c2)
  5975. def _pdf(self, x, df, nc):
  5976. return scu._nct_pdf(x, df, nc)
  5977. def _cdf(self, x, df, nc):
  5978. return sc.nctdtr(df, nc, x)
  5979. def _ppf(self, q, df, nc):
  5980. return sc.nctdtrit(df, nc, q)
  5981. def _sf(self, x, df, nc):
  5982. with np.errstate(over='ignore'): # see gh-17432
  5983. return np.clip(scu._nct_sf(x, df, nc), 0, 1)
  5984. def _isf(self, x, df, nc):
  5985. with np.errstate(over='ignore'): # see gh-17432
  5986. return scu._nct_isf(x, df, nc)
  5987. def _stats(self, df, nc, moments='mv'):
  5988. mu = scu._nct_mean(df, nc)
  5989. mu2 = scu._nct_variance(df, nc)
  5990. g1 = scu._nct_skewness(df, nc) if 's' in moments else None
  5991. g2 = scu._nct_kurtosis_excess(df, nc) if 'k' in moments else None
  5992. return mu, mu2, g1, g2
  5993. nct = nct_gen(name="nct")
  5994. class pareto_gen(rv_continuous):
  5995. r"""A Pareto continuous random variable.
  5996. %(before_notes)s
  5997. Notes
  5998. -----
  5999. The probability density function for `pareto` is:
  6000. .. math::
  6001. f(x, b) = \frac{b}{x^{b+1}}
  6002. for :math:`x \ge 1`, :math:`b > 0`.
  6003. `pareto` takes ``b`` as a shape parameter for :math:`b`.
  6004. %(after_notes)s
  6005. %(example)s
  6006. """
  6007. def _shape_info(self):
  6008. return [_ShapeInfo("b", False, (0, np.inf), (False, False))]
  6009. def _pdf(self, x, b):
  6010. # pareto.pdf(x, b) = b / x**(b+1)
  6011. return b * x**(-b-1)
  6012. def _cdf(self, x, b):
  6013. return 1 - x**(-b)
  6014. def _ppf(self, q, b):
  6015. return pow(1-q, -1.0/b)
  6016. def _sf(self, x, b):
  6017. return x**(-b)
  6018. def _isf(self, q, b):
  6019. return np.power(q, -1.0 / b)
  6020. def _stats(self, b, moments='mv'):
  6021. mu, mu2, g1, g2 = None, None, None, None
  6022. if 'm' in moments:
  6023. mask = b > 1
  6024. bt = np.extract(mask, b)
  6025. mu = np.full(np.shape(b), fill_value=np.inf)
  6026. np.place(mu, mask, bt / (bt-1.0))
  6027. if 'v' in moments:
  6028. mask = b > 2
  6029. bt = np.extract(mask, b)
  6030. mu2 = np.full(np.shape(b), fill_value=np.inf)
  6031. np.place(mu2, mask, bt / (bt-2.0) / (bt-1.0)**2)
  6032. if 's' in moments:
  6033. mask = b > 3
  6034. bt = np.extract(mask, b)
  6035. g1 = np.full(np.shape(b), fill_value=np.nan)
  6036. vals = 2 * (bt + 1.0) * np.sqrt(bt - 2.0) / ((bt - 3.0) * np.sqrt(bt))
  6037. np.place(g1, mask, vals)
  6038. if 'k' in moments:
  6039. mask = b > 4
  6040. bt = np.extract(mask, b)
  6041. g2 = np.full(np.shape(b), fill_value=np.nan)
  6042. vals = (6.0*np.polyval([1.0, 1.0, -6, -2], bt) /
  6043. np.polyval([1.0, -7.0, 12.0, 0.0], bt))
  6044. np.place(g2, mask, vals)
  6045. return mu, mu2, g1, g2
  6046. def _entropy(self, b):
  6047. return 1 + 1.0/b - np.log(b)
  6048. @_call_super_mom
  6049. @inherit_docstring_from(rv_continuous)
  6050. def fit(self, data, *args, **kwds):
  6051. parameters = _check_fit_input_parameters(self, data, args, kwds)
  6052. data, fshape, floc, fscale = parameters
  6053. # ensure that any fixed parameters don't violate constraints of the
  6054. # distribution before continuing.
  6055. if floc is not None and np.min(data) - floc < (fscale or 0):
  6056. raise FitDataError("pareto", lower=1, upper=np.inf)
  6057. ndata = data.shape[0]
  6058. def get_shape(scale, location):
  6059. # The first-order necessary condition on `shape` can be solved in
  6060. # closed form
  6061. return ndata / np.sum(np.log((data - location) / scale))
  6062. if floc is fscale is None:
  6063. # The support of the distribution is `(x - loc)/scale > 0`.
  6064. # The method of Lagrange multipliers turns this constraint
  6065. # into an equation that can be solved numerically.
  6066. # See gh-12545 for details.
  6067. def dL_dScale(shape, scale):
  6068. # The partial derivative of the log-likelihood function w.r.t.
  6069. # the scale.
  6070. return ndata * shape / scale
  6071. def dL_dLocation(shape, location):
  6072. # The partial derivative of the log-likelihood function w.r.t.
  6073. # the location.
  6074. return (shape + 1) * np.sum(1 / (data - location))
  6075. def fun_to_solve(scale):
  6076. # optimize the scale by setting the partial derivatives
  6077. # w.r.t. to location and scale equal and solving.
  6078. location = np.min(data) - scale
  6079. shape = fshape or get_shape(scale, location)
  6080. return dL_dLocation(shape, location) - dL_dScale(shape, scale)
  6081. def interval_contains_root(lbrack, rbrack):
  6082. # return true if the signs disagree.
  6083. return (np.sign(fun_to_solve(lbrack)) !=
  6084. np.sign(fun_to_solve(rbrack)))
  6085. # set brackets for `root_scalar` to use when optimizing over the
  6086. # scale such that a root is likely between them. Use user supplied
  6087. # guess or default 1.
  6088. brack_start = float(kwds.get('scale', 1))
  6089. lbrack, rbrack = brack_start / 2, brack_start * 2
  6090. # if a root is not between the brackets, iteratively expand them
  6091. # until they include a sign change, checking after each bracket is
  6092. # modified.
  6093. while (not interval_contains_root(lbrack, rbrack)
  6094. and (lbrack > 0 or rbrack < np.inf)):
  6095. lbrack /= 2
  6096. rbrack *= 2
  6097. res = root_scalar(fun_to_solve, bracket=[lbrack, rbrack])
  6098. if res.converged:
  6099. scale = res.root
  6100. loc = np.min(data) - scale
  6101. shape = fshape or get_shape(scale, loc)
  6102. # The Pareto distribution requires that its parameters satisfy
  6103. # the condition `fscale + floc <= min(data)`. However, to
  6104. # avoid numerical issues, we require that `fscale + floc`
  6105. # is strictly less than `min(data)`. If this condition
  6106. # is not satisfied, reduce the scale with `np.nextafter` to
  6107. # ensure that data does not fall outside of the support.
  6108. if not (scale + loc) < np.min(data):
  6109. scale = np.min(data) - loc
  6110. scale = np.nextafter(scale, 0)
  6111. return shape, loc, scale
  6112. else:
  6113. return super().fit(data, **kwds)
  6114. elif floc is None:
  6115. loc = np.min(data) - fscale
  6116. else:
  6117. loc = floc
  6118. # Source: Evans, Hastings, and Peacock (2000), Statistical
  6119. # Distributions, 3rd. Ed., John Wiley and Sons. Page 149.
  6120. scale = fscale or np.min(data) - loc
  6121. shape = fshape or get_shape(scale, loc)
  6122. return shape, loc, scale
  6123. pareto = pareto_gen(a=1.0, name="pareto")
  6124. class lomax_gen(rv_continuous):
  6125. r"""A Lomax (Pareto of the second kind) continuous random variable.
  6126. %(before_notes)s
  6127. Notes
  6128. -----
  6129. The probability density function for `lomax` is:
  6130. .. math::
  6131. f(x, c) = \frac{c}{(1+x)^{c+1}}
  6132. for :math:`x \ge 0`, :math:`c > 0`.
  6133. `lomax` takes ``c`` as a shape parameter for :math:`c`.
  6134. `lomax` is a special case of `pareto` with ``loc=-1.0``.
  6135. %(after_notes)s
  6136. %(example)s
  6137. """
  6138. def _shape_info(self):
  6139. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  6140. def _pdf(self, x, c):
  6141. # lomax.pdf(x, c) = c / (1+x)**(c+1)
  6142. return c*1.0/(1.0+x)**(c+1.0)
  6143. def _logpdf(self, x, c):
  6144. return np.log(c) - (c+1)*sc.log1p(x)
  6145. def _cdf(self, x, c):
  6146. return -sc.expm1(-c*sc.log1p(x))
  6147. def _sf(self, x, c):
  6148. return np.exp(-c*sc.log1p(x))
  6149. def _logsf(self, x, c):
  6150. return -c*sc.log1p(x)
  6151. def _ppf(self, q, c):
  6152. return sc.expm1(-sc.log1p(-q)/c)
  6153. def _isf(self, q, c):
  6154. return q**(-1.0 / c) - 1
  6155. def _stats(self, c):
  6156. mu, mu2, g1, g2 = pareto.stats(c, loc=-1.0, moments='mvsk')
  6157. return mu, mu2, g1, g2
  6158. def _entropy(self, c):
  6159. return 1+1.0/c-np.log(c)
  6160. lomax = lomax_gen(a=0.0, name="lomax")
  6161. class pearson3_gen(rv_continuous):
  6162. r"""A pearson type III continuous random variable.
  6163. %(before_notes)s
  6164. Notes
  6165. -----
  6166. The probability density function for `pearson3` is:
  6167. .. math::
  6168. f(x, \kappa) = \frac{|\beta|}{\Gamma(\alpha)}
  6169. (\beta (x - \zeta))^{\alpha - 1}
  6170. \exp(-\beta (x - \zeta))
  6171. where:
  6172. .. math::
  6173. \beta = \frac{2}{\kappa}
  6174. \alpha = \beta^2 = \frac{4}{\kappa^2}
  6175. \zeta = -\frac{\alpha}{\beta} = -\beta
  6176. :math:`\Gamma` is the gamma function (`scipy.special.gamma`).
  6177. Pass the skew :math:`\kappa` into `pearson3` as the shape parameter
  6178. ``skew``.
  6179. %(after_notes)s
  6180. %(example)s
  6181. References
  6182. ----------
  6183. R.W. Vogel and D.E. McMartin, "Probability Plot Goodness-of-Fit and
  6184. Skewness Estimation Procedures for the Pearson Type 3 Distribution", Water
  6185. Resources Research, Vol.27, 3149-3158 (1991).
  6186. L.R. Salvosa, "Tables of Pearson's Type III Function", Ann. Math. Statist.,
  6187. Vol.1, 191-198 (1930).
  6188. "Using Modern Computing Tools to Fit the Pearson Type III Distribution to
  6189. Aviation Loads Data", Office of Aviation Research (2003).
  6190. """
  6191. def _preprocess(self, x, skew):
  6192. # The real 'loc' and 'scale' are handled in the calling pdf(...). The
  6193. # local variables 'loc' and 'scale' within pearson3._pdf are set to
  6194. # the defaults just to keep them as part of the equations for
  6195. # documentation.
  6196. loc = 0.0
  6197. scale = 1.0
  6198. # If skew is small, return _norm_pdf. The divide between pearson3
  6199. # and norm was found by brute force and is approximately a skew of
  6200. # 0.000016. No one, I hope, would actually use a skew value even
  6201. # close to this small.
  6202. norm2pearson_transition = 0.000016
  6203. ans, x, skew = np.broadcast_arrays(1.0, x, skew)
  6204. ans = ans.copy()
  6205. # mask is True where skew is small enough to use the normal approx.
  6206. mask = np.absolute(skew) < norm2pearson_transition
  6207. invmask = ~mask
  6208. beta = 2.0 / (skew[invmask] * scale)
  6209. alpha = (scale * beta)**2
  6210. zeta = loc - alpha / beta
  6211. transx = beta * (x[invmask] - zeta)
  6212. return ans, x, transx, mask, invmask, beta, alpha, zeta
  6213. def _argcheck(self, skew):
  6214. # The _argcheck function in rv_continuous only allows positive
  6215. # arguments. The skew argument for pearson3 can be zero (which I want
  6216. # to handle inside pearson3._pdf) or negative. So just return True
  6217. # for all skew args.
  6218. return np.isfinite(skew)
  6219. def _shape_info(self):
  6220. return [_ShapeInfo("skew", False, (-np.inf, np.inf), (False, False))]
  6221. def _stats(self, skew):
  6222. m = 0.0
  6223. v = 1.0
  6224. s = skew
  6225. k = 1.5*skew**2
  6226. return m, v, s, k
  6227. def _pdf(self, x, skew):
  6228. # pearson3.pdf(x, skew) = abs(beta) / gamma(alpha) *
  6229. # (beta * (x - zeta))**(alpha - 1) * exp(-beta*(x - zeta))
  6230. # Do the calculation in _logpdf since helps to limit
  6231. # overflow/underflow problems
  6232. ans = np.exp(self._logpdf(x, skew))
  6233. if ans.ndim == 0:
  6234. if np.isnan(ans):
  6235. return 0.0
  6236. return ans
  6237. ans[np.isnan(ans)] = 0.0
  6238. return ans
  6239. def _logpdf(self, x, skew):
  6240. # PEARSON3 logpdf GAMMA logpdf
  6241. # np.log(abs(beta))
  6242. # + (alpha - 1)*np.log(beta*(x - zeta)) + (a - 1)*np.log(x)
  6243. # - beta*(x - zeta) - x
  6244. # - sc.gammalnalpha) - sc.gammalna)
  6245. ans, x, transx, mask, invmask, beta, alpha, _ = (
  6246. self._preprocess(x, skew))
  6247. ans[mask] = np.log(_norm_pdf(x[mask]))
  6248. # use logpdf instead of _logpdf to fix issue mentioned in gh-12640
  6249. # (_logpdf does not return correct result for alpha = 1)
  6250. ans[invmask] = np.log(abs(beta)) + gamma.logpdf(transx, alpha)
  6251. return ans
  6252. def _cdf(self, x, skew):
  6253. ans, x, transx, mask, invmask, _, alpha, _ = (
  6254. self._preprocess(x, skew))
  6255. ans[mask] = _norm_cdf(x[mask])
  6256. skew = np.broadcast_to(skew, invmask.shape)
  6257. invmask1a = np.logical_and(invmask, skew > 0)
  6258. invmask1b = skew[invmask] > 0
  6259. # use cdf instead of _cdf to fix issue mentioned in gh-12640
  6260. # (_cdf produces NaNs for inputs outside support)
  6261. ans[invmask1a] = gamma.cdf(transx[invmask1b], alpha[invmask1b])
  6262. # The gamma._cdf approach wasn't working with negative skew.
  6263. # Note that multiplying the skew by -1 reflects about x=0.
  6264. # So instead of evaluating the CDF with negative skew at x,
  6265. # evaluate the SF with positive skew at -x.
  6266. invmask2a = np.logical_and(invmask, skew < 0)
  6267. invmask2b = skew[invmask] < 0
  6268. # gamma._sf produces NaNs when transx < 0, so use gamma.sf
  6269. ans[invmask2a] = gamma.sf(transx[invmask2b], alpha[invmask2b])
  6270. return ans
  6271. def _sf(self, x, skew):
  6272. ans, x, transx, mask, invmask, _, alpha, _ = (
  6273. self._preprocess(x, skew))
  6274. ans[mask] = _norm_sf(x[mask])
  6275. skew = np.broadcast_to(skew, invmask.shape)
  6276. invmask1a = np.logical_and(invmask, skew > 0)
  6277. invmask1b = skew[invmask] > 0
  6278. ans[invmask1a] = gamma.sf(transx[invmask1b], alpha[invmask1b])
  6279. invmask2a = np.logical_and(invmask, skew < 0)
  6280. invmask2b = skew[invmask] < 0
  6281. ans[invmask2a] = gamma.cdf(transx[invmask2b], alpha[invmask2b])
  6282. return ans
  6283. def _rvs(self, skew, size=None, random_state=None):
  6284. skew = np.broadcast_to(skew, size)
  6285. ans, _, _, mask, invmask, beta, alpha, zeta = (
  6286. self._preprocess([0], skew))
  6287. nsmall = mask.sum()
  6288. nbig = mask.size - nsmall
  6289. ans[mask] = random_state.standard_normal(nsmall)
  6290. ans[invmask] = random_state.standard_gamma(alpha, nbig)/beta + zeta
  6291. if size == ():
  6292. ans = ans[0]
  6293. return ans
  6294. def _ppf(self, q, skew):
  6295. ans, q, _, mask, invmask, beta, alpha, zeta = (
  6296. self._preprocess(q, skew))
  6297. ans[mask] = _norm_ppf(q[mask])
  6298. q = q[invmask]
  6299. q[beta < 0] = 1 - q[beta < 0] # for negative skew; see gh-17050
  6300. ans[invmask] = sc.gammaincinv(alpha, q)/beta + zeta
  6301. return ans
  6302. @_call_super_mom
  6303. @extend_notes_in_docstring(rv_continuous, notes="""\
  6304. Note that method of moments (`method='MM'`) is not
  6305. available for this distribution.\n\n""")
  6306. def fit(self, data, *args, **kwds):
  6307. if kwds.get("method", None) == 'MM':
  6308. raise NotImplementedError("Fit `method='MM'` is not available for "
  6309. "the Pearson3 distribution. Please try "
  6310. "the default `method='MLE'`.")
  6311. else:
  6312. return super(type(self), self).fit(data, *args, **kwds)
  6313. pearson3 = pearson3_gen(name="pearson3")
  6314. class powerlaw_gen(rv_continuous):
  6315. r"""A power-function continuous random variable.
  6316. %(before_notes)s
  6317. See Also
  6318. --------
  6319. pareto
  6320. Notes
  6321. -----
  6322. The probability density function for `powerlaw` is:
  6323. .. math::
  6324. f(x, a) = a x^{a-1}
  6325. for :math:`0 \le x \le 1`, :math:`a > 0`.
  6326. `powerlaw` takes ``a`` as a shape parameter for :math:`a`.
  6327. %(after_notes)s
  6328. For example, the support of `powerlaw` can be adjusted from the default
  6329. interval ``[0, 1]`` to the interval ``[c, c+d]`` by setting ``loc=c`` and
  6330. ``scale=d``. For a power-law distribution with infinite support, see
  6331. `pareto`. For a power-law distribution described by PDF:
  6332. .. math::
  6333. f(x; a, l, h) = \frac{a}{h^a - l^2} x^{a-1}
  6334. with :math:`a \neq 0` and :math:`0 < l < x < h`, see `truncpareto`.
  6335. `powerlaw` is a special case of `beta` with ``b=1``.
  6336. %(example)s
  6337. """
  6338. def _shape_info(self):
  6339. return [_ShapeInfo("a", False, (0, np.inf), (False, False))]
  6340. def _pdf(self, x, a):
  6341. # powerlaw.pdf(x, a) = a * x**(a-1)
  6342. return a*x**(a-1.0)
  6343. def _logpdf(self, x, a):
  6344. return np.log(a) + sc.xlogy(a - 1, x)
  6345. def _cdf(self, x, a):
  6346. return x**(a*1.0)
  6347. def _logcdf(self, x, a):
  6348. return a*np.log(x)
  6349. def _ppf(self, q, a):
  6350. return pow(q, 1.0/a)
  6351. def _sf(self, p, a):
  6352. return -sc.powm1(p, a)
  6353. def _munp(self, n, a):
  6354. # The following expression is correct for all real n (provided a > 0).
  6355. return a / (a + n)
  6356. def _stats(self, a):
  6357. return (a / (a + 1.0),
  6358. a / (a + 2.0) / (a + 1.0) ** 2,
  6359. -2.0 * ((a - 1.0) / (a + 3.0)) * np.sqrt((a + 2.0) / a),
  6360. 6 * np.polyval([1, -1, -6, 2], a) / (a * (a + 3.0) * (a + 4)))
  6361. def _entropy(self, a):
  6362. return 1 - 1.0/a - np.log(a)
  6363. def _support_mask(self, x, a):
  6364. return (super()._support_mask(x, a)
  6365. & ((x != 0) | (a >= 1)))
  6366. @_call_super_mom
  6367. @extend_notes_in_docstring(rv_continuous, notes="""\
  6368. Notes specifically for ``powerlaw.fit``: If the location is a free
  6369. parameter and the value returned for the shape parameter is less than
  6370. one, the true maximum likelihood approaches infinity. This causes
  6371. numerical difficulties, and the resulting estimates are approximate.
  6372. \n\n""")
  6373. def fit(self, data, *args, **kwds):
  6374. # Summary of the strategy:
  6375. #
  6376. # 1) If the scale and location are fixed, return the shape according
  6377. # to a formula.
  6378. #
  6379. # 2) If the scale is fixed, there are two possibilities for the other
  6380. # parameters - one corresponding with shape less than one, and
  6381. # another with shape greater than one. Calculate both, and return
  6382. # whichever has the better log-likelihood.
  6383. #
  6384. # At this point, the scale is known to be free.
  6385. #
  6386. # 3) If the location is fixed, return the scale and shape according to
  6387. # formulas (or, if the shape is fixed, the fixed shape).
  6388. #
  6389. # At this point, the location and scale are both free. There are
  6390. # separate equations depending on whether the shape is less than one or
  6391. # greater than one.
  6392. #
  6393. # 4a) If the shape is less than one, there are formulas for shape,
  6394. # location, and scale.
  6395. # 4b) If the shape is greater than one, there are formulas for shape
  6396. # and scale, but there is a condition for location to be solved
  6397. # numerically.
  6398. #
  6399. # If the shape is fixed and less than one, we use 4a.
  6400. # If the shape is fixed and greater than one, we use 4b.
  6401. # If the shape is also free, we calculate fits using both 4a and 4b
  6402. # and choose the one that results a better log-likelihood.
  6403. #
  6404. # In many cases, the use of `np.nextafter` is used to avoid numerical
  6405. # issues.
  6406. if kwds.pop('superfit', False):
  6407. return super().fit(data, *args, **kwds)
  6408. if len(np.unique(data)) == 1:
  6409. return super().fit(data, *args, **kwds)
  6410. data, fshape, floc, fscale = _check_fit_input_parameters(self, data,
  6411. args, kwds)
  6412. penalized_nllf_args = [data, (self._fitstart(data),)]
  6413. penalized_nllf = self._reduce_func(penalized_nllf_args, {})[1]
  6414. # ensure that any fixed parameters don't violate constraints of the
  6415. # distribution before continuing. The support of the distribution
  6416. # is `0 < (x - loc)/scale < 1`.
  6417. if floc is not None:
  6418. if not data.min() > floc:
  6419. raise FitDataError('powerlaw', 0, 1)
  6420. if fscale is not None and not data.max() <= floc + fscale:
  6421. raise FitDataError('powerlaw', 0, 1)
  6422. if fscale is not None:
  6423. if fscale <= 0:
  6424. raise ValueError("Negative or zero `fscale` is outside the "
  6425. "range allowed by the distribution.")
  6426. if fscale <= np.ptp(data):
  6427. msg = "`fscale` must be greater than the range of data."
  6428. raise ValueError(msg)
  6429. def get_shape(data, loc, scale):
  6430. # The first-order necessary condition on `shape` can be solved in
  6431. # closed form. It can be used no matter the assumption of the
  6432. # value of the shape.
  6433. N = len(data)
  6434. return - N / (np.sum(np.log(data - loc)) - N*np.log(scale))
  6435. def get_scale(data, loc):
  6436. # analytical solution for `scale` based on the location.
  6437. # It can be used no matter the assumption of the value of the
  6438. # shape.
  6439. return data.max() - loc
  6440. # 1) The location and scale are both fixed. Analytically determine the
  6441. # shape.
  6442. if fscale is not None and floc is not None:
  6443. return get_shape(data, floc, fscale), floc, fscale
  6444. # 2) The scale is fixed. There are two possibilities for the other
  6445. # parameters. Choose the option with better log-likelihood.
  6446. if fscale is not None:
  6447. # using `data.min()` as the optimal location
  6448. loc_lt1 = np.nextafter(data.min(), -np.inf)
  6449. shape_lt1 = fshape or get_shape(data, loc_lt1, fscale)
  6450. ll_lt1 = penalized_nllf((shape_lt1, loc_lt1, fscale), data)
  6451. # using `data.max() - scale` as the optimal location
  6452. loc_gt1 = np.nextafter(data.max() - fscale, np.inf)
  6453. shape_gt1 = fshape or get_shape(data, loc_gt1, fscale)
  6454. ll_gt1 = penalized_nllf((shape_gt1, loc_gt1, fscale), data)
  6455. if ll_lt1 < ll_gt1:
  6456. return shape_lt1, loc_lt1, fscale
  6457. else:
  6458. return shape_gt1, loc_gt1, fscale
  6459. # 3) The location is fixed. Return the analytical scale and the
  6460. # analytical (or fixed) shape.
  6461. if floc is not None:
  6462. scale = get_scale(data, floc)
  6463. shape = fshape or get_shape(data, floc, scale)
  6464. return shape, floc, scale
  6465. # 4) Location and scale are both free
  6466. # 4a) Use formulas that assume `shape <= 1`.
  6467. def fit_loc_scale_w_shape_lt_1():
  6468. loc = np.nextafter(data.min(), -np.inf)
  6469. if np.abs(loc) < np.finfo(loc.dtype).tiny:
  6470. loc = np.sign(loc) * np.finfo(loc.dtype).tiny
  6471. scale = np.nextafter(get_scale(data, loc), np.inf)
  6472. shape = fshape or get_shape(data, loc, scale)
  6473. return shape, loc, scale
  6474. # 4b) Fit under the assumption that `shape > 1`. The support
  6475. # of the distribution is `(x - loc)/scale <= 1`. The method of Lagrange
  6476. # multipliers turns this constraint into the condition that
  6477. # dL_dScale - dL_dLocation must be zero, which is solved numerically.
  6478. # (Alternatively, substitute the constraint into the objective
  6479. # function before deriving the likelihood equation for location.)
  6480. def dL_dScale(data, shape, scale):
  6481. # The partial derivative of the log-likelihood function w.r.t.
  6482. # the scale.
  6483. return -data.shape[0] * shape / scale
  6484. def dL_dLocation(data, shape, loc):
  6485. # The partial derivative of the log-likelihood function w.r.t.
  6486. # the location.
  6487. return (shape - 1) * np.sum(1 / (loc - data)) # -1/(data-loc)
  6488. def dL_dLocation_star(loc):
  6489. # The derivative of the log-likelihood function w.r.t.
  6490. # the location, given optimal shape and scale
  6491. scale = np.nextafter(get_scale(data, loc), -np.inf)
  6492. shape = fshape or get_shape(data, loc, scale)
  6493. return dL_dLocation(data, shape, loc)
  6494. def fun_to_solve(loc):
  6495. # optimize the location by setting the partial derivatives
  6496. # w.r.t. to location and scale equal and solving.
  6497. scale = np.nextafter(get_scale(data, loc), -np.inf)
  6498. shape = fshape or get_shape(data, loc, scale)
  6499. return (dL_dScale(data, shape, scale)
  6500. - dL_dLocation(data, shape, loc))
  6501. def fit_loc_scale_w_shape_gt_1():
  6502. # set brackets for `root_scalar` to use when optimizing over the
  6503. # location such that a root is likely between them.
  6504. rbrack = np.nextafter(data.min(), -np.inf)
  6505. # if the sign of `dL_dLocation_star` is positive at rbrack,
  6506. # we're not going to find the root we're looking for
  6507. delta = (data.min() - rbrack)
  6508. while dL_dLocation_star(rbrack) > 0:
  6509. rbrack = data.min() - delta
  6510. delta *= 2
  6511. def interval_contains_root(lbrack, rbrack):
  6512. # Check if the interval (lbrack, rbrack) contains the root.
  6513. return (np.sign(fun_to_solve(lbrack))
  6514. != np.sign(fun_to_solve(rbrack)))
  6515. lbrack = rbrack - 1
  6516. # if the sign doesn't change between the brackets, move the left
  6517. # bracket until it does. (The right bracket remains fixed at the
  6518. # maximum permissible value.)
  6519. i = 1.0
  6520. while (not interval_contains_root(lbrack, rbrack)
  6521. and lbrack != -np.inf):
  6522. lbrack = (data.min() - i)
  6523. i *= 2
  6524. root = optimize.root_scalar(fun_to_solve, bracket=(lbrack, rbrack))
  6525. loc = np.nextafter(root.root, -np.inf)
  6526. scale = np.nextafter(get_scale(data, loc), np.inf)
  6527. shape = fshape or get_shape(data, loc, scale)
  6528. return shape, loc, scale
  6529. # Shape is fixed - choose 4a or 4b accordingly.
  6530. if fshape is not None and fshape <= 1:
  6531. return fit_loc_scale_w_shape_lt_1()
  6532. elif fshape is not None and fshape > 1:
  6533. return fit_loc_scale_w_shape_gt_1()
  6534. # Shape is free
  6535. fit_shape_lt1 = fit_loc_scale_w_shape_lt_1()
  6536. ll_lt1 = self.nnlf(fit_shape_lt1, data)
  6537. fit_shape_gt1 = fit_loc_scale_w_shape_gt_1()
  6538. ll_gt1 = self.nnlf(fit_shape_gt1, data)
  6539. if ll_lt1 <= ll_gt1 and fit_shape_lt1[0] <= 1:
  6540. return fit_shape_lt1
  6541. elif ll_lt1 > ll_gt1 and fit_shape_gt1[0] > 1:
  6542. return fit_shape_gt1
  6543. else:
  6544. return super().fit(data, *args, **kwds)
  6545. powerlaw = powerlaw_gen(a=0.0, b=1.0, name="powerlaw")
  6546. class powerlognorm_gen(rv_continuous):
  6547. r"""A power log-normal continuous random variable.
  6548. %(before_notes)s
  6549. Notes
  6550. -----
  6551. The probability density function for `powerlognorm` is:
  6552. .. math::
  6553. f(x, c, s) = \frac{c}{x s} \phi(\log(x)/s)
  6554. (\Phi(-\log(x)/s))^{c-1}
  6555. where :math:`\phi` is the normal pdf, and :math:`\Phi` is the normal cdf,
  6556. and :math:`x > 0`, :math:`s, c > 0`.
  6557. `powerlognorm` takes :math:`c` and :math:`s` as shape parameters.
  6558. %(after_notes)s
  6559. %(example)s
  6560. """
  6561. _support_mask = rv_continuous._open_support_mask
  6562. def _shape_info(self):
  6563. ic = _ShapeInfo("c", False, (0, np.inf), (False, False))
  6564. i_s = _ShapeInfo("s", False, (0, np.inf), (False, False))
  6565. return [ic, i_s]
  6566. def _pdf(self, x, c, s):
  6567. return np.exp(self._logpdf(x, c, s))
  6568. def _logpdf(self, x, c, s):
  6569. return (np.log(c) - np.log(x) - np.log(s) +
  6570. _norm_logpdf(np.log(x) / s) +
  6571. _norm_logcdf(-np.log(x) / s) * (c - 1.))
  6572. def _cdf(self, x, c, s):
  6573. return -sc.expm1(self._logsf(x, c, s))
  6574. def _ppf(self, q, c, s):
  6575. return self._isf(1 - q, c, s)
  6576. def _sf(self, x, c, s):
  6577. return np.exp(self._logsf(x, c, s))
  6578. def _logsf(self, x, c, s):
  6579. return _norm_logcdf(-np.log(x) / s) * c
  6580. def _isf(self, q, c, s):
  6581. return np.exp(-_norm_ppf(q**(1/c)) * s)
  6582. powerlognorm = powerlognorm_gen(a=0.0, name="powerlognorm")
  6583. class powernorm_gen(rv_continuous):
  6584. r"""A power normal continuous random variable.
  6585. %(before_notes)s
  6586. Notes
  6587. -----
  6588. The probability density function for `powernorm` is:
  6589. .. math::
  6590. f(x, c) = c \phi(x) (\Phi(-x))^{c-1}
  6591. where :math:`\phi` is the normal pdf, :math:`\Phi` is the normal cdf,
  6592. :math:`x` is any real, and :math:`c > 0` [1]_.
  6593. `powernorm` takes ``c`` as a shape parameter for :math:`c`.
  6594. %(after_notes)s
  6595. References
  6596. ----------
  6597. .. [1] NIST Engineering Statistics Handbook, Section 1.3.6.6.13,
  6598. https://www.itl.nist.gov/div898/handbook//eda/section3/eda366d.htm
  6599. %(example)s
  6600. """
  6601. def _shape_info(self):
  6602. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  6603. def _pdf(self, x, c):
  6604. # powernorm.pdf(x, c) = c * phi(x) * (Phi(-x))**(c-1)
  6605. return c*_norm_pdf(x) * (_norm_cdf(-x)**(c-1.0))
  6606. def _logpdf(self, x, c):
  6607. return np.log(c) + _norm_logpdf(x) + (c-1)*_norm_logcdf(-x)
  6608. def _cdf(self, x, c):
  6609. return -sc.expm1(self._logsf(x, c))
  6610. def _ppf(self, q, c):
  6611. return -_norm_ppf(pow(1.0 - q, 1.0 / c))
  6612. def _sf(self, x, c):
  6613. return np.exp(self._logsf(x, c))
  6614. def _logsf(self, x, c):
  6615. return c * _norm_logcdf(-x)
  6616. def _isf(self, q, c):
  6617. return -_norm_ppf(np.exp(np.log(q) / c))
  6618. powernorm = powernorm_gen(name='powernorm')
  6619. class rdist_gen(rv_continuous):
  6620. r"""An R-distributed (symmetric beta) continuous random variable.
  6621. %(before_notes)s
  6622. Notes
  6623. -----
  6624. The probability density function for `rdist` is:
  6625. .. math::
  6626. f(x, c) = \frac{(1-x^2)^{c/2-1}}{B(1/2, c/2)}
  6627. for :math:`-1 \le x \le 1`, :math:`c > 0`. `rdist` is also called the
  6628. symmetric beta distribution: if B has a `beta` distribution with
  6629. parameters (c/2, c/2), then X = 2*B - 1 follows a R-distribution with
  6630. parameter c.
  6631. `rdist` takes ``c`` as a shape parameter for :math:`c`.
  6632. This distribution includes the following distribution kernels as
  6633. special cases::
  6634. c = 2: uniform
  6635. c = 3: `semicircular`
  6636. c = 4: Epanechnikov (parabolic)
  6637. c = 6: quartic (biweight)
  6638. c = 8: triweight
  6639. %(after_notes)s
  6640. %(example)s
  6641. """
  6642. def _shape_info(self):
  6643. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  6644. # use relation to the beta distribution for pdf, cdf, etc
  6645. def _pdf(self, x, c):
  6646. return np.exp(self._logpdf(x, c))
  6647. def _logpdf(self, x, c):
  6648. return -np.log(2) + beta._logpdf((x + 1)/2, c/2, c/2)
  6649. def _cdf(self, x, c):
  6650. return beta._cdf((x + 1)/2, c/2, c/2)
  6651. def _sf(self, x, c):
  6652. return beta._sf((x + 1)/2, c/2, c/2)
  6653. def _ppf(self, q, c):
  6654. return 2*beta._ppf(q, c/2, c/2) - 1
  6655. def _rvs(self, c, size=None, random_state=None):
  6656. return 2 * random_state.beta(c/2, c/2, size) - 1
  6657. def _munp(self, n, c):
  6658. numerator = (1 - (n % 2)) * sc.beta((n + 1.0) / 2, c / 2.0)
  6659. return numerator / sc.beta(1. / 2, c / 2.)
  6660. rdist = rdist_gen(a=-1.0, b=1.0, name="rdist")
  6661. class rayleigh_gen(rv_continuous):
  6662. r"""A Rayleigh continuous random variable.
  6663. %(before_notes)s
  6664. Notes
  6665. -----
  6666. The probability density function for `rayleigh` is:
  6667. .. math::
  6668. f(x) = x \exp(-x^2/2)
  6669. for :math:`x \ge 0`.
  6670. `rayleigh` is a special case of `chi` with ``df=2``.
  6671. %(after_notes)s
  6672. %(example)s
  6673. """
  6674. _support_mask = rv_continuous._open_support_mask
  6675. def _shape_info(self):
  6676. return []
  6677. def _rvs(self, size=None, random_state=None):
  6678. return chi.rvs(2, size=size, random_state=random_state)
  6679. def _pdf(self, r):
  6680. # rayleigh.pdf(r) = r * exp(-r**2/2)
  6681. return np.exp(self._logpdf(r))
  6682. def _logpdf(self, r):
  6683. return np.log(r) - 0.5 * r * r
  6684. def _cdf(self, r):
  6685. return -sc.expm1(-0.5 * r**2)
  6686. def _ppf(self, q):
  6687. return np.sqrt(-2 * sc.log1p(-q))
  6688. def _sf(self, r):
  6689. return np.exp(self._logsf(r))
  6690. def _logsf(self, r):
  6691. return -0.5 * r * r
  6692. def _isf(self, q):
  6693. return np.sqrt(-2 * np.log(q))
  6694. def _stats(self):
  6695. val = 4 - np.pi
  6696. return (np.sqrt(np.pi/2),
  6697. val/2,
  6698. 2*(np.pi-3)*np.sqrt(np.pi)/val**1.5,
  6699. 6*np.pi/val-16/val**2)
  6700. def _entropy(self):
  6701. return _EULER/2.0 + 1 - 0.5*np.log(2)
  6702. @_call_super_mom
  6703. @extend_notes_in_docstring(rv_continuous, notes="""\
  6704. Notes specifically for ``rayleigh.fit``: If the location is fixed with
  6705. the `floc` parameter, this method uses an analytical formula to find
  6706. the scale. Otherwise, this function uses a numerical root finder on
  6707. the first order conditions of the log-likelihood function to find the
  6708. MLE. Only the (optional) `loc` parameter is used as the initial guess
  6709. for the root finder; the `scale` parameter and any other parameters
  6710. for the optimizer are ignored.\n\n""")
  6711. def fit(self, data, *args, **kwds):
  6712. if kwds.pop('superfit', False):
  6713. return super().fit(data, *args, **kwds)
  6714. data, floc, fscale = _check_fit_input_parameters(self, data,
  6715. args, kwds)
  6716. def scale_mle(loc):
  6717. # Source: Statistical Distributions, 3rd Edition. Evans, Hastings,
  6718. # and Peacock (2000), Page 175
  6719. return (np.sum((data - loc) ** 2) / (2 * len(data))) ** .5
  6720. def loc_mle(loc):
  6721. # This implicit equation for `loc` is used when
  6722. # both `loc` and `scale` are free.
  6723. xm = data - loc
  6724. s1 = xm.sum()
  6725. s2 = (xm**2).sum()
  6726. s3 = (1/xm).sum()
  6727. return s1 - s2/(2*len(data))*s3
  6728. def loc_mle_scale_fixed(loc, scale=fscale):
  6729. # This implicit equation for `loc` is used when
  6730. # `scale` is fixed but `loc` is not.
  6731. xm = data - loc
  6732. return xm.sum() - scale**2 * (1/xm).sum()
  6733. if floc is not None:
  6734. # `loc` is fixed, analytically determine `scale`.
  6735. if np.any(data - floc <= 0):
  6736. raise FitDataError("rayleigh", lower=1, upper=np.inf)
  6737. else:
  6738. return floc, scale_mle(floc)
  6739. # Account for user provided guess of `loc`.
  6740. loc0 = kwds.get('loc')
  6741. if loc0 is None:
  6742. # Use _fitstart to estimate loc; ignore the returned scale.
  6743. loc0 = self._fitstart(data)[0]
  6744. fun = loc_mle if fscale is None else loc_mle_scale_fixed
  6745. rbrack = np.nextafter(np.min(data), -np.inf)
  6746. lbrack = _get_left_bracket(fun, rbrack)
  6747. res = optimize.root_scalar(fun, bracket=(lbrack, rbrack))
  6748. if not res.converged:
  6749. raise FitSolverError(res.flag)
  6750. loc = res.root
  6751. scale = fscale or scale_mle(loc)
  6752. return loc, scale
  6753. rayleigh = rayleigh_gen(a=0.0, name="rayleigh")
  6754. class reciprocal_gen(rv_continuous):
  6755. r"""A loguniform or reciprocal continuous random variable.
  6756. %(before_notes)s
  6757. Notes
  6758. -----
  6759. The probability density function for this class is:
  6760. .. math::
  6761. f(x, a, b) = \frac{1}{x \log(b/a)}
  6762. for :math:`a \le x \le b`, :math:`b > a > 0`. This class takes
  6763. :math:`a` and :math:`b` as shape parameters.
  6764. %(after_notes)s
  6765. %(example)s
  6766. This doesn't show the equal probability of ``0.01``, ``0.1`` and
  6767. ``1``. This is best when the x-axis is log-scaled:
  6768. >>> import numpy as np
  6769. >>> import matplotlib.pyplot as plt
  6770. >>> fig, ax = plt.subplots(1, 1)
  6771. >>> ax.hist(np.log10(r))
  6772. >>> ax.set_ylabel("Frequency")
  6773. >>> ax.set_xlabel("Value of random variable")
  6774. >>> ax.xaxis.set_major_locator(plt.FixedLocator([-2, -1, 0]))
  6775. >>> ticks = ["$10^{{ {} }}$".format(i) for i in [-2, -1, 0]]
  6776. >>> ax.set_xticklabels(ticks) # doctest: +SKIP
  6777. >>> plt.show()
  6778. This random variable will be log-uniform regardless of the base chosen for
  6779. ``a`` and ``b``. Let's specify with base ``2`` instead:
  6780. >>> rvs = %(name)s(2**-2, 2**0).rvs(size=1000)
  6781. Values of ``1/4``, ``1/2`` and ``1`` are equally likely with this random
  6782. variable. Here's the histogram:
  6783. >>> fig, ax = plt.subplots(1, 1)
  6784. >>> ax.hist(np.log2(rvs))
  6785. >>> ax.set_ylabel("Frequency")
  6786. >>> ax.set_xlabel("Value of random variable")
  6787. >>> ax.xaxis.set_major_locator(plt.FixedLocator([-2, -1, 0]))
  6788. >>> ticks = ["$2^{{ {} }}$".format(i) for i in [-2, -1, 0]]
  6789. >>> ax.set_xticklabels(ticks) # doctest: +SKIP
  6790. >>> plt.show()
  6791. """
  6792. def _argcheck(self, a, b):
  6793. return (a > 0) & (b > a)
  6794. def _shape_info(self):
  6795. ia = _ShapeInfo("a", False, (0, np.inf), (False, False))
  6796. ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
  6797. return [ia, ib]
  6798. def _fitstart(self, data):
  6799. if isinstance(data, CensoredData):
  6800. data = data._uncensor()
  6801. # Reasonable, since support is [a, b]
  6802. return super()._fitstart(data, args=(np.min(data), np.max(data)))
  6803. def _get_support(self, a, b):
  6804. return a, b
  6805. def _pdf(self, x, a, b):
  6806. # reciprocal.pdf(x, a, b) = 1 / (x*(log(b) - log(a)))
  6807. return np.exp(self._logpdf(x, a, b))
  6808. def _logpdf(self, x, a, b):
  6809. return -np.log(x) - np.log(np.log(b) - np.log(a))
  6810. def _cdf(self, x, a, b):
  6811. return (np.log(x)-np.log(a)) / (np.log(b) - np.log(a))
  6812. def _ppf(self, q, a, b):
  6813. return np.exp(np.log(a) + q*(np.log(b) - np.log(a)))
  6814. def _munp(self, n, a, b):
  6815. if n == 0:
  6816. return 1.0
  6817. t1 = 1 / (np.log(b) - np.log(a)) / n
  6818. t2 = np.real(np.exp(_log_diff(n * np.log(b), n*np.log(a))))
  6819. return t1 * t2
  6820. def _entropy(self, a, b):
  6821. return 0.5*(np.log(a) + np.log(b)) + np.log(np.log(b) - np.log(a))
  6822. fit_note = """\
  6823. `loguniform`/`reciprocal` is over-parameterized. `fit` automatically
  6824. fixes `scale` to 1 unless `fscale` is provided by the user.\n\n"""
  6825. @extend_notes_in_docstring(rv_continuous, notes=fit_note)
  6826. def fit(self, data, *args, **kwds):
  6827. fscale = kwds.pop('fscale', 1)
  6828. return super().fit(data, *args, fscale=fscale, **kwds)
  6829. # Details related to the decision of not defining
  6830. # the survival function for this distribution can be
  6831. # found in the PR: https://github.com/scipy/scipy/pull/18614
  6832. loguniform = reciprocal_gen(name="loguniform")
  6833. reciprocal = reciprocal_gen(name="reciprocal")
  6834. loguniform._support = ('a', 'b')
  6835. reciprocal._support = ('a', 'b')
  6836. class rice_gen(rv_continuous):
  6837. r"""A Rice continuous random variable.
  6838. %(before_notes)s
  6839. Notes
  6840. -----
  6841. The probability density function for `rice` is:
  6842. .. math::
  6843. f(x, b) = x \exp(- \frac{x^2 + b^2}{2}) I_0(x b)
  6844. for :math:`x >= 0`, :math:`b > 0`. :math:`I_0` is the modified Bessel
  6845. function of order zero (`scipy.special.i0`).
  6846. `rice` takes ``b`` as a shape parameter for :math:`b`.
  6847. %(after_notes)s
  6848. The Rice distribution describes the length, :math:`r`, of a 2-D vector with
  6849. components :math:`(U+u, V+v)`, where :math:`U, V` are constant, :math:`u,
  6850. v` are independent Gaussian random variables with standard deviation
  6851. :math:`s`. Let :math:`R = \sqrt{U^2 + V^2}`. Then the pdf of :math:`r` is
  6852. ``rice.pdf(x, R/s, scale=s)``.
  6853. %(example)s
  6854. """
  6855. def _argcheck(self, b):
  6856. return b >= 0
  6857. def _shape_info(self):
  6858. return [_ShapeInfo("b", False, (0, np.inf), (True, False))]
  6859. def _rvs(self, b, size=None, random_state=None):
  6860. # https://en.wikipedia.org/wiki/Rice_distribution
  6861. t = b/np.sqrt(2) + random_state.standard_normal(size=(2,) + size)
  6862. return np.sqrt((t*t).sum(axis=0))
  6863. def _cdf(self, x, b):
  6864. return sc.chndtr(np.square(x), 2, np.square(b))
  6865. def _ppf(self, q, b):
  6866. return np.sqrt(sc.chndtrix(q, 2, np.square(b)))
  6867. def _pdf(self, x, b):
  6868. # rice.pdf(x, b) = x * exp(-(x**2+b**2)/2) * I[0](x*b)
  6869. #
  6870. # We use (x**2 + b**2)/2 = ((x-b)**2)/2 + xb.
  6871. # The factor of np.exp(-xb) is then included in the i0e function
  6872. # in place of the modified Bessel function, i0, improving
  6873. # numerical stability for large values of xb.
  6874. return x * np.exp(-(x-b)*(x-b)/2.0) * sc.i0e(x*b)
  6875. def _munp(self, n, b):
  6876. nd2 = n/2.0
  6877. n1 = 1 + nd2
  6878. b2 = b*b/2.0
  6879. return (2.0**(nd2) * np.exp(-b2) * sc.gamma(n1) *
  6880. sc.hyp1f1(n1, 1, b2))
  6881. rice = rice_gen(a=0.0, name="rice")
  6882. class irwinhall_gen(rv_continuous):
  6883. r"""An Irwin-Hall (Uniform Sum) continuous random variable.
  6884. An `Irwin-Hall <https://en.wikipedia.org/wiki/Irwin-Hall_distribution/>`_
  6885. continuous random variable is the sum of :math:`n` independent
  6886. standard uniform random variables [1]_ [2]_.
  6887. %(before_notes)s
  6888. Notes
  6889. -----
  6890. Applications include `Rao's Spacing Test
  6891. <https://jammalam.faculty.pstat.ucsb.edu/html/favorite/test.htm>`_,
  6892. a more powerful alternative to the Rayleigh test
  6893. when the data are not unimodal, and radar [3]_.
  6894. Conveniently, the pdf and cdf are the :math:`n`-fold convolution of
  6895. the ones for the standard uniform distribution, which is also the
  6896. definition of the cardinal B-splines of degree :math:`n-1`
  6897. having knots evenly spaced from :math:`1` to :math:`n` [4]_ [5]_.
  6898. The Bates distribution, which represents the *mean* of statistically
  6899. independent, uniformly distributed random variables, is simply the
  6900. Irwin-Hall distribution scaled by :math:`1/n`. For example, the frozen
  6901. distribution ``bates = irwinhall(10, scale=1/10)`` represents the
  6902. distribution of the mean of 10 uniformly distributed random variables.
  6903. %(after_notes)s
  6904. References
  6905. ----------
  6906. .. [1] P. Hall, "The distribution of means for samples of size N drawn
  6907. from a population in which the variate takes values between 0 and 1,
  6908. all such values being equally probable",
  6909. Biometrika, Volume 19, Issue 3-4, December 1927, Pages 240-244,
  6910. :doi:`10.1093/biomet/19.3-4.240`.
  6911. .. [2] J. O. Irwin, "On the frequency distribution of the means of samples
  6912. from a population having any law of frequency with finite moments,
  6913. with special reference to Pearson's Type II,
  6914. Biometrika, Volume 19, Issue 3-4, December 1927, Pages 225-239,
  6915. :doi:`0.1093/biomet/19.3-4.225`.
  6916. .. [3] K. Buchanan, T. Adeyemi, C. Flores-Molina, S. Wheeland and D. Overturf,
  6917. "Sidelobe behavior and bandwidth characteristics
  6918. of distributed antenna arrays,"
  6919. 2018 United States National Committee of
  6920. URSI National Radio Science Meeting (USNC-URSI NRSM),
  6921. Boulder, CO, USA, 2018, pp. 1-2.
  6922. https://www.usnc-ursi-archive.org/nrsm/2018/papers/B15-9.pdf.
  6923. .. [4] Amos Ron, "Lecture 1: Cardinal B-splines and convolution operators", p. 1
  6924. https://pages.cs.wisc.edu/~deboor/887/lec1new.pdf.
  6925. .. [5] Trefethen, N. (2012, July). B-splines and convolution. Chebfun.
  6926. Retrieved April 30, 2024, from http://www.chebfun.org/examples/approx/BSplineConv.html.
  6927. %(example)s
  6928. """ # noqa: E501
  6929. @replace_notes_in_docstring(rv_continuous, notes="""\
  6930. Raises a ``NotImplementedError`` for the Irwin-Hall distribution because
  6931. the generic `fit` implementation is unreliable and no custom implementation
  6932. is available. Consider using `scipy.stats.fit`.\n\n""")
  6933. def fit(self, data, *args, **kwds):
  6934. fit_notes = ("The generic `fit` implementation is unreliable for this "
  6935. "distribution, and no custom implementation is available. "
  6936. "Consider using `scipy.stats.fit`.")
  6937. raise NotImplementedError(fit_notes)
  6938. def _argcheck(self, n):
  6939. return (n > 0) & _isintegral(n) & np.isrealobj(n)
  6940. def _get_support(self, n):
  6941. return 0, n
  6942. def _shape_info(self):
  6943. return [_ShapeInfo("n", True, (1, np.inf), (True, False))]
  6944. def _munp(self, order, n):
  6945. # see https://link.springer.com/content/pdf/10.1007/s10959-020-01050-9.pdf
  6946. # page 640, with m=n, j=n+order
  6947. def vmunp(order, n):
  6948. n = np.asarray(n, dtype=np.int64)
  6949. return (sc.stirling2(n+order, n, exact=True)
  6950. / sc.comb(n+order, n, exact=True))
  6951. # exact rationals, but we convert to float anyway
  6952. return np.vectorize(vmunp, otypes=[np.float64])(order, n)
  6953. @staticmethod
  6954. def _cardbspl(n):
  6955. t = np.arange(n+1)
  6956. return BSpline.basis_element(t)
  6957. def _pdf(self, x, n):
  6958. def vpdf(x, n):
  6959. return self._cardbspl(n)(x)
  6960. return np.vectorize(vpdf, otypes=[np.float64])(x, n)
  6961. def _cdf(self, x, n):
  6962. def vcdf(x, n):
  6963. return self._cardbspl(n).antiderivative()(x)
  6964. return np.vectorize(vcdf, otypes=[np.float64])(x, n)
  6965. def _sf(self, x, n):
  6966. def vsf(x, n):
  6967. return self._cardbspl(n).antiderivative()(n-x)
  6968. return np.vectorize(vsf, otypes=[np.float64])(x, n)
  6969. def _rvs(self, n, size=None, random_state=None, *args):
  6970. @_vectorize_rvs_over_shapes
  6971. def _rvs1(n, size=None, random_state=None):
  6972. n = np.floor(n).astype(int)
  6973. usize = (n,) if size is None else (n, *size)
  6974. return random_state.uniform(size=usize).sum(axis=0)
  6975. return _rvs1(n, size=size, random_state=random_state)
  6976. def _stats(self, n):
  6977. # mgf = ((exp(t) - 1)/t)**n
  6978. # m'th derivative follows from the generalized Leibniz rule
  6979. # Moments follow directly from the definition as the sum of n iid unif(0,1)
  6980. # and the summation rules for moments of a sum of iid random variables
  6981. # E(IH((n))) = n*E(U(0,1)) = n/2
  6982. # Var(IH((n))) = n*Var(U(0,1)) = n/12
  6983. # Skew(IH((n))) = Skew(U(0,1))/sqrt(n) = 0
  6984. # Kurt(IH((n))) = Kurt(U(0,1))/n = -6/(5*n) -- Fisher's excess kurtosis
  6985. # See e.g. https://en.wikipedia.org/wiki/Irwin%E2%80%93Hall_distribution
  6986. return n/2, n/12, 0, -6/(5*n)
  6987. irwinhall = irwinhall_gen(name="irwinhall")
  6988. irwinhall._support = (0.0, 'n')
  6989. class recipinvgauss_gen(rv_continuous):
  6990. r"""A reciprocal inverse Gaussian continuous random variable.
  6991. %(before_notes)s
  6992. Notes
  6993. -----
  6994. The probability density function for `recipinvgauss` is:
  6995. .. math::
  6996. f(x, \mu) = \frac{1}{\sqrt{2\pi x}}
  6997. \exp\left(\frac{-(1-\mu x)^2}{2\mu^2x}\right)
  6998. for :math:`x \ge 0`.
  6999. `recipinvgauss` takes ``mu`` as a shape parameter for :math:`\mu`.
  7000. %(after_notes)s
  7001. %(example)s
  7002. """
  7003. def _shape_info(self):
  7004. return [_ShapeInfo("mu", False, (0, np.inf), (False, False))]
  7005. def _pdf(self, x, mu):
  7006. # recipinvgauss.pdf(x, mu) =
  7007. # 1/sqrt(2*pi*x) * exp(-(1-mu*x)**2/(2*x*mu**2))
  7008. return np.exp(self._logpdf(x, mu))
  7009. def _logpdf(self, x, mu):
  7010. return xpx.apply_where(
  7011. x > 0, (x, mu),
  7012. lambda x, mu: (-(1 - mu*x)**2.0 / (2*x*mu**2.0)
  7013. - 0.5*np.log(2*np.pi*x)),
  7014. fill_value=-np.inf)
  7015. def _cdf(self, x, mu):
  7016. trm1 = 1.0/mu - x
  7017. trm2 = 1.0/mu + x
  7018. isqx = 1.0/np.sqrt(x)
  7019. return _norm_cdf(-isqx*trm1) - np.exp(2.0/mu)*_norm_cdf(-isqx*trm2)
  7020. def _sf(self, x, mu):
  7021. trm1 = 1.0/mu - x
  7022. trm2 = 1.0/mu + x
  7023. isqx = 1.0/np.sqrt(x)
  7024. return _norm_cdf(isqx*trm1) + np.exp(2.0/mu)*_norm_cdf(-isqx*trm2)
  7025. def _rvs(self, mu, size=None, random_state=None):
  7026. return 1.0/random_state.wald(mu, 1.0, size=size)
  7027. recipinvgauss = recipinvgauss_gen(a=0.0, name='recipinvgauss')
  7028. class semicircular_gen(rv_continuous):
  7029. r"""A semicircular continuous random variable.
  7030. %(before_notes)s
  7031. See Also
  7032. --------
  7033. rdist
  7034. Notes
  7035. -----
  7036. The probability density function for `semicircular` is:
  7037. .. math::
  7038. f(x) = \frac{2}{\pi} \sqrt{1-x^2}
  7039. for :math:`-1 \le x \le 1`.
  7040. The distribution is a special case of `rdist` with ``c = 3``.
  7041. %(after_notes)s
  7042. References
  7043. ----------
  7044. .. [1] "Wigner semicircle distribution",
  7045. https://en.wikipedia.org/wiki/Wigner_semicircle_distribution
  7046. %(example)s
  7047. """
  7048. def _shape_info(self):
  7049. return []
  7050. def _pdf(self, x):
  7051. return 2.0/np.pi*np.sqrt(1-x*x)
  7052. def _logpdf(self, x):
  7053. return np.log(2/np.pi) + 0.5*sc.log1p(-x*x)
  7054. def _cdf(self, x):
  7055. return 0.5+1.0/np.pi*(x*np.sqrt(1-x*x) + np.arcsin(x))
  7056. def _ppf(self, q):
  7057. return rdist._ppf(q, 3)
  7058. def _rvs(self, size=None, random_state=None):
  7059. # generate values uniformly distributed on the area under the pdf
  7060. # (semi-circle) by randomly generating the radius and angle
  7061. r = np.sqrt(random_state.uniform(size=size))
  7062. a = np.cos(np.pi * random_state.uniform(size=size))
  7063. return r * a
  7064. def _stats(self):
  7065. return 0, 0.25, 0, -1.0
  7066. def _entropy(self):
  7067. return 0.64472988584940017414
  7068. semicircular = semicircular_gen(a=-1.0, b=1.0, name="semicircular")
  7069. class skewcauchy_gen(rv_continuous):
  7070. r"""A skewed Cauchy random variable.
  7071. %(before_notes)s
  7072. See Also
  7073. --------
  7074. cauchy : Cauchy distribution
  7075. Notes
  7076. -----
  7077. The probability density function for `skewcauchy` is:
  7078. .. math::
  7079. f(x) = \frac{1}{\pi \left(\frac{x^2}{\left(a\, \text{sign}(x) + 1
  7080. \right)^2} + 1 \right)}
  7081. for a real number :math:`x` and skewness parameter :math:`-1 < a < 1`.
  7082. When :math:`a=0`, the distribution reduces to the usual Cauchy
  7083. distribution.
  7084. %(after_notes)s
  7085. References
  7086. ----------
  7087. .. [1] "Skewed generalized *t* distribution", Wikipedia
  7088. https://en.wikipedia.org/wiki/Skewed_generalized_t_distribution#Skewed_Cauchy_distribution
  7089. %(example)s
  7090. """
  7091. def _argcheck(self, a):
  7092. return np.abs(a) < 1
  7093. def _shape_info(self):
  7094. return [_ShapeInfo("a", False, (-1.0, 1.0), (False, False))]
  7095. def _pdf(self, x, a):
  7096. return 1 / (np.pi * (x**2 / (a * np.sign(x) + 1)**2 + 1))
  7097. def _cdf(self, x, a):
  7098. return np.where(x <= 0,
  7099. (1 - a) / 2 + (1 - a) / np.pi * np.arctan(x / (1 - a)),
  7100. (1 - a) / 2 + (1 + a) / np.pi * np.arctan(x / (1 + a)))
  7101. def _ppf(self, x, a):
  7102. i = x < self._cdf(0, a)
  7103. return np.where(i,
  7104. np.tan(np.pi / (1 - a) * (x - (1 - a) / 2)) * (1 - a),
  7105. np.tan(np.pi / (1 + a) * (x - (1 - a) / 2)) * (1 + a))
  7106. def _stats(self, a, moments='mvsk'):
  7107. return np.nan, np.nan, np.nan, np.nan
  7108. def _fitstart(self, data):
  7109. # Use 0 as the initial guess of the skewness shape parameter.
  7110. # For the location and scale, estimate using the median and
  7111. # quartiles.
  7112. if isinstance(data, CensoredData):
  7113. data = data._uncensor()
  7114. p25, p50, p75 = np.percentile(data, [25, 50, 75])
  7115. return 0.0, p50, (p75 - p25)/2
  7116. skewcauchy = skewcauchy_gen(name='skewcauchy')
  7117. class skewnorm_gen(rv_continuous):
  7118. r"""A skew-normal random variable.
  7119. %(before_notes)s
  7120. Notes
  7121. -----
  7122. The pdf is::
  7123. skewnorm.pdf(x, a) = 2 * norm.pdf(x) * norm.cdf(a*x)
  7124. `skewnorm` takes a real number :math:`a` as a skewness parameter
  7125. When ``a = 0`` the distribution is identical to a normal distribution
  7126. (`norm`). `rvs` implements the method of [1]_.
  7127. This distribution uses routines from the Boost Math C++ library for
  7128. the computation of ``cdf``, ``ppf`` and ``isf`` methods. [2]_
  7129. %(after_notes)s
  7130. References
  7131. ----------
  7132. .. [1] A. Azzalini and A. Capitanio (1999). Statistical applications of
  7133. the multivariate skew-normal distribution. J. Roy. Statist. Soc.,
  7134. B 61, 579-602. :arxiv:`0911.2093`
  7135. .. [2] The Boost Developers. "Boost C++ Libraries". https://www.boost.org/.
  7136. %(example)s
  7137. """
  7138. def _argcheck(self, a):
  7139. return np.isfinite(a)
  7140. def _shape_info(self):
  7141. return [_ShapeInfo("a", False, (-np.inf, np.inf), (False, False))]
  7142. def _pdf(self, x, a):
  7143. return xpx.apply_where(
  7144. a == 0, (x, a),
  7145. lambda x, a: _norm_pdf(x),
  7146. lambda x, a: 2.*_norm_pdf(x)*_norm_cdf(a*x))
  7147. def _logpdf(self, x, a):
  7148. return xpx.apply_where(
  7149. a == 0, (x, a),
  7150. lambda x, a: _norm_logpdf(x),
  7151. lambda x, a: np.log(2)+_norm_logpdf(x)+_norm_logcdf(a*x))
  7152. def _cdf(self, x, a):
  7153. a = np.atleast_1d(a)
  7154. cdf = scu._skewnorm_cdf(x, 0.0, 1.0, a)
  7155. # for some reason, a isn't broadcasted if some of x are invalid
  7156. a = np.broadcast_to(a, cdf.shape)
  7157. # Boost is not accurate in left tail when a > 0
  7158. i_small_cdf = (cdf < 1e-6) & (a > 0)
  7159. cdf[i_small_cdf] = super()._cdf(x[i_small_cdf], a[i_small_cdf])
  7160. return np.clip(cdf, 0, 1)
  7161. def _ppf(self, x, a):
  7162. return scu._skewnorm_ppf(x, 0.0, 1.0, a)
  7163. def _sf(self, x, a):
  7164. # Boost's SF is implemented this way. Use whatever customizations
  7165. # we made in the _cdf.
  7166. return self._cdf(-x, -a)
  7167. def _isf(self, x, a):
  7168. return scu._skewnorm_isf(x, 0.0, 1.0, a)
  7169. def _rvs(self, a, size=None, random_state=None):
  7170. u0 = random_state.normal(size=size)
  7171. v = random_state.normal(size=size)
  7172. d = a/np.sqrt(1 + a**2)
  7173. u1 = d*u0 + v*np.sqrt(1 - d**2)
  7174. return np.where(u0 >= 0, u1, -u1)
  7175. def _stats(self, a, moments='mvsk'):
  7176. output = [None, None, None, None]
  7177. const = np.sqrt(2/np.pi) * a/np.sqrt(1 + a**2)
  7178. if 'm' in moments:
  7179. output[0] = const
  7180. if 'v' in moments:
  7181. output[1] = 1 - const**2
  7182. if 's' in moments:
  7183. output[2] = ((4 - np.pi)/2) * (const/np.sqrt(1 - const**2))**3
  7184. if 'k' in moments:
  7185. output[3] = (2*(np.pi - 3)) * (const**4/(1 - const**2)**2)
  7186. return output
  7187. # For odd order, the each noncentral moment of the skew-normal distribution
  7188. # with location 0 and scale 1 can be expressed as a polynomial in delta,
  7189. # where delta = a/sqrt(1 + a**2) and `a` is the skew-normal shape
  7190. # parameter. The dictionary _skewnorm_odd_moments defines those
  7191. # polynomials for orders up to 19. The dict is implemented as a cached
  7192. # property to reduce the impact of the creation of the dict on import time.
  7193. @cached_property
  7194. def _skewnorm_odd_moments(self):
  7195. skewnorm_odd_moments = {
  7196. 1: Polynomial([1]),
  7197. 3: Polynomial([3, -1]),
  7198. 5: Polynomial([15, -10, 3]),
  7199. 7: Polynomial([105, -105, 63, -15]),
  7200. 9: Polynomial([945, -1260, 1134, -540, 105]),
  7201. 11: Polynomial([10395, -17325, 20790, -14850, 5775, -945]),
  7202. 13: Polynomial([135135, -270270, 405405, -386100, 225225, -73710,
  7203. 10395]),
  7204. 15: Polynomial([2027025, -4729725, 8513505, -10135125, 7882875,
  7205. -3869775, 1091475, -135135]),
  7206. 17: Polynomial([34459425, -91891800, 192972780, -275675400,
  7207. 268017750, -175429800, 74220300, -18378360,
  7208. 2027025]),
  7209. 19: Polynomial([654729075, -1964187225, 4714049340, -7856748900,
  7210. 9166207050, -7499623950, 4230557100, -1571349780,
  7211. 346621275, -34459425]),
  7212. }
  7213. return skewnorm_odd_moments
  7214. def _munp(self, order, a):
  7215. if order % 2:
  7216. if order > 19:
  7217. raise NotImplementedError("skewnorm noncentral moments not "
  7218. "implemented for odd orders greater "
  7219. "than 19.")
  7220. # Use the precomputed polynomials that were derived from the
  7221. # moment generating function.
  7222. delta = a/np.sqrt(1 + a**2)
  7223. return (delta * self._skewnorm_odd_moments[order](delta**2)
  7224. * _SQRT_2_OVER_PI)
  7225. else:
  7226. # For even order, the moment is just (order-1)!!, where !! is the
  7227. # notation for the double factorial; for an odd integer m, m!! is
  7228. # m*(m-2)*...*3*1.
  7229. # We could use special.factorial2, but we know the argument is odd,
  7230. # so avoid the overhead of that function and compute the result
  7231. # directly here.
  7232. return sc.gamma((order + 1)/2) * 2**(order/2) / _SQRT_PI
  7233. @extend_notes_in_docstring(rv_continuous, notes="""\
  7234. If ``method='mm'``, parameters fixed by the user are respected, and the
  7235. remaining parameters are used to match distribution and sample moments
  7236. where possible. For example, if the user fixes the location with
  7237. ``floc``, the parameters will only match the distribution skewness and
  7238. variance to the sample skewness and variance; no attempt will be made
  7239. to match the means or minimize a norm of the errors.
  7240. Note that the maximum possible skewness magnitude of a
  7241. `scipy.stats.skewnorm` distribution is approximately 0.9952717; if the
  7242. magnitude of the data's sample skewness exceeds this, the returned
  7243. shape parameter ``a`` will be infinite.
  7244. \n\n""")
  7245. def fit(self, data, *args, **kwds):
  7246. if kwds.pop("superfit", False):
  7247. return super().fit(data, *args, **kwds)
  7248. if isinstance(data, CensoredData):
  7249. if data.num_censored() == 0:
  7250. data = data._uncensor()
  7251. else:
  7252. return super().fit(data, *args, **kwds)
  7253. # this extracts fixed shape, location, and scale however they
  7254. # are specified, and also leaves them in `kwds`
  7255. data, fa, floc, fscale = _check_fit_input_parameters(self, data,
  7256. args, kwds)
  7257. method = kwds.get("method", "mle").lower()
  7258. # See https://en.wikipedia.org/wiki/Skew_normal_distribution for
  7259. # moment formulas.
  7260. def skew_d(d): # skewness in terms of delta
  7261. return (4-np.pi)/2 * ((d * np.sqrt(2 / np.pi))**3
  7262. / (1 - 2*d**2 / np.pi)**(3/2))
  7263. def d_skew(skew): # delta in terms of skewness
  7264. s_23 = np.abs(skew)**(2/3)
  7265. return np.sign(skew) * np.sqrt(
  7266. np.pi/2 * s_23 / (s_23 + ((4 - np.pi)/2)**(2/3))
  7267. )
  7268. # If method is method of moments, we don't need the user's guesses.
  7269. # Otherwise, extract the guesses from args and kwds.
  7270. if method == "mm":
  7271. a, loc, scale = None, None, None
  7272. else:
  7273. a = args[0] if len(args) else None
  7274. loc = kwds.pop('loc', None)
  7275. scale = kwds.pop('scale', None)
  7276. if fa is None and a is None: # not fixed and no guess: use MoM
  7277. # Solve for a that matches sample distribution skewness to sample
  7278. # skewness.
  7279. s = stats.skew(data)
  7280. if method == 'mle':
  7281. # For MLE initial conditions, clip skewness to a large but
  7282. # reasonable value in case the data skewness is out-of-range.
  7283. s = np.clip(s, -0.99, 0.99)
  7284. else:
  7285. s_max = skew_d(1)
  7286. s = np.clip(s, -s_max, s_max)
  7287. d = d_skew(s)
  7288. with np.errstate(divide='ignore'):
  7289. a = np.sqrt(np.divide(d**2, (1-d**2)))*np.sign(s)
  7290. else:
  7291. a = fa if fa is not None else a
  7292. d = a / np.sqrt(1 + a**2)
  7293. if fscale is None and scale is None:
  7294. v = np.var(data)
  7295. scale = np.sqrt(v / (1 - 2*d**2/np.pi))
  7296. elif fscale is not None:
  7297. scale = fscale
  7298. if floc is None and loc is None:
  7299. m = np.mean(data)
  7300. loc = m - scale*d*np.sqrt(2/np.pi)
  7301. elif floc is not None:
  7302. loc = floc
  7303. if method == 'mm':
  7304. return a, loc, scale
  7305. else:
  7306. # At this point, parameter "guesses" may equal the fixed parameters
  7307. # in kwds. No harm in passing them as guesses, too.
  7308. return super().fit(data, a, loc=loc, scale=scale, **kwds)
  7309. skewnorm = skewnorm_gen(name='skewnorm')
  7310. class trapezoid_gen(rv_continuous):
  7311. r"""A trapezoidal continuous random variable.
  7312. %(before_notes)s
  7313. Notes
  7314. -----
  7315. The trapezoidal distribution can be represented with an up-sloping line
  7316. from ``loc`` to ``(loc + c*scale)``, then constant to ``(loc + d*scale)``
  7317. and then downsloping from ``(loc + d*scale)`` to ``(loc+scale)``. This
  7318. defines the trapezoid base from ``loc`` to ``(loc+scale)`` and the flat
  7319. top from ``c`` to ``d`` proportional to the position along the base
  7320. with ``0 <= c <= d <= 1``. When ``c=d``, this is equivalent to `triang`
  7321. with the same values for `loc`, `scale` and `c`.
  7322. The method of [1]_ is used for computing moments.
  7323. `trapezoid` takes :math:`c` and :math:`d` as shape parameters.
  7324. %(after_notes)s
  7325. The standard form is in the range [0, 1] with c the mode.
  7326. The location parameter shifts the start to `loc`.
  7327. The scale parameter changes the width from 1 to `scale`.
  7328. %(example)s
  7329. References
  7330. ----------
  7331. .. [1] Kacker, R.N. and Lawrence, J.F. (2007). Trapezoidal and triangular
  7332. distributions for Type B evaluation of standard uncertainty.
  7333. Metrologia 44, 117-127. :doi:`10.1088/0026-1394/44/2/003`
  7334. """
  7335. def _argcheck(self, c, d):
  7336. return (c >= 0) & (c <= 1) & (d >= 0) & (d <= 1) & (d >= c)
  7337. def _shape_info(self):
  7338. ic = _ShapeInfo("c", False, (0, 1.0), (True, True))
  7339. id = _ShapeInfo("d", False, (0, 1.0), (True, True))
  7340. return [ic, id]
  7341. def _pdf(self, x, c, d):
  7342. u = 2 / (d-c+1)
  7343. return _lazyselect([x < c,
  7344. (c <= x) & (x <= d),
  7345. x > d],
  7346. [lambda x, c, d, u: u * x / c,
  7347. lambda x, c, d, u: u,
  7348. lambda x, c, d, u: u * (1-x) / (1-d)],
  7349. (x, c, d, u))
  7350. def _cdf(self, x, c, d):
  7351. return _lazyselect([x < c,
  7352. (c <= x) & (x <= d),
  7353. x > d],
  7354. [lambda x, c, d: x**2 / c / (d-c+1),
  7355. lambda x, c, d: (c + 2 * (x-c)) / (d-c+1),
  7356. lambda x, c, d: 1-((1-x) ** 2
  7357. / (d-c+1) / (1-d))],
  7358. (x, c, d))
  7359. def _ppf(self, q, c, d):
  7360. qc, qd = self._cdf(c, c, d), self._cdf(d, c, d)
  7361. condlist = [q < qc, q <= qd, q > qd]
  7362. choicelist = [np.sqrt(q * c * (1 + d - c)),
  7363. 0.5 * q * (1 + d - c) + 0.5 * c,
  7364. 1 - np.sqrt((1 - q) * (d - c + 1) * (1 - d))]
  7365. return np.select(condlist, choicelist)
  7366. def _munp(self, n, c, d):
  7367. # Using the parameterization from Kacker, 2007, with
  7368. # a=bottom left, c=top left, d=top right, b=bottom right, then
  7369. # E[X^n] = h/(n+1)/(n+2) [(b^{n+2}-d^{n+2})/(b-d)
  7370. # - ((c^{n+2} - a^{n+2})/(c-a)]
  7371. # with h = 2/((b-a) - (d-c)). The corresponding parameterization
  7372. # in scipy, has a'=loc, c'=loc+c*scale, d'=loc+d*scale, b'=loc+scale,
  7373. # which for standard form reduces to a'=0, b'=1, c'=c, d'=d.
  7374. # Substituting into E[X^n] gives the bd' term as (1 - d^{n+2})/(1 - d)
  7375. # and the ac' term as c^{n-1} for the standard form. The bd' term has
  7376. # numerical difficulties near d=1, so replace (1 - d^{n+2})/(1-d)
  7377. # with expm1((n+2)*log(d))/(d-1).
  7378. # Testing with n=18 for c=(1e-30,1-eps) shows that this is stable.
  7379. # We still require an explicit test for d=1 to prevent divide by zero,
  7380. # and now a test for d=0 to prevent log(0).
  7381. ab_term = c**(n+1)
  7382. dc_term = _lazyselect(
  7383. [d == 0.0, (0.0 < d) & (d < 1.0), d == 1.0],
  7384. [lambda d: 1.0,
  7385. lambda d: np.expm1((n+2) * np.log(d)) / (d-1.0),
  7386. lambda d: n+2],
  7387. [d])
  7388. val = 2.0 / (1.0+d-c) * (dc_term - ab_term) / ((n+1) * (n+2))
  7389. return val
  7390. def _entropy(self, c, d):
  7391. # Using the parameterization from Wikipedia (van Dorp, 2003)
  7392. # with a=bottom left, c=top left, d=top right, b=bottom right
  7393. # gives a'=loc, b'=loc+c*scale, c'=loc+d*scale, d'=loc+scale,
  7394. # which for loc=0, scale=1 is a'=0, b'=c, c'=d, d'=1.
  7395. # Substituting into the entropy formula from Wikipedia gives
  7396. # the following result.
  7397. return 0.5 * (1.0-d+c) / (1.0+d-c) + np.log(0.5 * (1.0+d-c))
  7398. def _fitstart(self, data, args=None):
  7399. # Arbitrary, but c=d=1 fails due to being on edge of bounds
  7400. if args is None:
  7401. args = (0.33, 0.66)
  7402. return super()._fitstart(data, args=args)
  7403. trapezoid = trapezoid_gen(a=0.0, b=1.0, name="trapezoid")
  7404. class triang_gen(rv_continuous):
  7405. r"""A triangular continuous random variable.
  7406. %(before_notes)s
  7407. Notes
  7408. -----
  7409. The triangular distribution can be represented with an up-sloping line from
  7410. ``loc`` to ``(loc + c*scale)`` and then downsloping for ``(loc + c*scale)``
  7411. to ``(loc + scale)``.
  7412. `triang` takes ``c`` as a shape parameter for :math:`0 \le c \le 1`.
  7413. %(after_notes)s
  7414. The standard form is in the range [0, 1] with c the mode.
  7415. The location parameter shifts the start to `loc`.
  7416. The scale parameter changes the width from 1 to `scale`.
  7417. %(example)s
  7418. """
  7419. def _rvs(self, c, size=None, random_state=None):
  7420. return random_state.triangular(0, c, 1, size)
  7421. def _argcheck(self, c):
  7422. return (c >= 0) & (c <= 1)
  7423. def _shape_info(self):
  7424. return [_ShapeInfo("c", False, (0, 1.0), (True, True))]
  7425. def _pdf(self, x, c):
  7426. # 0: edge case where c=0
  7427. # 1: generalised case for x < c, don't use x <= c, as it doesn't cope
  7428. # with c = 0.
  7429. # 2: generalised case for x >= c, but doesn't cope with c = 1
  7430. # 3: edge case where c=1
  7431. r = _lazyselect([c == 0,
  7432. x < c,
  7433. (x >= c) & (c != 1),
  7434. c == 1],
  7435. [lambda x, c: 2 - 2 * x,
  7436. lambda x, c: 2 * x / c,
  7437. lambda x, c: 2 * (1 - x) / (1 - c),
  7438. lambda x, c: 2 * x],
  7439. (x, c))
  7440. return r
  7441. def _cdf(self, x, c):
  7442. r = _lazyselect([c == 0,
  7443. x < c,
  7444. (x >= c) & (c != 1),
  7445. c == 1],
  7446. [lambda x, c: 2*x - x*x,
  7447. lambda x, c: x * x / c,
  7448. lambda x, c: (x*x - 2*x + c) / (c-1),
  7449. lambda x, c: x * x],
  7450. (x, c))
  7451. return r
  7452. def _ppf(self, q, c):
  7453. return np.where(q < c, np.sqrt(c * q), 1-np.sqrt((1-c) * (1-q)))
  7454. def _stats(self, c):
  7455. return ((c+1.0)/3.0,
  7456. (1.0-c+c*c)/18,
  7457. np.sqrt(2)*(2*c-1)*(c+1)*(c-2) / (5*np.power((1.0-c+c*c), 1.5)),
  7458. -3.0/5.0)
  7459. def _entropy(self, c):
  7460. return 0.5-np.log(2)
  7461. triang = triang_gen(a=0.0, b=1.0, name="triang")
  7462. class truncexpon_gen(rv_continuous):
  7463. r"""A truncated exponential continuous random variable.
  7464. %(before_notes)s
  7465. Notes
  7466. -----
  7467. The probability density function for `truncexpon` is:
  7468. .. math::
  7469. f(x, b) = \frac{\exp(-x)}{1 - \exp(-b)}
  7470. for :math:`0 <= x <= b`.
  7471. `truncexpon` takes ``b`` as a shape parameter for :math:`b`.
  7472. %(after_notes)s
  7473. %(example)s
  7474. """
  7475. def _shape_info(self):
  7476. return [_ShapeInfo("b", False, (0, np.inf), (False, False))]
  7477. def _get_support(self, b):
  7478. return self.a, b
  7479. def _pdf(self, x, b):
  7480. # truncexpon.pdf(x, b) = exp(-x) / (1-exp(-b))
  7481. return np.exp(-x)/(-sc.expm1(-b))
  7482. def _logpdf(self, x, b):
  7483. return -x - np.log(-sc.expm1(-b))
  7484. def _cdf(self, x, b):
  7485. return sc.expm1(-x)/sc.expm1(-b)
  7486. def _ppf(self, q, b):
  7487. return -sc.log1p(q*sc.expm1(-b))
  7488. def _sf(self, x, b):
  7489. return (np.exp(-b) - np.exp(-x))/sc.expm1(-b)
  7490. def _isf(self, q, b):
  7491. return -np.log(np.exp(-b) - q * sc.expm1(-b))
  7492. def _munp(self, n, b):
  7493. # wrong answer with formula, same as in continuous.pdf
  7494. # return sc.gamman+1)-sc.gammainc1+n, b)
  7495. if n == 1:
  7496. return (1-(b+1)*np.exp(-b))/(-sc.expm1(-b))
  7497. elif n == 2:
  7498. return 2*(1-0.5*(b*b+2*b+2)*np.exp(-b))/(-sc.expm1(-b))
  7499. else:
  7500. # return generic for higher moments
  7501. return super()._munp(n, b)
  7502. def _entropy(self, b):
  7503. eB = np.exp(b)
  7504. return np.log(eB-1)+(1+eB*(b-1.0))/(1.0-eB)
  7505. truncexpon = truncexpon_gen(a=0.0, name='truncexpon')
  7506. truncexpon._support = (0.0, 'b')
  7507. # logsumexp trick for log(p + q) with only log(p) and log(q)
  7508. def _log_sum(log_p, log_q):
  7509. return sc.logsumexp([log_p, log_q], axis=0)
  7510. # same as above, but using -exp(x) = exp(x + πi)
  7511. def _log_diff(log_p, log_q):
  7512. return sc.logsumexp([log_p, log_q+np.pi*1j], axis=0)
  7513. def _log_gauss_mass(a, b):
  7514. """Log of Gaussian probability mass within an interval"""
  7515. a, b = np.broadcast_arrays(a, b)
  7516. # Calculations in right tail are inaccurate, so we'll exploit the
  7517. # symmetry and work only in the left tail
  7518. case_left = b <= 0
  7519. case_right = a > 0
  7520. case_central = ~(case_left | case_right)
  7521. def mass_case_left(a, b):
  7522. return _log_diff(_norm_logcdf(b), _norm_logcdf(a))
  7523. def mass_case_right(a, b):
  7524. return mass_case_left(-b, -a)
  7525. def mass_case_central(a, b):
  7526. # Previously, this was implemented as:
  7527. # left_mass = mass_case_left(a, 0)
  7528. # right_mass = mass_case_right(0, b)
  7529. # return _log_sum(left_mass, right_mass)
  7530. # Catastrophic cancellation occurs as np.exp(log_mass) approaches 1.
  7531. # Correct for this with an alternative formulation.
  7532. # We're not concerned with underflow here: if only one term
  7533. # underflows, it was insignificant; if both terms underflow,
  7534. # the result can't accurately be represented in logspace anyway
  7535. # because sc.log1p(x) ~ x for small x.
  7536. return sc.log1p(-_norm_cdf(a) - _norm_cdf(-b))
  7537. # _lazyselect not working; don't care to debug it
  7538. out = np.full_like(a, fill_value=np.nan, dtype=np.complex128)
  7539. if a[case_left].size:
  7540. out[case_left] = mass_case_left(a[case_left], b[case_left])
  7541. if a[case_right].size:
  7542. out[case_right] = mass_case_right(a[case_right], b[case_right])
  7543. if a[case_central].size:
  7544. out[case_central] = mass_case_central(a[case_central], b[case_central])
  7545. return np.real(out) # discard ~0j
  7546. class truncnorm_gen(rv_continuous):
  7547. r"""A truncated normal continuous random variable.
  7548. %(before_notes)s
  7549. Notes
  7550. -----
  7551. This distribution is the normal distribution centered on ``loc`` (default
  7552. 0), with standard deviation ``scale`` (default 1), and truncated at ``a``
  7553. and ``b`` *standard deviations* from ``loc``. For arbitrary ``loc`` and
  7554. ``scale``, ``a`` and ``b`` are *not* the abscissae at which the shifted
  7555. and scaled distribution is truncated.
  7556. .. note::
  7557. If ``a_trunc`` and ``b_trunc`` are the abscissae at which we wish
  7558. to truncate the distribution (as opposed to the number of standard
  7559. deviations from ``loc``), then we can calculate the distribution
  7560. parameters ``a`` and ``b`` as follows::
  7561. a, b = (a_trunc - loc) / scale, (b_trunc - loc) / scale
  7562. This is a common point of confusion. For additional clarification,
  7563. please see the example below.
  7564. %(example)s
  7565. In the examples above, ``loc=0`` and ``scale=1``, so the plot is truncated
  7566. at ``a`` on the left and ``b`` on the right. However, suppose we were to
  7567. produce the same histogram with ``loc = 1`` and ``scale=0.5``.
  7568. >>> loc, scale = 1, 0.5
  7569. >>> rv = truncnorm(a, b, loc=loc, scale=scale)
  7570. >>> x = np.linspace(truncnorm.ppf(0.01, a, b),
  7571. ... truncnorm.ppf(0.99, a, b), 100)
  7572. >>> r = rv.rvs(size=1000)
  7573. >>> fig, ax = plt.subplots(1, 1)
  7574. >>> ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')
  7575. >>> ax.hist(r, density=True, bins='auto', histtype='stepfilled', alpha=0.2)
  7576. >>> ax.set_xlim(a, b)
  7577. >>> ax.legend(loc='best', frameon=False)
  7578. >>> plt.show()
  7579. Note that the distribution is no longer appears to be truncated at
  7580. abscissae ``a`` and ``b``. That is because the *standard* normal
  7581. distribution is first truncated at ``a`` and ``b``, *then* the resulting
  7582. distribution is scaled by ``scale`` and shifted by ``loc``. If we instead
  7583. want the shifted and scaled distribution to be truncated at ``a`` and
  7584. ``b``, we need to transform these values before passing them as the
  7585. distribution parameters.
  7586. >>> a_transformed, b_transformed = (a - loc) / scale, (b - loc) / scale
  7587. >>> rv = truncnorm(a_transformed, b_transformed, loc=loc, scale=scale)
  7588. >>> x = np.linspace(truncnorm.ppf(0.01, a, b),
  7589. ... truncnorm.ppf(0.99, a, b), 100)
  7590. >>> r = rv.rvs(size=10000)
  7591. >>> fig, ax = plt.subplots(1, 1)
  7592. >>> ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')
  7593. >>> ax.hist(r, density=True, bins='auto', histtype='stepfilled', alpha=0.2)
  7594. >>> ax.set_xlim(a-0.1, b+0.1)
  7595. >>> ax.legend(loc='best', frameon=False)
  7596. >>> plt.show()
  7597. """
  7598. def _argcheck(self, a, b):
  7599. return a < b
  7600. def _shape_info(self):
  7601. ia = _ShapeInfo("a", False, (-np.inf, np.inf), (True, False))
  7602. ib = _ShapeInfo("b", False, (-np.inf, np.inf), (False, True))
  7603. return [ia, ib]
  7604. def _fitstart(self, data):
  7605. # Reasonable, since support is [a, b]
  7606. if isinstance(data, CensoredData):
  7607. data = data._uncensor()
  7608. return super()._fitstart(data, args=(np.min(data), np.max(data)))
  7609. def _get_support(self, a, b):
  7610. return a, b
  7611. def _pdf(self, x, a, b):
  7612. return np.exp(self._logpdf(x, a, b))
  7613. def _logpdf(self, x, a, b):
  7614. return _norm_logpdf(x) - _log_gauss_mass(a, b)
  7615. def _cdf(self, x, a, b):
  7616. return np.exp(self._logcdf(x, a, b))
  7617. def _logcdf(self, x, a, b):
  7618. x, a, b = np.broadcast_arrays(x, a, b)
  7619. logcdf = np.asarray(_log_gauss_mass(a, x) - _log_gauss_mass(a, b))
  7620. i = logcdf > -0.1 # avoid catastrophic cancellation
  7621. if np.any(i):
  7622. logcdf[i] = np.log1p(-np.exp(self._logsf(x[i], a[i], b[i])))
  7623. return logcdf
  7624. def _sf(self, x, a, b):
  7625. return np.exp(self._logsf(x, a, b))
  7626. def _logsf(self, x, a, b):
  7627. x, a, b = np.broadcast_arrays(x, a, b)
  7628. logsf = np.asarray(_log_gauss_mass(x, b) - _log_gauss_mass(a, b))
  7629. i = logsf > -0.1 # avoid catastrophic cancellation
  7630. if np.any(i):
  7631. logsf[i] = np.log1p(-np.exp(self._logcdf(x[i], a[i], b[i])))
  7632. return logsf
  7633. def _entropy(self, a, b):
  7634. A = _norm_cdf(a)
  7635. B = _norm_cdf(b)
  7636. Z = B - A
  7637. C = np.log(np.sqrt(2 * np.pi * np.e) * Z)
  7638. D = (a * _norm_pdf(a) - b * _norm_pdf(b)) / (2 * Z)
  7639. h = C + D
  7640. return h
  7641. def _ppf(self, q, a, b):
  7642. q, a, b = np.broadcast_arrays(q, a, b)
  7643. case_left = a < 0
  7644. case_right = ~case_left
  7645. def ppf_left(q, a, b):
  7646. log_Phi_x = _log_sum(_norm_logcdf(a),
  7647. np.log(q) + _log_gauss_mass(a, b))
  7648. return sc.ndtri_exp(log_Phi_x)
  7649. def ppf_right(q, a, b):
  7650. log_Phi_x = _log_sum(_norm_logcdf(-b),
  7651. np.log1p(-q) + _log_gauss_mass(a, b))
  7652. return -sc.ndtri_exp(log_Phi_x)
  7653. out = np.empty_like(q)
  7654. q_left = q[case_left]
  7655. q_right = q[case_right]
  7656. if q_left.size:
  7657. out[case_left] = ppf_left(q_left, a[case_left], b[case_left])
  7658. if q_right.size:
  7659. out[case_right] = ppf_right(q_right, a[case_right], b[case_right])
  7660. return out
  7661. def _isf(self, q, a, b):
  7662. # Mostly copy-paste of _ppf, but I think this is simpler than combining
  7663. q, a, b = np.broadcast_arrays(q, a, b)
  7664. case_left = b < 0
  7665. case_right = ~case_left
  7666. def isf_left(q, a, b):
  7667. log_Phi_x = _log_diff(_norm_logcdf(b),
  7668. np.log(q) + _log_gauss_mass(a, b))
  7669. return sc.ndtri_exp(np.real(log_Phi_x))
  7670. def isf_right(q, a, b):
  7671. log_Phi_x = _log_diff(_norm_logcdf(-a),
  7672. np.log1p(-q) + _log_gauss_mass(a, b))
  7673. return -sc.ndtri_exp(np.real(log_Phi_x))
  7674. out = np.empty_like(q)
  7675. q_left = q[case_left]
  7676. q_right = q[case_right]
  7677. if q_left.size:
  7678. out[case_left] = isf_left(q_left, a[case_left], b[case_left])
  7679. if q_right.size:
  7680. out[case_right] = isf_right(q_right, a[case_right], b[case_right])
  7681. return out
  7682. def _munp(self, n, a, b):
  7683. def n_th_moment(n, a, b):
  7684. """
  7685. Returns n-th moment. Defined only if n >= 0.
  7686. Function cannot broadcast due to the loop over n
  7687. """
  7688. ab = np.asarray([a, b])
  7689. pA, pB = self._pdf(ab, a, b)
  7690. probs = np.asarray([pA, -pB])
  7691. cond = probs != 0
  7692. moments = [0, 1]
  7693. for k in range(1, n+1):
  7694. # a or b might be infinite, and the corresponding pdf value
  7695. # is 0 in that case, but nan is returned for the
  7696. # multiplication. However, as b->infinity, pdf(b)*b**k -> 0.
  7697. # So it is safe to use xpx.apply_where to avoid the nan.
  7698. vals = xpx.apply_where(cond, (probs, ab),
  7699. lambda x, y: x * y**(k-1),
  7700. fill_value=0)
  7701. mk = np.sum(vals) + (k-1) * moments[-2]
  7702. moments.append(mk)
  7703. return moments[-1]
  7704. return xpx.apply_where((n >= 0) & (a == a) & (b == b), (n, a, b),
  7705. np.vectorize(n_th_moment, otypes=[np.float64]),
  7706. fill_value=np.nan)
  7707. def _stats(self, a, b, moments='mv'):
  7708. pA, pB = self.pdf(np.array([a, b]), a, b)
  7709. def _truncnorm_stats_scalar(a, b, pA, pB):
  7710. ab = np.asarray([a, b])
  7711. m1 = pA - pB
  7712. mu = m1
  7713. # use xpx.apply_where to avoid nan (See detailed comment in _munp)
  7714. probs = np.asarray([pA, -pB])
  7715. cond = probs != 0
  7716. vals = xpx.apply_where(cond, (probs, ab), lambda x, y: x*y,
  7717. fill_value=0)
  7718. m2 = 1 + np.sum(vals)
  7719. vals = xpx.apply_where(cond, (probs, ab - mu), lambda x, y: x*y,
  7720. fill_value=0)
  7721. # mu2 = m2 - mu**2, but not as numerically stable as:
  7722. # mu2 = (a-mu)*pA - (b-mu)*pB + 1
  7723. mu2 = 1 + np.sum(vals)
  7724. vals = xpx.apply_where(cond, (probs, ab), lambda x, y: x*y**2,
  7725. fill_value=0)
  7726. m3 = 2*m1 + np.sum(vals)
  7727. vals = xpx.apply_where(cond, (probs, ab), lambda x, y: x*y**3,
  7728. fill_value=0)
  7729. m4 = 3*m2 + np.sum(vals)
  7730. mu3 = m3 + m1 * (-3*m2 + 2*m1**2)
  7731. g1 = mu3 / np.power(mu2, 1.5)
  7732. mu4 = m4 + m1*(-4*m3 + 3*m1*(2*m2 - m1**2))
  7733. g2 = mu4 / mu2**2 - 3
  7734. return mu, mu2, g1, g2
  7735. _truncnorm_stats = np.vectorize(_truncnorm_stats_scalar)
  7736. return _truncnorm_stats(a, b, pA, pB)
  7737. truncnorm = truncnorm_gen(name='truncnorm', momtype=1)
  7738. truncnorm._support = ('a', 'b')
  7739. class truncpareto_gen(rv_continuous):
  7740. r"""An upper truncated Pareto continuous random variable.
  7741. %(before_notes)s
  7742. See Also
  7743. --------
  7744. pareto : Pareto distribution
  7745. Notes
  7746. -----
  7747. The probability density function for `truncpareto` is:
  7748. .. math::
  7749. f(x, b, c) = \frac{b}{1 - c^{-b}} \frac{1}{x^{b+1}}
  7750. for :math:`b \neq 0`, :math:`c > 1` and :math:`1 \le x \le c`.
  7751. `truncpareto` takes `b` and `c` as shape parameters for :math:`b` and
  7752. :math:`c`.
  7753. Notice that the upper truncation value :math:`c` is defined in
  7754. standardized form so that random values of an unscaled, unshifted variable
  7755. are within the range ``[1, c]``.
  7756. If ``u_r`` is the upper bound to a scaled and/or shifted variable,
  7757. then ``c = (u_r - loc) / scale``. In other words, the support of the
  7758. distribution becomes ``(scale + loc) <= x <= (c*scale + loc)`` when
  7759. `scale` and/or `loc` are provided.
  7760. The ``fit`` method assumes that :math:`b` is positive; it does not produce
  7761. good results when the data is more consistent with negative :math:`b`.
  7762. `truncpareto` can also be used to model a general power law distribution
  7763. with PDF:
  7764. .. math::
  7765. f(x; a, l, h) = \frac{a}{h^a - l^a} x^{a-1}
  7766. for :math:`a \neq 0` and :math:`0 < l < x < h`. Suppose :math:`a`,
  7767. :math:`l`, and :math:`h` are represented in code as ``a``, ``l``, and
  7768. ``h``, respectively. In this case, use `truncpareto` with parameters
  7769. ``b = -a``, ``c = h / l``, ``scale = l``, and ``loc = 0``.
  7770. %(after_notes)s
  7771. References
  7772. ----------
  7773. .. [1] Burroughs, S. M., and Tebbens S. F.
  7774. "Upper-truncated power laws in natural systems."
  7775. Pure and Applied Geophysics 158.4 (2001): 741-757.
  7776. %(example)s
  7777. """
  7778. def _shape_info(self):
  7779. ib = _ShapeInfo("b", False, (-np.inf, np.inf), (False, False))
  7780. ic = _ShapeInfo("c", False, (1.0, np.inf), (False, False))
  7781. return [ib, ic]
  7782. def _argcheck(self, b, c):
  7783. return (b != 0.) & (c > 1.)
  7784. def _get_support(self, b, c):
  7785. return self.a, c
  7786. def _pdf(self, x, b, c):
  7787. # here and below, avoid int to negative int power
  7788. x, b, c = xp_promote(x, b, c, force_floating=True, xp=np)
  7789. return b * x**-(b+1) / (1 - 1/c**b)
  7790. def _logpdf(self, x, b, c):
  7791. x, b, c = xp_promote(x, b, c, force_floating=True, xp=np)
  7792. return xpx.apply_where(b > 0, (x, b, c), self._logpdf_pos_b, super()._logpdf)
  7793. def _logpdf_pos_b(self, x, b, c):
  7794. return np.log(b) - np.log(-np.expm1(-b*np.log(c))) - (b+1)*np.log(x)
  7795. def _cdf(self, x, b, c):
  7796. x, b, c = xp_promote(x, b, c, force_floating=True, xp=np)
  7797. return (1 - x**-b) / (1 - 1/c**b)
  7798. def _logcdf(self, x, b, c):
  7799. x, b, c = xp_promote(x, b, c, force_floating=True, xp=np)
  7800. return xpx.apply_where(b > 0, (x, b, c), self._logcdf_pos_b, super()._logcdf)
  7801. def _logcdf_pos_b(self, x, b, c):
  7802. return np.log1p(-x**-b) - np.log1p(-1/c**b)
  7803. def _ppf(self, q, b, c):
  7804. q, b, c = xp_promote(q, b, c, force_floating=True, xp=np)
  7805. return pow(1 - (1 - 1/c**b)*q, -1/b)
  7806. def _sf(self, x, b, c):
  7807. x, b, c = xp_promote(x, b, c, force_floating=True, xp=np)
  7808. return (x**-b - 1/c**b) / (1 - 1/c**b)
  7809. def _logsf(self, x, b, c):
  7810. x, b, c = xp_promote(x, b, c, force_floating=True, xp=np)
  7811. return xpx.apply_where(b > 0, (x, b, c), self._logsf_pos_b, super()._logsf)
  7812. def _logsf_pos_b(self, x, b, c):
  7813. return np.log(x**-b - 1/c**b) - np.log1p(-1/c**b)
  7814. def _isf(self, q, b, c):
  7815. q, b, c = xp_promote(q, b, c, force_floating=True, xp=np)
  7816. return pow(1/c**b + (1 - 1/c**b)*q, -1/b)
  7817. def _entropy(self, b, c):
  7818. return -(np.log(b/(1 - 1/c**b))
  7819. + (b+1)*(np.log(c)/(c**b - 1) - 1/b))
  7820. def _munp(self, n, b, c):
  7821. n, b, c = xp_promote(n, b, c, force_floating=True, xp=np)
  7822. if (n == b).all():
  7823. return b*np.log(c) / (1 - 1/c**b)
  7824. else:
  7825. return b / (b-n) * (c**b - c**n) / (c**b - 1)
  7826. def _fitstart(self, data):
  7827. if isinstance(data, CensoredData):
  7828. data = data._uncensor()
  7829. b, loc, scale = pareto.fit(data)
  7830. c = (max(data) - loc)/scale
  7831. return b, c, loc, scale
  7832. @_call_super_mom
  7833. @inherit_docstring_from(rv_continuous)
  7834. def fit(self, data, *args, **kwds):
  7835. if kwds.pop("superfit", False):
  7836. return super().fit(data, *args, **kwds)
  7837. def log_mean(x):
  7838. return np.mean(np.log(x))
  7839. def harm_mean(x):
  7840. return 1/np.mean(1/x)
  7841. def get_b(c, loc, scale):
  7842. u = (data-loc)/scale
  7843. harm_m = harm_mean(u)
  7844. log_m = log_mean(u)
  7845. quot = (harm_m-1)/log_m
  7846. return (1 - (quot-1) / (quot - (1 - 1/c)*harm_m/np.log(c)))/log_m
  7847. def get_c(loc, scale):
  7848. return (mx - loc)/scale
  7849. def get_loc(fc, fscale):
  7850. if fscale: # (fscale and fc) or (fscale and not fc)
  7851. loc = mn - fscale
  7852. return loc
  7853. if fc:
  7854. loc = (fc*mn - mx)/(fc - 1)
  7855. return loc
  7856. def get_scale(loc):
  7857. return mn - loc
  7858. # Functions used for optimisation; partial derivatives of
  7859. # the Lagrangian, set to equal 0.
  7860. def dL_dLoc(loc, b_=None):
  7861. # Partial derivative wrt location.
  7862. # Optimised upon when no parameters, or only b, are fixed.
  7863. scale = get_scale(loc)
  7864. c = get_c(loc, scale)
  7865. b = get_b(c, loc, scale) if b_ is None else b_
  7866. harm_m = harm_mean((data - loc)/scale)
  7867. return 1 - (1 + (c - 1)/(c**(b+1) - c)) * (1 - 1/(b+1)) * harm_m
  7868. def dL_dB(b, logc, logm):
  7869. # Partial derivative wrt b.
  7870. # Optimised upon whenever at least one parameter but b is fixed,
  7871. # and b is free.
  7872. return b - np.log1p(b*logc / (1 - b*logm)) / logc
  7873. def fallback(data, *args, **kwargs):
  7874. # Should any issue arise, default to the general fit method.
  7875. return super(truncpareto_gen, self).fit(data, *args, **kwargs)
  7876. parameters = _check_fit_input_parameters(self, data, args, kwds)
  7877. data, fb, fc, floc, fscale = parameters
  7878. mn, mx = data.min(), data.max()
  7879. mn_inf = np.nextafter(mn, -np.inf)
  7880. if (fb is not None
  7881. and fc is not None
  7882. and floc is not None
  7883. and fscale is not None):
  7884. raise ValueError("All parameters fixed."
  7885. "There is nothing to optimize.")
  7886. elif fc is None and floc is None and fscale is None:
  7887. if fb is None:
  7888. def cond_b(loc):
  7889. # b is positive only if this function is positive
  7890. scale = get_scale(loc)
  7891. c = get_c(loc, scale)
  7892. harm_m = harm_mean((data - loc)/scale)
  7893. return (1 + 1/(c-1)) * np.log(c) / harm_m - 1
  7894. # This gives an upper bound on loc allowing for a positive b.
  7895. # Iteratively look for a bracket for root_scalar.
  7896. mn_inf = np.nextafter(mn, -np.inf)
  7897. rbrack = mn_inf
  7898. i = 0
  7899. lbrack = rbrack - 1
  7900. while ((lbrack > -np.inf)
  7901. and (cond_b(lbrack)*cond_b(rbrack) >= 0)):
  7902. i += 1
  7903. lbrack = rbrack - np.power(2., i)
  7904. if not lbrack > -np.inf:
  7905. return fallback(data, *args, **kwds)
  7906. res = root_scalar(cond_b, bracket=(lbrack, rbrack))
  7907. if not res.converged:
  7908. return fallback(data, *args, **kwds)
  7909. # Determine the MLE for loc.
  7910. # Iteratively look for a bracket for root_scalar.
  7911. rbrack = res.root - 1e-3 # grad_loc is numerically ill-behaved
  7912. lbrack = rbrack - 1
  7913. i = 0
  7914. while ((lbrack > -np.inf)
  7915. and (dL_dLoc(lbrack)*dL_dLoc(rbrack) >= 0)):
  7916. i += 1
  7917. lbrack = rbrack - np.power(2., i)
  7918. if not lbrack > -np.inf:
  7919. return fallback(data, *args, **kwds)
  7920. res = root_scalar(dL_dLoc, bracket=(lbrack, rbrack))
  7921. if not res.converged:
  7922. return fallback(data, *args, **kwds)
  7923. loc = res.root
  7924. scale = get_scale(loc)
  7925. c = get_c(loc, scale)
  7926. b = get_b(c, loc, scale)
  7927. std_data = (data - loc)/scale
  7928. # The expression of b relies on b being bounded above.
  7929. up_bound_b = min(1/log_mean(std_data),
  7930. 1/(harm_mean(std_data)-1))
  7931. if not (b < up_bound_b):
  7932. return fallback(data, *args, **kwds)
  7933. else:
  7934. # We know b is positive (or a FitError will be triggered)
  7935. # so we let loc get close to min(data).
  7936. rbrack = mn_inf
  7937. lbrack = mn_inf - 1
  7938. i = 0
  7939. # Iteratively look for a bracket for root_scalar.
  7940. while (lbrack > -np.inf
  7941. and (dL_dLoc(lbrack, fb)
  7942. * dL_dLoc(rbrack, fb) >= 0)):
  7943. i += 1
  7944. lbrack = rbrack - 2**i
  7945. if not lbrack > -np.inf:
  7946. return fallback(data, *args, **kwds)
  7947. res = root_scalar(dL_dLoc, (fb,),
  7948. bracket=(lbrack, rbrack))
  7949. if not res.converged:
  7950. return fallback(data, *args, **kwds)
  7951. loc = res.root
  7952. scale = get_scale(loc)
  7953. c = get_c(loc, scale)
  7954. b = fb
  7955. else:
  7956. # At least one of the parameters determining the support is fixed;
  7957. # the others then have analytical expressions from the constraints.
  7958. # The completely determined case (fixed c, loc and scale)
  7959. # has to be checked for not overflowing the support.
  7960. # If not fixed, b has to be determined numerically.
  7961. loc = floc if floc is not None else get_loc(fc, fscale)
  7962. scale = fscale or get_scale(loc)
  7963. c = fc or get_c(loc, scale)
  7964. # Unscaled, translated values should be positive when the location
  7965. # is fixed. If it is not the case, we end up with negative `scale`
  7966. # and `c`, which would trigger a FitError before exiting the
  7967. # method.
  7968. if floc is not None and data.min() - floc < 0:
  7969. raise FitDataError("truncpareto", lower=1, upper=c)
  7970. # Standardised values should be within the distribution support
  7971. # when all parameters controlling it are fixed. If it not the case,
  7972. # `fc` is overridden by `c` determined from `floc` and `fscale` when
  7973. # raising the exception.
  7974. if fc and (floc is not None) and fscale:
  7975. if data.max() > fc*fscale + floc:
  7976. raise FitDataError("truncpareto", lower=1,
  7977. upper=get_c(loc, scale))
  7978. # The other constraints should be automatically satisfied
  7979. # from the analytical expressions of the parameters.
  7980. # If fc or fscale are respectively less than one or less than 0,
  7981. # a FitError is triggered before exiting the method.
  7982. if fb is None:
  7983. std_data = (data - loc)/scale
  7984. logm = log_mean(std_data)
  7985. logc = np.log(c)
  7986. # Condition for a positive root to exist.
  7987. if not (2*logm < logc):
  7988. return fallback(data, *args, **kwds)
  7989. lbrack = 1/logm + 1/(logm - logc)
  7990. rbrack = np.nextafter(1/logm, 0)
  7991. try:
  7992. res = root_scalar(dL_dB, (logc, logm),
  7993. bracket=(lbrack, rbrack))
  7994. # we should then never get there
  7995. if not res.converged:
  7996. return fallback(data, *args, **kwds)
  7997. b = res.root
  7998. except ValueError:
  7999. b = rbrack
  8000. else:
  8001. b = fb
  8002. # The distribution requires that `scale+loc <= data <= c*scale+loc`.
  8003. # To avoid numerical issues, some tuning may be necessary.
  8004. # We adjust `scale` to satisfy the lower bound, and we adjust
  8005. # `c` to satisfy the upper bound.
  8006. if not (scale+loc) < mn:
  8007. if fscale:
  8008. loc = np.nextafter(loc, -np.inf)
  8009. else:
  8010. scale = get_scale(loc)
  8011. scale = np.nextafter(scale, 0)
  8012. if not (c*scale+loc) > mx:
  8013. c = get_c(loc, scale)
  8014. c = np.nextafter(c, np.inf)
  8015. if not (np.all(self._argcheck(b, c)) and (scale > 0)):
  8016. return fallback(data, *args, **kwds)
  8017. params_override = b, c, loc, scale
  8018. if floc is None and fscale is None:
  8019. # Based on testing in gh-16782, the following methods are only
  8020. # reliable if either `floc` or `fscale` are provided. They are
  8021. # fast, though, so might as well see if they are better than the
  8022. # generic method.
  8023. params_super = fallback(data, *args, **kwds)
  8024. nllf_override = self.nnlf(params_override, data)
  8025. nllf_super = self.nnlf(params_super, data)
  8026. if nllf_super < nllf_override:
  8027. return params_super
  8028. return params_override
  8029. truncpareto = truncpareto_gen(a=1.0, name='truncpareto')
  8030. truncpareto._support = (1.0, 'c')
  8031. class tukeylambda_gen(rv_continuous):
  8032. r"""A Tukey-Lamdba continuous random variable.
  8033. %(before_notes)s
  8034. Notes
  8035. -----
  8036. A flexible distribution, able to represent and interpolate between the
  8037. following distributions:
  8038. - Cauchy (:math:`lambda = -1`)
  8039. - logistic (:math:`lambda = 0`)
  8040. - approx Normal (:math:`lambda = 0.14`)
  8041. - uniform from -1 to 1 (:math:`lambda = 1`)
  8042. `tukeylambda` takes a real number :math:`lambda` (denoted ``lam``
  8043. in the implementation) as a shape parameter.
  8044. %(after_notes)s
  8045. %(example)s
  8046. """
  8047. _support_mask = rv_continuous._open_support_mask
  8048. def _argcheck(self, lam):
  8049. return np.isfinite(lam)
  8050. def _shape_info(self):
  8051. return [_ShapeInfo("lam", False, (-np.inf, np.inf), (False, False))]
  8052. def _get_support(self, lam):
  8053. b = xpx.apply_where(lam > 0, lam,
  8054. lambda lam: 1/lam,
  8055. fill_value=np.inf)
  8056. return -b, b
  8057. def _pdf(self, x, lam):
  8058. Fx = np.asarray(sc.tklmbda(x, lam))
  8059. Px = Fx**(lam-1.0) + (np.asarray(1-Fx))**(lam-1.0)
  8060. with np.errstate(divide='ignore'):
  8061. Px = 1.0/np.asarray(Px)
  8062. return np.where((lam <= 0) | (abs(x) < 1.0/np.asarray(lam)), Px, 0.0)
  8063. def _cdf(self, x, lam):
  8064. return sc.tklmbda(x, lam)
  8065. def _ppf(self, q, lam):
  8066. return sc.boxcox(q, lam) - sc.boxcox1p(-q, lam)
  8067. def _stats(self, lam):
  8068. return 0, _tlvar(lam), 0, _tlkurt(lam)
  8069. def _entropy(self, lam):
  8070. def integ(p):
  8071. return np.log(pow(p, lam-1)+pow(1-p, lam-1))
  8072. return integrate.quad(integ, 0, 1)[0]
  8073. tukeylambda = tukeylambda_gen(name='tukeylambda')
  8074. class FitUniformFixedScaleDataError(FitDataError):
  8075. def __init__(self, ptp, fscale):
  8076. self.args = (
  8077. "Invalid values in `data`. Maximum likelihood estimation with "
  8078. "the uniform distribution and fixed scale requires that "
  8079. f"np.ptp(data) <= fscale, but np.ptp(data) = {ptp} and "
  8080. f"fscale = {fscale}."
  8081. )
  8082. class uniform_gen(rv_continuous):
  8083. r"""A uniform continuous random variable.
  8084. In the standard form, the distribution is uniform on ``[0, 1]``. Using
  8085. the parameters ``loc`` and ``scale``, one obtains the uniform distribution
  8086. on ``[loc, loc + scale]``.
  8087. %(before_notes)s
  8088. %(example)s
  8089. """
  8090. def _shape_info(self):
  8091. return []
  8092. def _rvs(self, size=None, random_state=None):
  8093. return random_state.uniform(0.0, 1.0, size)
  8094. def _pdf(self, x):
  8095. return 1.0*(x == x)
  8096. def _cdf(self, x):
  8097. return x
  8098. def _ppf(self, q):
  8099. return q
  8100. def _stats(self):
  8101. return 0.5, 1.0/12, 0, -1.2
  8102. def _entropy(self):
  8103. return 0.0
  8104. @_call_super_mom
  8105. def fit(self, data, *args, **kwds):
  8106. """
  8107. Maximum likelihood estimate for the location and scale parameters.
  8108. `uniform.fit` uses only the following parameters. Because exact
  8109. formulas are used, the parameters related to optimization that are
  8110. available in the `fit` method of other distributions are ignored
  8111. here. The only positional argument accepted is `data`.
  8112. Parameters
  8113. ----------
  8114. data : array_like
  8115. Data to use in calculating the maximum likelihood estimate.
  8116. floc : float, optional
  8117. Hold the location parameter fixed to the specified value.
  8118. fscale : float, optional
  8119. Hold the scale parameter fixed to the specified value.
  8120. Returns
  8121. -------
  8122. loc, scale : float
  8123. Maximum likelihood estimates for the location and scale.
  8124. Notes
  8125. -----
  8126. An error is raised if `floc` is given and any values in `data` are
  8127. less than `floc`, or if `fscale` is given and `fscale` is less
  8128. than ``data.max() - data.min()``. An error is also raised if both
  8129. `floc` and `fscale` are given.
  8130. Examples
  8131. --------
  8132. >>> import numpy as np
  8133. >>> from scipy.stats import uniform
  8134. We'll fit the uniform distribution to `x`:
  8135. >>> x = np.array([2, 2.5, 3.1, 9.5, 13.0])
  8136. For a uniform distribution MLE, the location is the minimum of the
  8137. data, and the scale is the maximum minus the minimum.
  8138. >>> loc, scale = uniform.fit(x)
  8139. >>> loc
  8140. 2.0
  8141. >>> scale
  8142. 11.0
  8143. If we know the data comes from a uniform distribution where the support
  8144. starts at 0, we can use ``floc=0``:
  8145. >>> loc, scale = uniform.fit(x, floc=0)
  8146. >>> loc
  8147. 0.0
  8148. >>> scale
  8149. 13.0
  8150. Alternatively, if we know the length of the support is 12, we can use
  8151. ``fscale=12``:
  8152. >>> loc, scale = uniform.fit(x, fscale=12)
  8153. >>> loc
  8154. 1.5
  8155. >>> scale
  8156. 12.0
  8157. In that last example, the support interval is [1.5, 13.5]. This
  8158. solution is not unique. For example, the distribution with ``loc=2``
  8159. and ``scale=12`` has the same likelihood as the one above. When
  8160. `fscale` is given and it is larger than ``data.max() - data.min()``,
  8161. the parameters returned by the `fit` method center the support over
  8162. the interval ``[data.min(), data.max()]``.
  8163. """
  8164. if len(args) > 0:
  8165. raise TypeError("Too many arguments.")
  8166. floc = kwds.pop('floc', None)
  8167. fscale = kwds.pop('fscale', None)
  8168. _remove_optimizer_parameters(kwds)
  8169. if floc is not None and fscale is not None:
  8170. # This check is for consistency with `rv_continuous.fit`.
  8171. raise ValueError("All parameters fixed. There is nothing to "
  8172. "optimize.")
  8173. data = np.asarray(data)
  8174. if not np.isfinite(data).all():
  8175. raise ValueError("The data contains non-finite values.")
  8176. # MLE for the uniform distribution
  8177. # --------------------------------
  8178. # The PDF is
  8179. #
  8180. # f(x, loc, scale) = {1/scale for loc <= x <= loc + scale
  8181. # {0 otherwise}
  8182. #
  8183. # The likelihood function is
  8184. # L(x, loc, scale) = (1/scale)**n
  8185. # where n is len(x), assuming loc <= x <= loc + scale for all x.
  8186. # The log-likelihood is
  8187. # l(x, loc, scale) = -n*log(scale)
  8188. # The log-likelihood is maximized by making scale as small as possible,
  8189. # while keeping loc <= x <= loc + scale. So if neither loc nor scale
  8190. # are fixed, the log-likelihood is maximized by choosing
  8191. # loc = x.min()
  8192. # scale = np.ptp(x)
  8193. # If loc is fixed, it must be less than or equal to x.min(), and then
  8194. # the scale is
  8195. # scale = x.max() - loc
  8196. # If scale is fixed, it must not be less than np.ptp(x). If scale is
  8197. # greater than np.ptp(x), the solution is not unique. Note that the
  8198. # likelihood does not depend on loc, except for the requirement that
  8199. # loc <= x <= loc + scale. All choices of loc for which
  8200. # x.max() - scale <= loc <= x.min()
  8201. # have the same log-likelihood. In this case, we choose loc such that
  8202. # the support is centered over the interval [data.min(), data.max()]:
  8203. # loc = x.min() = 0.5*(scale - np.ptp(x))
  8204. if fscale is None:
  8205. # scale is not fixed.
  8206. if floc is None:
  8207. # loc is not fixed, scale is not fixed.
  8208. loc = data.min()
  8209. scale = np.ptp(data)
  8210. else:
  8211. # loc is fixed, scale is not fixed.
  8212. loc = floc
  8213. scale = data.max() - loc
  8214. if data.min() < loc:
  8215. raise FitDataError("uniform", lower=loc, upper=loc + scale)
  8216. else:
  8217. # loc is not fixed, scale is fixed.
  8218. ptp = np.ptp(data)
  8219. if ptp > fscale:
  8220. raise FitUniformFixedScaleDataError(ptp=ptp, fscale=fscale)
  8221. # If ptp < fscale, the ML estimate is not unique; see the comments
  8222. # above. We choose the distribution for which the support is
  8223. # centered over the interval [data.min(), data.max()].
  8224. loc = data.min() - 0.5*(fscale - ptp)
  8225. scale = fscale
  8226. # We expect the return values to be floating point, so ensure it
  8227. # by explicitly converting to float.
  8228. return float(loc), float(scale)
  8229. uniform = uniform_gen(a=0.0, b=1.0, name='uniform')
  8230. class vonmises_gen(rv_continuous):
  8231. r"""A Von Mises continuous random variable.
  8232. %(before_notes)s
  8233. See Also
  8234. --------
  8235. scipy.stats.vonmises_fisher : Von-Mises Fisher distribution on a
  8236. hypersphere
  8237. Notes
  8238. -----
  8239. The probability density function for `vonmises` and `vonmises_line` is:
  8240. .. math::
  8241. f(x, \kappa) = \frac{ \exp(\kappa \cos(x)) }{ 2 \pi I_0(\kappa) }
  8242. for :math:`-\pi \le x \le \pi`, :math:`\kappa \ge 0`. :math:`I_0` is the
  8243. modified Bessel function of order zero (`scipy.special.i0`).
  8244. `vonmises` is a circular distribution which does not restrict the
  8245. distribution to a fixed interval. Currently, there is no circular
  8246. distribution framework in SciPy. The ``cdf`` is implemented such that
  8247. ``cdf(x + 2*np.pi) == cdf(x) + 1``.
  8248. `vonmises_line` is the same distribution, defined on :math:`[-\pi, \pi]`
  8249. on the real line. This is a regular (i.e. non-circular) distribution.
  8250. Note about distribution parameters: `vonmises` and `vonmises_line` take
  8251. ``kappa`` as a shape parameter (concentration) and ``loc`` as the location
  8252. (circular mean). A ``scale`` parameter is accepted but does not have any
  8253. effect.
  8254. Examples
  8255. --------
  8256. Import the necessary modules.
  8257. >>> import numpy as np
  8258. >>> import matplotlib.pyplot as plt
  8259. >>> from scipy.stats import vonmises
  8260. Define distribution parameters.
  8261. >>> loc = 0.5 * np.pi # circular mean
  8262. >>> kappa = 1 # concentration
  8263. Compute the probability density at ``x=0`` via the ``pdf`` method.
  8264. >>> vonmises.pdf(0, loc=loc, kappa=kappa)
  8265. 0.12570826359722018
  8266. Verify that the percentile function ``ppf`` inverts the cumulative
  8267. distribution function ``cdf`` up to floating point accuracy.
  8268. >>> x = 1
  8269. >>> cdf_value = vonmises.cdf(x, loc=loc, kappa=kappa)
  8270. >>> ppf_value = vonmises.ppf(cdf_value, loc=loc, kappa=kappa)
  8271. >>> x, cdf_value, ppf_value
  8272. (1, 0.31489339900904967, 1.0000000000000004)
  8273. Draw 1000 random variates by calling the ``rvs`` method.
  8274. >>> sample_size = 1000
  8275. >>> sample = vonmises(loc=loc, kappa=kappa).rvs(sample_size)
  8276. Plot the von Mises density on a Cartesian and polar grid to emphasize
  8277. that it is a circular distribution.
  8278. >>> fig = plt.figure(figsize=(12, 6))
  8279. >>> left = plt.subplot(121)
  8280. >>> right = plt.subplot(122, projection='polar')
  8281. >>> x = np.linspace(-np.pi, np.pi, 500)
  8282. >>> vonmises_pdf = vonmises.pdf(x, loc=loc, kappa=kappa)
  8283. >>> ticks = [0, 0.15, 0.3]
  8284. The left image contains the Cartesian plot.
  8285. >>> left.plot(x, vonmises_pdf)
  8286. >>> left.set_yticks(ticks)
  8287. >>> number_of_bins = int(np.sqrt(sample_size))
  8288. >>> left.hist(sample, density=True, bins=number_of_bins)
  8289. >>> left.set_title("Cartesian plot")
  8290. >>> left.set_xlim(-np.pi, np.pi)
  8291. >>> left.grid(True)
  8292. The right image contains the polar plot.
  8293. >>> right.plot(x, vonmises_pdf, label="PDF")
  8294. >>> right.set_yticks(ticks)
  8295. >>> right.hist(sample, density=True, bins=number_of_bins,
  8296. ... label="Histogram")
  8297. >>> right.set_title("Polar plot")
  8298. >>> right.legend(bbox_to_anchor=(0.15, 1.06))
  8299. """
  8300. def _shape_info(self):
  8301. return [_ShapeInfo("kappa", False, (0, np.inf), (True, False))]
  8302. def _argcheck(self, kappa):
  8303. return kappa >= 0
  8304. def _rvs(self, kappa, size=None, random_state=None):
  8305. return random_state.vonmises(0.0, kappa, size=size)
  8306. @inherit_docstring_from(rv_continuous)
  8307. def rvs(self, *args, **kwds):
  8308. rvs = super().rvs(*args, **kwds)
  8309. return np.mod(rvs + np.pi, 2*np.pi) - np.pi
  8310. def _pdf(self, x, kappa):
  8311. # vonmises.pdf(x, kappa) = exp(kappa * cos(x)) / (2*pi*I[0](kappa))
  8312. # = exp(kappa * (cos(x) - 1)) /
  8313. # (2*pi*exp(-kappa)*I[0](kappa))
  8314. # = exp(kappa * cosm1(x)) / (2*pi*i0e(kappa))
  8315. return np.exp(kappa*sc.cosm1(x)) / (2*np.pi*sc.i0e(kappa))
  8316. def _logpdf(self, x, kappa):
  8317. # vonmises.pdf(x, kappa) = exp(kappa * cosm1(x)) / (2*pi*i0e(kappa))
  8318. return kappa * sc.cosm1(x) - np.log(2*np.pi) - np.log(sc.i0e(kappa))
  8319. def _cdf(self, x, kappa):
  8320. return _stats.von_mises_cdf(kappa, x)
  8321. def _stats_skip(self, kappa):
  8322. return 0, None, 0, None
  8323. def _entropy(self, kappa):
  8324. # vonmises.entropy(kappa) = -kappa * I[1](kappa) / I[0](kappa) +
  8325. # log(2 * np.pi * I[0](kappa))
  8326. # = -kappa * I[1](kappa) * exp(-kappa) /
  8327. # (I[0](kappa) * exp(-kappa)) +
  8328. # log(2 * np.pi *
  8329. # I[0](kappa) * exp(-kappa) / exp(-kappa))
  8330. # = -kappa * sc.i1e(kappa) / sc.i0e(kappa) +
  8331. # log(2 * np.pi * i0e(kappa)) + kappa
  8332. return (-kappa * sc.i1e(kappa) / sc.i0e(kappa) +
  8333. np.log(2 * np.pi * sc.i0e(kappa)) + kappa)
  8334. @extend_notes_in_docstring(rv_continuous, notes="""\
  8335. The default limits of integration are endpoints of the interval
  8336. of width ``2*pi`` centered at `loc` (e.g. ``[-pi, pi]`` when
  8337. ``loc=0``).\n\n""")
  8338. def expect(self, func=None, args=(), loc=0, scale=1, lb=None, ub=None,
  8339. conditional=False, **kwds):
  8340. _a, _b = -np.pi, np.pi
  8341. if lb is None:
  8342. lb = loc + _a
  8343. if ub is None:
  8344. ub = loc + _b
  8345. return super().expect(func, args, loc,
  8346. scale, lb, ub, conditional, **kwds)
  8347. @_call_super_mom
  8348. @extend_notes_in_docstring(rv_continuous, notes="""\
  8349. Fit data is assumed to represent angles and will be wrapped onto the
  8350. unit circle. `f0` and `fscale` are ignored; the returned shape is
  8351. always the maximum likelihood estimate and the scale is always
  8352. 1. Initial guesses are ignored.\n\n""")
  8353. def fit(self, data, *args, **kwds):
  8354. if kwds.pop('superfit', False):
  8355. return super().fit(data, *args, **kwds)
  8356. data, fshape, floc, fscale = _check_fit_input_parameters(self, data,
  8357. args, kwds)
  8358. if self.a == -np.pi:
  8359. # vonmises line case, here the default fit method will be used
  8360. return super().fit(data, *args, **kwds)
  8361. # wrap data to interval [0, 2*pi]
  8362. data = np.mod(data, 2 * np.pi)
  8363. def find_mu(data):
  8364. return stats.circmean(data)
  8365. def find_kappa(data, loc):
  8366. # Usually, sources list the following as the equation to solve for
  8367. # the MLE of the shape parameter:
  8368. # r = I[1](kappa)/I[0](kappa), where r = mean resultant length
  8369. # This is valid when the location is the MLE of location.
  8370. # More generally, when the location may be fixed at an arbitrary
  8371. # value, r should be defined as follows:
  8372. r = np.sum(np.cos(loc - data))/len(data)
  8373. # See gh-18128 for more information.
  8374. # The function r[0](kappa) := I[1](kappa)/I[0](kappa) is monotonic
  8375. # increasing from r[0](0) = 0 to r[0](+inf) = 1. The partial
  8376. # derivative of the log likelihood function with respect to kappa
  8377. # is monotonic decreasing in kappa.
  8378. if r == 1:
  8379. # All observations are (almost) equal to the mean. Return
  8380. # some large kappa such that r[0](kappa) = 1.0 numerically.
  8381. return 1e16
  8382. elif r > 0:
  8383. def solve_for_kappa(kappa):
  8384. return sc.i1e(kappa)/sc.i0e(kappa) - r
  8385. # The bounds of the root of r[0](kappa) = r are derived from
  8386. # selected bounds of r[0](x) given in [1, Eq. 11 & 16]. See
  8387. # gh-20102 for details.
  8388. #
  8389. # [1] Amos, D. E. (1973). Computation of Modified Bessel
  8390. # Functions and Their Ratios. Mathematics of Computation,
  8391. # 28(125): 239-251.
  8392. lower_bound = r/(1-r)/(1+r)
  8393. upper_bound = 2*lower_bound
  8394. # The bounds are violated numerically for certain values of r,
  8395. # where solve_for_kappa evaluated at the bounds have the same
  8396. # sign. This indicates numerical imprecision of i1e()/i0e().
  8397. # Return the violated bound in this case as it's more accurate.
  8398. if solve_for_kappa(lower_bound) >= 0:
  8399. return lower_bound
  8400. elif solve_for_kappa(upper_bound) <= 0:
  8401. return upper_bound
  8402. else:
  8403. root_res = root_scalar(solve_for_kappa, method="brentq",
  8404. bracket=(lower_bound, upper_bound))
  8405. return root_res.root
  8406. else:
  8407. # if the provided floc is very far from the circular mean,
  8408. # the mean resultant length r can become negative.
  8409. # In that case, the equation
  8410. # I[1](kappa)/I[0](kappa) = r does not have a solution.
  8411. # The maximum likelihood kappa is then 0 which practically
  8412. # results in the uniform distribution on the circle. As
  8413. # vonmises is defined for kappa > 0, return instead the
  8414. # smallest floating point value.
  8415. # See gh-18190 for more information
  8416. return np.finfo(float).tiny
  8417. # location likelihood equation has a solution independent of kappa
  8418. loc = floc if floc is not None else find_mu(data)
  8419. # shape likelihood equation depends on location
  8420. shape = fshape if fshape is not None else find_kappa(data, loc)
  8421. loc = np.mod(loc + np.pi, 2 * np.pi) - np.pi # ensure in [-pi, pi]
  8422. return shape, loc, 1 # scale is not handled
  8423. vonmises = vonmises_gen(name='vonmises')
  8424. vonmises_line = vonmises_gen(a=-np.pi, b=np.pi, name='vonmises_line')
  8425. class wald_gen(invgauss_gen):
  8426. r"""A Wald continuous random variable.
  8427. %(before_notes)s
  8428. Notes
  8429. -----
  8430. The probability density function for `wald` is:
  8431. .. math::
  8432. f(x) = \frac{1}{\sqrt{2\pi x^3}} \exp(- \frac{ (x-1)^2 }{ 2x })
  8433. for :math:`x >= 0`.
  8434. `wald` is a special case of `invgauss` with ``mu=1``.
  8435. %(after_notes)s
  8436. %(example)s
  8437. """
  8438. _support_mask = rv_continuous._open_support_mask
  8439. def _shape_info(self):
  8440. return []
  8441. def _rvs(self, size=None, random_state=None):
  8442. return random_state.wald(1.0, 1.0, size=size)
  8443. def _pdf(self, x):
  8444. # wald.pdf(x) = 1/sqrt(2*pi*x**3) * exp(-(x-1)**2/(2*x))
  8445. return invgauss._pdf(x, 1.0)
  8446. def _cdf(self, x):
  8447. return invgauss._cdf(x, 1.0)
  8448. def _sf(self, x):
  8449. return invgauss._sf(x, 1.0)
  8450. def _ppf(self, x):
  8451. return invgauss._ppf(x, 1.0)
  8452. def _isf(self, x):
  8453. return invgauss._isf(x, 1.0)
  8454. def _logpdf(self, x):
  8455. return invgauss._logpdf(x, 1.0)
  8456. def _logcdf(self, x):
  8457. return invgauss._logcdf(x, 1.0)
  8458. def _logsf(self, x):
  8459. return invgauss._logsf(x, 1.0)
  8460. def _stats(self):
  8461. return 1.0, 1.0, 3.0, 15.0
  8462. def _entropy(self):
  8463. return invgauss._entropy(1.0)
  8464. wald = wald_gen(a=0.0, name="wald")
  8465. class wrapcauchy_gen(rv_continuous):
  8466. r"""A wrapped Cauchy continuous random variable.
  8467. %(before_notes)s
  8468. Notes
  8469. -----
  8470. The probability density function for `wrapcauchy` is:
  8471. .. math::
  8472. f(x, c) = \frac{1-c^2}{2\pi (1+c^2 - 2c \cos(x))}
  8473. for :math:`0 \le x \le 2\pi`, :math:`0 < c < 1`.
  8474. `wrapcauchy` takes ``c`` as a shape parameter for :math:`c`.
  8475. %(after_notes)s
  8476. %(example)s
  8477. """
  8478. def _argcheck(self, c):
  8479. return (c > 0) & (c < 1)
  8480. def _shape_info(self):
  8481. return [_ShapeInfo("c", False, (0, 1), (False, False))]
  8482. def _pdf(self, x, c):
  8483. # wrapcauchy.pdf(x, c) = (1-c**2) / (2*pi*(1+c**2-2*c*cos(x)))
  8484. return (1.0-c*c)/(2*np.pi*(1+c*c-2*c*np.cos(x)))
  8485. def _cdf(self, x, c):
  8486. def f1(x, cr):
  8487. # CDF for 0 <= x < pi
  8488. return 1/np.pi * np.arctan(cr*np.tan(x/2))
  8489. def f2(x, cr):
  8490. # CDF for pi <= x <= 2*pi
  8491. return 1 - 1/np.pi * np.arctan(cr*np.tan((2*np.pi - x)/2))
  8492. cr = (1 + c)/(1 - c)
  8493. return xpx.apply_where(x < np.pi, (x, cr), f1, f2)
  8494. def _ppf(self, q, c):
  8495. val = (1.0-c)/(1.0+c)
  8496. rcq = 2*np.arctan(val*np.tan(np.pi*q))
  8497. rcmq = 2*np.pi-2*np.arctan(val*np.tan(np.pi*(1-q)))
  8498. return np.where(q < 1.0/2, rcq, rcmq)
  8499. def _entropy(self, c):
  8500. return np.log(2*np.pi*(1-c*c))
  8501. def _fitstart(self, data):
  8502. # Use 0.5 as the initial guess of the shape parameter.
  8503. # For the location and scale, use the minimum and
  8504. # peak-to-peak/(2*pi), respectively.
  8505. if isinstance(data, CensoredData):
  8506. data = data._uncensor()
  8507. return 0.5, np.min(data), np.ptp(data)/(2*np.pi)
  8508. @inherit_docstring_from(rv_continuous)
  8509. def rvs(self, *args, **kwds):
  8510. rvs = super().rvs(*args, **kwds)
  8511. return np.mod(rvs, 2*np.pi)
  8512. wrapcauchy = wrapcauchy_gen(a=0.0, b=2*np.pi, name='wrapcauchy')
  8513. class gennorm_gen(rv_continuous):
  8514. r"""A generalized normal continuous random variable.
  8515. %(before_notes)s
  8516. See Also
  8517. --------
  8518. laplace : Laplace distribution
  8519. norm : normal distribution
  8520. Notes
  8521. -----
  8522. The probability density function for `gennorm` is [1]_:
  8523. .. math::
  8524. f(x, \beta) = \frac{\beta}{2 \Gamma(1/\beta)} \exp(-|x|^\beta),
  8525. where :math:`x` is a real number, :math:`\beta > 0` and
  8526. :math:`\Gamma` is the gamma function (`scipy.special.gamma`).
  8527. `gennorm` takes ``beta`` as a shape parameter for :math:`\beta`.
  8528. For :math:`\beta = 1`, it is identical to a Laplace distribution.
  8529. For :math:`\beta = 2`, it is identical to a normal distribution
  8530. (with ``scale=1/sqrt(2)``).
  8531. References
  8532. ----------
  8533. .. [1] "Generalized normal distribution, Version 1",
  8534. https://en.wikipedia.org/wiki/Generalized_normal_distribution#Version_1
  8535. .. [2] Nardon, Martina, and Paolo Pianca. "Simulation techniques for
  8536. generalized Gaussian densities." Journal of Statistical
  8537. Computation and Simulation 79.11 (2009): 1317-1329
  8538. .. [3] Wicklin, Rick. "Simulate data from a generalized Gaussian
  8539. distribution" in The DO Loop blog, September 21, 2016,
  8540. https://blogs.sas.com/content/iml/2016/09/21/simulate-generalized-gaussian-sas.html
  8541. %(example)s
  8542. """
  8543. def _shape_info(self):
  8544. return [_ShapeInfo("beta", False, (0, np.inf), (False, False))]
  8545. def _pdf(self, x, beta):
  8546. return np.exp(self._logpdf(x, beta))
  8547. def _logpdf(self, x, beta):
  8548. return np.log(0.5*beta) - sc.gammaln(1.0/beta) - abs(x)**beta
  8549. def _cdf(self, x, beta):
  8550. c = 0.5 * np.sign(x)
  8551. # evaluating (.5 + c) first prevents numerical cancellation
  8552. return (0.5 + c) - c * sc.gammaincc(1.0/beta, abs(x)**beta)
  8553. def _ppf(self, x, beta):
  8554. c = np.sign(x - 0.5)
  8555. # evaluating (1. + c) first prevents numerical cancellation
  8556. return c * sc.gammainccinv(1.0/beta, (1.0 + c) - 2.0*c*x)**(1.0/beta)
  8557. def _sf(self, x, beta):
  8558. return self._cdf(-x, beta)
  8559. def _isf(self, x, beta):
  8560. return -self._ppf(x, beta)
  8561. def _munp(self, n, beta):
  8562. if n == 0:
  8563. return 1.
  8564. if n % 2 == 0:
  8565. c1, cn = sc.gammaln([1.0/beta, (n + 1.0)/beta])
  8566. return np.exp(cn - c1)
  8567. else:
  8568. return 0.
  8569. def _stats(self, beta):
  8570. c1, c3, c5 = sc.gammaln([1.0/beta, 3.0/beta, 5.0/beta])
  8571. return 0., np.exp(c3 - c1), 0., np.exp(c5 + c1 - 2.0*c3) - 3.
  8572. def _entropy(self, beta):
  8573. return 1. / beta - np.log(.5 * beta) + sc.gammaln(1. / beta)
  8574. def _rvs(self, beta, size=None, random_state=None):
  8575. # see [2]_ for the algorithm
  8576. # see [3]_ for reference implementation in SAS
  8577. z = random_state.gamma(1/beta, size=size)
  8578. y = z ** (1/beta)
  8579. # convert y to array to ensure masking support
  8580. y = np.asarray(y)
  8581. mask = random_state.random(size=y.shape) < 0.5
  8582. y[mask] = -y[mask]
  8583. return y
  8584. gennorm = gennorm_gen(name='gennorm')
  8585. class halfgennorm_gen(rv_continuous):
  8586. r"""The upper half of a generalized normal continuous random variable.
  8587. %(before_notes)s
  8588. See Also
  8589. --------
  8590. gennorm : generalized normal distribution
  8591. expon : exponential distribution
  8592. halfnorm : half normal distribution
  8593. Notes
  8594. -----
  8595. The probability density function for `halfgennorm` is:
  8596. .. math::
  8597. f(x, \beta) = \frac{\beta}{\Gamma(1/\beta)} \exp(-|x|^\beta)
  8598. for :math:`x, \beta > 0`. :math:`\Gamma` is the gamma function
  8599. (`scipy.special.gamma`).
  8600. `halfgennorm` takes ``beta`` as a shape parameter for :math:`\beta`.
  8601. For :math:`\beta = 1`, it is identical to an exponential distribution.
  8602. For :math:`\beta = 2`, it is identical to a half normal distribution
  8603. (with ``scale=1/sqrt(2)``).
  8604. References
  8605. ----------
  8606. .. [1] "Generalized normal distribution, Version 1",
  8607. https://en.wikipedia.org/wiki/Generalized_normal_distribution#Version_1
  8608. %(example)s
  8609. """
  8610. def _shape_info(self):
  8611. return [_ShapeInfo("beta", False, (0, np.inf), (False, False))]
  8612. def _pdf(self, x, beta):
  8613. # beta
  8614. # halfgennorm.pdf(x, beta) = ------------- exp(-|x|**beta)
  8615. # gamma(1/beta)
  8616. return np.exp(self._logpdf(x, beta))
  8617. def _logpdf(self, x, beta):
  8618. return np.log(beta) - sc.gammaln(1.0/beta) - x**beta
  8619. def _cdf(self, x, beta):
  8620. return sc.gammainc(1.0/beta, x**beta)
  8621. def _ppf(self, x, beta):
  8622. return sc.gammaincinv(1.0/beta, x)**(1.0/beta)
  8623. def _sf(self, x, beta):
  8624. return sc.gammaincc(1.0/beta, x**beta)
  8625. def _isf(self, x, beta):
  8626. return sc.gammainccinv(1.0/beta, x)**(1.0/beta)
  8627. def _entropy(self, beta):
  8628. return 1.0/beta - np.log(beta) + sc.gammaln(1.0/beta)
  8629. halfgennorm = halfgennorm_gen(a=0, name='halfgennorm')
  8630. class crystalball_gen(rv_continuous):
  8631. r"""
  8632. Crystalball distribution
  8633. %(before_notes)s
  8634. Notes
  8635. -----
  8636. The probability density function for `crystalball` is:
  8637. .. math::
  8638. f(x, \beta, m) = \begin{cases}
  8639. N \exp(-x^2 / 2), &\text{for } x > -\beta\\
  8640. N A (B - x)^{-m} &\text{for } x \le -\beta
  8641. \end{cases}
  8642. where :math:`A = (m / |\beta|)^m \exp(-\beta^2 / 2)`,
  8643. :math:`B = m/|\beta| - |\beta|` and :math:`N` is a normalisation constant.
  8644. `crystalball` takes :math:`\beta > 0` and :math:`m > 1` as shape
  8645. parameters. :math:`\beta` defines the point where the pdf changes
  8646. from a power-law to a Gaussian distribution. :math:`m` is the power
  8647. of the power-law tail.
  8648. %(after_notes)s
  8649. .. versionadded:: 0.19.0
  8650. References
  8651. ----------
  8652. .. [1] "Crystal Ball Function",
  8653. https://en.wikipedia.org/wiki/Crystal_Ball_function
  8654. %(example)s
  8655. """
  8656. def _argcheck(self, beta, m):
  8657. """
  8658. Shape parameter bounds are m > 1 and beta > 0.
  8659. """
  8660. return (m > 1) & (beta > 0)
  8661. def _shape_info(self):
  8662. ibeta = _ShapeInfo("beta", False, (0, np.inf), (False, False))
  8663. im = _ShapeInfo("m", False, (1, np.inf), (False, False))
  8664. return [ibeta, im]
  8665. def _fitstart(self, data):
  8666. # Arbitrary, but the default m=1 is not valid
  8667. return super()._fitstart(data, args=(1, 1.5))
  8668. def _pdf(self, x, beta, m):
  8669. """
  8670. Return PDF of the crystalball function.
  8671. --
  8672. | exp(-x**2 / 2), for x > -beta
  8673. crystalball.pdf(x, beta, m) = N * |
  8674. | A * (B - x)**(-m), for x <= -beta
  8675. --
  8676. """
  8677. N = 1.0 / (m/beta / (m-1) * np.exp(-beta**2 / 2.0) +
  8678. _norm_pdf_C * _norm_cdf(beta))
  8679. def rhs(x, beta, m):
  8680. return np.exp(-x**2 / 2)
  8681. def lhs(x, beta, m):
  8682. return ((m/beta)**m * np.exp(-beta**2 / 2.0) *
  8683. (m/beta - beta - x)**(-m))
  8684. return N * xpx.apply_where(x > -beta, (x, beta, m), rhs, lhs)
  8685. def _logpdf(self, x, beta, m):
  8686. """
  8687. Return the log of the PDF of the crystalball function.
  8688. """
  8689. N = 1.0 / (m/beta / (m-1) * np.exp(-beta**2 / 2.0) +
  8690. _norm_pdf_C * _norm_cdf(beta))
  8691. def rhs(x, beta, m):
  8692. return -x**2/2
  8693. def lhs(x, beta, m):
  8694. return m*np.log(m/beta) - beta**2/2 - m*np.log(m/beta - beta - x)
  8695. return np.log(N) + xpx.apply_where(x > -beta, (x, beta, m), rhs, lhs)
  8696. def _cdf(self, x, beta, m):
  8697. """
  8698. Return CDF of the crystalball function
  8699. """
  8700. N = 1.0 / (m/beta / (m-1) * np.exp(-beta**2 / 2.0) +
  8701. _norm_pdf_C * _norm_cdf(beta))
  8702. def rhs(x, beta, m):
  8703. return ((m/beta) * np.exp(-beta**2 / 2.0) / (m-1) +
  8704. _norm_pdf_C * (_norm_cdf(x) - _norm_cdf(-beta)))
  8705. def lhs(x, beta, m):
  8706. return ((m/beta)**m * np.exp(-beta**2 / 2.0) *
  8707. (m/beta - beta - x)**(-m+1) / (m-1))
  8708. return N * xpx.apply_where(x > -beta, (x, beta, m), rhs, lhs)
  8709. def _sf(self, x, beta, m):
  8710. """
  8711. Survival function of the crystalball distribution.
  8712. """
  8713. def rhs(x, beta, m):
  8714. # M is the same as 1/N used elsewhere.
  8715. M = m/beta/(m - 1)*np.exp(-beta**2/2) + _norm_pdf_C*_norm_cdf(beta)
  8716. return _norm_pdf_C*_norm_sf(x)/M
  8717. def lhs(x, beta, m):
  8718. # Default behavior is OK in the left tail of the SF.
  8719. return 1 - self._cdf(x, beta, m)
  8720. return xpx.apply_where(x > -beta, (x, beta, m), rhs, lhs)
  8721. def _ppf(self, p, beta, m):
  8722. N = 1.0 / (m/beta / (m-1) * np.exp(-beta**2 / 2.0) +
  8723. _norm_pdf_C * _norm_cdf(beta))
  8724. pbeta = N * (m/beta) * np.exp(-beta**2/2) / (m - 1)
  8725. def ppf_less(p, beta, m):
  8726. eb2 = np.exp(-beta**2/2)
  8727. C = (m/beta) * eb2 / (m-1)
  8728. N = 1/(C + _norm_pdf_C * _norm_cdf(beta))
  8729. return (m/beta - beta -
  8730. ((m - 1)*(m/beta)**(-m)/eb2*p/N)**(1/(1-m)))
  8731. def ppf_greater(p, beta, m):
  8732. eb2 = np.exp(-beta**2/2)
  8733. C = (m/beta) * eb2 / (m-1)
  8734. N = 1/(C + _norm_pdf_C * _norm_cdf(beta))
  8735. return _norm_ppf(_norm_cdf(-beta) + (1/_norm_pdf_C)*(p/N - C))
  8736. return xpx.apply_where(p < pbeta, (p, beta, m), ppf_less, ppf_greater)
  8737. def _munp(self, n, beta, m):
  8738. """
  8739. Returns the n-th non-central moment of the crystalball function.
  8740. """
  8741. N = 1.0 / (m/beta / (m-1) * np.exp(-beta**2 / 2.0) +
  8742. _norm_pdf_C * _norm_cdf(beta))
  8743. def n_th_moment(n, beta, m):
  8744. """
  8745. Returns n-th moment. Defined only if n+1 < m
  8746. Function cannot broadcast due to the loop over n
  8747. """
  8748. A = (m/beta)**m * np.exp(-beta**2 / 2.0)
  8749. B = m/beta - beta
  8750. rhs = (2**((n-1)/2.0) * sc.gamma((n+1)/2) *
  8751. (1.0 + (-1)**n * sc.gammainc((n+1)/2, beta**2 / 2)))
  8752. lhs = np.zeros(rhs.shape)
  8753. for k in range(int(n) + 1):
  8754. lhs += (sc.binom(n, k) * B**(n-k) * (-1)**k / (m - k - 1) *
  8755. (m/beta)**(-m + k + 1))
  8756. return A * lhs + rhs
  8757. return N * xpx.apply_where(n + 1 < m, (n, beta, m),
  8758. np.vectorize(n_th_moment, otypes=[np.float64]),
  8759. fill_value=np.inf)
  8760. crystalball = crystalball_gen(name='crystalball', longname="A Crystalball Function")
  8761. def _argus_phi(chi):
  8762. """
  8763. Utility function for the argus distribution used in the pdf, sf and
  8764. moment calculation.
  8765. Note that for all x > 0:
  8766. gammainc(1.5, x**2/2) = 2 * (_norm_cdf(x) - x * _norm_pdf(x) - 0.5).
  8767. This can be verified directly by noting that the cdf of Gamma(1.5) can
  8768. be written as erf(sqrt(x)) - 2*sqrt(x)*exp(-x)/sqrt(Pi).
  8769. We use gammainc instead of the usual definition because it is more precise
  8770. for small chi.
  8771. """
  8772. return sc.gammainc(1.5, chi**2/2) / 2
  8773. class argus_gen(rv_continuous):
  8774. r"""
  8775. Argus distribution
  8776. %(before_notes)s
  8777. Notes
  8778. -----
  8779. The probability density function for `argus` is:
  8780. .. math::
  8781. f(x, \chi) = \frac{\chi^3}{\sqrt{2\pi} \Psi(\chi)} x \sqrt{1-x^2}
  8782. \exp(-\chi^2 (1 - x^2)/2)
  8783. for :math:`0 < x < 1` and :math:`\chi > 0`, where
  8784. .. math::
  8785. \Psi(\chi) = \Phi(\chi) - \chi \phi(\chi) - 1/2
  8786. with :math:`\Phi` and :math:`\phi` being the CDF and PDF of a standard
  8787. normal distribution, respectively.
  8788. `argus` takes :math:`\chi` as shape a parameter. Details about sampling
  8789. from the ARGUS distribution can be found in [2]_.
  8790. %(after_notes)s
  8791. References
  8792. ----------
  8793. .. [1] "ARGUS distribution",
  8794. https://en.wikipedia.org/wiki/ARGUS_distribution
  8795. .. [2] Christoph Baumgarten "Random variate generation by fast numerical
  8796. inversion in the varying parameter case." Research in Statistics,
  8797. vol. 1, 2023. :doi:`10.1080/27684520.2023.2279060`
  8798. .. versionadded:: 0.19.0
  8799. %(example)s
  8800. """
  8801. def _shape_info(self):
  8802. return [_ShapeInfo("chi", False, (0, np.inf), (False, False))]
  8803. def _logpdf(self, x, chi):
  8804. # for x = 0 or 1, logpdf returns -np.inf
  8805. with np.errstate(divide='ignore'):
  8806. y = 1.0 - x*x
  8807. A = 3*np.log(chi) - _norm_pdf_logC - np.log(_argus_phi(chi))
  8808. return A + np.log(x) + 0.5*np.log1p(-x*x) - chi**2 * y / 2
  8809. def _pdf(self, x, chi):
  8810. return np.exp(self._logpdf(x, chi))
  8811. def _cdf(self, x, chi):
  8812. return 1.0 - self._sf(x, chi)
  8813. def _sf(self, x, chi):
  8814. return _argus_phi(chi * np.sqrt((1 - x)*(1 + x))) / _argus_phi(chi)
  8815. def _rvs(self, chi, size=None, random_state=None):
  8816. chi = np.asarray(chi)
  8817. if chi.size == 1:
  8818. out = self._rvs_scalar(chi, numsamples=size,
  8819. random_state=random_state)
  8820. else:
  8821. shp, bc = _check_shape(chi.shape, size)
  8822. numsamples = int(np.prod(shp))
  8823. out = np.empty(size)
  8824. it = np.nditer([chi],
  8825. flags=['multi_index'],
  8826. op_flags=[['readonly']])
  8827. while not it.finished:
  8828. idx = tuple((it.multi_index[j] if not bc[j] else slice(None))
  8829. for j in range(-len(size), 0))
  8830. r = self._rvs_scalar(it[0], numsamples=numsamples,
  8831. random_state=random_state)
  8832. out[idx] = r.reshape(shp)
  8833. it.iternext()
  8834. if size == ():
  8835. out = out[()]
  8836. return out
  8837. def _rvs_scalar(self, chi, numsamples=None, random_state=None):
  8838. # if chi <= 1.8:
  8839. # use rejection method, see Devroye:
  8840. # Non-Uniform Random Variate Generation, 1986, section II.3.2.
  8841. # write: PDF f(x) = c * g(x) * h(x), where
  8842. # h is [0,1]-valued and g is a density
  8843. # we use two ways to write f
  8844. #
  8845. # Case 1:
  8846. # write g(x) = 3*x*sqrt(1-x**2), h(x) = exp(-chi**2 (1-x**2) / 2)
  8847. # If X has a distribution with density g its ppf G_inv is given by:
  8848. # G_inv(u) = np.sqrt(1 - u**(2/3))
  8849. #
  8850. # Case 2:
  8851. # g(x) = chi**2 * x * exp(-chi**2 * (1-x**2)/2) / (1 - exp(-chi**2 /2))
  8852. # h(x) = sqrt(1 - x**2), 0 <= x <= 1
  8853. # one can show that
  8854. # G_inv(u) = np.sqrt(2*np.log(u*(np.exp(chi**2/2)-1)+1))/chi
  8855. # = np.sqrt(1 + 2*np.log(np.exp(-chi**2/2)*(1-u)+u)/chi**2)
  8856. # the latter expression is used for precision with small chi
  8857. #
  8858. # In both cases, the inverse cdf of g can be written analytically, and
  8859. # we can apply the rejection method:
  8860. #
  8861. # REPEAT
  8862. # Generate U uniformly distributed on [0, 1]
  8863. # Generate X with density g (e.g. via inverse transform sampling:
  8864. # X = G_inv(V) with V uniformly distributed on [0, 1])
  8865. # UNTIL X <= h(X)
  8866. # RETURN X
  8867. #
  8868. # We use case 1 for chi <= 0.5 as it maintains precision for small chi
  8869. # and case 2 for 0.5 < chi <= 1.8 due to its speed for moderate chi.
  8870. #
  8871. # if chi > 1.8:
  8872. # use relation to the Gamma distribution: if X is ARGUS with parameter
  8873. # chi), then Y = chi**2 * (1 - X**2) / 2 has density proportional to
  8874. # sqrt(u) * exp(-u) on [0, chi**2 / 2], i.e. a Gamma(3/2) distribution
  8875. # conditioned on [0, chi**2 / 2]). Therefore, to sample X from the
  8876. # ARGUS distribution, we sample Y from the gamma distribution, keeping
  8877. # only samples on [0, chi**2 / 2], and apply the inverse
  8878. # transformation X = (1 - 2*Y/chi**2)**(1/2). Since we only
  8879. # look at chi > 1.8, gamma(1.5).cdf(chi**2/2) is large enough such
  8880. # Y falls in the interval [0, chi**2 / 2] with a high probability:
  8881. # stats.gamma(1.5).cdf(1.8**2/2) = 0.644...
  8882. #
  8883. # The points to switch between the different methods are determined
  8884. # by a comparison of the runtime of the different methods. However,
  8885. # the runtime is platform-dependent. The implemented values should
  8886. # ensure a good overall performance and are supported by an analysis
  8887. # of the rejection constants of different methods.
  8888. size1d = tuple(np.atleast_1d(numsamples))
  8889. N = int(np.prod(size1d))
  8890. x = np.zeros(N)
  8891. simulated = 0
  8892. chi2 = chi * chi
  8893. if chi <= 0.5:
  8894. d = -chi2 / 2
  8895. while simulated < N:
  8896. k = N - simulated
  8897. u = random_state.uniform(size=k)
  8898. v = random_state.uniform(size=k)
  8899. z = v**(2/3)
  8900. # acceptance condition: u <= h(G_inv(v)). This simplifies to
  8901. accept = (np.log(u) <= d * z)
  8902. num_accept = np.sum(accept)
  8903. if num_accept > 0:
  8904. # we still need to transform z=v**(2/3) to X = G_inv(v)
  8905. rvs = np.sqrt(1 - z[accept])
  8906. x[simulated:(simulated + num_accept)] = rvs
  8907. simulated += num_accept
  8908. elif chi <= 1.8:
  8909. echi = np.exp(-chi2 / 2)
  8910. while simulated < N:
  8911. k = N - simulated
  8912. u = random_state.uniform(size=k)
  8913. v = random_state.uniform(size=k)
  8914. z = 2 * np.log(echi * (1 - v) + v) / chi2
  8915. # as in case one, simplify u <= h(G_inv(v)) and then transform
  8916. # z to the target distribution X = G_inv(v)
  8917. accept = (u**2 + z <= 0)
  8918. num_accept = np.sum(accept)
  8919. if num_accept > 0:
  8920. rvs = np.sqrt(1 + z[accept])
  8921. x[simulated:(simulated + num_accept)] = rvs
  8922. simulated += num_accept
  8923. else:
  8924. # conditional Gamma for chi > 1.8
  8925. while simulated < N:
  8926. k = N - simulated
  8927. g = random_state.standard_gamma(1.5, size=k)
  8928. accept = (g <= chi2 / 2)
  8929. num_accept = np.sum(accept)
  8930. if num_accept > 0:
  8931. x[simulated:(simulated + num_accept)] = g[accept]
  8932. simulated += num_accept
  8933. x = np.sqrt(1 - 2 * x / chi2)
  8934. return np.reshape(x, size1d)
  8935. def _stats(self, chi):
  8936. # need to ensure that dtype is float
  8937. # otherwise the mask below does not work for integers
  8938. chi = np.asarray(chi, dtype=float)
  8939. phi = _argus_phi(chi)
  8940. m = np.sqrt(np.pi/8) * chi * sc.ive(1, chi**2/4) / phi
  8941. # compute second moment, use Taylor expansion for small chi (<= 0.1)
  8942. mu2 = np.empty_like(chi)
  8943. mask = chi > 0.1
  8944. c = chi[mask]
  8945. mu2[mask] = 1 - 3 / c**2 + c * _norm_pdf(c) / phi[mask]
  8946. c = chi[~mask]
  8947. coef = [-358/65690625, 0, -94/1010625, 0, 2/2625, 0, 6/175, 0, 0.4]
  8948. mu2[~mask] = np.polyval(coef, c)
  8949. return m, mu2 - m**2, None, None
  8950. argus = argus_gen(name='argus', longname="An Argus Function", a=0.0, b=1.0)
  8951. class rv_histogram(rv_continuous):
  8952. """
  8953. Generates a distribution given by a histogram.
  8954. This is useful to generate a template distribution from a binned
  8955. datasample.
  8956. As a subclass of the `rv_continuous` class, `rv_histogram` inherits from it
  8957. a collection of generic methods (see `rv_continuous` for the full list),
  8958. and implements them based on the properties of the provided binned
  8959. datasample.
  8960. Parameters
  8961. ----------
  8962. histogram : tuple of array_like
  8963. Tuple containing two array_like objects.
  8964. The first containing the content of n bins,
  8965. the second containing the (n+1) bin boundaries.
  8966. In particular, the return value of `numpy.histogram` is accepted.
  8967. density : bool, optional
  8968. If False, assumes the histogram is proportional to counts per bin;
  8969. otherwise, assumes it is proportional to a density.
  8970. For constant bin widths, these are equivalent, but the distinction
  8971. is important when bin widths vary (see Notes).
  8972. If None (default), sets ``density=True`` for backwards compatibility,
  8973. but warns if the bin widths are variable. Set `density` explicitly
  8974. to silence the warning.
  8975. .. versionadded:: 1.10.0
  8976. Notes
  8977. -----
  8978. When a histogram has unequal bin widths, there is a distinction between
  8979. histograms that are proportional to counts per bin and histograms that are
  8980. proportional to probability density over a bin. If `numpy.histogram` is
  8981. called with its default ``density=False``, the resulting histogram is the
  8982. number of counts per bin, so ``density=False`` should be passed to
  8983. `rv_histogram`. If `numpy.histogram` is called with ``density=True``, the
  8984. resulting histogram is in terms of probability density, so ``density=True``
  8985. should be passed to `rv_histogram`. To avoid warnings, always pass
  8986. ``density`` explicitly when the input histogram has unequal bin widths.
  8987. There are no additional shape parameters except for the loc and scale.
  8988. The pdf is defined as a stepwise function from the provided histogram.
  8989. The cdf is a linear interpolation of the pdf.
  8990. .. versionadded:: 0.19.0
  8991. Examples
  8992. --------
  8993. Create a scipy.stats distribution from a numpy histogram
  8994. >>> import scipy.stats
  8995. >>> import numpy as np
  8996. >>> data = scipy.stats.norm.rvs(size=100000, loc=0, scale=1.5,
  8997. ... random_state=123)
  8998. >>> hist = np.histogram(data, bins=100)
  8999. >>> hist_dist = scipy.stats.rv_histogram(hist, density=False)
  9000. Behaves like an ordinary scipy rv_continuous distribution
  9001. >>> hist_dist.pdf(1.0)
  9002. 0.20538577847618705
  9003. >>> hist_dist.cdf(2.0)
  9004. 0.90818568543056499
  9005. PDF is zero above (below) the highest (lowest) bin of the histogram,
  9006. defined by the max (min) of the original dataset
  9007. >>> hist_dist.pdf(np.max(data))
  9008. 0.0
  9009. >>> hist_dist.cdf(np.max(data))
  9010. 1.0
  9011. >>> hist_dist.pdf(np.min(data))
  9012. 7.7591907244498314e-05
  9013. >>> hist_dist.cdf(np.min(data))
  9014. 0.0
  9015. PDF and CDF follow the histogram
  9016. >>> import matplotlib.pyplot as plt
  9017. >>> X = np.linspace(-5.0, 5.0, 100)
  9018. >>> fig, ax = plt.subplots()
  9019. >>> ax.set_title("PDF from Template")
  9020. >>> ax.hist(data, density=True, bins=100)
  9021. >>> ax.plot(X, hist_dist.pdf(X), label='PDF')
  9022. >>> ax.plot(X, hist_dist.cdf(X), label='CDF')
  9023. >>> ax.legend()
  9024. >>> fig.show()
  9025. """
  9026. _support_mask = rv_continuous._support_mask
  9027. def __init__(self, histogram, *args, density=None, **kwargs):
  9028. """
  9029. Create a new distribution using the given histogram
  9030. Parameters
  9031. ----------
  9032. histogram : tuple of array_like
  9033. Tuple containing two array_like objects.
  9034. The first containing the content of n bins,
  9035. the second containing the (n+1) bin boundaries.
  9036. In particular, the return value of np.histogram is accepted.
  9037. density : bool, optional
  9038. If False, assumes the histogram is proportional to counts per bin;
  9039. otherwise, assumes it is proportional to a density.
  9040. For constant bin widths, these are equivalent.
  9041. If None (default), sets ``density=True`` for backward
  9042. compatibility, but warns if the bin widths are variable. Set
  9043. `density` explicitly to silence the warning.
  9044. """
  9045. self._histogram = histogram
  9046. self._density = density
  9047. if len(histogram) != 2:
  9048. raise ValueError("Expected length 2 for parameter histogram")
  9049. self._hpdf = np.asarray(histogram[0])
  9050. self._hbins = np.asarray(histogram[1])
  9051. if len(self._hpdf) + 1 != len(self._hbins):
  9052. raise ValueError("Number of elements in histogram content "
  9053. "and histogram boundaries do not match, "
  9054. "expected n and n+1.")
  9055. self._hbin_widths = self._hbins[1:] - self._hbins[:-1]
  9056. bins_vary = not np.allclose(self._hbin_widths, self._hbin_widths[0])
  9057. if density is None and bins_vary:
  9058. message = ("Bin widths are not constant. Assuming `density=True`."
  9059. "Specify `density` explicitly to silence this warning.")
  9060. warnings.warn(message, RuntimeWarning, stacklevel=2)
  9061. density = True
  9062. elif not density:
  9063. self._hpdf = self._hpdf / self._hbin_widths
  9064. self._hpdf = self._hpdf / float(np.sum(self._hpdf * self._hbin_widths))
  9065. self._hcdf = np.cumsum(self._hpdf * self._hbin_widths)
  9066. self._hpdf = np.hstack([0.0, self._hpdf, 0.0])
  9067. self._hcdf = np.hstack([0.0, self._hcdf])
  9068. # Set support
  9069. kwargs['a'] = self.a = self._hbins[0]
  9070. kwargs['b'] = self.b = self._hbins[-1]
  9071. super().__init__(*args, **kwargs)
  9072. def _pdf(self, x):
  9073. """
  9074. PDF of the histogram
  9075. """
  9076. return self._hpdf[np.searchsorted(self._hbins, x, side='right')]
  9077. def _cdf(self, x):
  9078. """
  9079. CDF calculated from the histogram
  9080. """
  9081. return np.interp(x, self._hbins, self._hcdf)
  9082. def _ppf(self, x):
  9083. """
  9084. Percentile function calculated from the histogram
  9085. """
  9086. return np.interp(x, self._hcdf, self._hbins)
  9087. def _munp(self, n):
  9088. """Compute the n-th non-central moment."""
  9089. integrals = (self._hbins[1:]**(n+1) - self._hbins[:-1]**(n+1)) / (n+1)
  9090. return np.sum(self._hpdf[1:-1] * integrals)
  9091. def _entropy(self):
  9092. """Compute entropy of distribution"""
  9093. hpdf = self._hpdf[1:-1]
  9094. res = xpx.apply_where(hpdf > 0.0, hpdf, np.log, fill_value=0.0)
  9095. return -np.sum(hpdf * res * self._hbin_widths)
  9096. def _updated_ctor_param(self):
  9097. """
  9098. Set the histogram as additional constructor argument
  9099. """
  9100. dct = super()._updated_ctor_param()
  9101. dct['histogram'] = self._histogram
  9102. dct['density'] = self._density
  9103. return dct
  9104. class studentized_range_gen(rv_continuous):
  9105. r"""A studentized range continuous random variable.
  9106. %(before_notes)s
  9107. See Also
  9108. --------
  9109. t: Student's t distribution
  9110. Notes
  9111. -----
  9112. The probability density function for `studentized_range` is:
  9113. .. math::
  9114. f(x; k, \nu) = \frac{k(k-1)\nu^{\nu/2}}{\Gamma(\nu/2)
  9115. 2^{\nu/2-1}} \int_{0}^{\infty} \int_{-\infty}^{\infty}
  9116. s^{\nu} e^{-\nu s^2/2} \phi(z) \phi(sx + z)
  9117. [\Phi(sx + z) - \Phi(z)]^{k-2} \,dz \,ds
  9118. for :math:`x ≥ 0`, :math:`k > 1`, and :math:`\nu > 0`.
  9119. `studentized_range` takes ``k`` for :math:`k` and ``df`` for :math:`\nu`
  9120. as shape parameters.
  9121. When :math:`\nu` exceeds 100,000, an asymptotic approximation (infinite
  9122. degrees of freedom) is used to compute the cumulative distribution
  9123. function [4]_ and probability distribution function.
  9124. %(after_notes)s
  9125. References
  9126. ----------
  9127. .. [1] "Studentized range distribution",
  9128. https://en.wikipedia.org/wiki/Studentized_range_distribution
  9129. .. [2] Batista, Ben Dêivide, et al. "Externally Studentized Normal Midrange
  9130. Distribution." Ciência e Agrotecnologia, vol. 41, no. 4, 2017, pp.
  9131. 378-389., doi:10.1590/1413-70542017414047716.
  9132. .. [3] Harter, H. Leon. "Tables of Range and Studentized Range." The Annals
  9133. of Mathematical Statistics, vol. 31, no. 4, 1960, pp. 1122-1147.
  9134. JSTOR, www.jstor.org/stable/2237810. Accessed 18 Feb. 2021.
  9135. .. [4] Lund, R. E., and J. R. Lund. "Algorithm AS 190: Probabilities and
  9136. Upper Quantiles for the Studentized Range." Journal of the Royal
  9137. Statistical Society. Series C (Applied Statistics), vol. 32, no. 2,
  9138. 1983, pp. 204-210. JSTOR, www.jstor.org/stable/2347300. Accessed 18
  9139. Feb. 2021.
  9140. Examples
  9141. --------
  9142. >>> import numpy as np
  9143. >>> from scipy.stats import studentized_range
  9144. >>> import matplotlib.pyplot as plt
  9145. >>> fig, ax = plt.subplots(1, 1)
  9146. Display the probability density function (``pdf``):
  9147. >>> k, df = 3, 10
  9148. >>> x = np.linspace(studentized_range.ppf(0.01, k, df),
  9149. ... studentized_range.ppf(0.99, k, df), 100)
  9150. >>> ax.plot(x, studentized_range.pdf(x, k, df),
  9151. ... 'r-', lw=5, alpha=0.6, label='studentized_range pdf')
  9152. Alternatively, the distribution object can be called (as a function)
  9153. to fix the shape, location and scale parameters. This returns a "frozen"
  9154. RV object holding the given parameters fixed.
  9155. Freeze the distribution and display the frozen ``pdf``:
  9156. >>> rv = studentized_range(k, df)
  9157. >>> ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')
  9158. Check accuracy of ``cdf`` and ``ppf``:
  9159. >>> vals = studentized_range.ppf([0.001, 0.5, 0.999], k, df)
  9160. >>> np.allclose([0.001, 0.5, 0.999], studentized_range.cdf(vals, k, df))
  9161. True
  9162. Rather than using (``studentized_range.rvs``) to generate random variates,
  9163. which is very slow for this distribution, we can approximate the inverse
  9164. CDF using an interpolator, and then perform inverse transform sampling
  9165. with this approximate inverse CDF.
  9166. This distribution has an infinite but thin right tail, so we focus our
  9167. attention on the leftmost 99.9 percent.
  9168. >>> a, b = studentized_range.ppf([0, .999], k, df)
  9169. >>> a, b
  9170. 0, 7.41058083802274
  9171. >>> from scipy.interpolate import interp1d
  9172. >>> rng = np.random.default_rng()
  9173. >>> xs = np.linspace(a, b, 50)
  9174. >>> cdf = studentized_range.cdf(xs, k, df)
  9175. # Create an interpolant of the inverse CDF
  9176. >>> ppf = interp1d(cdf, xs, fill_value='extrapolate')
  9177. # Perform inverse transform sampling using the interpolant
  9178. >>> r = ppf(rng.uniform(size=1000))
  9179. And compare the histogram:
  9180. >>> ax.hist(r, density=True, histtype='stepfilled', alpha=0.2)
  9181. >>> ax.legend(loc='best', frameon=False)
  9182. >>> plt.show()
  9183. """
  9184. def _argcheck(self, k, df):
  9185. return (k > 1) & (df > 0)
  9186. def _shape_info(self):
  9187. ik = _ShapeInfo("k", False, (1, np.inf), (False, False))
  9188. idf = _ShapeInfo("df", False, (0, np.inf), (False, False))
  9189. return [ik, idf]
  9190. def _fitstart(self, data):
  9191. # Default is k=1, but that is not a valid value of the parameter.
  9192. return super()._fitstart(data, args=(2, 1))
  9193. def _munp(self, K, k, df):
  9194. cython_symbol = '_studentized_range_moment'
  9195. _a, _b = self._get_support()
  9196. # all three of these are used to create a numpy array so they must
  9197. # be the same shape.
  9198. def _single_moment(K, k, df):
  9199. log_const = _stats._studentized_range_pdf_logconst(k, df)
  9200. arg = [K, k, df, log_const]
  9201. usr_data = np.array(arg, float).ctypes.data_as(ctypes.c_void_p)
  9202. llc = LowLevelCallable.from_cython(_stats, cython_symbol, usr_data)
  9203. ranges = [(-np.inf, np.inf), (0, np.inf), (_a, _b)]
  9204. opts = dict(epsabs=1e-11, epsrel=1e-12)
  9205. return integrate.nquad(llc, ranges=ranges, opts=opts)[0]
  9206. ufunc = np.frompyfunc(_single_moment, 3, 1)
  9207. return np.asarray(ufunc(K, k, df), dtype=np.float64)[()]
  9208. def _pdf(self, x, k, df):
  9209. def _single_pdf(q, k, df):
  9210. # The infinite form of the PDF is derived from the infinite
  9211. # CDF.
  9212. if df < 100000:
  9213. cython_symbol = '_studentized_range_pdf'
  9214. log_const = _stats._studentized_range_pdf_logconst(k, df)
  9215. arg = [q, k, df, log_const]
  9216. usr_data = np.array(arg, float).ctypes.data_as(ctypes.c_void_p)
  9217. ranges = [(-np.inf, np.inf), (0, np.inf)]
  9218. else:
  9219. cython_symbol = '_studentized_range_pdf_asymptotic'
  9220. arg = [q, k]
  9221. usr_data = np.array(arg, float).ctypes.data_as(ctypes.c_void_p)
  9222. ranges = [(-np.inf, np.inf)]
  9223. llc = LowLevelCallable.from_cython(_stats, cython_symbol, usr_data)
  9224. opts = dict(epsabs=1e-11, epsrel=1e-12)
  9225. return integrate.nquad(llc, ranges=ranges, opts=opts)[0]
  9226. ufunc = np.frompyfunc(_single_pdf, 3, 1)
  9227. return np.asarray(ufunc(x, k, df), dtype=np.float64)[()]
  9228. def _cdf(self, x, k, df):
  9229. def _single_cdf(q, k, df):
  9230. # "When the degrees of freedom V are infinite the probability
  9231. # integral takes [on a] simpler form," and a single asymptotic
  9232. # integral is evaluated rather than the standard double integral.
  9233. # (Lund, Lund, page 205)
  9234. if df < 100000:
  9235. cython_symbol = '_studentized_range_cdf'
  9236. log_const = _stats._studentized_range_cdf_logconst(k, df)
  9237. arg = [q, k, df, log_const]
  9238. usr_data = np.array(arg, float).ctypes.data_as(ctypes.c_void_p)
  9239. ranges = [(-np.inf, np.inf), (0, np.inf)]
  9240. else:
  9241. cython_symbol = '_studentized_range_cdf_asymptotic'
  9242. arg = [q, k]
  9243. usr_data = np.array(arg, float).ctypes.data_as(ctypes.c_void_p)
  9244. ranges = [(-np.inf, np.inf)]
  9245. llc = LowLevelCallable.from_cython(_stats, cython_symbol, usr_data)
  9246. opts = dict(epsabs=1e-11, epsrel=1e-12)
  9247. return integrate.nquad(llc, ranges=ranges, opts=opts)[0]
  9248. ufunc = np.frompyfunc(_single_cdf, 3, 1)
  9249. # clip p-values to ensure they are in [0, 1].
  9250. return np.clip(np.asarray(ufunc(x, k, df), dtype=np.float64)[()], 0, 1)
  9251. studentized_range = studentized_range_gen(name='studentized_range', a=0,
  9252. b=np.inf)
  9253. class rel_breitwigner_gen(rv_continuous):
  9254. r"""A relativistic Breit-Wigner random variable.
  9255. %(before_notes)s
  9256. See Also
  9257. --------
  9258. cauchy: Cauchy distribution, also known as the Breit-Wigner distribution.
  9259. Notes
  9260. -----
  9261. The probability density function for `rel_breitwigner` is
  9262. .. math::
  9263. f(x, \rho) = \frac{k}{(x^2 - \rho^2)^2 + \rho^2}
  9264. where
  9265. .. math::
  9266. k = \frac{2\sqrt{2}\rho^2\sqrt{\rho^2 + 1}}
  9267. {\pi\sqrt{\rho^2 + \rho\sqrt{\rho^2 + 1}}}
  9268. The relativistic Breit-Wigner distribution is used in high energy physics
  9269. to model resonances [1]_. It gives the uncertainty in the invariant mass,
  9270. :math:`M` [2]_, of a resonance with characteristic mass :math:`M_0` and
  9271. decay-width :math:`\Gamma`, where :math:`M`, :math:`M_0` and :math:`\Gamma`
  9272. are expressed in natural units. In SciPy's parametrization, the shape
  9273. parameter :math:`\rho` is equal to :math:`M_0/\Gamma` and takes values in
  9274. :math:`(0, \infty)`.
  9275. Equivalently, the relativistic Breit-Wigner distribution is said to give
  9276. the uncertainty in the center-of-mass energy :math:`E_{\text{cm}}`. In
  9277. natural units, the speed of light :math:`c` is equal to 1 and the invariant
  9278. mass :math:`M` is equal to the rest energy :math:`Mc^2`. In the
  9279. center-of-mass frame, the rest energy is equal to the total energy [3]_.
  9280. %(after_notes)s
  9281. :math:`\rho = M/\Gamma` and :math:`\Gamma` is the scale parameter. For
  9282. example, if one seeks to model the :math:`Z^0` boson with :math:`M_0
  9283. \approx 91.1876 \text{ GeV}` and :math:`\Gamma \approx 2.4952\text{ GeV}`
  9284. [4]_ one can set ``rho=91.1876/2.4952`` and ``scale=2.4952``.
  9285. To ensure a physically meaningful result when using the `fit` method, one
  9286. should set ``floc=0`` to fix the location parameter to 0.
  9287. References
  9288. ----------
  9289. .. [1] Relativistic Breit-Wigner distribution, Wikipedia,
  9290. https://en.wikipedia.org/wiki/Relativistic_Breit-Wigner_distribution
  9291. .. [2] Invariant mass, Wikipedia,
  9292. https://en.wikipedia.org/wiki/Invariant_mass
  9293. .. [3] Center-of-momentum frame, Wikipedia,
  9294. https://en.wikipedia.org/wiki/Center-of-momentum_frame
  9295. .. [4] M. Tanabashi et al. (Particle Data Group) Phys. Rev. D 98, 030001 -
  9296. Published 17 August 2018
  9297. %(example)s
  9298. """
  9299. def _argcheck(self, rho):
  9300. return rho > 0
  9301. def _shape_info(self):
  9302. return [_ShapeInfo("rho", False, (0, np.inf), (False, False))]
  9303. def _pdf(self, x, rho):
  9304. # C = k / rho**2
  9305. C = np.sqrt(
  9306. 2 * (1 + 1/rho**2) / (1 + np.sqrt(1 + 1/rho**2))
  9307. ) * 2 / np.pi
  9308. with np.errstate(over='ignore'):
  9309. return C / (((x - rho)*(x + rho)/rho)**2 + 1)
  9310. def _cdf(self, x, rho):
  9311. # C = k / (2 * rho**2) / np.sqrt(1 + 1/rho**2)
  9312. C = np.sqrt(2/(1 + np.sqrt(1 + 1/rho**2)))/np.pi
  9313. result = (
  9314. np.sqrt(-1 + 1j/rho)
  9315. * np.arctan(x/np.sqrt(-rho*(rho + 1j)))
  9316. )
  9317. result = C * 2 * np.imag(result)
  9318. # Sometimes above formula produces values greater than 1.
  9319. return np.clip(result, None, 1)
  9320. def _munp(self, n, rho):
  9321. if n == 0:
  9322. return 1.
  9323. if n == 1:
  9324. # C = k / (2 * rho)
  9325. C = np.sqrt(
  9326. 2 * (1 + 1/rho**2) / (1 + np.sqrt(1 + 1/rho**2))
  9327. ) / np.pi * rho
  9328. return C * (np.pi/2 + np.arctan(rho))
  9329. if n == 2:
  9330. # C = pi * k / (4 * rho)
  9331. C = np.sqrt(
  9332. (1 + 1/rho**2) / (2 * (1 + np.sqrt(1 + 1/rho**2)))
  9333. ) * rho
  9334. result = (1 - rho * 1j) / np.sqrt(-1 - 1j/rho)
  9335. return 2 * C * np.real(result)
  9336. else:
  9337. return np.inf
  9338. def _stats(self, rho):
  9339. # Returning None from stats makes public stats use _munp.
  9340. # nan values will be omitted from public stats. Skew and
  9341. # kurtosis are actually infinite.
  9342. return None, None, np.nan, np.nan
  9343. @inherit_docstring_from(rv_continuous)
  9344. def fit(self, data, *args, **kwds):
  9345. # Override rv_continuous.fit to better handle case where floc is set.
  9346. data, _, floc, fscale = _check_fit_input_parameters(
  9347. self, data, args, kwds
  9348. )
  9349. censored = isinstance(data, CensoredData)
  9350. if censored:
  9351. if data.num_censored() == 0:
  9352. # There are no censored values in data, so replace the
  9353. # CensoredData instance with a regular array.
  9354. data = data._uncensored
  9355. censored = False
  9356. if floc is None or censored:
  9357. return super().fit(data, *args, **kwds)
  9358. if fscale is None:
  9359. # The interquartile range approximates the scale parameter gamma.
  9360. # The median approximates rho * gamma.
  9361. p25, p50, p75 = np.quantile(data - floc, [0.25, 0.5, 0.75])
  9362. scale_0 = p75 - p25
  9363. rho_0 = p50 / scale_0
  9364. if not args:
  9365. args = [rho_0]
  9366. if "scale" not in kwds:
  9367. kwds["scale"] = scale_0
  9368. else:
  9369. M_0 = np.median(data - floc)
  9370. rho_0 = M_0 / fscale
  9371. if not args:
  9372. args = [rho_0]
  9373. return super().fit(data, *args, **kwds)
  9374. rel_breitwigner = rel_breitwigner_gen(a=0.0, name="rel_breitwigner")
  9375. # Collect names of classes and objects in this module.
  9376. pairs = list(globals().copy().items())
  9377. _distn_names, _distn_gen_names = get_distribution_names(pairs, rv_continuous)
  9378. __all__ = _distn_names + _distn_gen_names + ['rv_histogram']