| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243724472457246724772487249725072517252725372547255725672577258725972607261726272637264726572667267726872697270727172727273727472757276727772787279728072817282728372847285728672877288728972907291729272937294729572967297729872997300730173027303730473057306730773087309731073117312731373147315731673177318731973207321732273237324732573267327732873297330733173327333733473357336733773387339734073417342734373447345734673477348734973507351735273537354735573567357735873597360736173627363736473657366736773687369737073717372737373747375737673777378737973807381738273837384738573867387738873897390739173927393739473957396739773987399740074017402740374047405740674077408740974107411741274137414741574167417741874197420742174227423742474257426742774287429743074317432743374347435743674377438743974407441744274437444744574467447744874497450745174527453745474557456745774587459746074617462746374647465746674677468746974707471747274737474747574767477747874797480748174827483748474857486748774887489749074917492749374947495749674977498749975007501750275037504750575067507750875097510751175127513751475157516751775187519752075217522752375247525752675277528752975307531753275337534753575367537753875397540754175427543754475457546754775487549755075517552755375547555755675577558755975607561756275637564756575667567756875697570757175727573757475757576757775787579758075817582758375847585758675877588758975907591759275937594759575967597759875997600760176027603760476057606760776087609761076117612761376147615761676177618761976207621762276237624762576267627762876297630763176327633763476357636763776387639764076417642764376447645764676477648764976507651765276537654765576567657765876597660766176627663766476657666766776687669767076717672767376747675767676777678767976807681768276837684768576867687768876897690769176927693769476957696769776987699770077017702770377047705770677077708770977107711771277137714771577167717771877197720772177227723772477257726772777287729773077317732773377347735773677377738773977407741774277437744774577467747774877497750775177527753775477557756775777587759776077617762776377647765776677677768776977707771777277737774777577767777777877797780778177827783778477857786778777887789779077917792779377947795779677977798779978007801780278037804780578067807780878097810781178127813781478157816781778187819782078217822782378247825782678277828782978307831783278337834783578367837783878397840784178427843784478457846784778487849785078517852785378547855785678577858785978607861786278637864786578667867786878697870787178727873787478757876787778787879788078817882788378847885788678877888788978907891789278937894789578967897789878997900790179027903790479057906790779087909791079117912791379147915791679177918791979207921792279237924792579267927792879297930793179327933793479357936793779387939794079417942794379447945794679477948794979507951795279537954795579567957795879597960796179627963796479657966796779687969797079717972797379747975797679777978797979807981798279837984798579867987798879897990799179927993799479957996799779987999800080018002800380048005800680078008800980108011801280138014801580168017801880198020802180228023802480258026802780288029803080318032803380348035803680378038803980408041804280438044804580468047804880498050805180528053805480558056805780588059806080618062806380648065806680678068806980708071807280738074807580768077807880798080808180828083808480858086808780888089809080918092809380948095809680978098809981008101810281038104810581068107810881098110811181128113811481158116811781188119812081218122812381248125812681278128812981308131813281338134813581368137813881398140814181428143814481458146814781488149815081518152815381548155815681578158815981608161816281638164816581668167816881698170817181728173817481758176817781788179818081818182818381848185818681878188818981908191819281938194819581968197819881998200820182028203820482058206820782088209821082118212821382148215821682178218821982208221822282238224822582268227822882298230823182328233823482358236823782388239824082418242824382448245824682478248824982508251825282538254825582568257825882598260826182628263826482658266826782688269827082718272827382748275827682778278827982808281828282838284828582868287828882898290829182928293829482958296829782988299830083018302830383048305830683078308830983108311831283138314831583168317831883198320832183228323832483258326832783288329833083318332833383348335833683378338833983408341834283438344834583468347834883498350835183528353835483558356835783588359836083618362836383648365836683678368836983708371837283738374837583768377837883798380838183828383838483858386838783888389839083918392839383948395839683978398839984008401840284038404840584068407840884098410841184128413841484158416841784188419842084218422842384248425842684278428842984308431843284338434843584368437843884398440844184428443844484458446844784488449845084518452845384548455845684578458845984608461846284638464846584668467846884698470847184728473847484758476847784788479848084818482848384848485848684878488848984908491849284938494849584968497849884998500850185028503850485058506850785088509851085118512851385148515851685178518851985208521852285238524852585268527852885298530853185328533853485358536853785388539854085418542854385448545854685478548854985508551855285538554855585568557855885598560856185628563856485658566856785688569857085718572857385748575857685778578857985808581858285838584858585868587858885898590859185928593859485958596859785988599860086018602860386048605860686078608860986108611861286138614861586168617861886198620862186228623862486258626862786288629863086318632863386348635863686378638863986408641864286438644864586468647864886498650865186528653865486558656865786588659866086618662866386648665866686678668866986708671867286738674867586768677867886798680868186828683868486858686868786888689869086918692869386948695869686978698869987008701870287038704870587068707870887098710871187128713871487158716871787188719872087218722872387248725872687278728872987308731873287338734873587368737873887398740874187428743874487458746874787488749875087518752875387548755875687578758875987608761876287638764876587668767876887698770877187728773877487758776877787788779878087818782878387848785878687878788878987908791879287938794879587968797879887998800880188028803880488058806880788088809881088118812881388148815881688178818881988208821882288238824882588268827882888298830883188328833883488358836883788388839884088418842884388448845884688478848884988508851885288538854885588568857885888598860886188628863886488658866886788688869887088718872887388748875887688778878887988808881888288838884888588868887888888898890889188928893889488958896889788988899890089018902890389048905890689078908890989108911891289138914891589168917891889198920892189228923892489258926892789288929893089318932893389348935893689378938893989408941894289438944894589468947894889498950895189528953895489558956895789588959896089618962896389648965896689678968896989708971897289738974897589768977897889798980898189828983898489858986898789888989899089918992899389948995899689978998899990009001900290039004900590069007900890099010901190129013901490159016901790189019902090219022902390249025902690279028902990309031903290339034903590369037903890399040904190429043904490459046904790489049905090519052905390549055905690579058905990609061906290639064906590669067906890699070907190729073907490759076907790789079908090819082908390849085908690879088908990909091909290939094909590969097909890999100910191029103910491059106910791089109911091119112911391149115911691179118911991209121912291239124912591269127912891299130913191329133913491359136913791389139914091419142914391449145914691479148914991509151915291539154915591569157915891599160916191629163916491659166916791689169917091719172917391749175917691779178917991809181918291839184918591869187918891899190919191929193919491959196919791989199920092019202920392049205920692079208920992109211921292139214921592169217921892199220922192229223922492259226922792289229923092319232923392349235923692379238923992409241924292439244924592469247924892499250925192529253925492559256925792589259926092619262926392649265926692679268926992709271927292739274927592769277927892799280928192829283928492859286928792889289929092919292929392949295929692979298929993009301930293039304930593069307930893099310931193129313931493159316931793189319932093219322932393249325932693279328932993309331933293339334933593369337933893399340934193429343934493459346934793489349935093519352935393549355935693579358935993609361936293639364936593669367936893699370937193729373937493759376937793789379938093819382938393849385938693879388938993909391939293939394939593969397939893999400940194029403940494059406940794089409941094119412941394149415941694179418941994209421942294239424942594269427942894299430943194329433943494359436943794389439944094419442944394449445944694479448944994509451945294539454945594569457945894599460946194629463946494659466946794689469947094719472947394749475947694779478947994809481948294839484948594869487948894899490949194929493949494959496949794989499950095019502950395049505950695079508950995109511951295139514951595169517951895199520952195229523952495259526952795289529953095319532953395349535953695379538953995409541954295439544954595469547954895499550955195529553955495559556955795589559956095619562956395649565956695679568956995709571957295739574957595769577957895799580958195829583958495859586958795889589959095919592959395949595959695979598959996009601960296039604960596069607960896099610961196129613961496159616961796189619962096219622962396249625962696279628962996309631963296339634963596369637963896399640964196429643964496459646964796489649965096519652965396549655965696579658965996609661966296639664966596669667966896699670967196729673967496759676967796789679968096819682968396849685968696879688968996909691969296939694969596969697969896999700970197029703970497059706970797089709971097119712971397149715971697179718971997209721972297239724972597269727972897299730973197329733973497359736973797389739974097419742974397449745974697479748974997509751975297539754975597569757975897599760976197629763976497659766976797689769977097719772977397749775977697779778977997809781978297839784978597869787978897899790979197929793979497959796979797989799980098019802980398049805980698079808980998109811981298139814981598169817981898199820982198229823982498259826982798289829983098319832983398349835983698379838983998409841984298439844984598469847984898499850985198529853985498559856985798589859986098619862986398649865986698679868986998709871987298739874987598769877987898799880988198829883988498859886988798889889989098919892989398949895989698979898989999009901990299039904990599069907990899099910991199129913991499159916991799189919992099219922992399249925992699279928992999309931993299339934993599369937993899399940994199429943994499459946994799489949995099519952995399549955995699579958995999609961996299639964996599669967996899699970997199729973997499759976997799789979998099819982998399849985998699879988998999909991999299939994999599969997999899991000010001100021000310004100051000610007100081000910010100111001210013100141001510016100171001810019100201002110022100231002410025100261002710028100291003010031100321003310034100351003610037100381003910040100411004210043100441004510046100471004810049100501005110052100531005410055100561005710058100591006010061100621006310064100651006610067100681006910070100711007210073100741007510076100771007810079100801008110082100831008410085100861008710088100891009010091100921009310094100951009610097100981009910100101011010210103101041010510106101071010810109101101011110112101131011410115101161011710118101191012010121101221012310124101251012610127101281012910130101311013210133101341013510136101371013810139101401014110142101431014410145101461014710148101491015010151101521015310154101551015610157101581015910160101611016210163101641016510166101671016810169101701017110172101731017410175101761017710178101791018010181101821018310184101851018610187101881018910190101911019210193101941019510196101971019810199102001020110202102031020410205102061020710208102091021010211102121021310214102151021610217102181021910220102211022210223102241022510226102271022810229102301023110232102331023410235102361023710238102391024010241102421024310244102451024610247102481024910250102511025210253102541025510256102571025810259102601026110262102631026410265102661026710268102691027010271102721027310274102751027610277102781027910280102811028210283102841028510286102871028810289102901029110292102931029410295102961029710298102991030010301103021030310304103051030610307103081030910310103111031210313103141031510316103171031810319103201032110322103231032410325103261032710328103291033010331103321033310334103351033610337103381033910340103411034210343103441034510346103471034810349103501035110352103531035410355103561035710358103591036010361103621036310364103651036610367103681036910370103711037210373103741037510376103771037810379103801038110382103831038410385103861038710388103891039010391103921039310394103951039610397103981039910400104011040210403104041040510406104071040810409104101041110412104131041410415104161041710418104191042010421104221042310424104251042610427104281042910430104311043210433104341043510436104371043810439104401044110442104431044410445104461044710448104491045010451104521045310454104551045610457104581045910460104611046210463104641046510466104671046810469104701047110472104731047410475104761047710478104791048010481104821048310484104851048610487104881048910490104911049210493104941049510496104971049810499105001050110502105031050410505105061050710508105091051010511105121051310514105151051610517105181051910520105211052210523105241052510526105271052810529105301053110532105331053410535105361053710538105391054010541105421054310544105451054610547105481054910550105511055210553105541055510556105571055810559105601056110562105631056410565105661056710568105691057010571105721057310574105751057610577105781057910580105811058210583105841058510586105871058810589105901059110592105931059410595105961059710598105991060010601106021060310604106051060610607106081060910610106111061210613106141061510616106171061810619106201062110622106231062410625106261062710628106291063010631106321063310634106351063610637106381063910640106411064210643106441064510646106471064810649106501065110652106531065410655106561065710658106591066010661106621066310664106651066610667106681066910670106711067210673106741067510676106771067810679106801068110682106831068410685106861068710688106891069010691106921069310694106951069610697106981069910700107011070210703107041070510706107071070810709107101071110712107131071410715107161071710718107191072010721107221072310724107251072610727107281072910730107311073210733107341073510736107371073810739107401074110742107431074410745107461074710748107491075010751107521075310754107551075610757107581075910760107611076210763107641076510766107671076810769107701077110772107731077410775107761077710778107791078010781107821078310784107851078610787107881078910790107911079210793107941079510796107971079810799108001080110802108031080410805108061080710808108091081010811108121081310814108151081610817108181081910820108211082210823108241082510826108271082810829108301083110832108331083410835108361083710838108391084010841108421084310844108451084610847108481084910850108511085210853108541085510856108571085810859108601086110862108631086410865108661086710868108691087010871108721087310874108751087610877108781087910880108811088210883108841088510886108871088810889108901089110892108931089410895108961089710898108991090010901109021090310904109051090610907109081090910910109111091210913109141091510916109171091810919109201092110922109231092410925109261092710928109291093010931109321093310934109351093610937109381093910940109411094210943109441094510946109471094810949109501095110952109531095410955109561095710958109591096010961109621096310964109651096610967109681096910970109711097210973109741097510976109771097810979109801098110982109831098410985109861098710988109891099010991109921099310994109951099610997109981099911000110011100211003110041100511006110071100811009110101101111012110131101411015110161101711018110191102011021110221102311024110251102611027110281102911030110311103211033110341103511036 |
- # coding=utf-8
- # Copyright 2019-present, the HuggingFace Inc. team.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- from __future__ import annotations
- import inspect
- import json
- import re
- import struct
- import time
- import warnings
- from collections import defaultdict
- from concurrent.futures import Future, ThreadPoolExecutor
- from dataclasses import asdict, dataclass, field
- from datetime import datetime
- from functools import wraps
- from itertools import islice
- from pathlib import Path
- from textwrap import dedent
- from typing import (
- TYPE_CHECKING,
- Any,
- BinaryIO,
- Callable,
- Dict,
- Iterable,
- Iterator,
- List,
- Literal,
- Optional,
- Tuple,
- Type,
- TypeVar,
- Union,
- overload,
- )
- from urllib.parse import quote
- import requests
- from requests.exceptions import HTTPError
- from tqdm.auto import tqdm as base_tqdm
- from tqdm.contrib.concurrent import thread_map
- from . import constants
- from ._commit_api import (
- CommitOperation,
- CommitOperationAdd,
- CommitOperationCopy,
- CommitOperationDelete,
- _fetch_files_to_copy,
- _fetch_upload_modes,
- _prepare_commit_payload,
- _upload_files,
- _warn_on_overwriting_operations,
- )
- from ._inference_endpoints import InferenceEndpoint, InferenceEndpointType
- from ._jobs_api import JobInfo, JobSpec, ScheduledJobInfo, _create_job_spec
- from ._space_api import SpaceHardware, SpaceRuntime, SpaceStorage, SpaceVariable
- from ._upload_large_folder import upload_large_folder_internal
- from .community import (
- Discussion,
- DiscussionComment,
- DiscussionStatusChange,
- DiscussionTitleChange,
- DiscussionWithDetails,
- deserialize_event,
- )
- from .constants import (
- DEFAULT_ETAG_TIMEOUT, # noqa: F401 # kept for backward compatibility
- DEFAULT_REQUEST_TIMEOUT, # noqa: F401 # kept for backward compatibility
- DEFAULT_REVISION, # noqa: F401 # kept for backward compatibility
- DISCUSSION_STATUS, # noqa: F401 # kept for backward compatibility
- DISCUSSION_TYPES, # noqa: F401 # kept for backward compatibility
- ENDPOINT, # noqa: F401 # kept for backward compatibility
- INFERENCE_ENDPOINTS_ENDPOINT, # noqa: F401 # kept for backward compatibility
- REGEX_COMMIT_OID, # noqa: F401 # kept for backward compatibility
- REPO_TYPE_MODEL, # noqa: F401 # kept for backward compatibility
- REPO_TYPES, # noqa: F401 # kept for backward compatibility
- REPO_TYPES_MAPPING, # noqa: F401 # kept for backward compatibility
- REPO_TYPES_URL_PREFIXES, # noqa: F401 # kept for backward compatibility
- SAFETENSORS_INDEX_FILE, # noqa: F401 # kept for backward compatibility
- SAFETENSORS_MAX_HEADER_LENGTH, # noqa: F401 # kept for backward compatibility
- SAFETENSORS_SINGLE_FILE, # noqa: F401 # kept for backward compatibility
- SPACES_SDK_TYPES, # noqa: F401 # kept for backward compatibility
- WEBHOOK_DOMAIN_T, # noqa: F401 # kept for backward compatibility
- DiscussionStatusFilter, # noqa: F401 # kept for backward compatibility
- DiscussionTypeFilter, # noqa: F401 # kept for backward compatibility
- )
- from .errors import (
- BadRequestError,
- EntryNotFoundError,
- GatedRepoError,
- HfHubHTTPError,
- RepositoryNotFoundError,
- RevisionNotFoundError,
- )
- from .file_download import HfFileMetadata, get_hf_file_metadata, hf_hub_url
- from .repocard_data import DatasetCardData, ModelCardData, SpaceCardData
- from .utils import (
- DEFAULT_IGNORE_PATTERNS,
- HfFolder, # noqa: F401 # kept for backward compatibility
- LocalTokenNotFoundError,
- NotASafetensorsRepoError,
- SafetensorsFileMetadata,
- SafetensorsParsingError,
- SafetensorsRepoMetadata,
- TensorInfo,
- build_hf_headers,
- chunk_iterable,
- experimental,
- filter_repo_objects,
- fix_hf_endpoint_in_url,
- get_session,
- get_token,
- hf_raise_for_status,
- logging,
- paginate,
- parse_datetime,
- validate_hf_hub_args,
- )
- from .utils import tqdm as hf_tqdm
- from .utils._auth import _get_token_from_environment, _get_token_from_file, _get_token_from_google_colab
- from .utils._deprecation import _deprecate_arguments, _deprecate_method
- from .utils._typing import CallableT
- from .utils.endpoint_helpers import _is_emission_within_threshold
- if TYPE_CHECKING:
- from .inference._providers import PROVIDER_T
- R = TypeVar("R") # Return type
- CollectionItemType_T = Literal["model", "dataset", "space", "paper", "collection"]
- ExpandModelProperty_T = Literal[
- "author",
- "baseModels",
- "cardData",
- "childrenModelCount",
- "config",
- "createdAt",
- "disabled",
- "downloads",
- "downloadsAllTime",
- "gated",
- "gguf",
- "inference",
- "inferenceProviderMapping",
- "lastModified",
- "library_name",
- "likes",
- "mask_token",
- "model-index",
- "pipeline_tag",
- "private",
- "resourceGroup",
- "safetensors",
- "sha",
- "siblings",
- "spaces",
- "tags",
- "transformersInfo",
- "trendingScore",
- "usedStorage",
- "widgetData",
- "xetEnabled",
- ]
- ExpandDatasetProperty_T = Literal[
- "author",
- "cardData",
- "citation",
- "createdAt",
- "description",
- "disabled",
- "downloads",
- "downloadsAllTime",
- "gated",
- "lastModified",
- "likes",
- "paperswithcode_id",
- "private",
- "resourceGroup",
- "sha",
- "siblings",
- "tags",
- "trendingScore",
- "usedStorage",
- "xetEnabled",
- ]
- ExpandSpaceProperty_T = Literal[
- "author",
- "cardData",
- "createdAt",
- "datasets",
- "disabled",
- "lastModified",
- "likes",
- "models",
- "private",
- "resourceGroup",
- "runtime",
- "sdk",
- "sha",
- "siblings",
- "subdomain",
- "tags",
- "trendingScore",
- "usedStorage",
- "xetEnabled",
- ]
- USERNAME_PLACEHOLDER = "hf_user"
- _REGEX_DISCUSSION_URL = re.compile(r".*/discussions/(\d+)$")
- _CREATE_COMMIT_NO_REPO_ERROR_MESSAGE = (
- "\nNote: Creating a commit assumes that the repo already exists on the"
- " Huggingface Hub. Please use `create_repo` if it's not the case."
- )
- _AUTH_CHECK_NO_REPO_ERROR_MESSAGE = (
- "\nNote: The repository either does not exist or you do not have access rights."
- " Please check the repository ID and your access permissions."
- " If this is a private repository, ensure that your token is correct."
- )
- logger = logging.get_logger(__name__)
- def repo_type_and_id_from_hf_id(hf_id: str, hub_url: Optional[str] = None) -> Tuple[Optional[str], Optional[str], str]:
- """
- Returns the repo type and ID from a huggingface.co URL linking to a
- repository
- Args:
- hf_id (`str`):
- An URL or ID of a repository on the HF hub. Accepted values are:
- - https://huggingface.co/<repo_type>/<namespace>/<repo_id>
- - https://huggingface.co/<namespace>/<repo_id>
- - hf://<repo_type>/<namespace>/<repo_id>
- - hf://<namespace>/<repo_id>
- - <repo_type>/<namespace>/<repo_id>
- - <namespace>/<repo_id>
- - <repo_id>
- hub_url (`str`, *optional*):
- The URL of the HuggingFace Hub, defaults to https://huggingface.co
- Returns:
- A tuple with three items: repo_type (`str` or `None`), namespace (`str` or
- `None`) and repo_id (`str`).
- Raises:
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- If URL cannot be parsed.
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- If `repo_type` is unknown.
- """
- input_hf_id = hf_id
- hub_url = re.sub(r"https?://", "", hub_url if hub_url is not None else constants.ENDPOINT)
- is_hf_url = hub_url in hf_id and "@" not in hf_id
- HFFS_PREFIX = "hf://"
- if hf_id.startswith(HFFS_PREFIX): # Remove "hf://" prefix if exists
- hf_id = hf_id[len(HFFS_PREFIX) :]
- url_segments = hf_id.split("/")
- is_hf_id = len(url_segments) <= 3
- namespace: Optional[str]
- if is_hf_url:
- namespace, repo_id = url_segments[-2:]
- if namespace == hub_url:
- namespace = None
- if len(url_segments) > 2 and hub_url not in url_segments[-3]:
- repo_type = url_segments[-3]
- elif namespace in constants.REPO_TYPES_MAPPING:
- # Mean canonical dataset or model
- repo_type = constants.REPO_TYPES_MAPPING[namespace]
- namespace = None
- else:
- repo_type = None
- elif is_hf_id:
- if len(url_segments) == 3:
- # Passed <repo_type>/<user>/<model_id> or <repo_type>/<org>/<model_id>
- repo_type, namespace, repo_id = url_segments[-3:]
- elif len(url_segments) == 2:
- if url_segments[0] in constants.REPO_TYPES_MAPPING:
- # Passed '<model_id>' or 'datasets/<dataset_id>' for a canonical model or dataset
- repo_type = constants.REPO_TYPES_MAPPING[url_segments[0]]
- namespace = None
- repo_id = hf_id.split("/")[-1]
- else:
- # Passed <user>/<model_id> or <org>/<model_id>
- namespace, repo_id = hf_id.split("/")[-2:]
- repo_type = None
- else:
- # Passed <model_id>
- repo_id = url_segments[0]
- namespace, repo_type = None, None
- else:
- raise ValueError(f"Unable to retrieve user and repo ID from the passed HF ID: {hf_id}")
- # Check if repo type is known (mapping "spaces" => "space" + empty value => `None`)
- if repo_type in constants.REPO_TYPES_MAPPING:
- repo_type = constants.REPO_TYPES_MAPPING[repo_type]
- if repo_type == "":
- repo_type = None
- if repo_type not in constants.REPO_TYPES:
- raise ValueError(f"Unknown `repo_type`: '{repo_type}' ('{input_hf_id}')")
- return repo_type, namespace, repo_id
- @dataclass
- class LastCommitInfo(dict):
- oid: str
- title: str
- date: datetime
- def __post_init__(self): # hack to make LastCommitInfo backward compatible
- self.update(asdict(self))
- @dataclass
- class BlobLfsInfo(dict):
- size: int
- sha256: str
- pointer_size: int
- def __post_init__(self): # hack to make BlobLfsInfo backward compatible
- self.update(asdict(self))
- @dataclass
- class BlobSecurityInfo(dict):
- safe: bool # duplicate information with "status" field, keeping it for backward compatibility
- status: str
- av_scan: Optional[Dict]
- pickle_import_scan: Optional[Dict]
- def __post_init__(self): # hack to make BlogSecurityInfo backward compatible
- self.update(asdict(self))
- @dataclass
- class TransformersInfo(dict):
- auto_model: str
- custom_class: Optional[str] = None
- # possible `pipeline_tag` values: https://github.com/huggingface/huggingface.js/blob/3ee32554b8620644a6287e786b2a83bf5caf559c/packages/tasks/src/pipelines.ts#L72
- pipeline_tag: Optional[str] = None
- processor: Optional[str] = None
- def __post_init__(self): # hack to make TransformersInfo backward compatible
- self.update(asdict(self))
- @dataclass
- class SafeTensorsInfo(dict):
- parameters: Dict[str, int]
- total: int
- def __post_init__(self): # hack to make SafeTensorsInfo backward compatible
- self.update(asdict(self))
- @dataclass
- class CommitInfo(str):
- """Data structure containing information about a newly created commit.
- Returned by any method that creates a commit on the Hub: [`create_commit`], [`upload_file`], [`upload_folder`],
- [`delete_file`], [`delete_folder`]. It inherits from `str` for backward compatibility but using methods specific
- to `str` is deprecated.
- Attributes:
- commit_url (`str`):
- Url where to find the commit.
- commit_message (`str`):
- The summary (first line) of the commit that has been created.
- commit_description (`str`):
- Description of the commit that has been created. Can be empty.
- oid (`str`):
- Commit hash id. Example: `"91c54ad1727ee830252e457677f467be0bfd8a57"`.
- pr_url (`str`, *optional*):
- Url to the PR that has been created, if any. Populated when `create_pr=True`
- is passed.
- pr_revision (`str`, *optional*):
- Revision of the PR that has been created, if any. Populated when
- `create_pr=True` is passed. Example: `"refs/pr/1"`.
- pr_num (`int`, *optional*):
- Number of the PR discussion that has been created, if any. Populated when
- `create_pr=True` is passed. Can be passed as `discussion_num` in
- [`get_discussion_details`]. Example: `1`.
- repo_url (`RepoUrl`):
- Repo URL of the commit containing info like repo_id, repo_type, etc.
- _url (`str`, *optional*):
- Legacy url for `str` compatibility. Can be the url to the uploaded file on the Hub (if returned by
- [`upload_file`]), to the uploaded folder on the Hub (if returned by [`upload_folder`]) or to the commit on
- the Hub (if returned by [`create_commit`]). Defaults to `commit_url`. It is deprecated to use this
- attribute. Please use `commit_url` instead.
- """
- commit_url: str
- commit_message: str
- commit_description: str
- oid: str
- pr_url: Optional[str] = None
- # Computed from `commit_url` in `__post_init__`
- repo_url: RepoUrl = field(init=False)
- # Computed from `pr_url` in `__post_init__`
- pr_revision: Optional[str] = field(init=False)
- pr_num: Optional[str] = field(init=False)
- # legacy url for `str` compatibility (ex: url to uploaded file, url to uploaded folder, url to PR, etc.)
- _url: str = field(repr=False, default=None) # type: ignore # defaults to `commit_url`
- def __new__(cls, *args, commit_url: str, _url: Optional[str] = None, **kwargs):
- return str.__new__(cls, _url or commit_url)
- def __post_init__(self):
- """Populate pr-related fields after initialization.
- See https://docs.python.org/3.10/library/dataclasses.html#post-init-processing.
- """
- # Repo info
- self.repo_url = RepoUrl(self.commit_url.split("/commit/")[0])
- # PR info
- if self.pr_url is not None:
- self.pr_revision = _parse_revision_from_pr_url(self.pr_url)
- self.pr_num = int(self.pr_revision.split("/")[-1])
- else:
- self.pr_revision = None
- self.pr_num = None
- @dataclass
- class AccessRequest:
- """Data structure containing information about a user access request.
- Attributes:
- username (`str`):
- Username of the user who requested access.
- fullname (`str`):
- Fullname of the user who requested access.
- email (`Optional[str]`):
- Email of the user who requested access.
- Can only be `None` in the /accepted list if the user was granted access manually.
- timestamp (`datetime`):
- Timestamp of the request.
- status (`Literal["pending", "accepted", "rejected"]`):
- Status of the request. Can be one of `["pending", "accepted", "rejected"]`.
- fields (`Dict[str, Any]`, *optional*):
- Additional fields filled by the user in the gate form.
- """
- username: str
- fullname: str
- email: Optional[str]
- timestamp: datetime
- status: Literal["pending", "accepted", "rejected"]
- # Additional fields filled by the user in the gate form
- fields: Optional[Dict[str, Any]] = None
- @dataclass
- class WebhookWatchedItem:
- """Data structure containing information about the items watched by a webhook.
- Attributes:
- type (`Literal["dataset", "model", "org", "space", "user"]`):
- Type of the item to be watched. Can be one of `["dataset", "model", "org", "space", "user"]`.
- name (`str`):
- Name of the item to be watched. Can be the username, organization name, model name, dataset name or space name.
- """
- type: Literal["dataset", "model", "org", "space", "user"]
- name: str
- @dataclass
- class WebhookInfo:
- """Data structure containing information about a webhook.
- One of `url` or `job` is specified, but not both.
- Attributes:
- id (`str`):
- ID of the webhook.
- url (`str`, *optional*):
- URL of the webhook.
- job (`JobSpec`, *optional*):
- Specifications of the Job to trigger.
- watched (`List[WebhookWatchedItem]`):
- List of items watched by the webhook, see [`WebhookWatchedItem`].
- domains (`List[WEBHOOK_DOMAIN_T]`):
- List of domains the webhook is watching. Can be one of `["repo", "discussions"]`.
- secret (`str`, *optional*):
- Secret of the webhook.
- disabled (`bool`):
- Whether the webhook is disabled or not.
- """
- id: str
- url: Optional[str]
- job: Optional[JobSpec]
- watched: List[WebhookWatchedItem]
- domains: List[constants.WEBHOOK_DOMAIN_T]
- secret: Optional[str]
- disabled: bool
- class RepoUrl(str):
- """Subclass of `str` describing a repo URL on the Hub.
- `RepoUrl` is returned by `HfApi.create_repo`. It inherits from `str` for backward
- compatibility. At initialization, the URL is parsed to populate properties:
- - endpoint (`str`)
- - namespace (`Optional[str]`)
- - repo_name (`str`)
- - repo_id (`str`)
- - repo_type (`Literal["model", "dataset", "space"]`)
- - url (`str`)
- Args:
- url (`Any`):
- String value of the repo url.
- endpoint (`str`, *optional*):
- Endpoint of the Hub. Defaults to <https://huggingface.co>.
- Example:
- ```py
- >>> RepoUrl('https://huggingface.co/gpt2')
- RepoUrl('https://huggingface.co/gpt2', endpoint='https://huggingface.co', repo_type='model', repo_id='gpt2')
- >>> RepoUrl('https://hub-ci.huggingface.co/datasets/dummy_user/dummy_dataset', endpoint='https://hub-ci.huggingface.co')
- RepoUrl('https://hub-ci.huggingface.co/datasets/dummy_user/dummy_dataset', endpoint='https://hub-ci.huggingface.co', repo_type='dataset', repo_id='dummy_user/dummy_dataset')
- >>> RepoUrl('hf://datasets/my-user/my-dataset')
- RepoUrl('hf://datasets/my-user/my-dataset', endpoint='https://huggingface.co', repo_type='dataset', repo_id='user/dataset')
- >>> HfApi.create_repo("dummy_model")
- RepoUrl('https://huggingface.co/Wauplin/dummy_model', endpoint='https://huggingface.co', repo_type='model', repo_id='Wauplin/dummy_model')
- ```
- Raises:
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- If URL cannot be parsed.
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- If `repo_type` is unknown.
- """
- def __new__(cls, url: Any, endpoint: Optional[str] = None):
- url = fix_hf_endpoint_in_url(url, endpoint=endpoint)
- return super(RepoUrl, cls).__new__(cls, url)
- def __init__(self, url: Any, endpoint: Optional[str] = None) -> None:
- super().__init__()
- # Parse URL
- self.endpoint = endpoint or constants.ENDPOINT
- repo_type, namespace, repo_name = repo_type_and_id_from_hf_id(self, hub_url=self.endpoint)
- # Populate fields
- self.namespace = namespace
- self.repo_name = repo_name
- self.repo_id = repo_name if namespace is None else f"{namespace}/{repo_name}"
- self.repo_type = repo_type or constants.REPO_TYPE_MODEL
- self.url = str(self) # just in case it's needed
- def __repr__(self) -> str:
- return f"RepoUrl('{self}', endpoint='{self.endpoint}', repo_type='{self.repo_type}', repo_id='{self.repo_id}')"
- @dataclass
- class RepoSibling:
- """
- Contains basic information about a repo file inside a repo on the Hub.
- > [!TIP]
- > All attributes of this class are optional except `rfilename`. This is because only the file names are returned when
- > listing repositories on the Hub (with [`list_models`], [`list_datasets`] or [`list_spaces`]). If you need more
- > information like file size, blob id or lfs details, you must request them specifically from one repo at a time
- > (using [`model_info`], [`dataset_info`] or [`space_info`]) as it adds more constraints on the backend server to
- > retrieve these.
- Attributes:
- rfilename (str):
- file name, relative to the repo root.
- size (`int`, *optional*):
- The file's size, in bytes. This attribute is defined when `files_metadata` argument of [`repo_info`] is set
- to `True`. It's `None` otherwise.
- blob_id (`str`, *optional*):
- The file's git OID. This attribute is defined when `files_metadata` argument of [`repo_info`] is set to
- `True`. It's `None` otherwise.
- lfs (`BlobLfsInfo`, *optional*):
- The file's LFS metadata. This attribute is defined when`files_metadata` argument of [`repo_info`] is set to
- `True` and the file is stored with Git LFS. It's `None` otherwise.
- """
- rfilename: str
- size: Optional[int] = None
- blob_id: Optional[str] = None
- lfs: Optional[BlobLfsInfo] = None
- @dataclass
- class RepoFile:
- """
- Contains information about a file on the Hub.
- Attributes:
- path (str):
- file path relative to the repo root.
- size (`int`):
- The file's size, in bytes.
- blob_id (`str`):
- The file's git OID.
- lfs (`BlobLfsInfo`):
- The file's LFS metadata.
- last_commit (`LastCommitInfo`, *optional*):
- The file's last commit metadata. Only defined if [`list_repo_tree`] and [`get_paths_info`]
- are called with `expand=True`.
- security (`BlobSecurityInfo`, *optional*):
- The file's security scan metadata. Only defined if [`list_repo_tree`] and [`get_paths_info`]
- are called with `expand=True`.
- """
- path: str
- size: int
- blob_id: str
- lfs: Optional[BlobLfsInfo] = None
- last_commit: Optional[LastCommitInfo] = None
- security: Optional[BlobSecurityInfo] = None
- def __init__(self, **kwargs):
- self.path = kwargs.pop("path")
- self.size = kwargs.pop("size")
- self.blob_id = kwargs.pop("oid")
- lfs = kwargs.pop("lfs", None)
- if lfs is not None:
- lfs = BlobLfsInfo(size=lfs["size"], sha256=lfs["oid"], pointer_size=lfs["pointerSize"])
- self.lfs = lfs
- last_commit = kwargs.pop("lastCommit", None) or kwargs.pop("last_commit", None)
- if last_commit is not None:
- last_commit = LastCommitInfo(
- oid=last_commit["id"], title=last_commit["title"], date=parse_datetime(last_commit["date"])
- )
- self.last_commit = last_commit
- security = kwargs.pop("securityFileStatus", None)
- if security is not None:
- safe = security["status"] == "safe"
- security = BlobSecurityInfo(
- safe=safe,
- status=security["status"],
- av_scan=security["avScan"],
- pickle_import_scan=security["pickleImportScan"],
- )
- self.security = security
- # backwards compatibility
- self.rfilename = self.path
- self.lastCommit = self.last_commit
- @dataclass
- class RepoFolder:
- """
- Contains information about a folder on the Hub.
- Attributes:
- path (str):
- folder path relative to the repo root.
- tree_id (`str`):
- The folder's git OID.
- last_commit (`LastCommitInfo`, *optional*):
- The folder's last commit metadata. Only defined if [`list_repo_tree`] and [`get_paths_info`]
- are called with `expand=True`.
- """
- path: str
- tree_id: str
- last_commit: Optional[LastCommitInfo] = None
- def __init__(self, **kwargs):
- self.path = kwargs.pop("path")
- self.tree_id = kwargs.pop("oid")
- last_commit = kwargs.pop("lastCommit", None) or kwargs.pop("last_commit", None)
- if last_commit is not None:
- last_commit = LastCommitInfo(
- oid=last_commit["id"], title=last_commit["title"], date=parse_datetime(last_commit["date"])
- )
- self.last_commit = last_commit
- @dataclass
- class InferenceProviderMapping:
- provider: "PROVIDER_T" # Provider name
- hf_model_id: str # ID of the model on the Hugging Face Hub
- provider_id: str # ID of the model on the provider's side
- status: Literal["error", "live", "staging"]
- task: str
- adapter: Optional[str] = None
- adapter_weights_path: Optional[str] = None
- type: Optional[Literal["single-model", "tag-filter"]] = None
- def __init__(self, **kwargs):
- self.provider = kwargs.pop("provider")
- self.hf_model_id = kwargs.pop("hf_model_id")
- self.provider_id = kwargs.pop("providerId")
- self.status = kwargs.pop("status")
- self.task = kwargs.pop("task")
- self.adapter = kwargs.pop("adapter", None)
- self.adapter_weights_path = kwargs.pop("adapterWeightsPath", None)
- self.type = kwargs.pop("type", None)
- self.__dict__.update(**kwargs)
- @dataclass
- class ModelInfo:
- """
- Contains information about a model on the Hub. This object is returned by [`model_info`] and [`list_models`].
- > [!TIP]
- > Most attributes of this class are optional. This is because the data returned by the Hub depends on the query made.
- > In general, the more specific the query, the more information is returned. On the contrary, when listing models
- > using [`list_models`] only a subset of the attributes are returned.
- Attributes:
- id (`str`):
- ID of model.
- author (`str`, *optional*):
- Author of the model.
- sha (`str`, *optional*):
- Repo SHA at this particular revision.
- created_at (`datetime`, *optional*):
- Date of creation of the repo on the Hub. Note that the lowest value is `2022-03-02T23:29:04.000Z`,
- corresponding to the date when we began to store creation dates.
- last_modified (`datetime`, *optional*):
- Date of last commit to the repo.
- private (`bool`):
- Is the repo private.
- disabled (`bool`, *optional*):
- Is the repo disabled.
- downloads (`int`):
- Number of downloads of the model over the last 30 days.
- downloads_all_time (`int`):
- Cumulated number of downloads of the model since its creation.
- gated (`Literal["auto", "manual", False]`, *optional*):
- Is the repo gated.
- If so, whether there is manual or automatic approval.
- gguf (`Dict`, *optional*):
- GGUF information of the model.
- inference (`Literal["warm"]`, *optional*):
- Status of the model on Inference Providers. Warm if the model is served by at least one provider.
- inference_provider_mapping (`List[InferenceProviderMapping]`, *optional*):
- A list of [`InferenceProviderMapping`] ordered after the user's provider order.
- likes (`int`):
- Number of likes of the model.
- library_name (`str`, *optional*):
- Library associated with the model.
- tags (`List[str]`):
- List of tags of the model. Compared to `card_data.tags`, contains extra tags computed by the Hub
- (e.g. supported libraries, model's arXiv).
- pipeline_tag (`str`, *optional*):
- Pipeline tag associated with the model.
- mask_token (`str`, *optional*):
- Mask token used by the model.
- widget_data (`Any`, *optional*):
- Widget data associated with the model.
- model_index (`Dict`, *optional*):
- Model index for evaluation.
- config (`Dict`, *optional*):
- Model configuration.
- transformers_info (`TransformersInfo`, *optional*):
- Transformers-specific info (auto class, processor, etc.) associated with the model.
- trending_score (`int`, *optional*):
- Trending score of the model.
- card_data (`ModelCardData`, *optional*):
- Model Card Metadata as a [`huggingface_hub.repocard_data.ModelCardData`] object.
- siblings (`List[RepoSibling]`):
- List of [`huggingface_hub.hf_api.RepoSibling`] objects that constitute the model.
- spaces (`List[str]`, *optional*):
- List of spaces using the model.
- safetensors (`SafeTensorsInfo`, *optional*):
- Model's safetensors information.
- security_repo_status (`Dict`, *optional*):
- Model's security scan status.
- """
- id: str
- author: Optional[str]
- sha: Optional[str]
- created_at: Optional[datetime]
- last_modified: Optional[datetime]
- private: Optional[bool]
- disabled: Optional[bool]
- downloads: Optional[int]
- downloads_all_time: Optional[int]
- gated: Optional[Literal["auto", "manual", False]]
- gguf: Optional[Dict]
- inference: Optional[Literal["warm"]]
- inference_provider_mapping: Optional[List[InferenceProviderMapping]]
- likes: Optional[int]
- library_name: Optional[str]
- tags: Optional[List[str]]
- pipeline_tag: Optional[str]
- mask_token: Optional[str]
- card_data: Optional[ModelCardData]
- widget_data: Optional[Any]
- model_index: Optional[Dict]
- config: Optional[Dict]
- transformers_info: Optional[TransformersInfo]
- trending_score: Optional[int]
- siblings: Optional[List[RepoSibling]]
- spaces: Optional[List[str]]
- safetensors: Optional[SafeTensorsInfo]
- security_repo_status: Optional[Dict]
- xet_enabled: Optional[bool]
- def __init__(self, **kwargs):
- self.id = kwargs.pop("id")
- self.author = kwargs.pop("author", None)
- self.sha = kwargs.pop("sha", None)
- last_modified = kwargs.pop("lastModified", None) or kwargs.pop("last_modified", None)
- self.last_modified = parse_datetime(last_modified) if last_modified else None
- created_at = kwargs.pop("createdAt", None) or kwargs.pop("created_at", None)
- self.created_at = parse_datetime(created_at) if created_at else None
- self.private = kwargs.pop("private", None)
- self.gated = kwargs.pop("gated", None)
- self.disabled = kwargs.pop("disabled", None)
- self.downloads = kwargs.pop("downloads", None)
- self.downloads_all_time = kwargs.pop("downloadsAllTime", None)
- self.likes = kwargs.pop("likes", None)
- self.library_name = kwargs.pop("library_name", None)
- self.gguf = kwargs.pop("gguf", None)
- self.inference = kwargs.pop("inference", None)
- # little hack to simplify Inference Providers logic and make it backward and forward compatible
- # right now, API returns a dict on model_info and a list on list_models. Let's harmonize to list.
- mapping = kwargs.pop("inferenceProviderMapping", None)
- if isinstance(mapping, list):
- self.inference_provider_mapping = [
- InferenceProviderMapping(**{**value, "hf_model_id": self.id}) for value in mapping
- ]
- elif isinstance(mapping, dict):
- self.inference_provider_mapping = [
- InferenceProviderMapping(**{**value, "hf_model_id": self.id, "provider": provider})
- for provider, value in mapping.items()
- ]
- elif mapping is None:
- self.inference_provider_mapping = None
- else:
- raise ValueError(
- f"Unexpected type for `inferenceProviderMapping`. Expecting `dict` or `list`. Got {mapping}."
- )
- self.tags = kwargs.pop("tags", None)
- self.pipeline_tag = kwargs.pop("pipeline_tag", None)
- self.mask_token = kwargs.pop("mask_token", None)
- self.trending_score = kwargs.pop("trendingScore", None)
- card_data = kwargs.pop("cardData", None) or kwargs.pop("card_data", None)
- self.card_data = (
- ModelCardData(**card_data, ignore_metadata_errors=True) if isinstance(card_data, dict) else card_data
- )
- self.widget_data = kwargs.pop("widgetData", None)
- self.model_index = kwargs.pop("model-index", None) or kwargs.pop("model_index", None)
- self.config = kwargs.pop("config", None)
- transformers_info = kwargs.pop("transformersInfo", None) or kwargs.pop("transformers_info", None)
- self.transformers_info = TransformersInfo(**transformers_info) if transformers_info else None
- siblings = kwargs.pop("siblings", None)
- self.siblings = (
- [
- RepoSibling(
- rfilename=sibling["rfilename"],
- size=sibling.get("size"),
- blob_id=sibling.get("blobId"),
- lfs=(
- BlobLfsInfo(
- size=sibling["lfs"]["size"],
- sha256=sibling["lfs"]["sha256"],
- pointer_size=sibling["lfs"]["pointerSize"],
- )
- if sibling.get("lfs")
- else None
- ),
- )
- for sibling in siblings
- ]
- if siblings is not None
- else None
- )
- self.spaces = kwargs.pop("spaces", None)
- safetensors = kwargs.pop("safetensors", None)
- self.safetensors = (
- SafeTensorsInfo(
- parameters=safetensors["parameters"],
- total=safetensors["total"],
- )
- if safetensors
- else None
- )
- self.security_repo_status = kwargs.pop("securityRepoStatus", None)
- self.xet_enabled = kwargs.pop("xetEnabled", None)
- # backwards compatibility
- self.lastModified = self.last_modified
- self.cardData = self.card_data
- self.transformersInfo = self.transformers_info
- self.__dict__.update(**kwargs)
- @dataclass
- class DatasetInfo:
- """
- Contains information about a dataset on the Hub. This object is returned by [`dataset_info`] and [`list_datasets`].
- > [!TIP]
- > Most attributes of this class are optional. This is because the data returned by the Hub depends on the query made.
- > In general, the more specific the query, the more information is returned. On the contrary, when listing datasets
- > using [`list_datasets`] only a subset of the attributes are returned.
- Attributes:
- id (`str`):
- ID of dataset.
- author (`str`):
- Author of the dataset.
- sha (`str`):
- Repo SHA at this particular revision.
- created_at (`datetime`, *optional*):
- Date of creation of the repo on the Hub. Note that the lowest value is `2022-03-02T23:29:04.000Z`,
- corresponding to the date when we began to store creation dates.
- last_modified (`datetime`, *optional*):
- Date of last commit to the repo.
- private (`bool`):
- Is the repo private.
- disabled (`bool`, *optional*):
- Is the repo disabled.
- gated (`Literal["auto", "manual", False]`, *optional*):
- Is the repo gated.
- If so, whether there is manual or automatic approval.
- downloads (`int`):
- Number of downloads of the dataset over the last 30 days.
- downloads_all_time (`int`):
- Cumulated number of downloads of the model since its creation.
- likes (`int`):
- Number of likes of the dataset.
- tags (`List[str]`):
- List of tags of the dataset.
- card_data (`DatasetCardData`, *optional*):
- Model Card Metadata as a [`huggingface_hub.repocard_data.DatasetCardData`] object.
- siblings (`List[RepoSibling]`):
- List of [`huggingface_hub.hf_api.RepoSibling`] objects that constitute the dataset.
- paperswithcode_id (`str`, *optional*):
- Papers with code ID of the dataset.
- trending_score (`int`, *optional*):
- Trending score of the dataset.
- """
- id: str
- author: Optional[str]
- sha: Optional[str]
- created_at: Optional[datetime]
- last_modified: Optional[datetime]
- private: Optional[bool]
- gated: Optional[Literal["auto", "manual", False]]
- disabled: Optional[bool]
- downloads: Optional[int]
- downloads_all_time: Optional[int]
- likes: Optional[int]
- paperswithcode_id: Optional[str]
- tags: Optional[List[str]]
- trending_score: Optional[int]
- card_data: Optional[DatasetCardData]
- siblings: Optional[List[RepoSibling]]
- xet_enabled: Optional[bool]
- def __init__(self, **kwargs):
- self.id = kwargs.pop("id")
- self.author = kwargs.pop("author", None)
- self.sha = kwargs.pop("sha", None)
- created_at = kwargs.pop("createdAt", None) or kwargs.pop("created_at", None)
- self.created_at = parse_datetime(created_at) if created_at else None
- last_modified = kwargs.pop("lastModified", None) or kwargs.pop("last_modified", None)
- self.last_modified = parse_datetime(last_modified) if last_modified else None
- self.private = kwargs.pop("private", None)
- self.gated = kwargs.pop("gated", None)
- self.disabled = kwargs.pop("disabled", None)
- self.downloads = kwargs.pop("downloads", None)
- self.downloads_all_time = kwargs.pop("downloadsAllTime", None)
- self.likes = kwargs.pop("likes", None)
- self.paperswithcode_id = kwargs.pop("paperswithcode_id", None)
- self.tags = kwargs.pop("tags", None)
- self.trending_score = kwargs.pop("trendingScore", None)
- card_data = kwargs.pop("cardData", None) or kwargs.pop("card_data", None)
- self.card_data = (
- DatasetCardData(**card_data, ignore_metadata_errors=True) if isinstance(card_data, dict) else card_data
- )
- siblings = kwargs.pop("siblings", None)
- self.siblings = (
- [
- RepoSibling(
- rfilename=sibling["rfilename"],
- size=sibling.get("size"),
- blob_id=sibling.get("blobId"),
- lfs=(
- BlobLfsInfo(
- size=sibling["lfs"]["size"],
- sha256=sibling["lfs"]["sha256"],
- pointer_size=sibling["lfs"]["pointerSize"],
- )
- if sibling.get("lfs")
- else None
- ),
- )
- for sibling in siblings
- ]
- if siblings is not None
- else None
- )
- self.xet_enabled = kwargs.pop("xetEnabled", None)
- # backwards compatibility
- self.lastModified = self.last_modified
- self.cardData = self.card_data
- self.__dict__.update(**kwargs)
- @dataclass
- class SpaceInfo:
- """
- Contains information about a Space on the Hub. This object is returned by [`space_info`] and [`list_spaces`].
- > [!TIP]
- > Most attributes of this class are optional. This is because the data returned by the Hub depends on the query made.
- > In general, the more specific the query, the more information is returned. On the contrary, when listing spaces
- > using [`list_spaces`] only a subset of the attributes are returned.
- Attributes:
- id (`str`):
- ID of the Space.
- author (`str`, *optional*):
- Author of the Space.
- sha (`str`, *optional*):
- Repo SHA at this particular revision.
- created_at (`datetime`, *optional*):
- Date of creation of the repo on the Hub. Note that the lowest value is `2022-03-02T23:29:04.000Z`,
- corresponding to the date when we began to store creation dates.
- last_modified (`datetime`, *optional*):
- Date of last commit to the repo.
- private (`bool`):
- Is the repo private.
- gated (`Literal["auto", "manual", False]`, *optional*):
- Is the repo gated.
- If so, whether there is manual or automatic approval.
- disabled (`bool`, *optional*):
- Is the Space disabled.
- host (`str`, *optional*):
- Host URL of the Space.
- subdomain (`str`, *optional*):
- Subdomain of the Space.
- likes (`int`):
- Number of likes of the Space.
- tags (`List[str]`):
- List of tags of the Space.
- siblings (`List[RepoSibling]`):
- List of [`huggingface_hub.hf_api.RepoSibling`] objects that constitute the Space.
- card_data (`SpaceCardData`, *optional*):
- Space Card Metadata as a [`huggingface_hub.repocard_data.SpaceCardData`] object.
- runtime (`SpaceRuntime`, *optional*):
- Space runtime information as a [`huggingface_hub.hf_api.SpaceRuntime`] object.
- sdk (`str`, *optional*):
- SDK used by the Space.
- models (`List[str]`, *optional*):
- List of models used by the Space.
- datasets (`List[str]`, *optional*):
- List of datasets used by the Space.
- trending_score (`int`, *optional*):
- Trending score of the Space.
- """
- id: str
- author: Optional[str]
- sha: Optional[str]
- created_at: Optional[datetime]
- last_modified: Optional[datetime]
- private: Optional[bool]
- gated: Optional[Literal["auto", "manual", False]]
- disabled: Optional[bool]
- host: Optional[str]
- subdomain: Optional[str]
- likes: Optional[int]
- sdk: Optional[str]
- tags: Optional[List[str]]
- siblings: Optional[List[RepoSibling]]
- trending_score: Optional[int]
- card_data: Optional[SpaceCardData]
- runtime: Optional[SpaceRuntime]
- models: Optional[List[str]]
- datasets: Optional[List[str]]
- xet_enabled: Optional[bool]
- def __init__(self, **kwargs):
- self.id = kwargs.pop("id")
- self.author = kwargs.pop("author", None)
- self.sha = kwargs.pop("sha", None)
- created_at = kwargs.pop("createdAt", None) or kwargs.pop("created_at", None)
- self.created_at = parse_datetime(created_at) if created_at else None
- last_modified = kwargs.pop("lastModified", None) or kwargs.pop("last_modified", None)
- self.last_modified = parse_datetime(last_modified) if last_modified else None
- self.private = kwargs.pop("private", None)
- self.gated = kwargs.pop("gated", None)
- self.disabled = kwargs.pop("disabled", None)
- self.host = kwargs.pop("host", None)
- self.subdomain = kwargs.pop("subdomain", None)
- self.likes = kwargs.pop("likes", None)
- self.sdk = kwargs.pop("sdk", None)
- self.tags = kwargs.pop("tags", None)
- self.trending_score = kwargs.pop("trendingScore", None)
- card_data = kwargs.pop("cardData", None) or kwargs.pop("card_data", None)
- self.card_data = (
- SpaceCardData(**card_data, ignore_metadata_errors=True) if isinstance(card_data, dict) else card_data
- )
- siblings = kwargs.pop("siblings", None)
- self.siblings = (
- [
- RepoSibling(
- rfilename=sibling["rfilename"],
- size=sibling.get("size"),
- blob_id=sibling.get("blobId"),
- lfs=(
- BlobLfsInfo(
- size=sibling["lfs"]["size"],
- sha256=sibling["lfs"]["sha256"],
- pointer_size=sibling["lfs"]["pointerSize"],
- )
- if sibling.get("lfs")
- else None
- ),
- )
- for sibling in siblings
- ]
- if siblings is not None
- else None
- )
- runtime = kwargs.pop("runtime", None)
- self.runtime = SpaceRuntime(runtime) if runtime else None
- self.models = kwargs.pop("models", None)
- self.datasets = kwargs.pop("datasets", None)
- self.xet_enabled = kwargs.pop("xetEnabled", None)
- # backwards compatibility
- self.lastModified = self.last_modified
- self.cardData = self.card_data
- self.__dict__.update(**kwargs)
- @dataclass
- class CollectionItem:
- """
- Contains information about an item of a Collection (model, dataset, Space, paper or collection).
- Attributes:
- item_object_id (`str`):
- Unique ID of the item in the collection.
- item_id (`str`):
- ID of the underlying object on the Hub. Can be either a repo_id, a paper id or a collection slug.
- e.g. `"jbilcke-hf/ai-comic-factory"`, `"2307.09288"`, `"celinah/cerebras-function-calling-682607169c35fbfa98b30b9a"`.
- item_type (`str`):
- Type of the underlying object. Can be one of `"model"`, `"dataset"`, `"space"`, `"paper"` or `"collection"`.
- position (`int`):
- Position of the item in the collection.
- note (`str`, *optional*):
- Note associated with the item, as plain text.
- """
- item_object_id: str # id in database
- item_id: str # repo_id or paper id
- item_type: str
- position: int
- note: Optional[str] = None
- def __init__(
- self,
- _id: str,
- id: str,
- type: CollectionItemType_T,
- position: int,
- note: Optional[Dict] = None,
- **kwargs,
- ) -> None:
- self.item_object_id: str = _id # id in database
- self.item_id: str = id # repo_id or paper id
- # if the item is a collection, override item_id with the slug
- slug = kwargs.get("slug")
- if slug is not None:
- self.item_id = slug # collection slug
- self.item_type: CollectionItemType_T = type
- self.position: int = position
- self.note: str = note["text"] if note is not None else None
- @dataclass
- class Collection:
- """
- Contains information about a Collection on the Hub.
- Attributes:
- slug (`str`):
- Slug of the collection. E.g. `"TheBloke/recent-models-64f9a55bb3115b4f513ec026"`.
- title (`str`):
- Title of the collection. E.g. `"Recent models"`.
- owner (`str`):
- Owner of the collection. E.g. `"TheBloke"`.
- items (`List[CollectionItem]`):
- List of items in the collection.
- last_updated (`datetime`):
- Date of the last update of the collection.
- position (`int`):
- Position of the collection in the list of collections of the owner.
- private (`bool`):
- Whether the collection is private or not.
- theme (`str`):
- Theme of the collection. E.g. `"green"`.
- upvotes (`int`):
- Number of upvotes of the collection.
- description (`str`, *optional*):
- Description of the collection, as plain text.
- url (`str`):
- (property) URL of the collection on the Hub.
- """
- slug: str
- title: str
- owner: str
- items: List[CollectionItem]
- last_updated: datetime
- position: int
- private: bool
- theme: str
- upvotes: int
- description: Optional[str] = None
- def __init__(self, **kwargs) -> None:
- self.slug = kwargs.pop("slug")
- self.title = kwargs.pop("title")
- self.owner = kwargs.pop("owner")
- self.items = [CollectionItem(**item) for item in kwargs.pop("items")]
- self.last_updated = parse_datetime(kwargs.pop("lastUpdated"))
- self.position = kwargs.pop("position")
- self.private = kwargs.pop("private")
- self.theme = kwargs.pop("theme")
- self.upvotes = kwargs.pop("upvotes")
- self.description = kwargs.pop("description", None)
- endpoint = kwargs.pop("endpoint", None)
- if endpoint is None:
- endpoint = constants.ENDPOINT
- self._url = f"{endpoint}/collections/{self.slug}"
- @property
- def url(self) -> str:
- """Returns the URL of the collection on the Hub."""
- return self._url
- @dataclass
- class GitRefInfo:
- """
- Contains information about a git reference for a repo on the Hub.
- Attributes:
- name (`str`):
- Name of the reference (e.g. tag name or branch name).
- ref (`str`):
- Full git ref on the Hub (e.g. `"refs/heads/main"` or `"refs/tags/v1.0"`).
- target_commit (`str`):
- OID of the target commit for the ref (e.g. `"e7da7f221d5bf496a48136c0cd264e630fe9fcc8"`)
- """
- name: str
- ref: str
- target_commit: str
- @dataclass
- class GitRefs:
- """
- Contains information about all git references for a repo on the Hub.
- Object is returned by [`list_repo_refs`].
- Attributes:
- branches (`List[GitRefInfo]`):
- A list of [`GitRefInfo`] containing information about branches on the repo.
- converts (`List[GitRefInfo]`):
- A list of [`GitRefInfo`] containing information about "convert" refs on the repo.
- Converts are refs used (internally) to push preprocessed data in Dataset repos.
- tags (`List[GitRefInfo]`):
- A list of [`GitRefInfo`] containing information about tags on the repo.
- pull_requests (`List[GitRefInfo]`, *optional*):
- A list of [`GitRefInfo`] containing information about pull requests on the repo.
- Only returned if `include_prs=True` is set.
- """
- branches: List[GitRefInfo]
- converts: List[GitRefInfo]
- tags: List[GitRefInfo]
- pull_requests: Optional[List[GitRefInfo]] = None
- @dataclass
- class GitCommitInfo:
- """
- Contains information about a git commit for a repo on the Hub. Check out [`list_repo_commits`] for more details.
- Attributes:
- commit_id (`str`):
- OID of the commit (e.g. `"e7da7f221d5bf496a48136c0cd264e630fe9fcc8"`)
- authors (`List[str]`):
- List of authors of the commit.
- created_at (`datetime`):
- Datetime when the commit was created.
- title (`str`):
- Title of the commit. This is a free-text value entered by the authors.
- message (`str`):
- Description of the commit. This is a free-text value entered by the authors.
- formatted_title (`str`):
- Title of the commit formatted as HTML. Only returned if `formatted=True` is set.
- formatted_message (`str`):
- Description of the commit formatted as HTML. Only returned if `formatted=True` is set.
- """
- commit_id: str
- authors: List[str]
- created_at: datetime
- title: str
- message: str
- formatted_title: Optional[str]
- formatted_message: Optional[str]
- @dataclass
- class UserLikes:
- """
- Contains information about a user likes on the Hub.
- Attributes:
- user (`str`):
- Name of the user for which we fetched the likes.
- total (`int`):
- Total number of likes.
- datasets (`List[str]`):
- List of datasets liked by the user (as repo_ids).
- models (`List[str]`):
- List of models liked by the user (as repo_ids).
- spaces (`List[str]`):
- List of spaces liked by the user (as repo_ids).
- """
- # Metadata
- user: str
- total: int
- # User likes
- datasets: List[str]
- models: List[str]
- spaces: List[str]
- @dataclass
- class Organization:
- """
- Contains information about an organization on the Hub.
- Attributes:
- avatar_url (`str`):
- URL of the organization's avatar.
- name (`str`):
- Name of the organization on the Hub (unique).
- fullname (`str`):
- Organization's full name.
- details (`str`, *optional*):
- Organization's description.
- is_verified (`bool`, *optional*):
- Whether the organization is verified.
- is_following (`bool`, *optional*):
- Whether the authenticated user follows this organization.
- num_users (`int`, *optional*):
- Number of members in the organization.
- num_models (`int`, *optional*):
- Number of models owned by the organization.
- num_spaces (`int`, *optional*):
- Number of Spaces owned by the organization.
- num_datasets (`int`, *optional*):
- Number of datasets owned by the organization.
- num_followers (`int`, *optional*):
- Number of followers of the organization.
- """
- avatar_url: str
- name: str
- fullname: str
- details: Optional[str] = None
- is_verified: Optional[bool] = None
- is_following: Optional[bool] = None
- num_users: Optional[int] = None
- num_models: Optional[int] = None
- num_spaces: Optional[int] = None
- num_datasets: Optional[int] = None
- num_followers: Optional[int] = None
- def __init__(self, **kwargs) -> None:
- self.avatar_url = kwargs.pop("avatarUrl", "")
- self.name = kwargs.pop("name", "")
- self.fullname = kwargs.pop("fullname", "")
- self.details = kwargs.pop("details", None)
- self.is_verified = kwargs.pop("isVerified", None)
- self.is_following = kwargs.pop("isFollowing", None)
- self.num_users = kwargs.pop("numUsers", None)
- self.num_models = kwargs.pop("numModels", None)
- self.num_spaces = kwargs.pop("numSpaces", None)
- self.num_datasets = kwargs.pop("numDatasets", None)
- self.num_followers = kwargs.pop("numFollowers", None)
- # forward compatibility
- self.__dict__.update(**kwargs)
- @dataclass
- class User:
- """
- Contains information about a user on the Hub.
- Attributes:
- username (`str`):
- Name of the user on the Hub (unique).
- fullname (`str`):
- User's full name.
- avatar_url (`str`):
- URL of the user's avatar.
- details (`str`, *optional*):
- User's details.
- is_following (`bool`, *optional*):
- Whether the authenticated user is following this user.
- is_pro (`bool`, *optional*):
- Whether the user is a pro user.
- num_models (`int`, *optional*):
- Number of models created by the user.
- num_datasets (`int`, *optional*):
- Number of datasets created by the user.
- num_spaces (`int`, *optional*):
- Number of spaces created by the user.
- num_discussions (`int`, *optional*):
- Number of discussions initiated by the user.
- num_papers (`int`, *optional*):
- Number of papers authored by the user.
- num_upvotes (`int`, *optional*):
- Number of upvotes received by the user.
- num_likes (`int`, *optional*):
- Number of likes given by the user.
- num_following (`int`, *optional*):
- Number of users this user is following.
- num_followers (`int`, *optional*):
- Number of users following this user.
- orgs (list of [`Organization`]):
- List of organizations the user is part of.
- """
- # Metadata
- username: str
- fullname: str
- avatar_url: str
- details: Optional[str] = None
- is_following: Optional[bool] = None
- is_pro: Optional[bool] = None
- num_models: Optional[int] = None
- num_datasets: Optional[int] = None
- num_spaces: Optional[int] = None
- num_discussions: Optional[int] = None
- num_papers: Optional[int] = None
- num_upvotes: Optional[int] = None
- num_likes: Optional[int] = None
- num_following: Optional[int] = None
- num_followers: Optional[int] = None
- orgs: List[Organization] = field(default_factory=list)
- def __init__(self, **kwargs) -> None:
- self.username = kwargs.pop("user", "")
- self.fullname = kwargs.pop("fullname", "")
- self.avatar_url = kwargs.pop("avatarUrl", "")
- self.is_following = kwargs.pop("isFollowing", None)
- self.is_pro = kwargs.pop("isPro", None)
- self.details = kwargs.pop("details", None)
- self.num_models = kwargs.pop("numModels", None)
- self.num_datasets = kwargs.pop("numDatasets", None)
- self.num_spaces = kwargs.pop("numSpaces", None)
- self.num_discussions = kwargs.pop("numDiscussions", None)
- self.num_papers = kwargs.pop("numPapers", None)
- self.num_upvotes = kwargs.pop("numUpvotes", None)
- self.num_likes = kwargs.pop("numLikes", None)
- self.num_following = kwargs.pop("numFollowing", None)
- self.num_followers = kwargs.pop("numFollowers", None)
- self.user_type = kwargs.pop("type", None)
- self.orgs = [Organization(**org) for org in kwargs.pop("orgs", [])]
- # forward compatibility
- self.__dict__.update(**kwargs)
- @dataclass
- class PaperInfo:
- """
- Contains information about a paper on the Hub.
- Attributes:
- id (`str`):
- arXiv paper ID.
- authors (`List[str]`, **optional**):
- Names of paper authors
- published_at (`datetime`, **optional**):
- Date paper published.
- title (`str`, **optional**):
- Title of the paper.
- summary (`str`, **optional**):
- Summary of the paper.
- upvotes (`int`, **optional**):
- Number of upvotes for the paper on the Hub.
- discussion_id (`str`, **optional**):
- Discussion ID for the paper on the Hub.
- source (`str`, **optional**):
- Source of the paper.
- comments (`int`, **optional**):
- Number of comments for the paper on the Hub.
- submitted_at (`datetime`, **optional**):
- Date paper appeared in daily papers on the Hub.
- submitted_by (`User`, **optional**):
- Information about who submitted the daily paper.
- """
- id: str
- authors: Optional[List[str]]
- published_at: Optional[datetime]
- title: Optional[str]
- summary: Optional[str]
- upvotes: Optional[int]
- discussion_id: Optional[str]
- source: Optional[str]
- comments: Optional[int]
- submitted_at: Optional[datetime]
- submitted_by: Optional[User]
- def __init__(self, **kwargs) -> None:
- paper = kwargs.pop("paper", {})
- self.id = kwargs.pop("id", None) or paper.pop("id", None)
- authors = paper.pop("authors", None) or kwargs.pop("authors", None)
- self.authors = [author.pop("name", None) for author in authors] if authors else None
- published_at = paper.pop("publishedAt", None) or kwargs.pop("publishedAt", None)
- self.published_at = parse_datetime(published_at) if published_at else None
- self.title = kwargs.pop("title", None)
- self.source = kwargs.pop("source", None)
- self.summary = paper.pop("summary", None) or kwargs.pop("summary", None)
- self.upvotes = paper.pop("upvotes", None) or kwargs.pop("upvotes", None)
- self.discussion_id = paper.pop("discussionId", None) or kwargs.pop("discussionId", None)
- self.comments = kwargs.pop("numComments", 0)
- submitted_at = kwargs.pop("publishedAt", None) or kwargs.pop("submittedOnDailyAt", None)
- self.submitted_at = parse_datetime(submitted_at) if submitted_at else None
- submitted_by = kwargs.pop("submittedBy", None) or kwargs.pop("submittedOnDailyBy", None)
- self.submitted_by = User(**submitted_by) if submitted_by else None
- # forward compatibility
- self.__dict__.update(**kwargs)
- @dataclass
- class LFSFileInfo:
- """
- Contains information about a file stored as LFS on a repo on the Hub.
- Used in the context of listing and permanently deleting LFS files from a repo to free-up space.
- See [`list_lfs_files`] and [`permanently_delete_lfs_files`] for more details.
- Git LFS files are tracked using SHA-256 object IDs, rather than file paths, to optimize performance
- This approach is necessary because a single object can be referenced by multiple paths across different commits,
- making it impractical to search and resolve these connections. Check out [our documentation](https://huggingface.co/docs/hub/storage-limits#advanced-track-lfs-file-references)
- to learn how to know which filename(s) is(are) associated with each SHA.
- Attributes:
- file_oid (`str`):
- SHA-256 object ID of the file. This is the identifier to pass when permanently deleting the file.
- filename (`str`):
- Possible filename for the LFS object. See the note above for more information.
- oid (`str`):
- OID of the LFS object.
- pushed_at (`datetime`):
- Date the LFS object was pushed to the repo.
- ref (`str`, *optional*):
- Ref where the LFS object has been pushed (if any).
- size (`int`):
- Size of the LFS object.
- Example:
- ```py
- >>> from huggingface_hub import HfApi
- >>> api = HfApi()
- >>> lfs_files = api.list_lfs_files("username/my-cool-repo")
- # Filter files files to delete based on a combination of `filename`, `pushed_at`, `ref` or `size`.
- # e.g. select only LFS files in the "checkpoints" folder
- >>> lfs_files_to_delete = (lfs_file for lfs_file in lfs_files if lfs_file.filename.startswith("checkpoints/"))
- # Permanently delete LFS files
- >>> api.permanently_delete_lfs_files("username/my-cool-repo", lfs_files_to_delete)
- ```
- """
- file_oid: str
- filename: str
- oid: str
- pushed_at: datetime
- ref: Optional[str]
- size: int
- def __init__(self, **kwargs) -> None:
- self.file_oid = kwargs.pop("fileOid")
- self.filename = kwargs.pop("filename")
- self.oid = kwargs.pop("oid")
- self.pushed_at = parse_datetime(kwargs.pop("pushedAt"))
- self.ref = kwargs.pop("ref", None)
- self.size = kwargs.pop("size")
- # forward compatibility
- self.__dict__.update(**kwargs)
- def future_compatible(fn: CallableT) -> CallableT:
- """Wrap a method of `HfApi` to handle `run_as_future=True`.
- A method flagged as "future_compatible" will be called in a thread if `run_as_future=True` and return a
- `concurrent.futures.Future` instance. Otherwise, it will be called normally and return the result.
- """
- sig = inspect.signature(fn)
- args_params = list(sig.parameters)[1:] # remove "self" from list
- @wraps(fn)
- def _inner(self, *args, **kwargs):
- # Get `run_as_future` value if provided (default to False)
- if "run_as_future" in kwargs:
- run_as_future = kwargs["run_as_future"]
- kwargs["run_as_future"] = False # avoid recursion error
- else:
- run_as_future = False
- for param, value in zip(args_params, args):
- if param == "run_as_future":
- run_as_future = value
- break
- # Call the function in a thread if `run_as_future=True`
- if run_as_future:
- return self.run_as_future(fn, self, *args, **kwargs)
- # Otherwise, call the function normally
- return fn(self, *args, **kwargs)
- _inner.is_future_compatible = True # type: ignore
- return _inner # type: ignore
- class HfApi:
- """
- Client to interact with the Hugging Face Hub via HTTP.
- The client is initialized with some high-level settings used in all requests
- made to the Hub (HF endpoint, authentication, user agents...). Using the `HfApi`
- client is preferred but not mandatory as all of its public methods are exposed
- directly at the root of `huggingface_hub`.
- Args:
- endpoint (`str`, *optional*):
- Endpoint of the Hub. Defaults to <https://huggingface.co>.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- library_name (`str`, *optional*):
- The name of the library that is making the HTTP request. Will be added to
- the user-agent header. Example: `"transformers"`.
- library_version (`str`, *optional*):
- The version of the library that is making the HTTP request. Will be added
- to the user-agent header. Example: `"4.24.0"`.
- user_agent (`str`, `dict`, *optional*):
- The user agent info in the form of a dictionary or a single string. It will
- be completed with information about the installed packages.
- headers (`dict`, *optional*):
- Additional headers to be sent with each request. Example: `{"X-My-Header": "value"}`.
- Headers passed here are taking precedence over the default headers.
- """
- def __init__(
- self,
- endpoint: Optional[str] = None,
- token: Union[str, bool, None] = None,
- library_name: Optional[str] = None,
- library_version: Optional[str] = None,
- user_agent: Union[Dict, str, None] = None,
- headers: Optional[Dict[str, str]] = None,
- ) -> None:
- self.endpoint = endpoint if endpoint is not None else constants.ENDPOINT
- self.token = token
- self.library_name = library_name
- self.library_version = library_version
- self.user_agent = user_agent
- self.headers = headers
- self._thread_pool: Optional[ThreadPoolExecutor] = None
- def run_as_future(self, fn: Callable[..., R], *args, **kwargs) -> Future[R]:
- """
- Run a method in the background and return a Future instance.
- The main goal is to run methods without blocking the main thread (e.g. to push data during a training).
- Background jobs are queued to preserve order but are not ran in parallel. If you need to speed-up your scripts
- by parallelizing lots of call to the API, you must setup and use your own [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor).
- Note: Most-used methods like [`upload_file`], [`upload_folder`] and [`create_commit`] have a `run_as_future: bool`
- argument to directly call them in the background. This is equivalent to calling `api.run_as_future(...)` on them
- but less verbose.
- Args:
- fn (`Callable`):
- The method to run in the background.
- *args, **kwargs:
- Arguments with which the method will be called.
- Return:
- `Future`: a [Future](https://docs.python.org/3/library/concurrent.futures.html#future-objects) instance to
- get the result of the task.
- Example:
- ```py
- >>> from huggingface_hub import HfApi
- >>> api = HfApi()
- >>> future = api.run_as_future(api.whoami) # instant
- >>> future.done()
- False
- >>> future.result() # wait until complete and return result
- (...)
- >>> future.done()
- True
- ```
- """
- if self._thread_pool is None:
- self._thread_pool = ThreadPoolExecutor(max_workers=1)
- self._thread_pool
- return self._thread_pool.submit(fn, *args, **kwargs)
- @validate_hf_hub_args
- def whoami(self, token: Union[bool, str, None] = None) -> Dict:
- """
- Call HF API to know "whoami".
- Args:
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- """
- # Get the effective token using the helper function get_token
- effective_token = token or self.token or get_token() or True
- r = get_session().get(
- f"{self.endpoint}/api/whoami-v2",
- headers=self._build_hf_headers(token=effective_token),
- )
- try:
- hf_raise_for_status(r)
- except HTTPError as e:
- if e.response.status_code == 401:
- error_message = "Invalid user token."
- # Check which token is the effective one and generate the error message accordingly
- if effective_token == _get_token_from_google_colab():
- error_message += " The token from Google Colab vault is invalid. Please update it from the UI."
- elif effective_token == _get_token_from_environment():
- error_message += (
- " The token from HF_TOKEN environment variable is invalid. "
- "Note that HF_TOKEN takes precedence over `hf auth login`."
- )
- elif effective_token == _get_token_from_file():
- error_message += " The token stored is invalid. Please run `hf auth login` to update it."
- raise HTTPError(error_message, request=e.request, response=e.response) from e
- raise
- return r.json()
- @_deprecate_method(
- version="1.0",
- message=(
- "Permissions are more complex than when `get_token_permission` was first introduced. "
- "OAuth and fine-grain tokens allows for more detailed permissions. "
- "If you need to know the permissions associated with a token, please use `whoami` and check the `'auth'` key."
- ),
- )
- def get_token_permission(
- self, token: Union[bool, str, None] = None
- ) -> Literal["read", "write", "fineGrained", None]:
- """
- Check if a given `token` is valid and return its permissions.
- > [!WARNING]
- > This method is deprecated and will be removed in version 1.0. Permissions are more complex than when
- > `get_token_permission` was first introduced. OAuth and fine-grain tokens allows for more detailed permissions.
- > If you need to know the permissions associated with a token, please use `whoami` and check the `'auth'` key.
- For more details about tokens, please refer to https://huggingface.co/docs/hub/security-tokens#what-are-user-access-tokens.
- Args:
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `Literal["read", "write", "fineGrained", None]`: Permission granted by the token ("read" or "write"). Returns `None` if no
- token passed, if token is invalid or if role is not returned by the server. This typically happens when the token is an OAuth token.
- """
- try:
- return self.whoami(token=token)["auth"]["accessToken"]["role"]
- except (LocalTokenNotFoundError, HTTPError, KeyError):
- return None
- def get_model_tags(self) -> Dict:
- """
- List all valid model tags as a nested namespace object
- """
- path = f"{self.endpoint}/api/models-tags-by-type"
- r = get_session().get(path)
- hf_raise_for_status(r)
- return r.json()
- def get_dataset_tags(self) -> Dict:
- """
- List all valid dataset tags as a nested namespace object.
- """
- path = f"{self.endpoint}/api/datasets-tags-by-type"
- r = get_session().get(path)
- hf_raise_for_status(r)
- return r.json()
- @_deprecate_arguments(
- version="1.0", deprecated_args=["language", "library", "task", "tags"], custom_message="Use `filter` instead."
- )
- @validate_hf_hub_args
- def list_models(
- self,
- *,
- # Search-query parameter
- filter: Union[str, Iterable[str], None] = None,
- author: Optional[str] = None,
- apps: Optional[Union[str, List[str]]] = None,
- gated: Optional[bool] = None,
- inference: Optional[Literal["warm"]] = None,
- inference_provider: Optional[Union[Literal["all"], "PROVIDER_T", List["PROVIDER_T"]]] = None,
- model_name: Optional[str] = None,
- trained_dataset: Optional[Union[str, List[str]]] = None,
- search: Optional[str] = None,
- pipeline_tag: Optional[str] = None,
- emissions_thresholds: Optional[Tuple[float, float]] = None,
- # Sorting and pagination parameters
- sort: Union[Literal["last_modified"], str, None] = None,
- direction: Optional[Literal[-1]] = None,
- limit: Optional[int] = None,
- # Additional data to fetch
- expand: Optional[List[ExpandModelProperty_T]] = None,
- full: Optional[bool] = None,
- cardData: bool = False,
- fetch_config: bool = False,
- token: Union[bool, str, None] = None,
- # Deprecated arguments - use `filter` instead
- language: Optional[Union[str, List[str]]] = None,
- library: Optional[Union[str, List[str]]] = None,
- tags: Optional[Union[str, List[str]]] = None,
- task: Optional[Union[str, List[str]]] = None,
- ) -> Iterable[ModelInfo]:
- """
- List models hosted on the Huggingface Hub, given some filters.
- Args:
- filter (`str` or `Iterable[str]`, *optional*):
- A string or list of string to filter models on the Hub.
- Models can be filtered by library, language, task, tags, and more.
- author (`str`, *optional*):
- A string which identify the author (user or organization) of the
- returned models.
- apps (`str` or `List`, *optional*):
- A string or list of strings to filter models on the Hub that
- support the specified apps. Example values include `"ollama"` or `["ollama", "vllm"]`.
- gated (`bool`, *optional*):
- A boolean to filter models on the Hub that are gated or not. By default, all models are returned.
- If `gated=True` is passed, only gated models are returned.
- If `gated=False` is passed, only non-gated models are returned.
- inference (`Literal["warm"]`, *optional*):
- If "warm", filter models on the Hub currently served by at least one provider.
- inference_provider (`Literal["all"]` or `str`, *optional*):
- A string to filter models on the Hub that are served by a specific provider.
- Pass `"all"` to get all models served by at least one provider.
- library (`str` or `List`, *optional*):
- Deprecated. Pass a library name in `filter` to filter models by library.
- language (`str` or `List`, *optional*):
- Deprecated. Pass a language in `filter` to filter models by language.
- model_name (`str`, *optional*):
- A string that contain complete or partial names for models on the
- Hub, such as "bert" or "bert-base-cased"
- task (`str` or `List`, *optional*):
- Deprecated. Pass a task in `filter` to filter models by task.
- trained_dataset (`str` or `List`, *optional*):
- A string tag or a list of string tags of the trained dataset for a
- model on the Hub.
- tags (`str` or `List`, *optional*):
- Deprecated. Pass tags in `filter` to filter models by tags.
- search (`str`, *optional*):
- A string that will be contained in the returned model ids.
- pipeline_tag (`str`, *optional*):
- A string pipeline tag to filter models on the Hub by, such as `summarization`.
- emissions_thresholds (`Tuple`, *optional*):
- A tuple of two ints or floats representing a minimum and maximum
- carbon footprint to filter the resulting models with in grams.
- sort (`Literal["last_modified"]` or `str`, *optional*):
- The key with which to sort the resulting models. Possible values are "last_modified", "trending_score",
- "created_at", "downloads" and "likes".
- direction (`Literal[-1]` or `int`, *optional*):
- Direction in which to sort. The value `-1` sorts by descending
- order while all other values sort by ascending order.
- limit (`int`, *optional*):
- The limit on the number of models fetched. Leaving this option
- to `None` fetches all models.
- expand (`List[ExpandModelProperty_T]`, *optional*):
- List properties to return in the response. When used, only the properties in the list will be returned.
- This parameter cannot be used if `full`, `cardData` or `fetch_config` are passed.
- Possible values are `"author"`, `"cardData"`, `"config"`, `"createdAt"`, `"disabled"`, `"downloads"`, `"downloadsAllTime"`, `"gated"`, `"gguf"`, `"inference"`, `"inferenceProviderMapping"`, `"lastModified"`, `"library_name"`, `"likes"`, `"mask_token"`, `"model-index"`, `"pipeline_tag"`, `"private"`, `"safetensors"`, `"sha"`, `"siblings"`, `"spaces"`, `"tags"`, `"transformersInfo"`, `"trendingScore"`, `"widgetData"`, `"resourceGroup"` and `"xetEnabled"`.
- full (`bool`, *optional*):
- Whether to fetch all model data, including the `last_modified`,
- the `sha`, the files and the `tags`. This is set to `True` by
- default when using a filter.
- cardData (`bool`, *optional*):
- Whether to grab the metadata for the model as well. Can contain
- useful information such as carbon emissions, metrics, and
- datasets trained on.
- fetch_config (`bool`, *optional*):
- Whether to fetch the model configs as well. This is not included
- in `full` due to its size.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `Iterable[ModelInfo]`: an iterable of [`huggingface_hub.hf_api.ModelInfo`] objects.
- Example:
- ```python
- >>> from huggingface_hub import HfApi
- >>> api = HfApi()
- # List all models
- >>> api.list_models()
- # List text classification models
- >>> api.list_models(filter="text-classification")
- # List models from the KerasHub library
- >>> api.list_models(filter="keras-hub")
- # List models served by Cohere
- >>> api.list_models(inference_provider="cohere")
- # List models with "bert" in their name
- >>> api.list_models(search="bert")
- # List models with "bert" in their name and pushed by google
- >>> api.list_models(search="bert", author="google")
- ```
- """
- if expand and (full or cardData or fetch_config):
- raise ValueError("`expand` cannot be used if `full`, `cardData` or `fetch_config` are passed.")
- if emissions_thresholds is not None and not cardData:
- raise ValueError("`emissions_thresholds` were passed without setting `cardData=True`.")
- path = f"{self.endpoint}/api/models"
- headers = self._build_hf_headers(token=token)
- params: Dict[str, Any] = {}
- # Build the filter list
- filter_list: List[str] = []
- if filter:
- filter_list.extend([filter] if isinstance(filter, str) else filter)
- if library:
- filter_list.extend([library] if isinstance(library, str) else library)
- if task:
- filter_list.extend([task] if isinstance(task, str) else task)
- if trained_dataset:
- if isinstance(trained_dataset, str):
- trained_dataset = [trained_dataset]
- for dataset in trained_dataset:
- if not dataset.startswith("dataset:"):
- dataset = f"dataset:{dataset}"
- filter_list.append(dataset)
- if language:
- filter_list.extend([language] if isinstance(language, str) else language)
- if tags:
- filter_list.extend([tags] if isinstance(tags, str) else tags)
- if len(filter_list) > 0:
- params["filter"] = filter_list
- # Handle other query params
- if author:
- params["author"] = author
- if apps:
- if isinstance(apps, str):
- apps = [apps]
- params["apps"] = apps
- if gated is not None:
- params["gated"] = gated
- if inference is not None:
- params["inference"] = inference
- if inference_provider is not None:
- params["inference_provider"] = inference_provider
- if pipeline_tag:
- params["pipeline_tag"] = pipeline_tag
- search_list = []
- if model_name:
- search_list.append(model_name)
- if search:
- search_list.append(search)
- if len(search_list) > 0:
- params["search"] = search_list
- if sort is not None:
- params["sort"] = (
- "lastModified"
- if sort == "last_modified"
- else "trendingScore"
- if sort == "trending_score"
- else "createdAt"
- if sort == "created_at"
- else sort
- )
- if direction is not None:
- params["direction"] = direction
- if limit is not None:
- params["limit"] = limit
- # Request additional data
- if full:
- params["full"] = True
- if fetch_config:
- params["config"] = True
- if cardData:
- params["cardData"] = True
- if expand:
- params["expand"] = expand
- # `items` is a generator
- items = paginate(path, params=params, headers=headers)
- if limit is not None:
- items = islice(items, limit) # Do not iterate over all pages
- for item in items:
- if "siblings" not in item:
- item["siblings"] = None
- model_info = ModelInfo(**item)
- if emissions_thresholds is None or _is_emission_within_threshold(model_info, *emissions_thresholds):
- yield model_info
- @_deprecate_arguments(version="1.0", deprecated_args=["tags"], custom_message="Use `filter` instead.")
- @validate_hf_hub_args
- def list_datasets(
- self,
- *,
- # Search-query parameter
- filter: Union[str, Iterable[str], None] = None,
- author: Optional[str] = None,
- benchmark: Optional[Union[str, List[str]]] = None,
- dataset_name: Optional[str] = None,
- gated: Optional[bool] = None,
- language_creators: Optional[Union[str, List[str]]] = None,
- language: Optional[Union[str, List[str]]] = None,
- multilinguality: Optional[Union[str, List[str]]] = None,
- size_categories: Optional[Union[str, List[str]]] = None,
- task_categories: Optional[Union[str, List[str]]] = None,
- task_ids: Optional[Union[str, List[str]]] = None,
- search: Optional[str] = None,
- # Sorting and pagination parameters
- sort: Optional[Union[Literal["last_modified"], str]] = None,
- direction: Optional[Literal[-1]] = None,
- limit: Optional[int] = None,
- # Additional data to fetch
- expand: Optional[List[ExpandDatasetProperty_T]] = None,
- full: Optional[bool] = None,
- token: Union[bool, str, None] = None,
- # Deprecated arguments - use `filter` instead
- tags: Optional[Union[str, List[str]]] = None,
- ) -> Iterable[DatasetInfo]:
- """
- List datasets hosted on the Huggingface Hub, given some filters.
- Args:
- filter (`str` or `Iterable[str]`, *optional*):
- A string or list of string to filter datasets on the hub.
- author (`str`, *optional*):
- A string which identify the author of the returned datasets.
- benchmark (`str` or `List`, *optional*):
- A string or list of strings that can be used to identify datasets on
- the Hub by their official benchmark.
- dataset_name (`str`, *optional*):
- A string or list of strings that can be used to identify datasets on
- the Hub by its name, such as `SQAC` or `wikineural`
- gated (`bool`, *optional*):
- A boolean to filter datasets on the Hub that are gated or not. By default, all datasets are returned.
- If `gated=True` is passed, only gated datasets are returned.
- If `gated=False` is passed, only non-gated datasets are returned.
- language_creators (`str` or `List`, *optional*):
- A string or list of strings that can be used to identify datasets on
- the Hub with how the data was curated, such as `crowdsourced` or
- `machine_generated`.
- language (`str` or `List`, *optional*):
- A string or list of strings representing a two-character language to
- filter datasets by on the Hub.
- multilinguality (`str` or `List`, *optional*):
- A string or list of strings representing a filter for datasets that
- contain multiple languages.
- size_categories (`str` or `List`, *optional*):
- A string or list of strings that can be used to identify datasets on
- the Hub by the size of the dataset such as `100K<n<1M` or
- `1M<n<10M`.
- tags (`str` or `List`, *optional*):
- Deprecated. Pass tags in `filter` to filter datasets by tags.
- task_categories (`str` or `List`, *optional*):
- A string or list of strings that can be used to identify datasets on
- the Hub by the designed task, such as `audio_classification` or
- `named_entity_recognition`.
- task_ids (`str` or `List`, *optional*):
- A string or list of strings that can be used to identify datasets on
- the Hub by the specific task such as `speech_emotion_recognition` or
- `paraphrase`.
- search (`str`, *optional*):
- A string that will be contained in the returned datasets.
- sort (`Literal["last_modified"]` or `str`, *optional*):
- The key with which to sort the resulting models. Possible values are "last_modified", "trending_score",
- "created_at", "downloads" and "likes".
- direction (`Literal[-1]` or `int`, *optional*):
- Direction in which to sort. The value `-1` sorts by descending
- order while all other values sort by ascending order.
- limit (`int`, *optional*):
- The limit on the number of datasets fetched. Leaving this option
- to `None` fetches all datasets.
- expand (`List[ExpandDatasetProperty_T]`, *optional*):
- List properties to return in the response. When used, only the properties in the list will be returned.
- This parameter cannot be used if `full` is passed.
- Possible values are `"author"`, `"cardData"`, `"citation"`, `"createdAt"`, `"disabled"`, `"description"`, `"downloads"`, `"downloadsAllTime"`, `"gated"`, `"lastModified"`, `"likes"`, `"paperswithcode_id"`, `"private"`, `"siblings"`, `"sha"`, `"tags"`, `"trendingScore"`, `"usedStorage"`, `"resourceGroup"` and `"xetEnabled"`.
- full (`bool`, *optional*):
- Whether to fetch all dataset data, including the `last_modified`,
- the `card_data` and the files. Can contain useful information such as the
- PapersWithCode ID.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `Iterable[DatasetInfo]`: an iterable of [`huggingface_hub.hf_api.DatasetInfo`] objects.
- Example usage with the `filter` argument:
- ```python
- >>> from huggingface_hub import HfApi
- >>> api = HfApi()
- # List all datasets
- >>> api.list_datasets()
- # List only the text classification datasets
- >>> api.list_datasets(filter="task_categories:text-classification")
- # List only the datasets in russian for language modeling
- >>> api.list_datasets(
- ... filter=("language:ru", "task_ids:language-modeling")
- ... )
- # List FiftyOne datasets (identified by the tag "fiftyone" in dataset card)
- >>> api.list_datasets(tags="fiftyone")
- ```
- Example usage with the `search` argument:
- ```python
- >>> from huggingface_hub import HfApi
- >>> api = HfApi()
- # List all datasets with "text" in their name
- >>> api.list_datasets(search="text")
- # List all datasets with "text" in their name made by google
- >>> api.list_datasets(search="text", author="google")
- ```
- """
- if expand and full:
- raise ValueError("`expand` cannot be used if `full` is passed.")
- path = f"{self.endpoint}/api/datasets"
- headers = self._build_hf_headers(token=token)
- params: Dict[str, Any] = {}
- # Build `filter` list
- filter_list = []
- if filter is not None:
- if isinstance(filter, str):
- filter_list.append(filter)
- else:
- filter_list.extend(filter)
- for key, value in (
- ("benchmark", benchmark),
- ("language_creators", language_creators),
- ("language", language),
- ("multilinguality", multilinguality),
- ("size_categories", size_categories),
- ("task_categories", task_categories),
- ("task_ids", task_ids),
- ):
- if value:
- if isinstance(value, str):
- value = [value]
- for value_item in value:
- if not value_item.startswith(f"{key}:"):
- data = f"{key}:{value_item}"
- filter_list.append(data)
- if tags is not None:
- filter_list.extend([tags] if isinstance(tags, str) else tags)
- if len(filter_list) > 0:
- params["filter"] = filter_list
- # Handle other query params
- if author:
- params["author"] = author
- if gated is not None:
- params["gated"] = gated
- search_list = []
- if dataset_name:
- search_list.append(dataset_name)
- if search:
- search_list.append(search)
- if len(search_list) > 0:
- params["search"] = search_list
- if sort is not None:
- params["sort"] = (
- "lastModified"
- if sort == "last_modified"
- else "trendingScore"
- if sort == "trending_score"
- else "createdAt"
- if sort == "created_at"
- else sort
- )
- if direction is not None:
- params["direction"] = direction
- if limit is not None:
- params["limit"] = limit
- # Request additional data
- if expand:
- params["expand"] = expand
- if full:
- params["full"] = True
- items = paginate(path, params=params, headers=headers)
- if limit is not None:
- items = islice(items, limit) # Do not iterate over all pages
- for item in items:
- if "siblings" not in item:
- item["siblings"] = None
- yield DatasetInfo(**item)
- @validate_hf_hub_args
- def list_spaces(
- self,
- *,
- # Search-query parameter
- filter: Union[str, Iterable[str], None] = None,
- author: Optional[str] = None,
- search: Optional[str] = None,
- datasets: Union[str, Iterable[str], None] = None,
- models: Union[str, Iterable[str], None] = None,
- linked: bool = False,
- # Sorting and pagination parameters
- sort: Union[Literal["last_modified"], str, None] = None,
- direction: Optional[Literal[-1]] = None,
- limit: Optional[int] = None,
- # Additional data to fetch
- expand: Optional[List[ExpandSpaceProperty_T]] = None,
- full: Optional[bool] = None,
- token: Union[bool, str, None] = None,
- ) -> Iterable[SpaceInfo]:
- """
- List spaces hosted on the Huggingface Hub, given some filters.
- Args:
- filter (`str` or `Iterable`, *optional*):
- A string tag or list of tags that can be used to identify Spaces on the Hub.
- author (`str`, *optional*):
- A string which identify the author of the returned Spaces.
- search (`str`, *optional*):
- A string that will be contained in the returned Spaces.
- datasets (`str` or `Iterable`, *optional*):
- Whether to return Spaces that make use of a dataset.
- The name of a specific dataset can be passed as a string.
- models (`str` or `Iterable`, *optional*):
- Whether to return Spaces that make use of a model.
- The name of a specific model can be passed as a string.
- linked (`bool`, *optional*):
- Whether to return Spaces that make use of either a model or a dataset.
- sort (`Literal["last_modified"]` or `str`, *optional*):
- The key with which to sort the resulting models. Possible values are "last_modified", "trending_score",
- "created_at" and "likes".
- direction (`Literal[-1]` or `int`, *optional*):
- Direction in which to sort. The value `-1` sorts by descending
- order while all other values sort by ascending order.
- limit (`int`, *optional*):
- The limit on the number of Spaces fetched. Leaving this option
- to `None` fetches all Spaces.
- expand (`List[ExpandSpaceProperty_T]`, *optional*):
- List properties to return in the response. When used, only the properties in the list will be returned.
- This parameter cannot be used if `full` is passed.
- Possible values are `"author"`, `"cardData"`, `"datasets"`, `"disabled"`, `"lastModified"`, `"createdAt"`, `"likes"`, `"models"`, `"private"`, `"runtime"`, `"sdk"`, `"siblings"`, `"sha"`, `"subdomain"`, `"tags"`, `"trendingScore"`, `"usedStorage"`, `"resourceGroup"` and `"xetEnabled"`.
- full (`bool`, *optional*):
- Whether to fetch all Spaces data, including the `last_modified`, `siblings`
- and `card_data` fields.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `Iterable[SpaceInfo]`: an iterable of [`huggingface_hub.hf_api.SpaceInfo`] objects.
- """
- if expand and full:
- raise ValueError("`expand` cannot be used if `full` is passed.")
- path = f"{self.endpoint}/api/spaces"
- headers = self._build_hf_headers(token=token)
- params: Dict[str, Any] = {}
- if filter is not None:
- params["filter"] = filter
- if author is not None:
- params["author"] = author
- if search is not None:
- params["search"] = search
- if sort is not None:
- params["sort"] = (
- "lastModified"
- if sort == "last_modified"
- else "trendingScore"
- if sort == "trending_score"
- else "createdAt"
- if sort == "created_at"
- else sort
- )
- if direction is not None:
- params["direction"] = direction
- if limit is not None:
- params["limit"] = limit
- if linked:
- params["linked"] = True
- if datasets is not None:
- params["datasets"] = datasets
- if models is not None:
- params["models"] = models
- # Request additional data
- if expand:
- params["expand"] = expand
- if full:
- params["full"] = True
- items = paginate(path, params=params, headers=headers)
- if limit is not None:
- items = islice(items, limit) # Do not iterate over all pages
- for item in items:
- if "siblings" not in item:
- item["siblings"] = None
- yield SpaceInfo(**item)
- @validate_hf_hub_args
- def unlike(
- self,
- repo_id: str,
- *,
- token: Union[bool, str, None] = None,
- repo_type: Optional[str] = None,
- ) -> None:
- """
- Unlike a given repo on the Hub (e.g. remove from favorite list).
- To prevent spam usage, it is not possible to `like` a repository from a script.
- See also [`list_liked_repos`].
- Args:
- repo_id (`str`):
- The repository to unlike. Example: `"user/my-cool-model"`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if unliking a dataset or space, `None` or
- `"model"` if unliking a model. Default is `None`.
- Raises:
- [`~utils.RepositoryNotFoundError`]:
- If repository is not found (error 404): wrong repo_id/repo_type, private
- but not authenticated or repo does not exist.
- Example:
- ```python
- >>> from huggingface_hub import list_liked_repos, unlike
- >>> "gpt2" in list_liked_repos().models # we assume you have already liked gpt2
- True
- >>> unlike("gpt2")
- >>> "gpt2" in list_liked_repos().models
- False
- ```
- """
- if repo_type is None:
- repo_type = constants.REPO_TYPE_MODEL
- response = get_session().delete(
- url=f"{self.endpoint}/api/{repo_type}s/{repo_id}/like", headers=self._build_hf_headers(token=token)
- )
- hf_raise_for_status(response)
- @validate_hf_hub_args
- def list_liked_repos(
- self,
- user: Optional[str] = None,
- *,
- token: Union[bool, str, None] = None,
- ) -> UserLikes:
- """
- List all public repos liked by a user on huggingface.co.
- This list is public so token is optional. If `user` is not passed, it defaults to
- the logged in user.
- See also [`unlike`].
- Args:
- user (`str`, *optional*):
- Name of the user for which you want to fetch the likes.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`UserLikes`]: object containing the user name and 3 lists of repo ids (1 for
- models, 1 for datasets and 1 for Spaces).
- Raises:
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- If `user` is not passed and no token found (either from argument or from machine).
- Example:
- ```python
- >>> from huggingface_hub import list_liked_repos
- >>> likes = list_liked_repos("julien-c")
- >>> likes.user
- "julien-c"
- >>> likes.models
- ["osanseviero/streamlit_1.15", "Xhaheen/ChatGPT_HF", ...]
- ```
- """
- # User is either provided explicitly or retrieved from current token.
- if user is None:
- me = self.whoami(token=token)
- if me["type"] == "user":
- user = me["name"]
- else:
- raise ValueError(
- "Cannot list liked repos. You must provide a 'user' as input or be logged in as a user."
- )
- path = f"{self.endpoint}/api/users/{user}/likes"
- headers = self._build_hf_headers(token=token)
- likes = list(paginate(path, params={}, headers=headers))
- # Looping over a list of items similar to:
- # {
- # 'createdAt': '2021-09-09T21:53:27.000Z',
- # 'repo': {
- # 'name': 'PaddlePaddle/PaddleOCR',
- # 'type': 'space'
- # }
- # }
- # Let's loop 3 times over the received list. Less efficient but more straightforward to read.
- return UserLikes(
- user=user,
- total=len(likes),
- models=[like["repo"]["name"] for like in likes if like["repo"]["type"] == "model"],
- datasets=[like["repo"]["name"] for like in likes if like["repo"]["type"] == "dataset"],
- spaces=[like["repo"]["name"] for like in likes if like["repo"]["type"] == "space"],
- )
- @validate_hf_hub_args
- def list_repo_likers(
- self,
- repo_id: str,
- *,
- repo_type: Optional[str] = None,
- token: Union[bool, str, None] = None,
- ) -> Iterable[User]:
- """
- List all users who liked a given repo on the hugging Face Hub.
- See also [`list_liked_repos`].
- Args:
- repo_id (`str`):
- The repository to retrieve . Example: `"user/my-cool-model"`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if uploading to a dataset or
- space, `None` or `"model"` if uploading to a model. Default is
- `None`.
- Returns:
- `Iterable[User]`: an iterable of [`huggingface_hub.hf_api.User`] objects.
- """
- # Construct the API endpoint
- if repo_type is None:
- repo_type = constants.REPO_TYPE_MODEL
- path = f"{self.endpoint}/api/{repo_type}s/{repo_id}/likers"
- for liker in paginate(path, params={}, headers=self._build_hf_headers(token=token)):
- yield User(username=liker["user"], fullname=liker["fullname"], avatar_url=liker["avatarUrl"])
- @validate_hf_hub_args
- def model_info(
- self,
- repo_id: str,
- *,
- revision: Optional[str] = None,
- timeout: Optional[float] = None,
- securityStatus: Optional[bool] = None,
- files_metadata: bool = False,
- expand: Optional[List[ExpandModelProperty_T]] = None,
- token: Union[bool, str, None] = None,
- ) -> ModelInfo:
- """
- Get info on one specific model on huggingface.co
- Model can be private if you pass an acceptable token or are logged in.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`.
- revision (`str`, *optional*):
- The revision of the model repository from which to get the
- information.
- timeout (`float`, *optional*):
- Whether to set a timeout for the request to the Hub.
- securityStatus (`bool`, *optional*):
- Whether to retrieve the security status from the model
- repository as well. The security status will be returned in the `security_repo_status` field.
- files_metadata (`bool`, *optional*):
- Whether or not to retrieve metadata for files in the repository
- (size, LFS metadata, etc). Defaults to `False`.
- expand (`List[ExpandModelProperty_T]`, *optional*):
- List properties to return in the response. When used, only the properties in the list will be returned.
- This parameter cannot be used if `securityStatus` or `files_metadata` are passed.
- Possible values are `"author"`, `"baseModels"`, `"cardData"`, `"childrenModelCount"`, `"config"`, `"createdAt"`, `"disabled"`, `"downloads"`, `"downloadsAllTime"`, `"gated"`, `"gguf"`, `"inference"`, `"inferenceProviderMapping"`, `"lastModified"`, `"library_name"`, `"likes"`, `"mask_token"`, `"model-index"`, `"pipeline_tag"`, `"private"`, `"safetensors"`, `"sha"`, `"siblings"`, `"spaces"`, `"tags"`, `"transformersInfo"`, `"trendingScore"`, `"widgetData"`, `"usedStorage"`, `"resourceGroup"` and `"xetEnabled"`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`huggingface_hub.hf_api.ModelInfo`]: The model repository information.
- > [!TIP]
- > Raises the following errors:
- >
- > - [`~utils.RepositoryNotFoundError`]
- > If the repository to download from cannot be found. This may be because it doesn't exist,
- > or because it is set to `private` and you do not have access.
- > - [`~utils.RevisionNotFoundError`]
- > If the revision to download from cannot be found.
- """
- if expand and (securityStatus or files_metadata):
- raise ValueError("`expand` cannot be used if `securityStatus` or `files_metadata` are set.")
- headers = self._build_hf_headers(token=token)
- path = (
- f"{self.endpoint}/api/models/{repo_id}"
- if revision is None
- else (f"{self.endpoint}/api/models/{repo_id}/revision/{quote(revision, safe='')}")
- )
- params: Dict = {}
- if securityStatus:
- params["securityStatus"] = True
- if files_metadata:
- params["blobs"] = True
- if expand:
- params["expand"] = expand
- r = get_session().get(path, headers=headers, timeout=timeout, params=params)
- hf_raise_for_status(r)
- data = r.json()
- return ModelInfo(**data)
- @validate_hf_hub_args
- def dataset_info(
- self,
- repo_id: str,
- *,
- revision: Optional[str] = None,
- timeout: Optional[float] = None,
- files_metadata: bool = False,
- expand: Optional[List[ExpandDatasetProperty_T]] = None,
- token: Union[bool, str, None] = None,
- ) -> DatasetInfo:
- """
- Get info on one specific dataset on huggingface.co.
- Dataset can be private if you pass an acceptable token.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`.
- revision (`str`, *optional*):
- The revision of the dataset repository from which to get the
- information.
- timeout (`float`, *optional*):
- Whether to set a timeout for the request to the Hub.
- files_metadata (`bool`, *optional*):
- Whether or not to retrieve metadata for files in the repository
- (size, LFS metadata, etc). Defaults to `False`.
- expand (`List[ExpandDatasetProperty_T]`, *optional*):
- List properties to return in the response. When used, only the properties in the list will be returned.
- This parameter cannot be used if `files_metadata` is passed.
- Possible values are `"author"`, `"cardData"`, `"citation"`, `"createdAt"`, `"disabled"`, `"description"`, `"downloads"`, `"downloadsAllTime"`, `"gated"`, `"lastModified"`, `"likes"`, `"paperswithcode_id"`, `"private"`, `"siblings"`, `"sha"`, `"tags"`, `"trendingScore"`,`"usedStorage"`, `"resourceGroup"` and `"xetEnabled"`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`hf_api.DatasetInfo`]: The dataset repository information.
- > [!TIP]
- > Raises the following errors:
- >
- > - [`~utils.RepositoryNotFoundError`]
- > If the repository to download from cannot be found. This may be because it doesn't exist,
- > or because it is set to `private` and you do not have access.
- > - [`~utils.RevisionNotFoundError`]
- > If the revision to download from cannot be found.
- """
- if expand and files_metadata:
- raise ValueError("`expand` cannot be used if `files_metadata` is set.")
- headers = self._build_hf_headers(token=token)
- path = (
- f"{self.endpoint}/api/datasets/{repo_id}"
- if revision is None
- else (f"{self.endpoint}/api/datasets/{repo_id}/revision/{quote(revision, safe='')}")
- )
- params: Dict = {}
- if files_metadata:
- params["blobs"] = True
- if expand:
- params["expand"] = expand
- r = get_session().get(path, headers=headers, timeout=timeout, params=params)
- hf_raise_for_status(r)
- data = r.json()
- return DatasetInfo(**data)
- @validate_hf_hub_args
- def space_info(
- self,
- repo_id: str,
- *,
- revision: Optional[str] = None,
- timeout: Optional[float] = None,
- files_metadata: bool = False,
- expand: Optional[List[ExpandSpaceProperty_T]] = None,
- token: Union[bool, str, None] = None,
- ) -> SpaceInfo:
- """
- Get info on one specific Space on huggingface.co.
- Space can be private if you pass an acceptable token.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`.
- revision (`str`, *optional*):
- The revision of the space repository from which to get the
- information.
- timeout (`float`, *optional*):
- Whether to set a timeout for the request to the Hub.
- files_metadata (`bool`, *optional*):
- Whether or not to retrieve metadata for files in the repository
- (size, LFS metadata, etc). Defaults to `False`.
- expand (`List[ExpandSpaceProperty_T]`, *optional*):
- List properties to return in the response. When used, only the properties in the list will be returned.
- This parameter cannot be used if `full` is passed.
- Possible values are `"author"`, `"cardData"`, `"createdAt"`, `"datasets"`, `"disabled"`, `"lastModified"`, `"likes"`, `"models"`, `"private"`, `"runtime"`, `"sdk"`, `"siblings"`, `"sha"`, `"subdomain"`, `"tags"`, `"trendingScore"`, `"usedStorage"`, `"resourceGroup"` and `"xetEnabled"`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`~hf_api.SpaceInfo`]: The space repository information.
- > [!TIP]
- > Raises the following errors:
- >
- > - [`~utils.RepositoryNotFoundError`]
- > If the repository to download from cannot be found. This may be because it doesn't exist,
- > or because it is set to `private` and you do not have access.
- > - [`~utils.RevisionNotFoundError`]
- > If the revision to download from cannot be found.
- """
- if expand and files_metadata:
- raise ValueError("`expand` cannot be used if `files_metadata` is set.")
- headers = self._build_hf_headers(token=token)
- path = (
- f"{self.endpoint}/api/spaces/{repo_id}"
- if revision is None
- else (f"{self.endpoint}/api/spaces/{repo_id}/revision/{quote(revision, safe='')}")
- )
- params: Dict = {}
- if files_metadata:
- params["blobs"] = True
- if expand:
- params["expand"] = expand
- r = get_session().get(path, headers=headers, timeout=timeout, params=params)
- hf_raise_for_status(r)
- data = r.json()
- return SpaceInfo(**data)
- @validate_hf_hub_args
- def repo_info(
- self,
- repo_id: str,
- *,
- revision: Optional[str] = None,
- repo_type: Optional[str] = None,
- timeout: Optional[float] = None,
- files_metadata: bool = False,
- expand: Optional[Union[ExpandModelProperty_T, ExpandDatasetProperty_T, ExpandSpaceProperty_T]] = None,
- token: Union[bool, str, None] = None,
- ) -> Union[ModelInfo, DatasetInfo, SpaceInfo]:
- """
- Get the info object for a given repo of a given type.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`.
- revision (`str`, *optional*):
- The revision of the repository from which to get the
- information.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if getting repository info from a dataset or a space,
- `None` or `"model"` if getting repository info from a model. Default is `None`.
- timeout (`float`, *optional*):
- Whether to set a timeout for the request to the Hub.
- expand (`ExpandModelProperty_T` or `ExpandDatasetProperty_T` or `ExpandSpaceProperty_T`, *optional*):
- List properties to return in the response. When used, only the properties in the list will be returned.
- This parameter cannot be used if `files_metadata` is passed.
- For an exhaustive list of available properties, check out [`model_info`], [`dataset_info`] or [`space_info`].
- files_metadata (`bool`, *optional*):
- Whether or not to retrieve metadata for files in the repository
- (size, LFS metadata, etc). Defaults to `False`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `Union[SpaceInfo, DatasetInfo, ModelInfo]`: The repository information, as a
- [`huggingface_hub.hf_api.DatasetInfo`], [`huggingface_hub.hf_api.ModelInfo`]
- or [`huggingface_hub.hf_api.SpaceInfo`] object.
- > [!TIP]
- > Raises the following errors:
- >
- > - [`~utils.RepositoryNotFoundError`]
- > If the repository to download from cannot be found. This may be because it doesn't exist,
- > or because it is set to `private` and you do not have access.
- > - [`~utils.RevisionNotFoundError`]
- > If the revision to download from cannot be found.
- """
- if repo_type is None or repo_type == "model":
- method = self.model_info
- elif repo_type == "dataset":
- method = self.dataset_info # type: ignore
- elif repo_type == "space":
- method = self.space_info # type: ignore
- else:
- raise ValueError("Unsupported repo type.")
- return method(
- repo_id,
- revision=revision,
- token=token,
- timeout=timeout,
- expand=expand, # type: ignore[arg-type]
- files_metadata=files_metadata,
- )
- @validate_hf_hub_args
- def repo_exists(
- self,
- repo_id: str,
- *,
- repo_type: Optional[str] = None,
- token: Union[str, bool, None] = None,
- ) -> bool:
- """
- Checks if a repository exists on the Hugging Face Hub.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if getting repository info from a dataset or a space,
- `None` or `"model"` if getting repository info from a model. Default is `None`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- True if the repository exists, False otherwise.
- Examples:
- ```py
- >>> from huggingface_hub import repo_exists
- >>> repo_exists("google/gemma-7b")
- True
- >>> repo_exists("google/not-a-repo")
- False
- ```
- """
- try:
- self.repo_info(repo_id=repo_id, repo_type=repo_type, token=token)
- return True
- except GatedRepoError:
- return True # we don't have access but it exists
- except RepositoryNotFoundError:
- return False
- @validate_hf_hub_args
- def revision_exists(
- self,
- repo_id: str,
- revision: str,
- *,
- repo_type: Optional[str] = None,
- token: Union[str, bool, None] = None,
- ) -> bool:
- """
- Checks if a specific revision exists on a repo on the Hugging Face Hub.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`.
- revision (`str`):
- The revision of the repository to check.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if getting repository info from a dataset or a space,
- `None` or `"model"` if getting repository info from a model. Default is `None`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- True if the repository and the revision exists, False otherwise.
- Examples:
- ```py
- >>> from huggingface_hub import revision_exists
- >>> revision_exists("google/gemma-7b", "float16")
- True
- >>> revision_exists("google/gemma-7b", "not-a-revision")
- False
- ```
- """
- try:
- self.repo_info(repo_id=repo_id, revision=revision, repo_type=repo_type, token=token)
- return True
- except RevisionNotFoundError:
- return False
- except RepositoryNotFoundError:
- return False
- @validate_hf_hub_args
- def file_exists(
- self,
- repo_id: str,
- filename: str,
- *,
- repo_type: Optional[str] = None,
- revision: Optional[str] = None,
- token: Union[str, bool, None] = None,
- ) -> bool:
- """
- Checks if a file exists in a repository on the Hugging Face Hub.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`.
- filename (`str`):
- The name of the file to check, for example:
- `"config.json"`
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if getting repository info from a dataset or a space,
- `None` or `"model"` if getting repository info from a model. Default is `None`.
- revision (`str`, *optional*):
- The revision of the repository from which to get the information. Defaults to `"main"` branch.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- True if the file exists, False otherwise.
- Examples:
- ```py
- >>> from huggingface_hub import file_exists
- >>> file_exists("bigcode/starcoder", "config.json")
- True
- >>> file_exists("bigcode/starcoder", "not-a-file")
- False
- >>> file_exists("bigcode/not-a-repo", "config.json")
- False
- ```
- """
- url = hf_hub_url(
- repo_id=repo_id, repo_type=repo_type, revision=revision, filename=filename, endpoint=self.endpoint
- )
- try:
- if token is None:
- token = self.token
- get_hf_file_metadata(url, token=token)
- return True
- except GatedRepoError: # raise specifically on gated repo
- raise
- except (RepositoryNotFoundError, EntryNotFoundError, RevisionNotFoundError):
- return False
- @validate_hf_hub_args
- def list_repo_files(
- self,
- repo_id: str,
- *,
- revision: Optional[str] = None,
- repo_type: Optional[str] = None,
- token: Union[str, bool, None] = None,
- ) -> List[str]:
- """
- Get the list of files in a given repo.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated by a `/`.
- revision (`str`, *optional*):
- The revision of the repository from which to get the information.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if uploading to a dataset or space, `None` or `"model"` if uploading to
- a model. Default is `None`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `List[str]`: the list of files in a given repository.
- """
- return [
- f.rfilename
- for f in self.list_repo_tree(
- repo_id=repo_id, recursive=True, revision=revision, repo_type=repo_type, token=token
- )
- if isinstance(f, RepoFile)
- ]
- @validate_hf_hub_args
- def list_repo_tree(
- self,
- repo_id: str,
- path_in_repo: Optional[str] = None,
- *,
- recursive: bool = False,
- expand: bool = False,
- revision: Optional[str] = None,
- repo_type: Optional[str] = None,
- token: Union[str, bool, None] = None,
- ) -> Iterable[Union[RepoFile, RepoFolder]]:
- """
- List a repo tree's files and folders and get information about them.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated by a `/`.
- path_in_repo (`str`, *optional*):
- Relative path of the tree (folder) in the repo, for example:
- `"checkpoints/1fec34a/results"`. Will default to the root tree (folder) of the repository.
- recursive (`bool`, *optional*, defaults to `False`):
- Whether to list tree's files and folders recursively.
- expand (`bool`, *optional*, defaults to `False`):
- Whether to fetch more information about the tree's files and folders (e.g. last commit and files' security scan results). This
- operation is more expensive for the server so only 50 results are returned per page (instead of 1000).
- As pagination is implemented in `huggingface_hub`, this is transparent for you except for the time it
- takes to get the results.
- revision (`str`, *optional*):
- The revision of the repository from which to get the tree. Defaults to `"main"` branch.
- repo_type (`str`, *optional*):
- The type of the repository from which to get the tree (`"model"`, `"dataset"` or `"space"`.
- Defaults to `"model"`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `Iterable[Union[RepoFile, RepoFolder]]`:
- The information about the tree's files and folders, as an iterable of [`RepoFile`] and [`RepoFolder`] objects. The order of the files and folders is
- not guaranteed.
- Raises:
- [`~utils.RepositoryNotFoundError`]:
- If repository is not found (error 404): wrong repo_id/repo_type, private but not authenticated or repo
- does not exist.
- [`~utils.RevisionNotFoundError`]:
- If revision is not found (error 404) on the repo.
- [`~utils.EntryNotFoundError`]:
- If the tree (folder) does not exist (error 404) on the repo.
- Examples:
- Get information about a repo's tree.
- ```py
- >>> from huggingface_hub import list_repo_tree
- >>> repo_tree = list_repo_tree("lysandre/arxiv-nlp")
- >>> repo_tree
- <generator object HfApi.list_repo_tree at 0x7fa4088e1ac0>
- >>> list(repo_tree)
- [
- RepoFile(path='.gitattributes', size=391, blob_id='ae8c63daedbd4206d7d40126955d4e6ab1c80f8f', lfs=None, last_commit=None, security=None),
- RepoFile(path='README.md', size=391, blob_id='43bd404b159de6fba7c2f4d3264347668d43af25', lfs=None, last_commit=None, security=None),
- RepoFile(path='config.json', size=554, blob_id='2f9618c3a19b9a61add74f70bfb121335aeef666', lfs=None, last_commit=None, security=None),
- RepoFile(
- path='flax_model.msgpack', size=497764107, blob_id='8095a62ccb4d806da7666fcda07467e2d150218e',
- lfs={'size': 497764107, 'sha256': 'd88b0d6a6ff9c3f8151f9d3228f57092aaea997f09af009eefd7373a77b5abb9', 'pointer_size': 134}, last_commit=None, security=None
- ),
- RepoFile(path='merges.txt', size=456318, blob_id='226b0752cac7789c48f0cb3ec53eda48b7be36cc', lfs=None, last_commit=None, security=None),
- RepoFile(
- path='pytorch_model.bin', size=548123560, blob_id='64eaa9c526867e404b68f2c5d66fd78e27026523',
- lfs={'size': 548123560, 'sha256': '9be78edb5b928eba33aa88f431551348f7466ba9f5ef3daf1d552398722a5436', 'pointer_size': 134}, last_commit=None, security=None
- ),
- RepoFile(path='vocab.json', size=898669, blob_id='b00361fece0387ca34b4b8b8539ed830d644dbeb', lfs=None, last_commit=None, security=None)]
- ]
- ```
- Get even more information about a repo's tree (last commit and files' security scan results)
- ```py
- >>> from huggingface_hub import list_repo_tree
- >>> repo_tree = list_repo_tree("prompthero/openjourney-v4", expand=True)
- >>> list(repo_tree)
- [
- RepoFolder(
- path='feature_extractor',
- tree_id='aa536c4ea18073388b5b0bc791057a7296a00398',
- last_commit={
- 'oid': '47b62b20b20e06b9de610e840282b7e6c3d51190',
- 'title': 'Upload diffusers weights (#48)',
- 'date': datetime.datetime(2023, 3, 21, 9, 5, 27, tzinfo=datetime.timezone.utc)
- }
- ),
- RepoFolder(
- path='safety_checker',
- tree_id='65aef9d787e5557373fdf714d6c34d4fcdd70440',
- last_commit={
- 'oid': '47b62b20b20e06b9de610e840282b7e6c3d51190',
- 'title': 'Upload diffusers weights (#48)',
- 'date': datetime.datetime(2023, 3, 21, 9, 5, 27, tzinfo=datetime.timezone.utc)
- }
- ),
- RepoFile(
- path='model_index.json',
- size=582,
- blob_id='d3d7c1e8c3e78eeb1640b8e2041ee256e24c9ee1',
- lfs=None,
- last_commit={
- 'oid': 'b195ed2d503f3eb29637050a886d77bd81d35f0e',
- 'title': 'Fix deprecation warning by changing `CLIPFeatureExtractor` to `CLIPImageProcessor`. (#54)',
- 'date': datetime.datetime(2023, 5, 15, 21, 41, 59, tzinfo=datetime.timezone.utc)
- },
- security={
- 'safe': True,
- 'av_scan': {'virusFound': False, 'virusNames': None},
- 'pickle_import_scan': None
- }
- )
- ...
- ]
- ```
- """
- repo_type = repo_type or constants.REPO_TYPE_MODEL
- revision = quote(revision, safe="") if revision is not None else constants.DEFAULT_REVISION
- headers = self._build_hf_headers(token=token)
- encoded_path_in_repo = "/" + quote(path_in_repo, safe="") if path_in_repo else ""
- tree_url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/tree/{revision}{encoded_path_in_repo}"
- for path_info in paginate(path=tree_url, headers=headers, params={"recursive": recursive, "expand": expand}):
- yield (RepoFile(**path_info) if path_info["type"] == "file" else RepoFolder(**path_info))
- @validate_hf_hub_args
- def list_repo_refs(
- self,
- repo_id: str,
- *,
- repo_type: Optional[str] = None,
- include_pull_requests: bool = False,
- token: Union[str, bool, None] = None,
- ) -> GitRefs:
- """
- Get the list of refs of a given repo (both tags and branches).
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if listing refs from a dataset or a Space,
- `None` or `"model"` if listing from a model. Default is `None`.
- include_pull_requests (`bool`, *optional*):
- Whether to include refs from pull requests in the list. Defaults to `False`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Example:
- ```py
- >>> from huggingface_hub import HfApi
- >>> api = HfApi()
- >>> api.list_repo_refs("gpt2")
- GitRefs(branches=[GitRefInfo(name='main', ref='refs/heads/main', target_commit='e7da7f221d5bf496a48136c0cd264e630fe9fcc8')], converts=[], tags=[])
- >>> api.list_repo_refs("bigcode/the-stack", repo_type='dataset')
- GitRefs(
- branches=[
- GitRefInfo(name='main', ref='refs/heads/main', target_commit='18edc1591d9ce72aa82f56c4431b3c969b210ae3'),
- GitRefInfo(name='v1.1.a1', ref='refs/heads/v1.1.a1', target_commit='f9826b862d1567f3822d3d25649b0d6d22ace714')
- ],
- converts=[],
- tags=[
- GitRefInfo(name='v1.0', ref='refs/tags/v1.0', target_commit='c37a8cd1e382064d8aced5e05543c5f7753834da')
- ]
- )
- ```
- Returns:
- [`GitRefs`]: object containing all information about branches and tags for a
- repo on the Hub.
- """
- repo_type = repo_type or constants.REPO_TYPE_MODEL
- response = get_session().get(
- f"{self.endpoint}/api/{repo_type}s/{repo_id}/refs",
- headers=self._build_hf_headers(token=token),
- params={"include_prs": 1} if include_pull_requests else {},
- )
- hf_raise_for_status(response)
- data = response.json()
- def _format_as_git_ref_info(item: Dict) -> GitRefInfo:
- return GitRefInfo(name=item["name"], ref=item["ref"], target_commit=item["targetCommit"])
- return GitRefs(
- branches=[_format_as_git_ref_info(item) for item in data["branches"]],
- converts=[_format_as_git_ref_info(item) for item in data["converts"]],
- tags=[_format_as_git_ref_info(item) for item in data["tags"]],
- pull_requests=[_format_as_git_ref_info(item) for item in data["pullRequests"]]
- if include_pull_requests
- else None,
- )
- @validate_hf_hub_args
- def list_repo_commits(
- self,
- repo_id: str,
- *,
- repo_type: Optional[str] = None,
- token: Union[bool, str, None] = None,
- revision: Optional[str] = None,
- formatted: bool = False,
- ) -> List[GitCommitInfo]:
- """
- Get the list of commits of a given revision for a repo on the Hub.
- Commits are sorted by date (last commit first).
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated by a `/`.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if listing commits from a dataset or a Space, `None` or `"model"` if
- listing from a model. Default is `None`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- revision (`str`, *optional*):
- The git revision to commit from. Defaults to the head of the `"main"` branch.
- formatted (`bool`):
- Whether to return the HTML-formatted title and description of the commits. Defaults to False.
- Example:
- ```py
- >>> from huggingface_hub import HfApi
- >>> api = HfApi()
- # Commits are sorted by date (last commit first)
- >>> initial_commit = api.list_repo_commits("gpt2")[-1]
- # Initial commit is always a system commit containing the `.gitattributes` file.
- >>> initial_commit
- GitCommitInfo(
- commit_id='9b865efde13a30c13e0a33e536cf3e4a5a9d71d8',
- authors=['system'],
- created_at=datetime.datetime(2019, 2, 18, 10, 36, 15, tzinfo=datetime.timezone.utc),
- title='initial commit',
- message='',
- formatted_title=None,
- formatted_message=None
- )
- # Create an empty branch by deriving from initial commit
- >>> api.create_branch("gpt2", "new_empty_branch", revision=initial_commit.commit_id)
- ```
- Returns:
- List[[`GitCommitInfo`]]: list of objects containing information about the commits for a repo on the Hub.
- Raises:
- [`~utils.RepositoryNotFoundError`]:
- If repository is not found (error 404): wrong repo_id/repo_type, private but not authenticated or repo
- does not exist.
- [`~utils.RevisionNotFoundError`]:
- If revision is not found (error 404) on the repo.
- """
- repo_type = repo_type or constants.REPO_TYPE_MODEL
- revision = quote(revision, safe="") if revision is not None else constants.DEFAULT_REVISION
- # Paginate over results and return the list of commits.
- return [
- GitCommitInfo(
- commit_id=item["id"],
- authors=[author["user"] for author in item["authors"]],
- created_at=parse_datetime(item["date"]),
- title=item["title"],
- message=item["message"],
- formatted_title=item.get("formatted", {}).get("title"),
- formatted_message=item.get("formatted", {}).get("message"),
- )
- for item in paginate(
- f"{self.endpoint}/api/{repo_type}s/{repo_id}/commits/{revision}",
- headers=self._build_hf_headers(token=token),
- params={"expand[]": "formatted"} if formatted else {},
- )
- ]
- @validate_hf_hub_args
- def get_paths_info(
- self,
- repo_id: str,
- paths: Union[List[str], str],
- *,
- expand: bool = False,
- revision: Optional[str] = None,
- repo_type: Optional[str] = None,
- token: Union[str, bool, None] = None,
- ) -> List[Union[RepoFile, RepoFolder]]:
- """
- Get information about a repo's paths.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated by a `/`.
- paths (`Union[List[str], str]`, *optional*):
- The paths to get information about. If a path do not exist, it is ignored without raising
- an exception.
- expand (`bool`, *optional*, defaults to `False`):
- Whether to fetch more information about the paths (e.g. last commit and files' security scan results). This
- operation is more expensive for the server so only 50 results are returned per page (instead of 1000).
- As pagination is implemented in `huggingface_hub`, this is transparent for you except for the time it
- takes to get the results.
- revision (`str`, *optional*):
- The revision of the repository from which to get the information. Defaults to `"main"` branch.
- repo_type (`str`, *optional*):
- The type of the repository from which to get the information (`"model"`, `"dataset"` or `"space"`.
- Defaults to `"model"`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `List[Union[RepoFile, RepoFolder]]`:
- The information about the paths, as a list of [`RepoFile`] and [`RepoFolder`] objects.
- Raises:
- [`~utils.RepositoryNotFoundError`]:
- If repository is not found (error 404): wrong repo_id/repo_type, private but not authenticated or repo
- does not exist.
- [`~utils.RevisionNotFoundError`]:
- If revision is not found (error 404) on the repo.
- Example:
- ```py
- >>> from huggingface_hub import get_paths_info
- >>> paths_info = get_paths_info("allenai/c4", ["README.md", "en"], repo_type="dataset")
- >>> paths_info
- [
- RepoFile(path='README.md', size=2379, blob_id='f84cb4c97182890fc1dbdeaf1a6a468fd27b4fff', lfs=None, last_commit=None, security=None),
- RepoFolder(path='en', tree_id='dc943c4c40f53d02b31ced1defa7e5f438d5862e', last_commit=None)
- ]
- ```
- """
- repo_type = repo_type or constants.REPO_TYPE_MODEL
- revision = quote(revision, safe="") if revision is not None else constants.DEFAULT_REVISION
- headers = self._build_hf_headers(token=token)
- response = get_session().post(
- f"{self.endpoint}/api/{repo_type}s/{repo_id}/paths-info/{revision}",
- data={
- "paths": paths if isinstance(paths, list) else [paths],
- "expand": expand,
- },
- headers=headers,
- )
- hf_raise_for_status(response)
- paths_info = response.json()
- return [
- RepoFile(**path_info) if path_info["type"] == "file" else RepoFolder(**path_info)
- for path_info in paths_info
- ]
- @validate_hf_hub_args
- def super_squash_history(
- self,
- repo_id: str,
- *,
- branch: Optional[str] = None,
- commit_message: Optional[str] = None,
- repo_type: Optional[str] = None,
- token: Union[str, bool, None] = None,
- ) -> None:
- """Squash commit history on a branch for a repo on the Hub.
- Squashing the repo history is useful when you know you'll make hundreds of commits and you don't want to
- clutter the history. Squashing commits can only be performed from the head of a branch.
- > [!WARNING]
- > Once squashed, the commit history cannot be retrieved. This is a non-revertible operation.
- > [!WARNING]
- > Once the history of a branch has been squashed, it is not possible to merge it back into another branch since
- > their history will have diverged.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated by a `/`.
- branch (`str`, *optional*):
- The branch to squash. Defaults to the head of the `"main"` branch.
- commit_message (`str`, *optional*):
- The commit message to use for the squashed commit.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if listing commits from a dataset or a Space, `None` or `"model"` if
- listing from a model. Default is `None`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Raises:
- [`~utils.RepositoryNotFoundError`]:
- If repository is not found (error 404): wrong repo_id/repo_type, private but not authenticated or repo
- does not exist.
- [`~utils.RevisionNotFoundError`]:
- If the branch to squash cannot be found.
- [`~utils.BadRequestError`]:
- If invalid reference for a branch. You cannot squash history on tags.
- Example:
- ```py
- >>> from huggingface_hub import HfApi
- >>> api = HfApi()
- # Create repo
- >>> repo_id = api.create_repo("test-squash").repo_id
- # Make a lot of commits.
- >>> api.upload_file(repo_id=repo_id, path_in_repo="file.txt", path_or_fileobj=b"content")
- >>> api.upload_file(repo_id=repo_id, path_in_repo="lfs.bin", path_or_fileobj=b"content")
- >>> api.upload_file(repo_id=repo_id, path_in_repo="file.txt", path_or_fileobj=b"another_content")
- # Squash history
- >>> api.super_squash_history(repo_id=repo_id)
- ```
- """
- if repo_type is None:
- repo_type = constants.REPO_TYPE_MODEL
- if repo_type not in constants.REPO_TYPES:
- raise ValueError("Invalid repo type")
- if branch is None:
- branch = constants.DEFAULT_REVISION
- # Prepare request
- url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/super-squash/{quote(branch, safe='')}"
- headers = self._build_hf_headers(token=token)
- commit_message = commit_message or f"Super-squash branch '{branch}' using huggingface_hub"
- # Super-squash
- response = get_session().post(url=url, headers=headers, json={"message": commit_message})
- hf_raise_for_status(response)
- @validate_hf_hub_args
- def list_lfs_files(
- self,
- repo_id: str,
- *,
- repo_type: Optional[str] = None,
- token: Union[bool, str, None] = None,
- ) -> Iterable[LFSFileInfo]:
- """
- List all LFS files in a repo on the Hub.
- This is primarily useful to count how much storage a repo is using and to eventually clean up large files
- with [`permanently_delete_lfs_files`]. Note that this would be a permanent action that will affect all commits
- referencing this deleted files and that cannot be undone.
- Args:
- repo_id (`str`):
- The repository for which you are listing LFS files.
- repo_type (`str`, *optional*):
- Type of repository. Set to `"dataset"` or `"space"` if listing from a dataset or space, `None` or
- `"model"` if listing from a model. Default is `None`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `Iterable[LFSFileInfo]`: An iterator of [`LFSFileInfo`] objects.
- Example:
- ```py
- >>> from huggingface_hub import HfApi
- >>> api = HfApi()
- >>> lfs_files = api.list_lfs_files("username/my-cool-repo")
- # Filter files files to delete based on a combination of `filename`, `pushed_at`, `ref` or `size`.
- # e.g. select only LFS files in the "checkpoints" folder
- >>> lfs_files_to_delete = (lfs_file for lfs_file in lfs_files if lfs_file.filename.startswith("checkpoints/"))
- # Permanently delete LFS files
- >>> api.permanently_delete_lfs_files("username/my-cool-repo", lfs_files_to_delete)
- ```
- """
- # Prepare request
- if repo_type is None:
- repo_type = constants.REPO_TYPE_MODEL
- url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/lfs-files"
- headers = self._build_hf_headers(token=token)
- # Paginate over LFS items
- for item in paginate(url, params={}, headers=headers):
- yield LFSFileInfo(**item)
- @validate_hf_hub_args
- def permanently_delete_lfs_files(
- self,
- repo_id: str,
- lfs_files: Iterable[LFSFileInfo],
- *,
- rewrite_history: bool = True,
- repo_type: Optional[str] = None,
- token: Union[bool, str, None] = None,
- ) -> None:
- """
- Permanently delete LFS files from a repo on the Hub.
- > [!WARNING]
- > This is a permanent action that will affect all commits referencing the deleted files and might corrupt your
- > repository. This is a non-revertible operation. Use it only if you know what you are doing.
- Args:
- repo_id (`str`):
- The repository for which you are listing LFS files.
- lfs_files (`Iterable[LFSFileInfo]`):
- An iterable of [`LFSFileInfo`] items to permanently delete from the repo. Use [`list_lfs_files`] to list
- all LFS files from a repo.
- rewrite_history (`bool`, *optional*, default to `True`):
- Whether to rewrite repository history to remove file pointers referencing the deleted LFS files (recommended).
- repo_type (`str`, *optional*):
- Type of repository. Set to `"dataset"` or `"space"` if listing from a dataset or space, `None` or
- `"model"` if listing from a model. Default is `None`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Example:
- ```py
- >>> from huggingface_hub import HfApi
- >>> api = HfApi()
- >>> lfs_files = api.list_lfs_files("username/my-cool-repo")
- # Filter files files to delete based on a combination of `filename`, `pushed_at`, `ref` or `size`.
- # e.g. select only LFS files in the "checkpoints" folder
- >>> lfs_files_to_delete = (lfs_file for lfs_file in lfs_files if lfs_file.filename.startswith("checkpoints/"))
- # Permanently delete LFS files
- >>> api.permanently_delete_lfs_files("username/my-cool-repo", lfs_files_to_delete)
- ```
- """
- # Prepare request
- if repo_type is None:
- repo_type = constants.REPO_TYPE_MODEL
- url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/lfs-files/batch"
- headers = self._build_hf_headers(token=token)
- # Delete LFS items by batches of 1000
- for batch in chunk_iterable(lfs_files, 1000):
- shas = [item.file_oid for item in batch]
- if len(shas) == 0:
- return
- payload = {
- "deletions": {
- "sha": shas,
- "rewriteHistory": rewrite_history,
- }
- }
- response = get_session().post(url, headers=headers, json=payload)
- hf_raise_for_status(response)
- @validate_hf_hub_args
- def create_repo(
- self,
- repo_id: str,
- *,
- token: Union[str, bool, None] = None,
- private: Optional[bool] = None,
- repo_type: Optional[str] = None,
- exist_ok: bool = False,
- resource_group_id: Optional[str] = None,
- space_sdk: Optional[str] = None,
- space_hardware: Optional[SpaceHardware] = None,
- space_storage: Optional[SpaceStorage] = None,
- space_sleep_time: Optional[int] = None,
- space_secrets: Optional[List[Dict[str, str]]] = None,
- space_variables: Optional[List[Dict[str, str]]] = None,
- ) -> RepoUrl:
- """Create an empty repo on the HuggingFace Hub.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- private (`bool`, *optional*):
- Whether to make the repo private. If `None` (default), the repo will be public unless the organization's default is private. This value is ignored if the repo already exists.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if uploading to a dataset or
- space, `None` or `"model"` if uploading to a model. Default is
- `None`.
- exist_ok (`bool`, *optional*, defaults to `False`):
- If `True`, do not raise an error if repo already exists.
- resource_group_id (`str`, *optional*):
- Resource group in which to create the repo. Resource groups is only available for Enterprise Hub organizations and
- allow to define which members of the organization can access the resource. The ID of a resource group
- can be found in the URL of the resource's page on the Hub (e.g. `"66670e5163145ca562cb1988"`).
- To learn more about resource groups, see https://huggingface.co/docs/hub/en/security-resource-groups.
- space_sdk (`str`, *optional*):
- Choice of SDK to use if repo_type is "space". Can be "streamlit", "gradio", "docker", or "static".
- space_hardware (`SpaceHardware` or `str`, *optional*):
- Choice of Hardware if repo_type is "space". See [`SpaceHardware`] for a complete list.
- space_storage (`SpaceStorage` or `str`, *optional*):
- Choice of persistent storage tier. Example: `"small"`. See [`SpaceStorage`] for a complete list.
- space_sleep_time (`int`, *optional*):
- Number of seconds of inactivity to wait before a Space is put to sleep. Set to `-1` if you don't want
- your Space to sleep (default behavior for upgraded hardware). For free hardware, you can't configure
- the sleep time (value is fixed to 48 hours of inactivity).
- See https://huggingface.co/docs/hub/spaces-gpus#sleep-time for more details.
- space_secrets (`List[Dict[str, str]]`, *optional*):
- A list of secret keys to set in your Space. Each item is in the form `{"key": ..., "value": ..., "description": ...}` where description is optional.
- For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets.
- space_variables (`List[Dict[str, str]]`, *optional*):
- A list of public environment variables to set in your Space. Each item is in the form `{"key": ..., "value": ..., "description": ...}` where description is optional.
- For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets-and-environment-variables.
- Returns:
- [`RepoUrl`]: URL to the newly created repo. Value is a subclass of `str` containing
- attributes like `endpoint`, `repo_type` and `repo_id`.
- """
- organization, name = repo_id.split("/") if "/" in repo_id else (None, repo_id)
- path = f"{self.endpoint}/api/repos/create"
- if repo_type not in constants.REPO_TYPES:
- raise ValueError("Invalid repo type")
- json: Dict[str, Any] = {"name": name, "organization": organization}
- if private is not None:
- json["private"] = private
- if repo_type is not None:
- json["type"] = repo_type
- if repo_type == "space":
- if space_sdk is None:
- raise ValueError(
- "No space_sdk provided. `create_repo` expects space_sdk to be one"
- f" of {constants.SPACES_SDK_TYPES} when repo_type is 'space'`"
- )
- if space_sdk not in constants.SPACES_SDK_TYPES:
- raise ValueError(f"Invalid space_sdk. Please choose one of {constants.SPACES_SDK_TYPES}.")
- json["sdk"] = space_sdk
- if space_sdk is not None and repo_type != "space":
- warnings.warn("Ignoring provided space_sdk because repo_type is not 'space'.")
- function_args = [
- "space_hardware",
- "space_storage",
- "space_sleep_time",
- "space_secrets",
- "space_variables",
- ]
- json_keys = ["hardware", "storageTier", "sleepTimeSeconds", "secrets", "variables"]
- values = [space_hardware, space_storage, space_sleep_time, space_secrets, space_variables]
- if repo_type == "space":
- json.update({k: v for k, v in zip(json_keys, values) if v is not None})
- else:
- provided_space_args = [key for key, value in zip(function_args, values) if value is not None]
- if provided_space_args:
- warnings.warn(f"Ignoring provided {', '.join(provided_space_args)} because repo_type is not 'space'.")
- if getattr(self, "_lfsmultipartthresh", None):
- # Testing purposes only.
- # See https://github.com/huggingface/huggingface_hub/pull/733/files#r820604472
- json["lfsmultipartthresh"] = self._lfsmultipartthresh # type: ignore
- if resource_group_id is not None:
- json["resourceGroupId"] = resource_group_id
- headers = self._build_hf_headers(token=token)
- while True:
- r = get_session().post(path, headers=headers, json=json)
- if r.status_code == 409 and "Cannot create repo: another conflicting operation is in progress" in r.text:
- # Since https://github.com/huggingface/moon-landing/pull/7272 (private repo), it is not possible to
- # concurrently create repos on the Hub for a same user. This is rarely an issue, except when running
- # tests. To avoid any inconvenience, we retry to create the repo for this specific error.
- # NOTE: This could have being fixed directly in the tests but adding it here should fixed CIs for all
- # dependent libraries.
- # NOTE: If a fix is implemented server-side, we should be able to remove this retry mechanism.
- logger.debug("Create repo failed due to a concurrency issue. Retrying...")
- continue
- break
- try:
- hf_raise_for_status(r)
- except HTTPError as err:
- if exist_ok and err.response.status_code == 409:
- # Repo already exists and `exist_ok=True`
- pass
- elif exist_ok and err.response.status_code == 403:
- # No write permission on the namespace but repo might already exist
- try:
- self.repo_info(repo_id=repo_id, repo_type=repo_type, token=token)
- if repo_type is None or repo_type == constants.REPO_TYPE_MODEL:
- return RepoUrl(f"{self.endpoint}/{repo_id}")
- return RepoUrl(f"{self.endpoint}/{repo_type}/{repo_id}")
- except HfHubHTTPError:
- raise err
- else:
- raise
- d = r.json()
- return RepoUrl(d["url"], endpoint=self.endpoint)
- @validate_hf_hub_args
- def delete_repo(
- self,
- repo_id: str,
- *,
- token: Union[str, bool, None] = None,
- repo_type: Optional[str] = None,
- missing_ok: bool = False,
- ) -> None:
- """
- Delete a repo from the HuggingFace Hub. CAUTION: this is irreversible.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if uploading to a dataset or
- space, `None` or `"model"` if uploading to a model.
- missing_ok (`bool`, *optional*, defaults to `False`):
- If `True`, do not raise an error if repo does not exist.
- Raises:
- [`~utils.RepositoryNotFoundError`]
- If the repository to delete from cannot be found and `missing_ok` is set to False (default).
- """
- organization, name = repo_id.split("/") if "/" in repo_id else (None, repo_id)
- path = f"{self.endpoint}/api/repos/delete"
- if repo_type not in constants.REPO_TYPES:
- raise ValueError("Invalid repo type")
- json = {"name": name, "organization": organization}
- if repo_type is not None:
- json["type"] = repo_type
- headers = self._build_hf_headers(token=token)
- r = get_session().delete(path, headers=headers, json=json)
- try:
- hf_raise_for_status(r)
- except RepositoryNotFoundError:
- if not missing_ok:
- raise
- @_deprecate_method(version="0.32", message="Please use `update_repo_settings` instead.")
- @validate_hf_hub_args
- def update_repo_visibility(
- self,
- repo_id: str,
- private: bool = False,
- *,
- token: Union[str, bool, None] = None,
- repo_type: Optional[str] = None,
- ) -> Dict[str, bool]:
- """Update the visibility setting of a repository.
- Deprecated. Use `update_repo_settings` instead.
- Args:
- repo_id (`str`, *optional*):
- A namespace (user or an organization) and a repo name separated by a `/`.
- private (`bool`, *optional*, defaults to `False`):
- Whether the repository should be private.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if uploading to a dataset or
- space, `None` or `"model"` if uploading to a model. Default is
- `None`.
- Returns:
- The HTTP response in json.
- > [!TIP]
- > Raises the following errors:
- >
- > - [`~utils.RepositoryNotFoundError`]
- > If the repository to download from cannot be found. This may be because it doesn't exist,
- > or because it is set to `private` and you do not have access.
- """
- if repo_type not in constants.REPO_TYPES:
- raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}")
- if repo_type is None:
- repo_type = constants.REPO_TYPE_MODEL # default repo type
- r = get_session().put(
- url=f"{self.endpoint}/api/{repo_type}s/{repo_id}/settings",
- headers=self._build_hf_headers(token=token),
- json={"private": private},
- )
- hf_raise_for_status(r)
- return r.json()
- @validate_hf_hub_args
- def update_repo_settings(
- self,
- repo_id: str,
- *,
- gated: Optional[Literal["auto", "manual", False]] = None,
- private: Optional[bool] = None,
- token: Union[str, bool, None] = None,
- repo_type: Optional[str] = None,
- xet_enabled: Optional[bool] = None,
- ) -> None:
- """
- Update the settings of a repository, including gated access and visibility.
- To give more control over how repos are used, the Hub allows repo authors to enable
- access requests for their repos, and also to set the visibility of the repo to private.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated by a /.
- gated (`Literal["auto", "manual", False]`, *optional*):
- The gated status for the repository. If set to `None` (default), the `gated` setting of the repository won't be updated.
- * "auto": The repository is gated, and access requests are automatically approved or denied based on predefined criteria.
- * "manual": The repository is gated, and access requests require manual approval.
- * False : The repository is not gated, and anyone can access it.
- private (`bool`, *optional*):
- Whether the repository should be private.
- token (`Union[str, bool, None]`, *optional*):
- A valid user access token (string). Defaults to the locally saved token,
- which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass False.
- repo_type (`str`, *optional*):
- The type of the repository to update settings from (`"model"`, `"dataset"` or `"space"`).
- Defaults to `"model"`.
- xet_enabled (`bool`, *optional*):
- Whether the repository should be enabled for Xet Storage.
- Raises:
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- If gated is not one of "auto", "manual", or False.
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- If repo_type is not one of the values in constants.REPO_TYPES.
- [`~utils.HfHubHTTPError`]:
- If the request to the Hugging Face Hub API fails.
- [`~utils.RepositoryNotFoundError`]
- If the repository to download from cannot be found. This may be because it doesn't exist,
- or because it is set to `private` and you do not have access.
- """
- if repo_type not in constants.REPO_TYPES:
- raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}")
- if repo_type is None:
- repo_type = constants.REPO_TYPE_MODEL # default repo type
- # Prepare the JSON payload for the PUT request
- payload: Dict = {}
- if gated is not None:
- if gated not in ["auto", "manual", False]:
- raise ValueError(f"Invalid gated status, must be one of 'auto', 'manual', or False. Got '{gated}'.")
- payload["gated"] = gated
- if private is not None:
- payload["private"] = private
- if xet_enabled is not None:
- payload["xetEnabled"] = xet_enabled
- if len(payload) == 0:
- raise ValueError("At least one setting must be updated.")
- # Build headers
- headers = self._build_hf_headers(token=token)
- r = get_session().put(
- url=f"{self.endpoint}/api/{repo_type}s/{repo_id}/settings",
- headers=headers,
- json=payload,
- )
- hf_raise_for_status(r)
- def move_repo(
- self,
- from_id: str,
- to_id: str,
- *,
- repo_type: Optional[str] = None,
- token: Union[str, bool, None] = None,
- ):
- """
- Moving a repository from namespace1/repo_name1 to namespace2/repo_name2
- Note there are certain limitations. For more information about moving
- repositories, please see
- https://hf.co/docs/hub/repositories-settings#renaming-or-transferring-a-repo.
- Args:
- from_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`. Original repository identifier.
- to_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`. Final repository identifier.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if uploading to a dataset or
- space, `None` or `"model"` if uploading to a model. Default is
- `None`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- > [!TIP]
- > Raises the following errors:
- >
- > - [`~utils.RepositoryNotFoundError`]
- > If the repository to download from cannot be found. This may be because it doesn't exist,
- > or because it is set to `private` and you do not have access.
- """
- if len(from_id.split("/")) != 2:
- raise ValueError(f"Invalid repo_id: {from_id}. It should have a namespace (:namespace:/:repo_name:)")
- if len(to_id.split("/")) != 2:
- raise ValueError(f"Invalid repo_id: {to_id}. It should have a namespace (:namespace:/:repo_name:)")
- if repo_type is None:
- repo_type = constants.REPO_TYPE_MODEL # Hub won't accept `None`.
- json = {"fromRepo": from_id, "toRepo": to_id, "type": repo_type}
- path = f"{self.endpoint}/api/repos/move"
- headers = self._build_hf_headers(token=token)
- r = get_session().post(path, headers=headers, json=json)
- try:
- hf_raise_for_status(r)
- except HfHubHTTPError as e:
- e.append_to_message(
- "\nFor additional documentation please see"
- " https://hf.co/docs/hub/repositories-settings#renaming-or-transferring-a-repo."
- )
- raise
- @overload
- def create_commit( # type: ignore
- self,
- repo_id: str,
- operations: Iterable[CommitOperation],
- *,
- commit_message: str,
- commit_description: Optional[str] = None,
- token: Union[str, bool, None] = None,
- repo_type: Optional[str] = None,
- revision: Optional[str] = None,
- create_pr: Optional[bool] = None,
- num_threads: int = 5,
- parent_commit: Optional[str] = None,
- run_as_future: Literal[False] = ...,
- ) -> CommitInfo: ...
- @overload
- def create_commit(
- self,
- repo_id: str,
- operations: Iterable[CommitOperation],
- *,
- commit_message: str,
- commit_description: Optional[str] = None,
- token: Union[str, bool, None] = None,
- repo_type: Optional[str] = None,
- revision: Optional[str] = None,
- create_pr: Optional[bool] = None,
- num_threads: int = 5,
- parent_commit: Optional[str] = None,
- run_as_future: Literal[True] = ...,
- ) -> Future[CommitInfo]: ...
- @validate_hf_hub_args
- @future_compatible
- def create_commit(
- self,
- repo_id: str,
- operations: Iterable[CommitOperation],
- *,
- commit_message: str,
- commit_description: Optional[str] = None,
- token: Union[str, bool, None] = None,
- repo_type: Optional[str] = None,
- revision: Optional[str] = None,
- create_pr: Optional[bool] = None,
- num_threads: int = 5,
- parent_commit: Optional[str] = None,
- run_as_future: bool = False,
- ) -> Union[CommitInfo, Future[CommitInfo]]:
- """
- Creates a commit in the given repo, deleting & uploading files as needed.
- > [!WARNING]
- > The input list of `CommitOperation` will be mutated during the commit process. Do not reuse the same objects
- > for multiple commits.
- > [!WARNING]
- > `create_commit` assumes that the repo already exists on the Hub. If you get a
- > Client error 404, please make sure you are authenticated and that `repo_id` and
- > `repo_type` are set correctly. If repo does not exist, create it first using
- > [`~hf_api.create_repo`].
- > [!WARNING]
- > `create_commit` is limited to 25k LFS files and a 1GB payload for regular files.
- Args:
- repo_id (`str`):
- The repository in which the commit will be created, for example:
- `"username/custom_transformers"`
- operations (`Iterable` of [`~hf_api.CommitOperation`]):
- An iterable of operations to include in the commit, either:
- - [`~hf_api.CommitOperationAdd`] to upload a file
- - [`~hf_api.CommitOperationDelete`] to delete a file
- - [`~hf_api.CommitOperationCopy`] to copy a file
- Operation objects will be mutated to include information relative to the upload. Do not reuse the
- same objects for multiple commits.
- commit_message (`str`):
- The summary (first line) of the commit that will be created.
- commit_description (`str`, *optional*):
- The description of the commit that will be created
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if uploading to a dataset or
- space, `None` or `"model"` if uploading to a model. Default is
- `None`.
- revision (`str`, *optional*):
- The git revision to commit from. Defaults to the head of the `"main"` branch.
- create_pr (`boolean`, *optional*):
- Whether or not to create a Pull Request with that commit. Defaults to `False`.
- If `revision` is not set, PR is opened against the `"main"` branch. If
- `revision` is set and is a branch, PR is opened against this branch. If
- `revision` is set and is not a branch name (example: a commit oid), an
- `RevisionNotFoundError` is returned by the server.
- num_threads (`int`, *optional*):
- Number of concurrent threads for uploading files. Defaults to 5.
- Setting it to 2 means at most 2 files will be uploaded concurrently.
- parent_commit (`str`, *optional*):
- The OID / SHA of the parent commit, as a hexadecimal string.
- Shorthands (7 first characters) are also supported. If specified and `create_pr` is `False`,
- the commit will fail if `revision` does not point to `parent_commit`. If specified and `create_pr`
- is `True`, the pull request will be created from `parent_commit`. Specifying `parent_commit`
- ensures the repo has not changed before committing the changes, and can be especially useful
- if the repo is updated / committed to concurrently.
- run_as_future (`bool`, *optional*):
- Whether or not to run this method in the background. Background jobs are run sequentially without
- blocking the main thread. Passing `run_as_future=True` will return a [Future](https://docs.python.org/3/library/concurrent.futures.html#future-objects)
- object. Defaults to `False`.
- Returns:
- [`CommitInfo`] or `Future`:
- Instance of [`CommitInfo`] containing information about the newly created commit (commit hash, commit
- url, pr url, commit message,...). If `run_as_future=True` is passed, returns a Future object which will
- contain the result when executed.
- Raises:
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- If commit message is empty.
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- If parent commit is not a valid commit OID.
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- If a README.md file with an invalid metadata section is committed. In this case, the commit will fail
- early, before trying to upload any file.
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- If `create_pr` is `True` and revision is neither `None` nor `"main"`.
- [`~utils.RepositoryNotFoundError`]:
- If repository is not found (error 404): wrong repo_id/repo_type, private
- but not authenticated or repo does not exist.
- """
- if parent_commit is not None and not constants.REGEX_COMMIT_OID.fullmatch(parent_commit):
- raise ValueError(
- f"`parent_commit` is not a valid commit OID. It must match the following regex: {constants.REGEX_COMMIT_OID}"
- )
- if commit_message is None or len(commit_message) == 0:
- raise ValueError("`commit_message` can't be empty, please pass a value.")
- commit_description = commit_description if commit_description is not None else ""
- repo_type = repo_type if repo_type is not None else constants.REPO_TYPE_MODEL
- if repo_type not in constants.REPO_TYPES:
- raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}")
- unquoted_revision = revision or constants.DEFAULT_REVISION
- revision = quote(unquoted_revision, safe="")
- create_pr = create_pr if create_pr is not None else False
- headers = self._build_hf_headers(token=token)
- operations = list(operations)
- additions = [op for op in operations if isinstance(op, CommitOperationAdd)]
- copies = [op for op in operations if isinstance(op, CommitOperationCopy)]
- nb_additions = len(additions)
- nb_copies = len(copies)
- nb_deletions = len(operations) - nb_additions - nb_copies
- for addition in additions:
- if addition._is_committed:
- raise ValueError(
- f"CommitOperationAdd {addition} has already being committed and cannot be reused. Please create a"
- " new CommitOperationAdd object if you want to create a new commit."
- )
- if repo_type != "dataset":
- for addition in additions:
- if addition.path_in_repo.endswith((".arrow", ".parquet")):
- warnings.warn(
- f"It seems that you are about to commit a data file ({addition.path_in_repo}) to a {repo_type}"
- " repository. You are sure this is intended? If you are trying to upload a dataset, please"
- " set `repo_type='dataset'` or `--repo-type=dataset` in a CLI."
- )
- logger.debug(
- f"About to commit to the hub: {len(additions)} addition(s), {len(copies)} copie(s) and"
- f" {nb_deletions} deletion(s)."
- )
- # If updating a README.md file, make sure the metadata format is valid
- # It's better to fail early than to fail after all the files have been uploaded.
- for addition in additions:
- if addition.path_in_repo == "README.md":
- with addition.as_file() as file:
- content = file.read().decode()
- self._validate_yaml(content, repo_type=repo_type, token=token)
- # Skip other additions after `README.md` has been processed
- break
- # If updating twice the same file or update then delete a file in a single commit
- _warn_on_overwriting_operations(operations)
- self.preupload_lfs_files(
- repo_id=repo_id,
- additions=additions,
- token=token,
- repo_type=repo_type,
- revision=unquoted_revision, # first-class methods take unquoted revision
- create_pr=create_pr,
- num_threads=num_threads,
- free_memory=False, # do not remove `CommitOperationAdd.path_or_fileobj` on LFS files for "normal" users
- )
- files_to_copy = _fetch_files_to_copy(
- copies=copies,
- repo_type=repo_type,
- repo_id=repo_id,
- headers=headers,
- revision=unquoted_revision,
- endpoint=self.endpoint,
- )
- # Remove no-op operations (files that have not changed)
- operations_without_no_op = []
- for operation in operations:
- if (
- isinstance(operation, CommitOperationAdd)
- and operation._remote_oid is not None
- and operation._remote_oid == operation._local_oid
- ):
- # File already exists on the Hub and has not changed: we can skip it.
- logger.debug(f"Skipping upload for '{operation.path_in_repo}' as the file has not changed.")
- continue
- if (
- isinstance(operation, CommitOperationCopy)
- and operation._dest_oid is not None
- and operation._dest_oid == operation._src_oid
- ):
- # Source and destination files are identical - skip
- logger.debug(
- f"Skipping copy for '{operation.src_path_in_repo}' -> '{operation.path_in_repo}' as the content of the source file is the same as the destination file."
- )
- continue
- operations_without_no_op.append(operation)
- if len(operations) != len(operations_without_no_op):
- logger.info(
- f"Removing {len(operations) - len(operations_without_no_op)} file(s) from commit that have not changed."
- )
- # Return early if empty commit
- if len(operations_without_no_op) == 0:
- logger.warning("No files have been modified since last commit. Skipping to prevent empty commit.")
- # Get latest commit info
- try:
- info = self.repo_info(repo_id=repo_id, repo_type=repo_type, revision=unquoted_revision, token=token)
- except RepositoryNotFoundError as e:
- e.append_to_message(_CREATE_COMMIT_NO_REPO_ERROR_MESSAGE)
- raise
- # Return commit info based on latest commit
- url_prefix = self.endpoint
- if repo_type is not None and repo_type != constants.REPO_TYPE_MODEL:
- url_prefix = f"{url_prefix}/{repo_type}s"
- return CommitInfo(
- commit_url=f"{url_prefix}/{repo_id}/commit/{info.sha}",
- commit_message=commit_message,
- commit_description=commit_description,
- oid=info.sha, # type: ignore[arg-type]
- )
- commit_payload = _prepare_commit_payload(
- operations=operations,
- files_to_copy=files_to_copy,
- commit_message=commit_message,
- commit_description=commit_description,
- parent_commit=parent_commit,
- )
- commit_url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/commit/{revision}"
- def _payload_as_ndjson() -> Iterable[bytes]:
- for item in commit_payload:
- yield json.dumps(item).encode()
- yield b"\n"
- headers = {
- # See https://github.com/huggingface/huggingface_hub/issues/1085#issuecomment-1265208073
- "Content-Type": "application/x-ndjson",
- **headers,
- }
- data = b"".join(_payload_as_ndjson())
- params = {"create_pr": "1"} if create_pr else None
- try:
- commit_resp = get_session().post(url=commit_url, headers=headers, data=data, params=params)
- hf_raise_for_status(commit_resp, endpoint_name="commit")
- except RepositoryNotFoundError as e:
- e.append_to_message(_CREATE_COMMIT_NO_REPO_ERROR_MESSAGE)
- raise
- except EntryNotFoundError as e:
- if nb_deletions > 0 and "A file with this name doesn't exist" in str(e):
- e.append_to_message(
- "\nMake sure to differentiate file and folder paths in delete"
- " operations with a trailing '/' or using `is_folder=True/False`."
- )
- raise
- # Mark additions as committed (cannot be reused in another commit)
- for addition in additions:
- addition._is_committed = True
- commit_data = commit_resp.json()
- return CommitInfo(
- commit_url=commit_data["commitUrl"],
- commit_message=commit_message,
- commit_description=commit_description,
- oid=commit_data["commitOid"],
- pr_url=commit_data["pullRequestUrl"] if create_pr else None,
- )
- def preupload_lfs_files(
- self,
- repo_id: str,
- additions: Iterable[CommitOperationAdd],
- *,
- token: Union[str, bool, None] = None,
- repo_type: Optional[str] = None,
- revision: Optional[str] = None,
- create_pr: Optional[bool] = None,
- num_threads: int = 5,
- free_memory: bool = True,
- gitignore_content: Optional[str] = None,
- ):
- """Pre-upload LFS files to S3 in preparation on a future commit.
- This method is useful if you are generating the files to upload on-the-fly and you don't want to store them
- in memory before uploading them all at once.
- > [!WARNING]
- > This is a power-user method. You shouldn't need to call it directly to make a normal commit.
- > Use [`create_commit`] directly instead.
- > [!WARNING]
- > Commit operations will be mutated during the process. In particular, the attached `path_or_fileobj` will be
- > removed after the upload to save memory (and replaced by an empty `bytes` object). Do not reuse the same
- > objects except to pass them to [`create_commit`]. If you don't want to remove the attached content from the
- > commit operation object, pass `free_memory=False`.
- Args:
- repo_id (`str`):
- The repository in which you will commit the files, for example: `"username/custom_transformers"`.
- operations (`Iterable` of [`CommitOperationAdd`]):
- The list of files to upload. Warning: the objects in this list will be mutated to include information
- relative to the upload. Do not reuse the same objects for multiple commits.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- repo_type (`str`, *optional*):
- The type of repository to upload to (e.g. `"model"` -default-, `"dataset"` or `"space"`).
- revision (`str`, *optional*):
- The git revision to commit from. Defaults to the head of the `"main"` branch.
- create_pr (`boolean`, *optional*):
- Whether or not you plan to create a Pull Request with that commit. Defaults to `False`.
- num_threads (`int`, *optional*):
- Number of concurrent threads for uploading files. Defaults to 5.
- Setting it to 2 means at most 2 files will be uploaded concurrently.
- gitignore_content (`str`, *optional*):
- The content of the `.gitignore` file to know which files should be ignored. The order of priority
- is to first check if `gitignore_content` is passed, then check if the `.gitignore` file is present
- in the list of files to commit and finally default to the `.gitignore` file already hosted on the Hub
- (if any).
- Example:
- ```py
- >>> from huggingface_hub import CommitOperationAdd, preupload_lfs_files, create_commit, create_repo
- >>> repo_id = create_repo("test_preupload").repo_id
- # Generate and preupload LFS files one by one
- >>> operations = [] # List of all `CommitOperationAdd` objects that will be generated
- >>> for i in range(5):
- ... content = ... # generate binary content
- ... addition = CommitOperationAdd(path_in_repo=f"shard_{i}_of_5.bin", path_or_fileobj=content)
- ... preupload_lfs_files(repo_id, additions=[addition]) # upload + free memory
- ... operations.append(addition)
- # Create commit
- >>> create_commit(repo_id, operations=operations, commit_message="Commit all shards")
- ```
- """
- repo_type = repo_type if repo_type is not None else constants.REPO_TYPE_MODEL
- if repo_type not in constants.REPO_TYPES:
- raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}")
- revision = quote(revision, safe="") if revision is not None else constants.DEFAULT_REVISION
- create_pr = create_pr if create_pr is not None else False
- headers = self._build_hf_headers(token=token)
- # Check if a `gitignore` file is being committed to the Hub.
- additions = list(additions)
- if gitignore_content is None:
- for addition in additions:
- if addition.path_in_repo == ".gitignore":
- with addition.as_file() as f:
- gitignore_content = f.read().decode()
- break
- # Filter out already uploaded files
- new_additions = [addition for addition in additions if not addition._is_uploaded]
- # Check which new files are LFS
- # For some items, we might have already fetched the upload mode (in case of upload_large_folder)
- additions_no_upload_mode = [addition for addition in new_additions if addition._upload_mode is None]
- if len(additions_no_upload_mode) > 0:
- try:
- _fetch_upload_modes(
- additions=additions_no_upload_mode,
- repo_type=repo_type,
- repo_id=repo_id,
- headers=headers,
- revision=revision,
- endpoint=self.endpoint,
- create_pr=create_pr or False,
- gitignore_content=gitignore_content,
- )
- except RepositoryNotFoundError as e:
- e.append_to_message(_CREATE_COMMIT_NO_REPO_ERROR_MESSAGE)
- raise
- # Filter out regular files
- new_lfs_additions = [addition for addition in new_additions if addition._upload_mode == "lfs"]
- # Filter out files listed in .gitignore
- new_lfs_additions_to_upload = []
- for addition in new_lfs_additions:
- if addition._should_ignore:
- logger.debug(f"Skipping upload for LFS file '{addition.path_in_repo}' (ignored by gitignore file).")
- else:
- new_lfs_additions_to_upload.append(addition)
- if len(new_lfs_additions) != len(new_lfs_additions_to_upload):
- logger.info(
- f"Skipped upload for {len(new_lfs_additions) - len(new_lfs_additions_to_upload)} LFS file(s) "
- "(ignored by gitignore file)."
- )
- # If no LFS files remain to upload, keep previous behavior and log explicitly
- if len(new_lfs_additions_to_upload) == 0:
- logger.debug("No LFS files to upload.")
- return
- # Prepare upload parameters
- upload_kwargs = {
- "additions": new_lfs_additions_to_upload,
- "repo_type": repo_type,
- "repo_id": repo_id,
- "headers": headers,
- "endpoint": self.endpoint,
- # If `create_pr`, we don't want to check user permission on the revision as users with read permission
- # should still be able to create PRs even if they don't have write permission on the target branch of the
- # PR (i.e. `revision`).
- "revision": revision if not create_pr else None,
- }
- _upload_files(**upload_kwargs, num_threads=num_threads, create_pr=create_pr) # type: ignore [arg-type]
- for addition in new_lfs_additions_to_upload:
- addition._is_uploaded = True
- if free_memory:
- addition.path_or_fileobj = b""
- @overload
- def upload_file( # type: ignore
- self,
- *,
- path_or_fileobj: Union[str, Path, bytes, BinaryIO],
- path_in_repo: str,
- repo_id: str,
- token: Union[str, bool, None] = None,
- repo_type: Optional[str] = None,
- revision: Optional[str] = None,
- commit_message: Optional[str] = None,
- commit_description: Optional[str] = None,
- create_pr: Optional[bool] = None,
- parent_commit: Optional[str] = None,
- run_as_future: Literal[False] = ...,
- ) -> CommitInfo: ...
- @overload
- def upload_file(
- self,
- *,
- path_or_fileobj: Union[str, Path, bytes, BinaryIO],
- path_in_repo: str,
- repo_id: str,
- token: Union[str, bool, None] = None,
- repo_type: Optional[str] = None,
- revision: Optional[str] = None,
- commit_message: Optional[str] = None,
- commit_description: Optional[str] = None,
- create_pr: Optional[bool] = None,
- parent_commit: Optional[str] = None,
- run_as_future: Literal[True] = ...,
- ) -> Future[CommitInfo]: ...
- @validate_hf_hub_args
- @future_compatible
- def upload_file(
- self,
- *,
- path_or_fileobj: Union[str, Path, bytes, BinaryIO],
- path_in_repo: str,
- repo_id: str,
- token: Union[str, bool, None] = None,
- repo_type: Optional[str] = None,
- revision: Optional[str] = None,
- commit_message: Optional[str] = None,
- commit_description: Optional[str] = None,
- create_pr: Optional[bool] = None,
- parent_commit: Optional[str] = None,
- run_as_future: bool = False,
- ) -> Union[CommitInfo, Future[CommitInfo]]:
- """
- Upload a local file (up to 50 GB) to the given repo. The upload is done
- through a HTTP post request, and doesn't require git or git-lfs to be
- installed.
- Args:
- path_or_fileobj (`str`, `Path`, `bytes`, or `IO`):
- Path to a file on the local machine or binary data stream /
- fileobj / buffer.
- path_in_repo (`str`):
- Relative filepath in the repo, for example:
- `"checkpoints/1fec34a/weights.bin"`
- repo_id (`str`):
- The repository to which the file will be uploaded, for example:
- `"username/custom_transformers"`
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if uploading to a dataset or
- space, `None` or `"model"` if uploading to a model. Default is
- `None`.
- revision (`str`, *optional*):
- The git revision to commit from. Defaults to the head of the `"main"` branch.
- commit_message (`str`, *optional*):
- The summary / title / first line of the generated commit
- commit_description (`str` *optional*)
- The description of the generated commit
- create_pr (`boolean`, *optional*):
- Whether or not to create a Pull Request with that commit. Defaults to `False`.
- If `revision` is not set, PR is opened against the `"main"` branch. If
- `revision` is set and is a branch, PR is opened against this branch. If
- `revision` is set and is not a branch name (example: a commit oid), an
- `RevisionNotFoundError` is returned by the server.
- parent_commit (`str`, *optional*):
- The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported.
- If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`.
- If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`.
- Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be
- especially useful if the repo is updated / committed to concurrently.
- run_as_future (`bool`, *optional*):
- Whether or not to run this method in the background. Background jobs are run sequentially without
- blocking the main thread. Passing `run_as_future=True` will return a [Future](https://docs.python.org/3/library/concurrent.futures.html#future-objects)
- object. Defaults to `False`.
- Returns:
- [`CommitInfo`] or `Future`:
- Instance of [`CommitInfo`] containing information about the newly created commit (commit hash, commit
- url, pr url, commit message,...). If `run_as_future=True` is passed, returns a Future object which will
- contain the result when executed.
- > [!TIP]
- > Raises the following errors:
- >
- > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
- > if the HuggingFace API returned an error
- > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- > if some parameter value is invalid
- > - [`~utils.RepositoryNotFoundError`]
- > If the repository to download from cannot be found. This may be because it doesn't exist,
- > or because it is set to `private` and you do not have access.
- > - [`~utils.RevisionNotFoundError`]
- > If the revision to download from cannot be found.
- > [!WARNING]
- > `upload_file` assumes that the repo already exists on the Hub. If you get a
- > Client error 404, please make sure you are authenticated and that `repo_id` and
- > `repo_type` are set correctly. If repo does not exist, create it first using
- > [`~hf_api.create_repo`].
- Example:
- ```python
- >>> from huggingface_hub import upload_file
- >>> with open("./local/filepath", "rb") as fobj:
- ... upload_file(
- ... path_or_fileobj=fileobj,
- ... path_in_repo="remote/file/path.h5",
- ... repo_id="username/my-dataset",
- ... repo_type="dataset",
- ... token="my_token",
- ... )
- "https://huggingface.co/datasets/username/my-dataset/blob/main/remote/file/path.h5"
- >>> upload_file(
- ... path_or_fileobj=".\\\\local\\\\file\\\\path",
- ... path_in_repo="remote/file/path.h5",
- ... repo_id="username/my-model",
- ... token="my_token",
- ... )
- "https://huggingface.co/username/my-model/blob/main/remote/file/path.h5"
- >>> upload_file(
- ... path_or_fileobj=".\\\\local\\\\file\\\\path",
- ... path_in_repo="remote/file/path.h5",
- ... repo_id="username/my-model",
- ... token="my_token",
- ... create_pr=True,
- ... )
- "https://huggingface.co/username/my-model/blob/refs%2Fpr%2F1/remote/file/path.h5"
- ```
- """
- if repo_type not in constants.REPO_TYPES:
- raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}")
- commit_message = (
- commit_message if commit_message is not None else f"Upload {path_in_repo} with huggingface_hub"
- )
- operation = CommitOperationAdd(
- path_or_fileobj=path_or_fileobj,
- path_in_repo=path_in_repo,
- )
- commit_info = self.create_commit(
- repo_id=repo_id,
- repo_type=repo_type,
- operations=[operation],
- commit_message=commit_message,
- commit_description=commit_description,
- token=token,
- revision=revision,
- create_pr=create_pr,
- parent_commit=parent_commit,
- )
- if commit_info.pr_url is not None:
- revision = quote(_parse_revision_from_pr_url(commit_info.pr_url), safe="")
- if repo_type in constants.REPO_TYPES_URL_PREFIXES:
- repo_id = constants.REPO_TYPES_URL_PREFIXES[repo_type] + repo_id
- revision = revision if revision is not None else constants.DEFAULT_REVISION
- return CommitInfo(
- commit_url=commit_info.commit_url,
- commit_message=commit_info.commit_message,
- commit_description=commit_info.commit_description,
- oid=commit_info.oid,
- pr_url=commit_info.pr_url,
- # Similar to `hf_hub_url` but it's "blob" instead of "resolve"
- # TODO: remove this in v1.0
- _url=f"{self.endpoint}/{repo_id}/blob/{revision}/{path_in_repo}",
- )
- @overload
- def upload_folder( # type: ignore
- self,
- *,
- repo_id: str,
- folder_path: Union[str, Path],
- path_in_repo: Optional[str] = None,
- commit_message: Optional[str] = None,
- commit_description: Optional[str] = None,
- token: Union[str, bool, None] = None,
- repo_type: Optional[str] = None,
- revision: Optional[str] = None,
- create_pr: Optional[bool] = None,
- parent_commit: Optional[str] = None,
- allow_patterns: Optional[Union[List[str], str]] = None,
- ignore_patterns: Optional[Union[List[str], str]] = None,
- delete_patterns: Optional[Union[List[str], str]] = None,
- run_as_future: Literal[False] = ...,
- ) -> CommitInfo: ...
- @overload
- def upload_folder( # type: ignore
- self,
- *,
- repo_id: str,
- folder_path: Union[str, Path],
- path_in_repo: Optional[str] = None,
- commit_message: Optional[str] = None,
- commit_description: Optional[str] = None,
- token: Union[str, bool, None] = None,
- repo_type: Optional[str] = None,
- revision: Optional[str] = None,
- create_pr: Optional[bool] = None,
- parent_commit: Optional[str] = None,
- allow_patterns: Optional[Union[List[str], str]] = None,
- ignore_patterns: Optional[Union[List[str], str]] = None,
- delete_patterns: Optional[Union[List[str], str]] = None,
- run_as_future: Literal[True] = ...,
- ) -> Future[CommitInfo]: ...
- @validate_hf_hub_args
- @future_compatible
- def upload_folder(
- self,
- *,
- repo_id: str,
- folder_path: Union[str, Path],
- path_in_repo: Optional[str] = None,
- commit_message: Optional[str] = None,
- commit_description: Optional[str] = None,
- token: Union[str, bool, None] = None,
- repo_type: Optional[str] = None,
- revision: Optional[str] = None,
- create_pr: Optional[bool] = None,
- parent_commit: Optional[str] = None,
- allow_patterns: Optional[Union[List[str], str]] = None,
- ignore_patterns: Optional[Union[List[str], str]] = None,
- delete_patterns: Optional[Union[List[str], str]] = None,
- run_as_future: bool = False,
- ) -> Union[CommitInfo, Future[CommitInfo]]:
- """
- Upload a local folder to the given repo. The upload is done through a HTTP requests, and doesn't require git or
- git-lfs to be installed.
- The structure of the folder will be preserved. Files with the same name already present in the repository will
- be overwritten. Others will be left untouched.
- Use the `allow_patterns` and `ignore_patterns` arguments to specify which files to upload. These parameters
- accept either a single pattern or a list of patterns. Patterns are Standard Wildcards (globbing patterns) as
- documented [here](https://tldp.org/LDP/GNU-Linux-Tools-Summary/html/x11655.htm). If both `allow_patterns` and
- `ignore_patterns` are provided, both constraints apply. By default, all files from the folder are uploaded.
- Use the `delete_patterns` argument to specify remote files you want to delete. Input type is the same as for
- `allow_patterns` (see above). If `path_in_repo` is also provided, the patterns are matched against paths
- relative to this folder. For example, `upload_folder(..., path_in_repo="experiment", delete_patterns="logs/*")`
- will delete any remote file under `./experiment/logs/`. Note that the `.gitattributes` file will not be deleted
- even if it matches the patterns.
- Any `.git/` folder present in any subdirectory will be ignored. However, please be aware that the `.gitignore`
- file is not taken into account.
- Uses `HfApi.create_commit` under the hood.
- Args:
- repo_id (`str`):
- The repository to which the file will be uploaded, for example:
- `"username/custom_transformers"`
- folder_path (`str` or `Path`):
- Path to the folder to upload on the local file system
- path_in_repo (`str`, *optional*):
- Relative path of the directory in the repo, for example:
- `"checkpoints/1fec34a/results"`. Will default to the root folder of the repository.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if uploading to a dataset or
- space, `None` or `"model"` if uploading to a model. Default is
- `None`.
- revision (`str`, *optional*):
- The git revision to commit from. Defaults to the head of the `"main"` branch.
- commit_message (`str`, *optional*):
- The summary / title / first line of the generated commit. Defaults to:
- `f"Upload {path_in_repo} with huggingface_hub"`
- commit_description (`str` *optional*):
- The description of the generated commit
- create_pr (`boolean`, *optional*):
- Whether or not to create a Pull Request with that commit. Defaults to `False`. If `revision` is not
- set, PR is opened against the `"main"` branch. If `revision` is set and is a branch, PR is opened
- against this branch. If `revision` is set and is not a branch name (example: a commit oid), an
- `RevisionNotFoundError` is returned by the server.
- parent_commit (`str`, *optional*):
- The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported.
- If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`.
- If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`.
- Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be
- especially useful if the repo is updated / committed to concurrently.
- allow_patterns (`List[str]` or `str`, *optional*):
- If provided, only files matching at least one pattern are uploaded.
- ignore_patterns (`List[str]` or `str`, *optional*):
- If provided, files matching any of the patterns are not uploaded.
- delete_patterns (`List[str]` or `str`, *optional*):
- If provided, remote files matching any of the patterns will be deleted from the repo while committing
- new files. This is useful if you don't know which files have already been uploaded.
- Note: to avoid discrepancies the `.gitattributes` file is not deleted even if it matches the pattern.
- run_as_future (`bool`, *optional*):
- Whether or not to run this method in the background. Background jobs are run sequentially without
- blocking the main thread. Passing `run_as_future=True` will return a [Future](https://docs.python.org/3/library/concurrent.futures.html#future-objects)
- object. Defaults to `False`.
- Returns:
- [`CommitInfo`] or `Future`:
- Instance of [`CommitInfo`] containing information about the newly created commit (commit hash, commit
- url, pr url, commit message,...). If `run_as_future=True` is passed, returns a Future object which will
- contain the result when executed.
- > [!TIP]
- > Raises the following errors:
- >
- > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
- > if the HuggingFace API returned an error
- > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- > if some parameter value is invalid
- > [!WARNING]
- > `upload_folder` assumes that the repo already exists on the Hub. If you get a Client error 404, please make
- > sure you are authenticated and that `repo_id` and `repo_type` are set correctly. If repo does not exist, create
- > it first using [`~hf_api.create_repo`].
- > [!TIP]
- > When dealing with a large folder (thousands of files or hundreds of GB), we recommend using [`~hf_api.upload_large_folder`] instead.
- Example:
- ```python
- # Upload checkpoints folder except the log files
- >>> upload_folder(
- ... folder_path="local/checkpoints",
- ... path_in_repo="remote/experiment/checkpoints",
- ... repo_id="username/my-dataset",
- ... repo_type="datasets",
- ... token="my_token",
- ... ignore_patterns="**/logs/*.txt",
- ... )
- # "https://huggingface.co/datasets/username/my-dataset/tree/main/remote/experiment/checkpoints"
- # Upload checkpoints folder including logs while deleting existing logs from the repo
- # Useful if you don't know exactly which log files have already being pushed
- >>> upload_folder(
- ... folder_path="local/checkpoints",
- ... path_in_repo="remote/experiment/checkpoints",
- ... repo_id="username/my-dataset",
- ... repo_type="datasets",
- ... token="my_token",
- ... delete_patterns="**/logs/*.txt",
- ... )
- "https://huggingface.co/datasets/username/my-dataset/tree/main/remote/experiment/checkpoints"
- # Upload checkpoints folder while creating a PR
- >>> upload_folder(
- ... folder_path="local/checkpoints",
- ... path_in_repo="remote/experiment/checkpoints",
- ... repo_id="username/my-dataset",
- ... repo_type="datasets",
- ... token="my_token",
- ... create_pr=True,
- ... )
- "https://huggingface.co/datasets/username/my-dataset/tree/refs%2Fpr%2F1/remote/experiment/checkpoints"
- ```
- """
- if repo_type not in constants.REPO_TYPES:
- raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}")
- # By default, upload folder to the root directory in repo.
- if path_in_repo is None:
- path_in_repo = ""
- # Do not upload .git folder
- if ignore_patterns is None:
- ignore_patterns = []
- elif isinstance(ignore_patterns, str):
- ignore_patterns = [ignore_patterns]
- ignore_patterns += DEFAULT_IGNORE_PATTERNS
- delete_operations = self._prepare_folder_deletions(
- repo_id=repo_id,
- repo_type=repo_type,
- revision=constants.DEFAULT_REVISION if create_pr else revision,
- token=token,
- path_in_repo=path_in_repo,
- delete_patterns=delete_patterns,
- )
- add_operations = self._prepare_upload_folder_additions(
- folder_path,
- path_in_repo,
- allow_patterns=allow_patterns,
- ignore_patterns=ignore_patterns,
- token=token,
- repo_type=repo_type,
- )
- # Optimize operations: if some files will be overwritten, we don't need to delete them first
- if len(add_operations) > 0:
- added_paths = set(op.path_in_repo for op in add_operations)
- delete_operations = [
- delete_op for delete_op in delete_operations if delete_op.path_in_repo not in added_paths
- ]
- commit_operations = delete_operations + add_operations
- commit_message = commit_message or "Upload folder using huggingface_hub"
- commit_info = self.create_commit(
- repo_type=repo_type,
- repo_id=repo_id,
- operations=commit_operations,
- commit_message=commit_message,
- commit_description=commit_description,
- token=token,
- revision=revision,
- create_pr=create_pr,
- parent_commit=parent_commit,
- )
- # Create url to uploaded folder (for legacy return value)
- if create_pr and commit_info.pr_url is not None:
- revision = quote(_parse_revision_from_pr_url(commit_info.pr_url), safe="")
- if repo_type in constants.REPO_TYPES_URL_PREFIXES:
- repo_id = constants.REPO_TYPES_URL_PREFIXES[repo_type] + repo_id
- revision = revision if revision is not None else constants.DEFAULT_REVISION
- return CommitInfo(
- commit_url=commit_info.commit_url,
- commit_message=commit_info.commit_message,
- commit_description=commit_info.commit_description,
- oid=commit_info.oid,
- pr_url=commit_info.pr_url,
- # Similar to `hf_hub_url` but it's "tree" instead of "resolve"
- # TODO: remove this in v1.0
- _url=f"{self.endpoint}/{repo_id}/tree/{revision}/{path_in_repo}",
- )
- @validate_hf_hub_args
- def delete_file(
- self,
- path_in_repo: str,
- repo_id: str,
- *,
- token: Union[str, bool, None] = None,
- repo_type: Optional[str] = None,
- revision: Optional[str] = None,
- commit_message: Optional[str] = None,
- commit_description: Optional[str] = None,
- create_pr: Optional[bool] = None,
- parent_commit: Optional[str] = None,
- ) -> CommitInfo:
- """
- Deletes a file in the given repo.
- Args:
- path_in_repo (`str`):
- Relative filepath in the repo, for example:
- `"checkpoints/1fec34a/weights.bin"`
- repo_id (`str`):
- The repository from which the file will be deleted, for example:
- `"username/custom_transformers"`
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if the file is in a dataset or
- space, `None` or `"model"` if in a model. Default is `None`.
- revision (`str`, *optional*):
- The git revision to commit from. Defaults to the head of the `"main"` branch.
- commit_message (`str`, *optional*):
- The summary / title / first line of the generated commit. Defaults to
- `f"Delete {path_in_repo} with huggingface_hub"`.
- commit_description (`str` *optional*)
- The description of the generated commit
- create_pr (`boolean`, *optional*):
- Whether or not to create a Pull Request with that commit. Defaults to `False`.
- If `revision` is not set, PR is opened against the `"main"` branch. If
- `revision` is set and is a branch, PR is opened against this branch. If
- `revision` is set and is not a branch name (example: a commit oid), an
- `RevisionNotFoundError` is returned by the server.
- parent_commit (`str`, *optional*):
- The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported.
- If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`.
- If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`.
- Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be
- especially useful if the repo is updated / committed to concurrently.
- > [!TIP]
- > Raises the following errors:
- >
- > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
- > if the HuggingFace API returned an error
- > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- > if some parameter value is invalid
- > - [`~utils.RepositoryNotFoundError`]
- > If the repository to download from cannot be found. This may be because it doesn't exist,
- > or because it is set to `private` and you do not have access.
- > - [`~utils.RevisionNotFoundError`]
- > If the revision to download from cannot be found.
- > - [`~utils.EntryNotFoundError`]
- > If the file to download cannot be found.
- """
- commit_message = (
- commit_message if commit_message is not None else f"Delete {path_in_repo} with huggingface_hub"
- )
- operations = [CommitOperationDelete(path_in_repo=path_in_repo)]
- return self.create_commit(
- repo_id=repo_id,
- repo_type=repo_type,
- token=token,
- operations=operations,
- revision=revision,
- commit_message=commit_message,
- commit_description=commit_description,
- create_pr=create_pr,
- parent_commit=parent_commit,
- )
- @validate_hf_hub_args
- def delete_files(
- self,
- repo_id: str,
- delete_patterns: List[str],
- *,
- token: Union[bool, str, None] = None,
- repo_type: Optional[str] = None,
- revision: Optional[str] = None,
- commit_message: Optional[str] = None,
- commit_description: Optional[str] = None,
- create_pr: Optional[bool] = None,
- parent_commit: Optional[str] = None,
- ) -> CommitInfo:
- """
- Delete files from a repository on the Hub.
- If a folder path is provided, the entire folder is deleted as well as
- all files it contained.
- Args:
- repo_id (`str`):
- The repository from which the folder will be deleted, for example:
- `"username/custom_transformers"`
- delete_patterns (`List[str]`):
- List of files or folders to delete. Each string can either be
- a file path, a folder path or a Unix shell-style wildcard.
- E.g. `["file.txt", "folder/", "data/*.parquet"]`
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- to the stored token.
- repo_type (`str`, *optional*):
- Type of the repo to delete files from. Can be `"model"`,
- `"dataset"` or `"space"`. Defaults to `"model"`.
- revision (`str`, *optional*):
- The git revision to commit from. Defaults to the head of the `"main"` branch.
- commit_message (`str`, *optional*):
- The summary (first line) of the generated commit. Defaults to
- `f"Delete files using huggingface_hub"`.
- commit_description (`str` *optional*)
- The description of the generated commit.
- create_pr (`boolean`, *optional*):
- Whether or not to create a Pull Request with that commit. Defaults to `False`.
- If `revision` is not set, PR is opened against the `"main"` branch. If
- `revision` is set and is a branch, PR is opened against this branch. If
- `revision` is set and is not a branch name (example: a commit oid), an
- `RevisionNotFoundError` is returned by the server.
- parent_commit (`str`, *optional*):
- The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported.
- If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`.
- If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`.
- Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be
- especially useful if the repo is updated / committed to concurrently.
- """
- operations = self._prepare_folder_deletions(
- repo_id=repo_id, repo_type=repo_type, delete_patterns=delete_patterns, path_in_repo="", revision=revision
- )
- if commit_message is None:
- commit_message = f"Delete files {' '.join(delete_patterns)} with huggingface_hub"
- return self.create_commit(
- repo_id=repo_id,
- repo_type=repo_type,
- token=token,
- operations=operations,
- revision=revision,
- commit_message=commit_message,
- commit_description=commit_description,
- create_pr=create_pr,
- parent_commit=parent_commit,
- )
- @validate_hf_hub_args
- def delete_folder(
- self,
- path_in_repo: str,
- repo_id: str,
- *,
- token: Union[bool, str, None] = None,
- repo_type: Optional[str] = None,
- revision: Optional[str] = None,
- commit_message: Optional[str] = None,
- commit_description: Optional[str] = None,
- create_pr: Optional[bool] = None,
- parent_commit: Optional[str] = None,
- ) -> CommitInfo:
- """
- Deletes a folder in the given repo.
- Simple wrapper around [`create_commit`] method.
- Args:
- path_in_repo (`str`):
- Relative folder path in the repo, for example: `"checkpoints/1fec34a"`.
- repo_id (`str`):
- The repository from which the folder will be deleted, for example:
- `"username/custom_transformers"`
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- to the stored token.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if the folder is in a dataset or
- space, `None` or `"model"` if in a model. Default is `None`.
- revision (`str`, *optional*):
- The git revision to commit from. Defaults to the head of the `"main"` branch.
- commit_message (`str`, *optional*):
- The summary / title / first line of the generated commit. Defaults to
- `f"Delete folder {path_in_repo} with huggingface_hub"`.
- commit_description (`str` *optional*)
- The description of the generated commit.
- create_pr (`boolean`, *optional*):
- Whether or not to create a Pull Request with that commit. Defaults to `False`.
- If `revision` is not set, PR is opened against the `"main"` branch. If
- `revision` is set and is a branch, PR is opened against this branch. If
- `revision` is set and is not a branch name (example: a commit oid), an
- `RevisionNotFoundError` is returned by the server.
- parent_commit (`str`, *optional*):
- The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported.
- If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`.
- If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`.
- Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be
- especially useful if the repo is updated / committed to concurrently.
- """
- return self.create_commit(
- repo_id=repo_id,
- repo_type=repo_type,
- token=token,
- operations=[CommitOperationDelete(path_in_repo=path_in_repo, is_folder=True)],
- revision=revision,
- commit_message=(
- commit_message if commit_message is not None else f"Delete folder {path_in_repo} with huggingface_hub"
- ),
- commit_description=commit_description,
- create_pr=create_pr,
- parent_commit=parent_commit,
- )
- def upload_large_folder(
- self,
- repo_id: str,
- folder_path: Union[str, Path],
- *,
- repo_type: str, # Repo type is required!
- revision: Optional[str] = None,
- private: Optional[bool] = None,
- allow_patterns: Optional[Union[List[str], str]] = None,
- ignore_patterns: Optional[Union[List[str], str]] = None,
- num_workers: Optional[int] = None,
- print_report: bool = True,
- print_report_every: int = 60,
- ) -> None:
- """Upload a large folder to the Hub in the most resilient way possible.
- Several workers are started to upload files in an optimized way. Before being committed to a repo, files must be
- hashed and be pre-uploaded if they are LFS files. Workers will perform these tasks for each file in the folder.
- At each step, some metadata information about the upload process is saved in the folder under `.cache/.huggingface/`
- to be able to resume the process if interrupted. The whole process might result in several commits.
- Args:
- repo_id (`str`):
- The repository to which the file will be uploaded.
- E.g. `"HuggingFaceTB/smollm-corpus"`.
- folder_path (`str` or `Path`):
- Path to the folder to upload on the local file system.
- repo_type (`str`):
- Type of the repository. Must be one of `"model"`, `"dataset"` or `"space"`.
- Unlike in all other `HfApi` methods, `repo_type` is explicitly required here. This is to avoid
- any mistake when uploading a large folder to the Hub, and therefore prevent from having to re-upload
- everything.
- revision (`str`, `optional`):
- The branch to commit to. If not provided, the `main` branch will be used.
- private (`bool`, `optional`):
- Whether the repository should be private.
- If `None` (default), the repo will be public unless the organization's default is private.
- allow_patterns (`List[str]` or `str`, *optional*):
- If provided, only files matching at least one pattern are uploaded.
- ignore_patterns (`List[str]` or `str`, *optional*):
- If provided, files matching any of the patterns are not uploaded.
- num_workers (`int`, *optional*):
- Number of workers to start. Defaults to `os.cpu_count() - 2` (minimum 2).
- A higher number of workers may speed up the process if your machine allows it. However, on machines with a
- slower connection, it is recommended to keep the number of workers low to ensure better resumability.
- Indeed, partially uploaded files will have to be completely re-uploaded if the process is interrupted.
- print_report (`bool`, *optional*):
- Whether to print a report of the upload progress. Defaults to True.
- Report is printed to `sys.stdout` every X seconds (60 by defaults) and overwrites the previous report.
- print_report_every (`int`, *optional*):
- Frequency at which the report is printed. Defaults to 60 seconds.
- > [!TIP]
- > A few things to keep in mind:
- > - Repository limits still apply: https://huggingface.co/docs/hub/repositories-recommendations
- > - Do not start several processes in parallel.
- > - You can interrupt and resume the process at any time.
- > - Do not upload the same folder to several repositories. If you need to do so, you must delete the local `.cache/.huggingface/` folder first.
- > [!WARNING]
- > While being much more robust to upload large folders, `upload_large_folder` is more limited than [`upload_folder`] feature-wise. In practice:
- > - you cannot set a custom `path_in_repo`. If you want to upload to a subfolder, you need to set the proper structure locally.
- > - you cannot set a custom `commit_message` and `commit_description` since multiple commits are created.
- > - you cannot delete from the repo while uploading. Please make a separate commit first.
- > - you cannot create a PR directly. Please create a PR first (from the UI or using [`create_pull_request`]) and then commit to it by passing `revision`.
- **Technical details:**
- `upload_large_folder` process is as follow:
- 1. (Check parameters and setup.)
- 2. Create repo if missing.
- 3. List local files to upload.
- 4. Run validation checks and display warnings if repository limits might be exceeded:
- - Warns if the total number of files exceeds 100k (recommended limit).
- - Warns if any folder contains more than 10k files (recommended limit).
- - Warns about files larger than 20GB (recommended) or 50GB (hard limit).
- 5. Start workers. Workers can perform the following tasks:
- - Hash a file.
- - Get upload mode (regular or LFS) for a list of files.
- - Pre-upload an LFS file.
- - Commit a bunch of files.
- Once a worker finishes a task, it will move on to the next task based on the priority list (see below) until
- all files are uploaded and committed.
- 6. While workers are up, regularly print a report to sys.stdout.
- Order of priority:
- 1. Commit if more than 5 minutes since last commit attempt (and at least 1 file).
- 2. Commit if at least 150 files are ready to commit.
- 3. Get upload mode if at least 10 files have been hashed.
- 4. Pre-upload LFS file if at least 1 file and no worker is pre-uploading.
- 5. Hash file if at least 1 file and no worker is hashing.
- 6. Get upload mode if at least 1 file and no worker is getting upload mode.
- 7. Pre-upload LFS file if at least 1 file (exception: if hf_transfer is enabled, only 1 worker can preupload LFS at a time).
- 8. Hash file if at least 1 file to hash.
- 9. Get upload mode if at least 1 file to get upload mode.
- 10. Commit if at least 1 file to commit and at least 1 min since last commit attempt.
- 11. Commit if at least 1 file to commit and all other queues are empty.
- Special rules:
- - If `hf_transfer` is enabled, only 1 LFS uploader at a time. Otherwise the CPU would be bloated by `hf_transfer`.
- - Only one worker can commit at a time.
- - If no tasks are available, the worker waits for 10 seconds before checking again.
- """
- return upload_large_folder_internal(
- self,
- repo_id=repo_id,
- folder_path=folder_path,
- repo_type=repo_type,
- revision=revision,
- private=private,
- allow_patterns=allow_patterns,
- ignore_patterns=ignore_patterns,
- num_workers=num_workers,
- print_report=print_report,
- print_report_every=print_report_every,
- )
- @validate_hf_hub_args
- def get_hf_file_metadata(
- self,
- *,
- url: str,
- token: Union[bool, str, None] = None,
- proxies: Optional[Dict] = None,
- timeout: Optional[float] = constants.DEFAULT_REQUEST_TIMEOUT,
- ) -> HfFileMetadata:
- """Fetch metadata of a file versioned on the Hub for a given url.
- Args:
- url (`str`):
- File url, for example returned by [`hf_hub_url`].
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- proxies (`dict`, *optional*):
- Dictionary mapping protocol to the URL of the proxy passed to `requests.request`.
- timeout (`float`, *optional*, defaults to 10):
- How many seconds to wait for the server to send metadata before giving up.
- Returns:
- A [`HfFileMetadata`] object containing metadata such as location, etag, size and commit_hash.
- """
- if token is None:
- # Cannot do `token = token or self.token` as token can be `False`.
- token = self.token
- return get_hf_file_metadata(
- url=url,
- token=token,
- proxies=proxies,
- timeout=timeout,
- library_name=self.library_name,
- library_version=self.library_version,
- user_agent=self.user_agent,
- endpoint=self.endpoint,
- )
- @validate_hf_hub_args
- def hf_hub_download(
- self,
- repo_id: str,
- filename: str,
- *,
- subfolder: Optional[str] = None,
- repo_type: Optional[str] = None,
- revision: Optional[str] = None,
- cache_dir: Union[str, Path, None] = None,
- local_dir: Union[str, Path, None] = None,
- force_download: bool = False,
- proxies: Optional[Dict] = None,
- etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
- token: Union[bool, str, None] = None,
- local_files_only: bool = False,
- # Deprecated args
- resume_download: Optional[bool] = None,
- force_filename: Optional[str] = None,
- local_dir_use_symlinks: Union[bool, Literal["auto"]] = "auto",
- ) -> str:
- """Download a given file if it's not already present in the local cache.
- The new cache file layout looks like this:
- - The cache directory contains one subfolder per repo_id (namespaced by repo type)
- - inside each repo folder:
- - refs is a list of the latest known revision => commit_hash pairs
- - blobs contains the actual file blobs (identified by their git-sha or sha256, depending on
- whether they're LFS files or not)
- - snapshots contains one subfolder per commit, each "commit" contains the subset of the files
- that have been resolved at that particular commit. Each filename is a symlink to the blob
- at that particular commit.
- ```
- [ 96] .
- └── [ 160] models--julien-c--EsperBERTo-small
- ├── [ 160] blobs
- │ ├── [321M] 403450e234d65943a7dcf7e05a771ce3c92faa84dd07db4ac20f592037a1e4bd
- │ ├── [ 398] 7cb18dc9bafbfcf74629a4b760af1b160957a83e
- │ └── [1.4K] d7edf6bd2a681fb0175f7735299831ee1b22b812
- ├── [ 96] refs
- │ └── [ 40] main
- └── [ 128] snapshots
- ├── [ 128] 2439f60ef33a0d46d85da5001d52aeda5b00ce9f
- │ ├── [ 52] README.md -> ../../blobs/d7edf6bd2a681fb0175f7735299831ee1b22b812
- │ └── [ 76] pytorch_model.bin -> ../../blobs/403450e234d65943a7dcf7e05a771ce3c92faa84dd07db4ac20f592037a1e4bd
- └── [ 128] bbc77c8132af1cc5cf678da3f1ddf2de43606d48
- ├── [ 52] README.md -> ../../blobs/7cb18dc9bafbfcf74629a4b760af1b160957a83e
- └── [ 76] pytorch_model.bin -> ../../blobs/403450e234d65943a7dcf7e05a771ce3c92faa84dd07db4ac20f592037a1e4bd
- ```
- If `local_dir` is provided, the file structure from the repo will be replicated in this location. When using this
- option, the `cache_dir` will not be used and a `.cache/huggingface/` folder will be created at the root of `local_dir`
- to store some metadata related to the downloaded files. While this mechanism is not as robust as the main
- cache-system, it's optimized for regularly pulling the latest version of a repository.
- Args:
- repo_id (`str`):
- A user or an organization name and a repo name separated by a `/`.
- filename (`str`):
- The name of the file in the repo.
- subfolder (`str`, *optional*):
- An optional value corresponding to a folder inside the repository.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if downloading from a dataset or space,
- `None` or `"model"` if downloading from a model. Default is `None`.
- revision (`str`, *optional*):
- An optional Git revision id which can be a branch name, a tag, or a
- commit hash.
- cache_dir (`str`, `Path`, *optional*):
- Path to the folder where cached files are stored.
- local_dir (`str` or `Path`, *optional*):
- If provided, the downloaded file will be placed under this directory.
- force_download (`bool`, *optional*, defaults to `False`):
- Whether the file should be downloaded even if it already exists in
- the local cache.
- proxies (`dict`, *optional*):
- Dictionary mapping protocol to the URL of the proxy passed to
- `requests.request`.
- etag_timeout (`float`, *optional*, defaults to `10`):
- When fetching ETag, how many seconds to wait for the server to send
- data before giving up which is passed to `requests.request`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- local_files_only (`bool`, *optional*, defaults to `False`):
- If `True`, avoid downloading the file and return the path to the
- local cached file if it exists.
- Returns:
- `str`: Local path of file or if networking is off, last version of file cached on disk.
- Raises:
- [`~utils.RepositoryNotFoundError`]
- If the repository to download from cannot be found. This may be because it doesn't exist,
- or because it is set to `private` and you do not have access.
- [`~utils.RevisionNotFoundError`]
- If the revision to download from cannot be found.
- [`~utils.EntryNotFoundError`]
- If the file to download cannot be found.
- [`~utils.LocalEntryNotFoundError`]
- If network is disabled or unavailable and file is not found in cache.
- [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
- If `token=True` but the token cannot be found.
- [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError)
- If ETag cannot be determined.
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- If some parameter value is invalid.
- """
- from .file_download import hf_hub_download
- if token is None:
- # Cannot do `token = token or self.token` as token can be `False`.
- token = self.token
- return hf_hub_download(
- repo_id=repo_id,
- filename=filename,
- subfolder=subfolder,
- repo_type=repo_type,
- revision=revision,
- endpoint=self.endpoint,
- library_name=self.library_name,
- library_version=self.library_version,
- cache_dir=cache_dir,
- local_dir=local_dir,
- local_dir_use_symlinks=local_dir_use_symlinks,
- user_agent=self.user_agent,
- force_download=force_download,
- force_filename=force_filename,
- proxies=proxies,
- etag_timeout=etag_timeout,
- resume_download=resume_download,
- token=token,
- headers=self.headers,
- local_files_only=local_files_only,
- )
- @validate_hf_hub_args
- def snapshot_download(
- self,
- repo_id: str,
- *,
- repo_type: Optional[str] = None,
- revision: Optional[str] = None,
- cache_dir: Union[str, Path, None] = None,
- local_dir: Union[str, Path, None] = None,
- proxies: Optional[Dict] = None,
- etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
- force_download: bool = False,
- token: Union[bool, str, None] = None,
- local_files_only: bool = False,
- allow_patterns: Optional[Union[List[str], str]] = None,
- ignore_patterns: Optional[Union[List[str], str]] = None,
- max_workers: int = 8,
- tqdm_class: Optional[Type[base_tqdm]] = None,
- # Deprecated args
- local_dir_use_symlinks: Union[bool, Literal["auto"]] = "auto",
- resume_download: Optional[bool] = None,
- ) -> str:
- """Download repo files.
- Download a whole snapshot of a repo's files at the specified revision. This is useful when you want all files from
- a repo, because you don't know which ones you will need a priori. All files are nested inside a folder in order
- to keep their actual filename relative to that folder. You can also filter which files to download using
- `allow_patterns` and `ignore_patterns`.
- If `local_dir` is provided, the file structure from the repo will be replicated in this location. When using this
- option, the `cache_dir` will not be used and a `.cache/huggingface/` folder will be created at the root of `local_dir`
- to store some metadata related to the downloaded files.While this mechanism is not as robust as the main
- cache-system, it's optimized for regularly pulling the latest version of a repository.
- An alternative would be to clone the repo but this requires git and git-lfs to be installed and properly
- configured. It is also not possible to filter which files to download when cloning a repository using git.
- Args:
- repo_id (`str`):
- A user or an organization name and a repo name separated by a `/`.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if downloading from a dataset or space,
- `None` or `"model"` if downloading from a model. Default is `None`.
- revision (`str`, *optional*):
- An optional Git revision id which can be a branch name, a tag, or a
- commit hash.
- cache_dir (`str`, `Path`, *optional*):
- Path to the folder where cached files are stored.
- local_dir (`str` or `Path`, *optional*):
- If provided, the downloaded files will be placed under this directory.
- proxies (`dict`, *optional*):
- Dictionary mapping protocol to the URL of the proxy passed to
- `requests.request`.
- etag_timeout (`float`, *optional*, defaults to `10`):
- When fetching ETag, how many seconds to wait for the server to send
- data before giving up which is passed to `requests.request`.
- force_download (`bool`, *optional*, defaults to `False`):
- Whether the file should be downloaded even if it already exists in the local cache.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- local_files_only (`bool`, *optional*, defaults to `False`):
- If `True`, avoid downloading the file and return the path to the
- local cached file if it exists.
- allow_patterns (`List[str]` or `str`, *optional*):
- If provided, only files matching at least one pattern are downloaded.
- ignore_patterns (`List[str]` or `str`, *optional*):
- If provided, files matching any of the patterns are not downloaded.
- max_workers (`int`, *optional*):
- Number of concurrent threads to download files (1 thread = 1 file download).
- Defaults to 8.
- tqdm_class (`tqdm`, *optional*):
- If provided, overwrites the default behavior for the progress bar. Passed
- argument must inherit from `tqdm.auto.tqdm` or at least mimic its behavior.
- Note that the `tqdm_class` is not passed to each individual download.
- Defaults to the custom HF progress bar that can be disabled by setting
- `HF_HUB_DISABLE_PROGRESS_BARS` environment variable.
- Returns:
- `str`: folder path of the repo snapshot.
- Raises:
- [`~utils.RepositoryNotFoundError`]
- If the repository to download from cannot be found. This may be because it doesn't exist,
- or because it is set to `private` and you do not have access.
- [`~utils.RevisionNotFoundError`]
- If the revision to download from cannot be found.
- [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
- If `token=True` and the token cannot be found.
- [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError) if
- ETag cannot be determined.
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- if some parameter value is invalid.
- """
- from ._snapshot_download import snapshot_download
- if token is None:
- # Cannot do `token = token or self.token` as token can be `False`.
- token = self.token
- return snapshot_download(
- repo_id=repo_id,
- repo_type=repo_type,
- revision=revision,
- endpoint=self.endpoint,
- cache_dir=cache_dir,
- local_dir=local_dir,
- local_dir_use_symlinks=local_dir_use_symlinks,
- library_name=self.library_name,
- library_version=self.library_version,
- user_agent=self.user_agent,
- proxies=proxies,
- etag_timeout=etag_timeout,
- resume_download=resume_download,
- force_download=force_download,
- token=token,
- local_files_only=local_files_only,
- allow_patterns=allow_patterns,
- ignore_patterns=ignore_patterns,
- max_workers=max_workers,
- tqdm_class=tqdm_class,
- )
- def get_safetensors_metadata(
- self,
- repo_id: str,
- *,
- repo_type: Optional[str] = None,
- revision: Optional[str] = None,
- token: Union[bool, str, None] = None,
- ) -> SafetensorsRepoMetadata:
- """
- Parse metadata for a safetensors repo on the Hub.
- We first check if the repo has a single safetensors file or a sharded safetensors repo. If it's a single
- safetensors file, we parse the metadata from this file. If it's a sharded safetensors repo, we parse the
- metadata from the index file and then parse the metadata from each shard.
- To parse metadata from a single safetensors file, use [`parse_safetensors_file_metadata`].
- For more details regarding the safetensors format, check out https://huggingface.co/docs/safetensors/index#format.
- Args:
- repo_id (`str`):
- A user or an organization name and a repo name separated by a `/`.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if the file is in a dataset or space, `None` or `"model"` if in a
- model. Default is `None`.
- revision (`str`, *optional*):
- The git revision to fetch the file from. Can be a branch name, a tag, or a commit hash. Defaults to the
- head of the `"main"` branch.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`SafetensorsRepoMetadata`]: information related to safetensors repo.
- Raises:
- [`NotASafetensorsRepoError`]
- If the repo is not a safetensors repo i.e. doesn't have either a
- `model.safetensors` or a `model.safetensors.index.json` file.
- [`SafetensorsParsingError`]
- If a safetensors file header couldn't be parsed correctly.
- Example:
- ```py
- # Parse repo with single weights file
- >>> metadata = get_safetensors_metadata("bigscience/bloomz-560m")
- >>> metadata
- SafetensorsRepoMetadata(
- metadata=None,
- sharded=False,
- weight_map={'h.0.input_layernorm.bias': 'model.safetensors', ...},
- files_metadata={'model.safetensors': SafetensorsFileMetadata(...)}
- )
- >>> metadata.files_metadata["model.safetensors"].metadata
- {'format': 'pt'}
- # Parse repo with sharded model
- >>> metadata = get_safetensors_metadata("bigscience/bloom")
- Parse safetensors files: 100%|██████████████████████████████████████████| 72/72 [00:12<00:00, 5.78it/s]
- >>> metadata
- SafetensorsRepoMetadata(metadata={'total_size': 352494542848}, sharded=True, weight_map={...}, files_metadata={...})
- >>> len(metadata.files_metadata)
- 72 # All safetensors files have been fetched
- # Parse repo with sharded model
- >>> get_safetensors_metadata("runwayml/stable-diffusion-v1-5")
- NotASafetensorsRepoError: 'runwayml/stable-diffusion-v1-5' is not a safetensors repo. Couldn't find 'model.safetensors.index.json' or 'model.safetensors' files.
- ```
- """
- if self.file_exists( # Single safetensors file => non-sharded model
- repo_id=repo_id,
- filename=constants.SAFETENSORS_SINGLE_FILE,
- repo_type=repo_type,
- revision=revision,
- token=token,
- ):
- file_metadata = self.parse_safetensors_file_metadata(
- repo_id=repo_id,
- filename=constants.SAFETENSORS_SINGLE_FILE,
- repo_type=repo_type,
- revision=revision,
- token=token,
- )
- return SafetensorsRepoMetadata(
- metadata=None,
- sharded=False,
- weight_map={
- tensor_name: constants.SAFETENSORS_SINGLE_FILE for tensor_name in file_metadata.tensors.keys()
- },
- files_metadata={constants.SAFETENSORS_SINGLE_FILE: file_metadata},
- )
- elif self.file_exists( # Multiple safetensors files => sharded with index
- repo_id=repo_id,
- filename=constants.SAFETENSORS_INDEX_FILE,
- repo_type=repo_type,
- revision=revision,
- token=token,
- ):
- # Fetch index
- index_file = self.hf_hub_download(
- repo_id=repo_id,
- filename=constants.SAFETENSORS_INDEX_FILE,
- repo_type=repo_type,
- revision=revision,
- token=token,
- )
- with open(index_file) as f:
- index = json.load(f)
- weight_map = index.get("weight_map", {})
- # Fetch metadata per shard
- files_metadata = {}
- def _parse(filename: str) -> None:
- files_metadata[filename] = self.parse_safetensors_file_metadata(
- repo_id=repo_id, filename=filename, repo_type=repo_type, revision=revision, token=token
- )
- thread_map(
- _parse,
- set(weight_map.values()),
- desc="Parse safetensors files",
- tqdm_class=hf_tqdm,
- )
- return SafetensorsRepoMetadata(
- metadata=index.get("metadata", None),
- sharded=True,
- weight_map=weight_map,
- files_metadata=files_metadata,
- )
- else:
- # Not a safetensors repo
- raise NotASafetensorsRepoError(
- f"'{repo_id}' is not a safetensors repo. Couldn't find '{constants.SAFETENSORS_INDEX_FILE}' or '{constants.SAFETENSORS_SINGLE_FILE}' files."
- )
- def parse_safetensors_file_metadata(
- self,
- repo_id: str,
- filename: str,
- *,
- repo_type: Optional[str] = None,
- revision: Optional[str] = None,
- token: Union[bool, str, None] = None,
- ) -> SafetensorsFileMetadata:
- """
- Parse metadata from a safetensors file on the Hub.
- To parse metadata from all safetensors files in a repo at once, use [`get_safetensors_metadata`].
- For more details regarding the safetensors format, check out https://huggingface.co/docs/safetensors/index#format.
- Args:
- repo_id (`str`):
- A user or an organization name and a repo name separated by a `/`.
- filename (`str`):
- The name of the file in the repo.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if the file is in a dataset or space, `None` or `"model"` if in a
- model. Default is `None`.
- revision (`str`, *optional*):
- The git revision to fetch the file from. Can be a branch name, a tag, or a commit hash. Defaults to the
- head of the `"main"` branch.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`SafetensorsFileMetadata`]: information related to a safetensors file.
- Raises:
- [`NotASafetensorsRepoError`]:
- If the repo is not a safetensors repo i.e. doesn't have either a
- `model.safetensors` or a `model.safetensors.index.json` file.
- [`SafetensorsParsingError`]:
- If a safetensors file header couldn't be parsed correctly.
- """
- url = hf_hub_url(
- repo_id=repo_id, filename=filename, repo_type=repo_type, revision=revision, endpoint=self.endpoint
- )
- _headers = self._build_hf_headers(token=token)
- # 1. Fetch first 100kb
- # Empirically, 97% of safetensors files have a metadata size < 100kb (over the top 1000 models on the Hub).
- # We assume fetching 100kb is faster than making 2 GET requests. Therefore we always fetch the first 100kb to
- # avoid the 2nd GET in most cases.
- # See https://github.com/huggingface/huggingface_hub/pull/1855#discussion_r1404286419.
- response = get_session().get(url, headers={**_headers, "range": "bytes=0-100000"})
- hf_raise_for_status(response)
- # 2. Parse metadata size
- metadata_size = struct.unpack("<Q", response.content[:8])[0]
- if metadata_size > constants.SAFETENSORS_MAX_HEADER_LENGTH:
- raise SafetensorsParsingError(
- f"Failed to parse safetensors header for '{filename}' (repo '{repo_id}', revision "
- f"'{revision or constants.DEFAULT_REVISION}'): safetensors header is too big. Maximum supported size is "
- f"{constants.SAFETENSORS_MAX_HEADER_LENGTH} bytes (got {metadata_size})."
- )
- # 3.a. Get metadata from payload
- if metadata_size <= 100000:
- metadata_as_bytes = response.content[8 : 8 + metadata_size]
- else: # 3.b. Request full metadata
- response = get_session().get(url, headers={**_headers, "range": f"bytes=8-{metadata_size + 7}"})
- hf_raise_for_status(response)
- metadata_as_bytes = response.content
- # 4. Parse json header
- try:
- metadata_as_dict = json.loads(metadata_as_bytes.decode(errors="ignore"))
- except json.JSONDecodeError as e:
- raise SafetensorsParsingError(
- f"Failed to parse safetensors header for '{filename}' (repo '{repo_id}', revision "
- f"'{revision or constants.DEFAULT_REVISION}'): header is not json-encoded string. Please make sure this is a "
- "correctly formatted safetensors file."
- ) from e
- try:
- return SafetensorsFileMetadata(
- metadata=metadata_as_dict.get("__metadata__", {}),
- tensors={
- key: TensorInfo(
- dtype=tensor["dtype"],
- shape=tensor["shape"],
- data_offsets=tuple(tensor["data_offsets"]), # type: ignore
- )
- for key, tensor in metadata_as_dict.items()
- if key != "__metadata__"
- },
- )
- except (KeyError, IndexError) as e:
- raise SafetensorsParsingError(
- f"Failed to parse safetensors header for '{filename}' (repo '{repo_id}', revision "
- f"'{revision or constants.DEFAULT_REVISION}'): header format not recognized. Please make sure this is a correctly"
- " formatted safetensors file."
- ) from e
- @validate_hf_hub_args
- def create_branch(
- self,
- repo_id: str,
- *,
- branch: str,
- revision: Optional[str] = None,
- token: Union[bool, str, None] = None,
- repo_type: Optional[str] = None,
- exist_ok: bool = False,
- ) -> None:
- """
- Create a new branch for a repo on the Hub, starting from the specified revision (defaults to `main`).
- To find a revision suiting your needs, you can use [`list_repo_refs`] or [`list_repo_commits`].
- Args:
- repo_id (`str`):
- The repository in which the branch will be created.
- Example: `"user/my-cool-model"`.
- branch (`str`):
- The name of the branch to create.
- revision (`str`, *optional*):
- The git revision to create the branch from. It can be a branch name or
- the OID/SHA of a commit, as a hexadecimal string. Defaults to the head
- of the `"main"` branch.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if creating a branch on a dataset or
- space, `None` or `"model"` if tagging a model. Default is `None`.
- exist_ok (`bool`, *optional*, defaults to `False`):
- If `True`, do not raise an error if branch already exists.
- Raises:
- [`~utils.RepositoryNotFoundError`]:
- If repository is not found (error 404): wrong repo_id/repo_type, private
- but not authenticated or repo does not exist.
- [`~utils.BadRequestError`]:
- If invalid reference for a branch. Ex: `refs/pr/5` or 'refs/foo/bar'.
- [`~utils.HfHubHTTPError`]:
- If the branch already exists on the repo (error 409) and `exist_ok` is
- set to `False`.
- """
- if repo_type is None:
- repo_type = constants.REPO_TYPE_MODEL
- branch = quote(branch, safe="")
- # Prepare request
- branch_url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/branch/{branch}"
- headers = self._build_hf_headers(token=token)
- payload = {}
- if revision is not None:
- payload["startingPoint"] = revision
- # Create branch
- response = get_session().post(url=branch_url, headers=headers, json=payload)
- try:
- hf_raise_for_status(response)
- except HfHubHTTPError as e:
- if exist_ok and e.response.status_code == 409:
- return
- elif exist_ok and e.response.status_code == 403:
- # No write permission on the namespace but branch might already exist
- try:
- refs = self.list_repo_refs(repo_id=repo_id, repo_type=repo_type, token=token)
- for branch_ref in refs.branches:
- if branch_ref.name == branch:
- return # Branch already exists => do not raise
- except HfHubHTTPError:
- pass # We raise the original error if the branch does not exist
- raise
- @validate_hf_hub_args
- def delete_branch(
- self,
- repo_id: str,
- *,
- branch: str,
- token: Union[bool, str, None] = None,
- repo_type: Optional[str] = None,
- ) -> None:
- """
- Delete a branch from a repo on the Hub.
- Args:
- repo_id (`str`):
- The repository in which a branch will be deleted.
- Example: `"user/my-cool-model"`.
- branch (`str`):
- The name of the branch to delete.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if creating a branch on a dataset or
- space, `None` or `"model"` if tagging a model. Default is `None`.
- Raises:
- [`~utils.RepositoryNotFoundError`]:
- If repository is not found (error 404): wrong repo_id/repo_type, private
- but not authenticated or repo does not exist.
- [`~utils.HfHubHTTPError`]:
- If trying to delete a protected branch. Ex: `main` cannot be deleted.
- [`~utils.HfHubHTTPError`]:
- If trying to delete a branch that does not exist.
- """
- if repo_type is None:
- repo_type = constants.REPO_TYPE_MODEL
- branch = quote(branch, safe="")
- # Prepare request
- branch_url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/branch/{branch}"
- headers = self._build_hf_headers(token=token)
- # Delete branch
- response = get_session().delete(url=branch_url, headers=headers)
- hf_raise_for_status(response)
- @validate_hf_hub_args
- def create_tag(
- self,
- repo_id: str,
- *,
- tag: str,
- tag_message: Optional[str] = None,
- revision: Optional[str] = None,
- token: Union[bool, str, None] = None,
- repo_type: Optional[str] = None,
- exist_ok: bool = False,
- ) -> None:
- """
- Tag a given commit of a repo on the Hub.
- Args:
- repo_id (`str`):
- The repository in which a commit will be tagged.
- Example: `"user/my-cool-model"`.
- tag (`str`):
- The name of the tag to create.
- tag_message (`str`, *optional*):
- The description of the tag to create.
- revision (`str`, *optional*):
- The git revision to tag. It can be a branch name or the OID/SHA of a
- commit, as a hexadecimal string. Shorthands (7 first characters) are
- also supported. Defaults to the head of the `"main"` branch.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if tagging a dataset or
- space, `None` or `"model"` if tagging a model. Default is
- `None`.
- exist_ok (`bool`, *optional*, defaults to `False`):
- If `True`, do not raise an error if tag already exists.
- Raises:
- [`~utils.RepositoryNotFoundError`]:
- If repository is not found (error 404): wrong repo_id/repo_type, private
- but not authenticated or repo does not exist.
- [`~utils.RevisionNotFoundError`]:
- If revision is not found (error 404) on the repo.
- [`~utils.HfHubHTTPError`]:
- If the branch already exists on the repo (error 409) and `exist_ok` is
- set to `False`.
- """
- if repo_type is None:
- repo_type = constants.REPO_TYPE_MODEL
- revision = quote(revision, safe="") if revision is not None else constants.DEFAULT_REVISION
- # Prepare request
- tag_url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/tag/{revision}"
- headers = self._build_hf_headers(token=token)
- payload = {"tag": tag}
- if tag_message is not None:
- payload["message"] = tag_message
- # Tag
- response = get_session().post(url=tag_url, headers=headers, json=payload)
- try:
- hf_raise_for_status(response)
- except HfHubHTTPError as e:
- if not (e.response.status_code == 409 and exist_ok):
- raise
- @validate_hf_hub_args
- def delete_tag(
- self,
- repo_id: str,
- *,
- tag: str,
- token: Union[bool, str, None] = None,
- repo_type: Optional[str] = None,
- ) -> None:
- """
- Delete a tag from a repo on the Hub.
- Args:
- repo_id (`str`):
- The repository in which a tag will be deleted.
- Example: `"user/my-cool-model"`.
- tag (`str`):
- The name of the tag to delete.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if tagging a dataset or space, `None` or
- `"model"` if tagging a model. Default is `None`.
- Raises:
- [`~utils.RepositoryNotFoundError`]:
- If repository is not found (error 404): wrong repo_id/repo_type, private
- but not authenticated or repo does not exist.
- [`~utils.RevisionNotFoundError`]:
- If tag is not found.
- """
- if repo_type is None:
- repo_type = constants.REPO_TYPE_MODEL
- tag = quote(tag, safe="")
- # Prepare request
- tag_url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/tag/{tag}"
- headers = self._build_hf_headers(token=token)
- # Un-tag
- response = get_session().delete(url=tag_url, headers=headers)
- hf_raise_for_status(response)
- @validate_hf_hub_args
- def get_full_repo_name(
- self,
- model_id: str,
- *,
- organization: Optional[str] = None,
- token: Union[bool, str, None] = None,
- ):
- """
- Returns the repository name for a given model ID and optional
- organization.
- Args:
- model_id (`str`):
- The name of the model.
- organization (`str`, *optional*):
- If passed, the repository name will be in the organization
- namespace instead of the user namespace.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `str`: The repository name in the user's namespace
- ({username}/{model_id}) if no organization is passed, and under the
- organization namespace ({organization}/{model_id}) otherwise.
- """
- if organization is None:
- if "/" in model_id:
- username = model_id.split("/")[0]
- else:
- username = self.whoami(token=token)["name"] # type: ignore
- return f"{username}/{model_id}"
- else:
- return f"{organization}/{model_id}"
- @validate_hf_hub_args
- def get_repo_discussions(
- self,
- repo_id: str,
- *,
- author: Optional[str] = None,
- discussion_type: Optional[constants.DiscussionTypeFilter] = None,
- discussion_status: Optional[constants.DiscussionStatusFilter] = None,
- repo_type: Optional[str] = None,
- token: Union[bool, str, None] = None,
- ) -> Iterator[Discussion]:
- """
- Fetches Discussions and Pull Requests for the given repo.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`.
- author (`str`, *optional*):
- Pass a value to filter by discussion author. `None` means no filter.
- Default is `None`.
- discussion_type (`str`, *optional*):
- Set to `"pull_request"` to fetch only pull requests, `"discussion"`
- to fetch only discussions. Set to `"all"` or `None` to fetch both.
- Default is `None`.
- discussion_status (`str`, *optional*):
- Set to `"open"` (respectively `"closed"`) to fetch only open
- (respectively closed) discussions. Set to `"all"` or `None`
- to fetch both.
- Default is `None`.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if fetching from a dataset or
- space, `None` or `"model"` if fetching from a model. Default is
- `None`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `Iterator[Discussion]`: An iterator of [`Discussion`] objects.
- Example:
- Collecting all discussions of a repo in a list:
- ```python
- >>> from huggingface_hub import get_repo_discussions
- >>> discussions_list = list(get_repo_discussions(repo_id="bert-base-uncased"))
- ```
- Iterating over discussions of a repo:
- ```python
- >>> from huggingface_hub import get_repo_discussions
- >>> for discussion in get_repo_discussions(repo_id="bert-base-uncased"):
- ... print(discussion.num, discussion.title)
- ```
- """
- if repo_type not in constants.REPO_TYPES:
- raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}")
- if repo_type is None:
- repo_type = constants.REPO_TYPE_MODEL
- if discussion_type is not None and discussion_type not in constants.DISCUSSION_TYPES:
- raise ValueError(f"Invalid discussion_type, must be one of {constants.DISCUSSION_TYPES}")
- if discussion_status is not None and discussion_status not in constants.DISCUSSION_STATUS:
- raise ValueError(f"Invalid discussion_status, must be one of {constants.DISCUSSION_STATUS}")
- headers = self._build_hf_headers(token=token)
- path = f"{self.endpoint}/api/{repo_type}s/{repo_id}/discussions"
- params: Dict[str, Union[str, int]] = {}
- if discussion_type is not None:
- params["type"] = discussion_type
- if discussion_status is not None:
- params["status"] = discussion_status
- if author is not None:
- params["author"] = author
- def _fetch_discussion_page(page_index: int):
- params["p"] = page_index
- resp = get_session().get(path, headers=headers, params=params)
- hf_raise_for_status(resp)
- paginated_discussions = resp.json()
- total = paginated_discussions["count"]
- start = paginated_discussions["start"]
- discussions = paginated_discussions["discussions"]
- has_next = (start + len(discussions)) < total
- return discussions, has_next
- has_next, page_index = True, 0
- while has_next:
- discussions, has_next = _fetch_discussion_page(page_index=page_index)
- for discussion in discussions:
- yield Discussion(
- title=discussion["title"],
- num=discussion["num"],
- author=discussion.get("author", {}).get("name", "deleted"),
- created_at=parse_datetime(discussion["createdAt"]),
- status=discussion["status"],
- repo_id=discussion["repo"]["name"],
- repo_type=discussion["repo"]["type"],
- is_pull_request=discussion["isPullRequest"],
- endpoint=self.endpoint,
- )
- page_index = page_index + 1
- @validate_hf_hub_args
- def get_discussion_details(
- self,
- repo_id: str,
- discussion_num: int,
- *,
- repo_type: Optional[str] = None,
- token: Union[bool, str, None] = None,
- ) -> DiscussionWithDetails:
- """Fetches a Discussion's / Pull Request 's details from the Hub.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`.
- discussion_num (`int`):
- The number of the Discussion or Pull Request . Must be a strictly positive integer.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if uploading to a dataset or
- space, `None` or `"model"` if uploading to a model. Default is
- `None`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns: [`DiscussionWithDetails`]
- > [!TIP]
- > Raises the following errors:
- >
- > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
- > if the HuggingFace API returned an error
- > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- > if some parameter value is invalid
- > - [`~utils.RepositoryNotFoundError`]
- > If the repository to download from cannot be found. This may be because it doesn't exist,
- > or because it is set to `private` and you do not have access.
- """
- if not isinstance(discussion_num, int) or discussion_num <= 0:
- raise ValueError("Invalid discussion_num, must be a positive integer")
- if repo_type not in constants.REPO_TYPES:
- raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}")
- if repo_type is None:
- repo_type = constants.REPO_TYPE_MODEL
- path = f"{self.endpoint}/api/{repo_type}s/{repo_id}/discussions/{discussion_num}"
- headers = self._build_hf_headers(token=token)
- resp = get_session().get(path, params={"diff": "1"}, headers=headers)
- hf_raise_for_status(resp)
- discussion_details = resp.json()
- is_pull_request = discussion_details["isPullRequest"]
- target_branch = discussion_details["changes"]["base"] if is_pull_request else None
- conflicting_files = discussion_details["filesWithConflicts"] if is_pull_request else None
- merge_commit_oid = discussion_details["changes"].get("mergeCommitId", None) if is_pull_request else None
- return DiscussionWithDetails(
- title=discussion_details["title"],
- num=discussion_details["num"],
- author=discussion_details.get("author", {}).get("name", "deleted"),
- created_at=parse_datetime(discussion_details["createdAt"]),
- status=discussion_details["status"],
- repo_id=discussion_details["repo"]["name"],
- repo_type=discussion_details["repo"]["type"],
- is_pull_request=discussion_details["isPullRequest"],
- events=[deserialize_event(evt) for evt in discussion_details["events"]],
- conflicting_files=conflicting_files,
- target_branch=target_branch,
- merge_commit_oid=merge_commit_oid,
- diff=discussion_details.get("diff"),
- endpoint=self.endpoint,
- )
- @validate_hf_hub_args
- def create_discussion(
- self,
- repo_id: str,
- title: str,
- *,
- token: Union[bool, str, None] = None,
- description: Optional[str] = None,
- repo_type: Optional[str] = None,
- pull_request: bool = False,
- ) -> DiscussionWithDetails:
- """Creates a Discussion or Pull Request.
- Pull Requests created programmatically will be in `"draft"` status.
- Creating a Pull Request with changes can also be done at once with [`HfApi.create_commit`].
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`.
- title (`str`):
- The title of the discussion. It can be up to 200 characters long,
- and must be at least 3 characters long. Leading and trailing whitespaces
- will be stripped.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- description (`str`, *optional*):
- An optional description for the Pull Request.
- Defaults to `"Discussion opened with the huggingface_hub Python library"`
- pull_request (`bool`, *optional*):
- Whether to create a Pull Request or discussion. If `True`, creates a Pull Request.
- If `False`, creates a discussion. Defaults to `False`.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if uploading to a dataset or
- space, `None` or `"model"` if uploading to a model. Default is
- `None`.
- Returns: [`DiscussionWithDetails`]
- > [!TIP]
- > Raises the following errors:
- >
- > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
- > if the HuggingFace API returned an error
- > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- > if some parameter value is invalid
- > - [`~utils.RepositoryNotFoundError`]
- > If the repository to download from cannot be found. This may be because it doesn't exist,
- > or because it is set to `private` and you do not have access."""
- if repo_type not in constants.REPO_TYPES:
- raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}")
- if repo_type is None:
- repo_type = constants.REPO_TYPE_MODEL
- if description is not None:
- description = description.strip()
- description = (
- description
- if description
- else (
- f"{'Pull Request' if pull_request else 'Discussion'} opened with the"
- " [huggingface_hub Python"
- " library](https://huggingface.co/docs/huggingface_hub)"
- )
- )
- headers = self._build_hf_headers(token=token)
- resp = get_session().post(
- f"{self.endpoint}/api/{repo_type}s/{repo_id}/discussions",
- json={
- "title": title.strip(),
- "description": description,
- "pullRequest": pull_request,
- },
- headers=headers,
- )
- hf_raise_for_status(resp)
- num = resp.json()["num"]
- return self.get_discussion_details(
- repo_id=repo_id,
- repo_type=repo_type,
- discussion_num=num,
- token=token,
- )
- @validate_hf_hub_args
- def create_pull_request(
- self,
- repo_id: str,
- title: str,
- *,
- token: Union[bool, str, None] = None,
- description: Optional[str] = None,
- repo_type: Optional[str] = None,
- ) -> DiscussionWithDetails:
- """Creates a Pull Request . Pull Requests created programmatically will be in `"draft"` status.
- Creating a Pull Request with changes can also be done at once with [`HfApi.create_commit`];
- This is a wrapper around [`HfApi.create_discussion`].
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`.
- title (`str`):
- The title of the discussion. It can be up to 200 characters long,
- and must be at least 3 characters long. Leading and trailing whitespaces
- will be stripped.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- description (`str`, *optional*):
- An optional description for the Pull Request.
- Defaults to `"Discussion opened with the huggingface_hub Python library"`
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if uploading to a dataset or
- space, `None` or `"model"` if uploading to a model. Default is
- `None`.
- Returns: [`DiscussionWithDetails`]
- > [!TIP]
- > Raises the following errors:
- >
- > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
- > if the HuggingFace API returned an error
- > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- > if some parameter value is invalid
- > - [`~utils.RepositoryNotFoundError`]
- > If the repository to download from cannot be found. This may be because it doesn't exist,
- > or because it is set to `private` and you do not have access."""
- return self.create_discussion(
- repo_id=repo_id,
- title=title,
- token=token,
- description=description,
- repo_type=repo_type,
- pull_request=True,
- )
- def _post_discussion_changes(
- self,
- *,
- repo_id: str,
- discussion_num: int,
- resource: str,
- body: Optional[dict] = None,
- token: Union[bool, str, None] = None,
- repo_type: Optional[str] = None,
- ) -> requests.Response:
- """Internal utility to POST changes to a Discussion or Pull Request"""
- if not isinstance(discussion_num, int) or discussion_num <= 0:
- raise ValueError("Invalid discussion_num, must be a positive integer")
- if repo_type not in constants.REPO_TYPES:
- raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}")
- if repo_type is None:
- repo_type = constants.REPO_TYPE_MODEL
- repo_id = f"{repo_type}s/{repo_id}"
- path = f"{self.endpoint}/api/{repo_id}/discussions/{discussion_num}/{resource}"
- headers = self._build_hf_headers(token=token)
- resp = requests.post(path, headers=headers, json=body)
- hf_raise_for_status(resp)
- return resp
- @validate_hf_hub_args
- def comment_discussion(
- self,
- repo_id: str,
- discussion_num: int,
- comment: str,
- *,
- token: Union[bool, str, None] = None,
- repo_type: Optional[str] = None,
- ) -> DiscussionComment:
- """Creates a new comment on the given Discussion.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`.
- discussion_num (`int`):
- The number of the Discussion or Pull Request . Must be a strictly positive integer.
- comment (`str`):
- The content of the comment to create. Comments support markdown formatting.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if uploading to a dataset or
- space, `None` or `"model"` if uploading to a model. Default is
- `None`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`DiscussionComment`]: the newly created comment
- Examples:
- ```python
- >>> comment = \"\"\"
- ... Hello @otheruser!
- ...
- ... # This is a title
- ...
- ... **This is bold**, *this is italic* and ~this is strikethrough~
- ... And [this](http://url) is a link
- ... \"\"\"
- >>> HfApi().comment_discussion(
- ... repo_id="username/repo_name",
- ... discussion_num=34
- ... comment=comment
- ... )
- # DiscussionComment(id='deadbeef0000000', type='comment', ...)
- ```
- > [!TIP]
- > Raises the following errors:
- >
- > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
- > if the HuggingFace API returned an error
- > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- > if some parameter value is invalid
- > - [`~utils.RepositoryNotFoundError`]
- > If the repository to download from cannot be found. This may be because it doesn't exist,
- > or because it is set to `private` and you do not have access.
- """
- resp = self._post_discussion_changes(
- repo_id=repo_id,
- repo_type=repo_type,
- discussion_num=discussion_num,
- token=token,
- resource="comment",
- body={"comment": comment},
- )
- return deserialize_event(resp.json()["newMessage"]) # type: ignore
- @validate_hf_hub_args
- def rename_discussion(
- self,
- repo_id: str,
- discussion_num: int,
- new_title: str,
- *,
- token: Union[bool, str, None] = None,
- repo_type: Optional[str] = None,
- ) -> DiscussionTitleChange:
- """Renames a Discussion.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`.
- discussion_num (`int`):
- The number of the Discussion or Pull Request . Must be a strictly positive integer.
- new_title (`str`):
- The new title for the discussion
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if uploading to a dataset or
- space, `None` or `"model"` if uploading to a model. Default is
- `None`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`DiscussionTitleChange`]: the title change event
- Examples:
- ```python
- >>> new_title = "New title, fixing a typo"
- >>> HfApi().rename_discussion(
- ... repo_id="username/repo_name",
- ... discussion_num=34
- ... new_title=new_title
- ... )
- # DiscussionTitleChange(id='deadbeef0000000', type='title-change', ...)
- ```
- > [!TIP]
- > Raises the following errors:
- >
- > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
- > if the HuggingFace API returned an error
- > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- > if some parameter value is invalid
- > - [`~utils.RepositoryNotFoundError`]
- > If the repository to download from cannot be found. This may be because it doesn't exist,
- > or because it is set to `private` and you do not have access.
- """
- resp = self._post_discussion_changes(
- repo_id=repo_id,
- repo_type=repo_type,
- discussion_num=discussion_num,
- token=token,
- resource="title",
- body={"title": new_title},
- )
- return deserialize_event(resp.json()["newTitle"]) # type: ignore
- @validate_hf_hub_args
- def change_discussion_status(
- self,
- repo_id: str,
- discussion_num: int,
- new_status: Literal["open", "closed"],
- *,
- token: Union[bool, str, None] = None,
- comment: Optional[str] = None,
- repo_type: Optional[str] = None,
- ) -> DiscussionStatusChange:
- """Closes or re-opens a Discussion or Pull Request.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`.
- discussion_num (`int`):
- The number of the Discussion or Pull Request . Must be a strictly positive integer.
- new_status (`str`):
- The new status for the discussion, either `"open"` or `"closed"`.
- comment (`str`, *optional*):
- An optional comment to post with the status change.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if uploading to a dataset or
- space, `None` or `"model"` if uploading to a model. Default is
- `None`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`DiscussionStatusChange`]: the status change event
- Examples:
- ```python
- >>> new_title = "New title, fixing a typo"
- >>> HfApi().rename_discussion(
- ... repo_id="username/repo_name",
- ... discussion_num=34
- ... new_title=new_title
- ... )
- # DiscussionStatusChange(id='deadbeef0000000', type='status-change', ...)
- ```
- > [!TIP]
- > Raises the following errors:
- >
- > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
- > if the HuggingFace API returned an error
- > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- > if some parameter value is invalid
- > - [`~utils.RepositoryNotFoundError`]
- > If the repository to download from cannot be found. This may be because it doesn't exist,
- > or because it is set to `private` and you do not have access.
- """
- if new_status not in ["open", "closed"]:
- raise ValueError("Invalid status, valid statuses are: 'open' and 'closed'")
- body: Dict[str, str] = {"status": new_status}
- if comment and comment.strip():
- body["comment"] = comment.strip()
- resp = self._post_discussion_changes(
- repo_id=repo_id,
- repo_type=repo_type,
- discussion_num=discussion_num,
- token=token,
- resource="status",
- body=body,
- )
- return deserialize_event(resp.json()["newStatus"]) # type: ignore
- @validate_hf_hub_args
- def merge_pull_request(
- self,
- repo_id: str,
- discussion_num: int,
- *,
- token: Union[bool, str, None] = None,
- comment: Optional[str] = None,
- repo_type: Optional[str] = None,
- ):
- """Merges a Pull Request.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`.
- discussion_num (`int`):
- The number of the Discussion or Pull Request . Must be a strictly positive integer.
- comment (`str`, *optional*):
- An optional comment to post with the status change.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if uploading to a dataset or
- space, `None` or `"model"` if uploading to a model. Default is
- `None`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`DiscussionStatusChange`]: the status change event
- > [!TIP]
- > Raises the following errors:
- >
- > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
- > if the HuggingFace API returned an error
- > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- > if some parameter value is invalid
- > - [`~utils.RepositoryNotFoundError`]
- > If the repository to download from cannot be found. This may be because it doesn't exist,
- > or because it is set to `private` and you do not have access.
- """
- self._post_discussion_changes(
- repo_id=repo_id,
- repo_type=repo_type,
- discussion_num=discussion_num,
- token=token,
- resource="merge",
- body={"comment": comment.strip()} if comment and comment.strip() else None,
- )
- @validate_hf_hub_args
- def edit_discussion_comment(
- self,
- repo_id: str,
- discussion_num: int,
- comment_id: str,
- new_content: str,
- *,
- token: Union[bool, str, None] = None,
- repo_type: Optional[str] = None,
- ) -> DiscussionComment:
- """Edits a comment on a Discussion / Pull Request.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`.
- discussion_num (`int`):
- The number of the Discussion or Pull Request . Must be a strictly positive integer.
- comment_id (`str`):
- The ID of the comment to edit.
- new_content (`str`):
- The new content of the comment. Comments support markdown formatting.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if uploading to a dataset or
- space, `None` or `"model"` if uploading to a model. Default is
- `None`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`DiscussionComment`]: the edited comment
- > [!TIP]
- > Raises the following errors:
- >
- > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
- > if the HuggingFace API returned an error
- > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- > if some parameter value is invalid
- > - [`~utils.RepositoryNotFoundError`]
- > If the repository to download from cannot be found. This may be because it doesn't exist,
- > or because it is set to `private` and you do not have access.
- """
- resp = self._post_discussion_changes(
- repo_id=repo_id,
- repo_type=repo_type,
- discussion_num=discussion_num,
- token=token,
- resource=f"comment/{comment_id.lower()}/edit",
- body={"content": new_content},
- )
- return deserialize_event(resp.json()["updatedComment"]) # type: ignore
- @validate_hf_hub_args
- def hide_discussion_comment(
- self,
- repo_id: str,
- discussion_num: int,
- comment_id: str,
- *,
- token: Union[bool, str, None] = None,
- repo_type: Optional[str] = None,
- ) -> DiscussionComment:
- """Hides a comment on a Discussion / Pull Request.
- > [!WARNING]
- > Hidden comments' content cannot be retrieved anymore. Hiding a comment is irreversible.
- Args:
- repo_id (`str`):
- A namespace (user or an organization) and a repo name separated
- by a `/`.
- discussion_num (`int`):
- The number of the Discussion or Pull Request . Must be a strictly positive integer.
- comment_id (`str`):
- The ID of the comment to edit.
- repo_type (`str`, *optional*):
- Set to `"dataset"` or `"space"` if uploading to a dataset or
- space, `None` or `"model"` if uploading to a model. Default is
- `None`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`DiscussionComment`]: the hidden comment
- > [!TIP]
- > Raises the following errors:
- >
- > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
- > if the HuggingFace API returned an error
- > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- > if some parameter value is invalid
- > - [`~utils.RepositoryNotFoundError`]
- > If the repository to download from cannot be found. This may be because it doesn't exist,
- > or because it is set to `private` and you do not have access.
- """
- warnings.warn(
- "Hidden comments' content cannot be retrieved anymore. Hiding a comment is irreversible.",
- UserWarning,
- )
- resp = self._post_discussion_changes(
- repo_id=repo_id,
- repo_type=repo_type,
- discussion_num=discussion_num,
- token=token,
- resource=f"comment/{comment_id.lower()}/hide",
- )
- return deserialize_event(resp.json()["updatedComment"]) # type: ignore
- @validate_hf_hub_args
- def add_space_secret(
- self,
- repo_id: str,
- key: str,
- value: str,
- *,
- description: Optional[str] = None,
- token: Union[bool, str, None] = None,
- ) -> None:
- """Adds or updates a secret in a Space.
- Secrets allow to set secret keys or tokens to a Space without hardcoding them.
- For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets.
- Args:
- repo_id (`str`):
- ID of the repo to update. Example: `"bigcode/in-the-stack"`.
- key (`str`):
- Secret key. Example: `"GITHUB_API_KEY"`
- value (`str`):
- Secret value. Example: `"your_github_api_key"`.
- description (`str`, *optional*):
- Secret description. Example: `"Github API key to access the Github API"`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- """
- payload = {"key": key, "value": value}
- if description is not None:
- payload["description"] = description
- r = get_session().post(
- f"{self.endpoint}/api/spaces/{repo_id}/secrets",
- headers=self._build_hf_headers(token=token),
- json=payload,
- )
- hf_raise_for_status(r)
- @validate_hf_hub_args
- def delete_space_secret(self, repo_id: str, key: str, *, token: Union[bool, str, None] = None) -> None:
- """Deletes a secret from a Space.
- Secrets allow to set secret keys or tokens to a Space without hardcoding them.
- For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets.
- Args:
- repo_id (`str`):
- ID of the repo to update. Example: `"bigcode/in-the-stack"`.
- key (`str`):
- Secret key. Example: `"GITHUB_API_KEY"`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- """
- r = get_session().delete(
- f"{self.endpoint}/api/spaces/{repo_id}/secrets",
- headers=self._build_hf_headers(token=token),
- json={"key": key},
- )
- hf_raise_for_status(r)
- @validate_hf_hub_args
- def get_space_variables(self, repo_id: str, *, token: Union[bool, str, None] = None) -> Dict[str, SpaceVariable]:
- """Gets all variables from a Space.
- Variables allow to set environment variables to a Space without hardcoding them.
- For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets-and-environment-variables
- Args:
- repo_id (`str`):
- ID of the repo to query. Example: `"bigcode/in-the-stack"`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- """
- r = get_session().get(
- f"{self.endpoint}/api/spaces/{repo_id}/variables",
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(r)
- return {k: SpaceVariable(k, v) for k, v in r.json().items()}
- @validate_hf_hub_args
- def add_space_variable(
- self,
- repo_id: str,
- key: str,
- value: str,
- *,
- description: Optional[str] = None,
- token: Union[bool, str, None] = None,
- ) -> Dict[str, SpaceVariable]:
- """Adds or updates a variable in a Space.
- Variables allow to set environment variables to a Space without hardcoding them.
- For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets-and-environment-variables
- Args:
- repo_id (`str`):
- ID of the repo to update. Example: `"bigcode/in-the-stack"`.
- key (`str`):
- Variable key. Example: `"MODEL_REPO_ID"`
- value (`str`):
- Variable value. Example: `"the_model_repo_id"`.
- description (`str`):
- Description of the variable. Example: `"Model Repo ID of the implemented model"`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- """
- payload = {"key": key, "value": value}
- if description is not None:
- payload["description"] = description
- r = get_session().post(
- f"{self.endpoint}/api/spaces/{repo_id}/variables",
- headers=self._build_hf_headers(token=token),
- json=payload,
- )
- hf_raise_for_status(r)
- return {k: SpaceVariable(k, v) for k, v in r.json().items()}
- @validate_hf_hub_args
- def delete_space_variable(
- self, repo_id: str, key: str, *, token: Union[bool, str, None] = None
- ) -> Dict[str, SpaceVariable]:
- """Deletes a variable from a Space.
- Variables allow to set environment variables to a Space without hardcoding them.
- For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets-and-environment-variables
- Args:
- repo_id (`str`):
- ID of the repo to update. Example: `"bigcode/in-the-stack"`.
- key (`str`):
- Variable key. Example: `"MODEL_REPO_ID"`
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- """
- r = get_session().delete(
- f"{self.endpoint}/api/spaces/{repo_id}/variables",
- headers=self._build_hf_headers(token=token),
- json={"key": key},
- )
- hf_raise_for_status(r)
- return {k: SpaceVariable(k, v) for k, v in r.json().items()}
- @validate_hf_hub_args
- def get_space_runtime(self, repo_id: str, *, token: Union[bool, str, None] = None) -> SpaceRuntime:
- """Gets runtime information about a Space.
- Args:
- repo_id (`str`):
- ID of the repo to update. Example: `"bigcode/in-the-stack"`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`SpaceRuntime`]: Runtime information about a Space including Space stage and hardware.
- """
- r = get_session().get(
- f"{self.endpoint}/api/spaces/{repo_id}/runtime", headers=self._build_hf_headers(token=token)
- )
- hf_raise_for_status(r)
- return SpaceRuntime(r.json())
- @validate_hf_hub_args
- def request_space_hardware(
- self,
- repo_id: str,
- hardware: SpaceHardware,
- *,
- token: Union[bool, str, None] = None,
- sleep_time: Optional[int] = None,
- ) -> SpaceRuntime:
- """Request new hardware for a Space.
- Args:
- repo_id (`str`):
- ID of the repo to update. Example: `"bigcode/in-the-stack"`.
- hardware (`str` or [`SpaceHardware`]):
- Hardware on which to run the Space. Example: `"t4-medium"`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- sleep_time (`int`, *optional*):
- Number of seconds of inactivity to wait before a Space is put to sleep. Set to `-1` if you don't want
- your Space to sleep (default behavior for upgraded hardware). For free hardware, you can't configure
- the sleep time (value is fixed to 48 hours of inactivity).
- See https://huggingface.co/docs/hub/spaces-gpus#sleep-time for more details.
- Returns:
- [`SpaceRuntime`]: Runtime information about a Space including Space stage and hardware.
- > [!TIP]
- > It is also possible to request hardware directly when creating the Space repo! See [`create_repo`] for details.
- """
- if sleep_time is not None and hardware == SpaceHardware.CPU_BASIC:
- warnings.warn(
- "If your Space runs on the default 'cpu-basic' hardware, it will go to sleep if inactive for more"
- " than 48 hours. This value is not configurable. If you don't want your Space to deactivate or if"
- " you want to set a custom sleep time, you need to upgrade to a paid Hardware.",
- UserWarning,
- )
- payload: Dict[str, Any] = {"flavor": hardware}
- if sleep_time is not None:
- payload["sleepTimeSeconds"] = sleep_time
- r = get_session().post(
- f"{self.endpoint}/api/spaces/{repo_id}/hardware",
- headers=self._build_hf_headers(token=token),
- json=payload,
- )
- hf_raise_for_status(r)
- return SpaceRuntime(r.json())
- @validate_hf_hub_args
- def set_space_sleep_time(
- self, repo_id: str, sleep_time: int, *, token: Union[bool, str, None] = None
- ) -> SpaceRuntime:
- """Set a custom sleep time for a Space running on upgraded hardware..
- Your Space will go to sleep after X seconds of inactivity. You are not billed when your Space is in "sleep"
- mode. If a new visitor lands on your Space, it will "wake it up". Only upgraded hardware can have a
- configurable sleep time. To know more about the sleep stage, please refer to
- https://huggingface.co/docs/hub/spaces-gpus#sleep-time.
- Args:
- repo_id (`str`):
- ID of the repo to update. Example: `"bigcode/in-the-stack"`.
- sleep_time (`int`, *optional*):
- Number of seconds of inactivity to wait before a Space is put to sleep. Set to `-1` if you don't want
- your Space to pause (default behavior for upgraded hardware). For free hardware, you can't configure
- the sleep time (value is fixed to 48 hours of inactivity).
- See https://huggingface.co/docs/hub/spaces-gpus#sleep-time for more details.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`SpaceRuntime`]: Runtime information about a Space including Space stage and hardware.
- > [!TIP]
- > It is also possible to set a custom sleep time when requesting hardware with [`request_space_hardware`].
- """
- r = get_session().post(
- f"{self.endpoint}/api/spaces/{repo_id}/sleeptime",
- headers=self._build_hf_headers(token=token),
- json={"seconds": sleep_time},
- )
- hf_raise_for_status(r)
- runtime = SpaceRuntime(r.json())
- hardware = runtime.requested_hardware or runtime.hardware
- if hardware == SpaceHardware.CPU_BASIC:
- warnings.warn(
- "If your Space runs on the default 'cpu-basic' hardware, it will go to sleep if inactive for more"
- " than 48 hours. This value is not configurable. If you don't want your Space to deactivate or if"
- " you want to set a custom sleep time, you need to upgrade to a paid Hardware.",
- UserWarning,
- )
- return runtime
- @validate_hf_hub_args
- def pause_space(self, repo_id: str, *, token: Union[bool, str, None] = None) -> SpaceRuntime:
- """Pause your Space.
- A paused Space stops executing until manually restarted by its owner. This is different from the sleeping
- state in which free Spaces go after 48h of inactivity. Paused time is not billed to your account, no matter the
- hardware you've selected. To restart your Space, use [`restart_space`] and go to your Space settings page.
- For more details, please visit [the docs](https://huggingface.co/docs/hub/spaces-gpus#pause).
- Args:
- repo_id (`str`):
- ID of the Space to pause. Example: `"Salesforce/BLIP2"`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`SpaceRuntime`]: Runtime information about your Space including `stage=PAUSED` and requested hardware.
- Raises:
- [`~utils.RepositoryNotFoundError`]:
- If your Space is not found (error 404). Most probably wrong repo_id or your space is private but you
- are not authenticated.
- [`~utils.HfHubHTTPError`]:
- 403 Forbidden: only the owner of a Space can pause it. If you want to manage a Space that you don't
- own, either ask the owner by opening a Discussion or duplicate the Space.
- [`~utils.BadRequestError`]:
- If your Space is a static Space. Static Spaces are always running and never billed. If you want to hide
- a static Space, you can set it to private.
- """
- r = get_session().post(
- f"{self.endpoint}/api/spaces/{repo_id}/pause", headers=self._build_hf_headers(token=token)
- )
- hf_raise_for_status(r)
- return SpaceRuntime(r.json())
- @validate_hf_hub_args
- def restart_space(
- self, repo_id: str, *, token: Union[bool, str, None] = None, factory_reboot: bool = False
- ) -> SpaceRuntime:
- """Restart your Space.
- This is the only way to programmatically restart a Space if you've put it on Pause (see [`pause_space`]). You
- must be the owner of the Space to restart it. If you are using an upgraded hardware, your account will be
- billed as soon as the Space is restarted. You can trigger a restart no matter the current state of a Space.
- For more details, please visit [the docs](https://huggingface.co/docs/hub/spaces-gpus#pause).
- Args:
- repo_id (`str`):
- ID of the Space to restart. Example: `"Salesforce/BLIP2"`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- factory_reboot (`bool`, *optional*):
- If `True`, the Space will be rebuilt from scratch without caching any requirements.
- Returns:
- [`SpaceRuntime`]: Runtime information about your Space.
- Raises:
- [`~utils.RepositoryNotFoundError`]:
- If your Space is not found (error 404). Most probably wrong repo_id or your space is private but you
- are not authenticated.
- [`~utils.HfHubHTTPError`]:
- 403 Forbidden: only the owner of a Space can restart it. If you want to restart a Space that you don't
- own, either ask the owner by opening a Discussion or duplicate the Space.
- [`~utils.BadRequestError`]:
- If your Space is a static Space. Static Spaces are always running and never billed. If you want to hide
- a static Space, you can set it to private.
- """
- params = {}
- if factory_reboot:
- params["factory"] = "true"
- r = get_session().post(
- f"{self.endpoint}/api/spaces/{repo_id}/restart", headers=self._build_hf_headers(token=token), params=params
- )
- hf_raise_for_status(r)
- return SpaceRuntime(r.json())
- @validate_hf_hub_args
- def duplicate_space(
- self,
- from_id: str,
- to_id: Optional[str] = None,
- *,
- private: Optional[bool] = None,
- token: Union[bool, str, None] = None,
- exist_ok: bool = False,
- hardware: Optional[SpaceHardware] = None,
- storage: Optional[SpaceStorage] = None,
- sleep_time: Optional[int] = None,
- secrets: Optional[List[Dict[str, str]]] = None,
- variables: Optional[List[Dict[str, str]]] = None,
- ) -> RepoUrl:
- """Duplicate a Space.
- Programmatically duplicate a Space. The new Space will be created in your account and will be in the same state
- as the original Space (running or paused). You can duplicate a Space no matter the current state of a Space.
- Args:
- from_id (`str`):
- ID of the Space to duplicate. Example: `"pharma/CLIP-Interrogator"`.
- to_id (`str`, *optional*):
- ID of the new Space. Example: `"dog/CLIP-Interrogator"`. If not provided, the new Space will have the same
- name as the original Space, but in your account.
- private (`bool`, *optional*):
- Whether the new Space should be private or not. Defaults to the same privacy as the original Space.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- exist_ok (`bool`, *optional*, defaults to `False`):
- If `True`, do not raise an error if repo already exists.
- hardware (`SpaceHardware` or `str`, *optional*):
- Choice of Hardware. Example: `"t4-medium"`. See [`SpaceHardware`] for a complete list.
- storage (`SpaceStorage` or `str`, *optional*):
- Choice of persistent storage tier. Example: `"small"`. See [`SpaceStorage`] for a complete list.
- sleep_time (`int`, *optional*):
- Number of seconds of inactivity to wait before a Space is put to sleep. Set to `-1` if you don't want
- your Space to sleep (default behavior for upgraded hardware). For free hardware, you can't configure
- the sleep time (value is fixed to 48 hours of inactivity).
- See https://huggingface.co/docs/hub/spaces-gpus#sleep-time for more details.
- secrets (`List[Dict[str, str]]`, *optional*):
- A list of secret keys to set in your Space. Each item is in the form `{"key": ..., "value": ..., "description": ...}` where description is optional.
- For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets.
- variables (`List[Dict[str, str]]`, *optional*):
- A list of public environment variables to set in your Space. Each item is in the form `{"key": ..., "value": ..., "description": ...}` where description is optional.
- For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets-and-environment-variables.
- Returns:
- [`RepoUrl`]: URL to the newly created repo. Value is a subclass of `str` containing
- attributes like `endpoint`, `repo_type` and `repo_id`.
- Raises:
- [`~utils.RepositoryNotFoundError`]:
- If one of `from_id` or `to_id` cannot be found. This may be because it doesn't exist,
- or because it is set to `private` and you do not have access.
- [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
- If the HuggingFace API returned an error
- Example:
- ```python
- >>> from huggingface_hub import duplicate_space
- # Duplicate a Space to your account
- >>> duplicate_space("multimodalart/dreambooth-training")
- RepoUrl('https://huggingface.co/spaces/nateraw/dreambooth-training',...)
- # Can set custom destination id and visibility flag.
- >>> duplicate_space("multimodalart/dreambooth-training", to_id="my-dreambooth", private=True)
- RepoUrl('https://huggingface.co/spaces/nateraw/my-dreambooth',...)
- ```
- """
- # Parse to_id if provided
- parsed_to_id = RepoUrl(to_id) if to_id is not None else None
- # Infer target repo_id
- to_namespace = ( # set namespace manually or default to username
- parsed_to_id.namespace
- if parsed_to_id is not None and parsed_to_id.namespace is not None
- else self.whoami(token)["name"]
- )
- to_repo_name = parsed_to_id.repo_name if to_id is not None else RepoUrl(from_id).repo_name # type: ignore
- # repository must be a valid repo_id (namespace/repo_name).
- payload: Dict[str, Any] = {"repository": f"{to_namespace}/{to_repo_name}"}
- keys = ["private", "hardware", "storageTier", "sleepTimeSeconds", "secrets", "variables"]
- values = [private, hardware, storage, sleep_time, secrets, variables]
- payload.update({k: v for k, v in zip(keys, values) if v is not None})
- if sleep_time is not None and hardware == SpaceHardware.CPU_BASIC:
- warnings.warn(
- "If your Space runs on the default 'cpu-basic' hardware, it will go to sleep if inactive for more"
- " than 48 hours. This value is not configurable. If you don't want your Space to deactivate or if"
- " you want to set a custom sleep time, you need to upgrade to a paid Hardware.",
- UserWarning,
- )
- r = get_session().post(
- f"{self.endpoint}/api/spaces/{from_id}/duplicate",
- headers=self._build_hf_headers(token=token),
- json=payload,
- )
- try:
- hf_raise_for_status(r)
- except HTTPError as err:
- if exist_ok and err.response.status_code == 409:
- # Repo already exists and `exist_ok=True`
- pass
- else:
- raise
- return RepoUrl(r.json()["url"], endpoint=self.endpoint)
- @validate_hf_hub_args
- def request_space_storage(
- self,
- repo_id: str,
- storage: SpaceStorage,
- *,
- token: Union[bool, str, None] = None,
- ) -> SpaceRuntime:
- """Request persistent storage for a Space.
- Args:
- repo_id (`str`):
- ID of the Space to update. Example: `"open-llm-leaderboard/open_llm_leaderboard"`.
- storage (`str` or [`SpaceStorage`]):
- Storage tier. Either 'small', 'medium', or 'large'.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`SpaceRuntime`]: Runtime information about a Space including Space stage and hardware.
- > [!TIP]
- > It is not possible to decrease persistent storage after its granted. To do so, you must delete it
- > via [`delete_space_storage`].
- """
- payload: Dict[str, SpaceStorage] = {"tier": storage}
- r = get_session().post(
- f"{self.endpoint}/api/spaces/{repo_id}/storage",
- headers=self._build_hf_headers(token=token),
- json=payload,
- )
- hf_raise_for_status(r)
- return SpaceRuntime(r.json())
- @validate_hf_hub_args
- def delete_space_storage(
- self,
- repo_id: str,
- *,
- token: Union[bool, str, None] = None,
- ) -> SpaceRuntime:
- """Delete persistent storage for a Space.
- Args:
- repo_id (`str`):
- ID of the Space to update. Example: `"open-llm-leaderboard/open_llm_leaderboard"`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`SpaceRuntime`]: Runtime information about a Space including Space stage and hardware.
- Raises:
- [`BadRequestError`]
- If space has no persistent storage.
- """
- r = get_session().delete(
- f"{self.endpoint}/api/spaces/{repo_id}/storage",
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(r)
- return SpaceRuntime(r.json())
- #######################
- # Inference Endpoints #
- #######################
- def list_inference_endpoints(
- self, namespace: Optional[str] = None, *, token: Union[bool, str, None] = None
- ) -> List[InferenceEndpoint]:
- """Lists all inference endpoints for the given namespace.
- Args:
- namespace (`str`, *optional*):
- The namespace to list endpoints for. Defaults to the current user. Set to `"*"` to list all endpoints
- from all namespaces (i.e. personal namespace and all orgs the user belongs to).
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- List[`InferenceEndpoint`]: A list of all inference endpoints for the given namespace.
- Example:
- ```python
- >>> from huggingface_hub import HfApi
- >>> api = HfApi()
- >>> api.list_inference_endpoints()
- [InferenceEndpoint(name='my-endpoint', ...), ...]
- ```
- """
- # Special case: list all endpoints for all namespaces the user has access to
- if namespace == "*":
- user = self.whoami(token=token)
- # List personal endpoints first
- endpoints: List[InferenceEndpoint] = list_inference_endpoints(namespace=self._get_namespace(token=token))
- # Then list endpoints for all orgs the user belongs to and ignore 401 errors (no billing or no access)
- for org in user.get("orgs", []):
- try:
- endpoints += list_inference_endpoints(namespace=org["name"], token=token)
- except HfHubHTTPError as error:
- if error.response.status_code == 401: # Either no billing or user don't have access)
- logger.debug("Cannot list Inference Endpoints for org '%s': %s", org["name"], error)
- pass
- return endpoints
- # Normal case: list endpoints for a specific namespace
- namespace = namespace or self._get_namespace(token=token)
- response = get_session().get(
- f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}",
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(response)
- return [
- InferenceEndpoint.from_raw(endpoint, namespace=namespace, token=token)
- for endpoint in response.json()["items"]
- ]
- def create_inference_endpoint(
- self,
- name: str,
- *,
- repository: str,
- framework: str,
- accelerator: str,
- instance_size: str,
- instance_type: str,
- region: str,
- vendor: str,
- account_id: Optional[str] = None,
- min_replica: int = 1,
- max_replica: int = 1,
- scale_to_zero_timeout: Optional[int] = None,
- revision: Optional[str] = None,
- task: Optional[str] = None,
- custom_image: Optional[Dict] = None,
- env: Optional[Dict[str, str]] = None,
- secrets: Optional[Dict[str, str]] = None,
- type: InferenceEndpointType = InferenceEndpointType.PROTECTED,
- domain: Optional[str] = None,
- path: Optional[str] = None,
- cache_http_responses: Optional[bool] = None,
- tags: Optional[List[str]] = None,
- namespace: Optional[str] = None,
- token: Union[bool, str, None] = None,
- ) -> InferenceEndpoint:
- """Create a new Inference Endpoint.
- Args:
- name (`str`):
- The unique name for the new Inference Endpoint.
- repository (`str`):
- The name of the model repository associated with the Inference Endpoint (e.g. `"gpt2"`).
- framework (`str`):
- The machine learning framework used for the model (e.g. `"custom"`).
- accelerator (`str`):
- The hardware accelerator to be used for inference (e.g. `"cpu"`).
- instance_size (`str`):
- The size or type of the instance to be used for hosting the model (e.g. `"x4"`).
- instance_type (`str`):
- The cloud instance type where the Inference Endpoint will be deployed (e.g. `"intel-icl"`).
- region (`str`):
- The cloud region in which the Inference Endpoint will be created (e.g. `"us-east-1"`).
- vendor (`str`):
- The cloud provider or vendor where the Inference Endpoint will be hosted (e.g. `"aws"`).
- account_id (`str`, *optional*):
- The account ID used to link a VPC to a private Inference Endpoint (if applicable).
- min_replica (`int`, *optional*):
- The minimum number of replicas (instances) to keep running for the Inference Endpoint. To enable
- scaling to zero, set this value to 0 and adjust `scale_to_zero_timeout` accordingly. Defaults to 1.
- max_replica (`int`, *optional*):
- The maximum number of replicas (instances) to scale to for the Inference Endpoint. Defaults to 1.
- scale_to_zero_timeout (`int`, *optional*):
- The duration in minutes before an inactive endpoint is scaled to zero, or no scaling to zero if
- set to None and `min_replica` is not 0. Defaults to None.
- revision (`str`, *optional*):
- The specific model revision to deploy on the Inference Endpoint (e.g. `"6c0e6080953db56375760c0471a8c5f2929baf11"`).
- task (`str`, *optional*):
- The task on which to deploy the model (e.g. `"text-classification"`).
- custom_image (`Dict`, *optional*):
- A custom Docker image to use for the Inference Endpoint. This is useful if you want to deploy an
- Inference Endpoint running on the `text-generation-inference` (TGI) framework (see examples).
- env (`Dict[str, str]`, *optional*):
- Non-secret environment variables to inject in the container environment.
- secrets (`Dict[str, str]`, *optional*):
- Secret values to inject in the container environment.
- type ([`InferenceEndpointType]`, *optional*):
- The type of the Inference Endpoint, which can be `"protected"` (default), `"public"` or `"private"`.
- domain (`str`, *optional*):
- The custom domain for the Inference Endpoint deployment, if setup the inference endpoint will be available at this domain (e.g. `"my-new-domain.cool-website.woof"`).
- path (`str`, *optional*):
- The custom path to the deployed model, should start with a `/` (e.g. `"/models/google-bert/bert-base-uncased"`).
- cache_http_responses (`bool`, *optional*):
- Whether to cache HTTP responses from the Inference Endpoint. Defaults to `False`.
- tags (`List[str]`, *optional*):
- A list of tags to associate with the Inference Endpoint.
- namespace (`str`, *optional*):
- The namespace where the Inference Endpoint will be created. Defaults to the current user's namespace.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`InferenceEndpoint`]: information about the updated Inference Endpoint.
- Example:
- ```python
- >>> from huggingface_hub import HfApi
- >>> api = HfApi()
- >>> endpoint = api.create_inference_endpoint(
- ... "my-endpoint-name",
- ... repository="gpt2",
- ... framework="pytorch",
- ... task="text-generation",
- ... accelerator="cpu",
- ... vendor="aws",
- ... region="us-east-1",
- ... type="protected",
- ... instance_size="x2",
- ... instance_type="intel-icl",
- ... )
- >>> endpoint
- InferenceEndpoint(name='my-endpoint-name', status="pending",...)
- # Run inference on the endpoint
- >>> endpoint.client.text_generation(...)
- "..."
- ```
- ```python
- # Start an Inference Endpoint running Zephyr-7b-beta on TGI
- >>> from huggingface_hub import HfApi
- >>> api = HfApi()
- >>> endpoint = api.create_inference_endpoint(
- ... "aws-zephyr-7b-beta-0486",
- ... repository="HuggingFaceH4/zephyr-7b-beta",
- ... framework="pytorch",
- ... task="text-generation",
- ... accelerator="gpu",
- ... vendor="aws",
- ... region="us-east-1",
- ... type="protected",
- ... instance_size="x1",
- ... instance_type="nvidia-a10g",
- ... env={
- ... "MAX_BATCH_PREFILL_TOKENS": "2048",
- ... "MAX_INPUT_LENGTH": "1024",
- ... "MAX_TOTAL_TOKENS": "1512",
- ... "MODEL_ID": "/repository"
- ... },
- ... custom_image={
- ... "health_route": "/health",
- ... "url": "ghcr.io/huggingface/text-generation-inference:1.1.0",
- ... },
- ... secrets={"MY_SECRET_KEY": "secret_value"},
- ... tags=["dev", "text-generation"],
- ... )
- ```
- ```python
- # Start an Inference Endpoint running ProsusAI/finbert while scaling to zero in 15 minutes
- >>> from huggingface_hub import HfApi
- >>> api = HfApi()
- >>> endpoint = api.create_inference_endpoint(
- ... "finbert-classifier",
- ... repository="ProsusAI/finbert",
- ... framework="pytorch",
- ... task="text-classification",
- ... min_replica=0,
- ... scale_to_zero_timeout=15,
- ... accelerator="cpu",
- ... vendor="aws",
- ... region="us-east-1",
- ... type="protected",
- ... instance_size="x2",
- ... instance_type="intel-icl",
- ... )
- >>> endpoint.wait(timeout=300)
- # Run inference on the endpoint
- >>> endpoint.client.text_generation(...)
- TextClassificationOutputElement(label='positive', score=0.8983615040779114)
- ```
- """
- namespace = namespace or self._get_namespace(token=token)
- if custom_image is not None:
- image = (
- custom_image
- if next(iter(custom_image)) in constants.INFERENCE_ENDPOINT_IMAGE_KEYS
- else {"custom": custom_image}
- )
- else:
- image = {"huggingface": {}}
- payload: Dict = {
- "accountId": account_id,
- "compute": {
- "accelerator": accelerator,
- "instanceSize": instance_size,
- "instanceType": instance_type,
- "scaling": {
- "maxReplica": max_replica,
- "minReplica": min_replica,
- "scaleToZeroTimeout": scale_to_zero_timeout,
- },
- },
- "model": {
- "framework": framework,
- "repository": repository,
- "revision": revision,
- "task": task,
- "image": image,
- },
- "name": name,
- "provider": {
- "region": region,
- "vendor": vendor,
- },
- "type": type,
- }
- if env:
- payload["model"]["env"] = env
- if secrets:
- payload["model"]["secrets"] = secrets
- if domain is not None or path is not None:
- payload["route"] = {}
- if domain is not None:
- payload["route"]["domain"] = domain
- if path is not None:
- payload["route"]["path"] = path
- if cache_http_responses is not None:
- payload["cacheHttpResponses"] = cache_http_responses
- if tags is not None:
- payload["tags"] = tags
- response = get_session().post(
- f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}",
- headers=self._build_hf_headers(token=token),
- json=payload,
- )
- hf_raise_for_status(response)
- return InferenceEndpoint.from_raw(response.json(), namespace=namespace, token=token)
- @experimental
- @validate_hf_hub_args
- def create_inference_endpoint_from_catalog(
- self,
- repo_id: str,
- *,
- name: Optional[str] = None,
- token: Union[bool, str, None] = None,
- namespace: Optional[str] = None,
- ) -> InferenceEndpoint:
- """Create a new Inference Endpoint from a model in the Hugging Face Inference Catalog.
- The goal of the Inference Catalog is to provide a curated list of models that are optimized for inference
- and for which default configurations have been tested. See https://endpoints.huggingface.co/catalog for a list
- of available models in the catalog.
- Args:
- repo_id (`str`):
- The ID of the model in the catalog to deploy as an Inference Endpoint.
- name (`str`, *optional*):
- The unique name for the new Inference Endpoint. If not provided, a random name will be generated.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- namespace (`str`, *optional*):
- The namespace where the Inference Endpoint will be created. Defaults to the current user's namespace.
- Returns:
- [`InferenceEndpoint`]: information about the new Inference Endpoint.
- > [!WARNING]
- > `create_inference_endpoint_from_catalog` is experimental. Its API is subject to change in the future. Please provide feedback
- > if you have any suggestions or requests.
- """
- token = token or self.token or get_token()
- payload: Dict = {
- "namespace": namespace or self._get_namespace(token=token),
- "repoId": repo_id,
- }
- if name is not None:
- payload["endpointName"] = name
- response = get_session().post(
- f"{constants.INFERENCE_CATALOG_ENDPOINT}/deploy",
- headers=self._build_hf_headers(token=token),
- json=payload,
- )
- hf_raise_for_status(response)
- data = response.json()["endpoint"]
- return InferenceEndpoint.from_raw(data, namespace=data["name"], token=token)
- @experimental
- @validate_hf_hub_args
- def list_inference_catalog(self, *, token: Union[bool, str, None] = None) -> List[str]:
- """List models available in the Hugging Face Inference Catalog.
- The goal of the Inference Catalog is to provide a curated list of models that are optimized for inference
- and for which default configurations have been tested. See https://endpoints.huggingface.co/catalog for a list
- of available models in the catalog.
- Use [`create_inference_endpoint_from_catalog`] to deploy a model from the catalog.
- Args:
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- Returns:
- List[`str`]: A list of model IDs available in the catalog.
- > [!WARNING]
- > `list_inference_catalog` is experimental. Its API is subject to change in the future. Please provide feedback
- > if you have any suggestions or requests.
- """
- response = get_session().get(
- f"{constants.INFERENCE_CATALOG_ENDPOINT}/repo-list",
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(response)
- return response.json()["models"]
- def get_inference_endpoint(
- self, name: str, *, namespace: Optional[str] = None, token: Union[bool, str, None] = None
- ) -> InferenceEndpoint:
- """Get information about an Inference Endpoint.
- Args:
- name (`str`):
- The name of the Inference Endpoint to retrieve information about.
- namespace (`str`, *optional*):
- The namespace in which the Inference Endpoint is located. Defaults to the current user.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`InferenceEndpoint`]: information about the requested Inference Endpoint.
- Example:
- ```python
- >>> from huggingface_hub import HfApi
- >>> api = HfApi()
- >>> endpoint = api.get_inference_endpoint("my-text-to-image")
- >>> endpoint
- InferenceEndpoint(name='my-text-to-image', ...)
- # Get status
- >>> endpoint.status
- 'running'
- >>> endpoint.url
- 'https://my-text-to-image.region.vendor.endpoints.huggingface.cloud'
- # Run inference
- >>> endpoint.client.text_to_image(...)
- ```
- """
- namespace = namespace or self._get_namespace(token=token)
- response = get_session().get(
- f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}/{name}",
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(response)
- return InferenceEndpoint.from_raw(response.json(), namespace=namespace, token=token)
- def update_inference_endpoint(
- self,
- name: str,
- *,
- # Compute update
- accelerator: Optional[str] = None,
- instance_size: Optional[str] = None,
- instance_type: Optional[str] = None,
- min_replica: Optional[int] = None,
- max_replica: Optional[int] = None,
- scale_to_zero_timeout: Optional[int] = None,
- # Model update
- repository: Optional[str] = None,
- framework: Optional[str] = None,
- revision: Optional[str] = None,
- task: Optional[str] = None,
- custom_image: Optional[Dict] = None,
- env: Optional[Dict[str, str]] = None,
- secrets: Optional[Dict[str, str]] = None,
- # Route update
- domain: Optional[str] = None,
- path: Optional[str] = None,
- # Other
- cache_http_responses: Optional[bool] = None,
- tags: Optional[List[str]] = None,
- namespace: Optional[str] = None,
- token: Union[bool, str, None] = None,
- ) -> InferenceEndpoint:
- """Update an Inference Endpoint.
- This method allows the update of either the compute configuration, the deployed model, the route, or any combination.
- All arguments are optional but at least one must be provided.
- For convenience, you can also update an Inference Endpoint using [`InferenceEndpoint.update`].
- Args:
- name (`str`):
- The name of the Inference Endpoint to update.
- accelerator (`str`, *optional*):
- The hardware accelerator to be used for inference (e.g. `"cpu"`).
- instance_size (`str`, *optional*):
- The size or type of the instance to be used for hosting the model (e.g. `"x4"`).
- instance_type (`str`, *optional*):
- The cloud instance type where the Inference Endpoint will be deployed (e.g. `"intel-icl"`).
- min_replica (`int`, *optional*):
- The minimum number of replicas (instances) to keep running for the Inference Endpoint.
- max_replica (`int`, *optional*):
- The maximum number of replicas (instances) to scale to for the Inference Endpoint.
- scale_to_zero_timeout (`int`, *optional*):
- The duration in minutes before an inactive endpoint is scaled to zero.
- repository (`str`, *optional*):
- The name of the model repository associated with the Inference Endpoint (e.g. `"gpt2"`).
- framework (`str`, *optional*):
- The machine learning framework used for the model (e.g. `"custom"`).
- revision (`str`, *optional*):
- The specific model revision to deploy on the Inference Endpoint (e.g. `"6c0e6080953db56375760c0471a8c5f2929baf11"`).
- task (`str`, *optional*):
- The task on which to deploy the model (e.g. `"text-classification"`).
- custom_image (`Dict`, *optional*):
- A custom Docker image to use for the Inference Endpoint. This is useful if you want to deploy an
- Inference Endpoint running on the `text-generation-inference` (TGI) framework (see examples).
- env (`Dict[str, str]`, *optional*):
- Non-secret environment variables to inject in the container environment
- secrets (`Dict[str, str]`, *optional*):
- Secret values to inject in the container environment.
- domain (`str`, *optional*):
- The custom domain for the Inference Endpoint deployment, if setup the inference endpoint will be available at this domain (e.g. `"my-new-domain.cool-website.woof"`).
- path (`str`, *optional*):
- The custom path to the deployed model, should start with a `/` (e.g. `"/models/google-bert/bert-base-uncased"`).
- cache_http_responses (`bool`, *optional*):
- Whether to cache HTTP responses from the Inference Endpoint.
- tags (`List[str]`, *optional*):
- A list of tags to associate with the Inference Endpoint.
- namespace (`str`, *optional*):
- The namespace where the Inference Endpoint will be updated. Defaults to the current user's namespace.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`InferenceEndpoint`]: information about the updated Inference Endpoint.
- """
- namespace = namespace or self._get_namespace(token=token)
- # Populate only the fields that are not None
- payload: Dict = defaultdict(lambda: defaultdict(dict))
- if accelerator is not None:
- payload["compute"]["accelerator"] = accelerator
- if instance_size is not None:
- payload["compute"]["instanceSize"] = instance_size
- if instance_type is not None:
- payload["compute"]["instanceType"] = instance_type
- if max_replica is not None:
- payload["compute"]["scaling"]["maxReplica"] = max_replica
- if min_replica is not None:
- payload["compute"]["scaling"]["minReplica"] = min_replica
- if scale_to_zero_timeout is not None:
- payload["compute"]["scaling"]["scaleToZeroTimeout"] = scale_to_zero_timeout
- if repository is not None:
- payload["model"]["repository"] = repository
- if framework is not None:
- payload["model"]["framework"] = framework
- if revision is not None:
- payload["model"]["revision"] = revision
- if task is not None:
- payload["model"]["task"] = task
- if custom_image is not None:
- payload["model"]["image"] = {"custom": custom_image}
- if env is not None:
- payload["model"]["env"] = env
- if secrets is not None:
- payload["model"]["secrets"] = secrets
- if domain is not None:
- payload["route"]["domain"] = domain
- if path is not None:
- payload["route"]["path"] = path
- if cache_http_responses is not None:
- payload["cacheHttpResponses"] = cache_http_responses
- if tags is not None:
- payload["tags"] = tags
- response = get_session().put(
- f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}/{name}",
- headers=self._build_hf_headers(token=token),
- json=payload,
- )
- hf_raise_for_status(response)
- return InferenceEndpoint.from_raw(response.json(), namespace=namespace, token=token)
- def delete_inference_endpoint(
- self, name: str, *, namespace: Optional[str] = None, token: Union[bool, str, None] = None
- ) -> None:
- """Delete an Inference Endpoint.
- This operation is not reversible. If you don't want to be charged for an Inference Endpoint, it is preferable
- to pause it with [`pause_inference_endpoint`] or scale it to zero with [`scale_to_zero_inference_endpoint`].
- For convenience, you can also delete an Inference Endpoint using [`InferenceEndpoint.delete`].
- Args:
- name (`str`):
- The name of the Inference Endpoint to delete.
- namespace (`str`, *optional*):
- The namespace in which the Inference Endpoint is located. Defaults to the current user.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- """
- namespace = namespace or self._get_namespace(token=token)
- response = get_session().delete(
- f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}/{name}",
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(response)
- def pause_inference_endpoint(
- self, name: str, *, namespace: Optional[str] = None, token: Union[bool, str, None] = None
- ) -> InferenceEndpoint:
- """Pause an Inference Endpoint.
- A paused Inference Endpoint will not be charged. It can be resumed at any time using [`resume_inference_endpoint`].
- This is different than scaling the Inference Endpoint to zero with [`scale_to_zero_inference_endpoint`], which
- would be automatically restarted when a request is made to it.
- For convenience, you can also pause an Inference Endpoint using [`pause_inference_endpoint`].
- Args:
- name (`str`):
- The name of the Inference Endpoint to pause.
- namespace (`str`, *optional*):
- The namespace in which the Inference Endpoint is located. Defaults to the current user.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`InferenceEndpoint`]: information about the paused Inference Endpoint.
- """
- namespace = namespace or self._get_namespace(token=token)
- response = get_session().post(
- f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}/{name}/pause",
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(response)
- return InferenceEndpoint.from_raw(response.json(), namespace=namespace, token=token)
- def resume_inference_endpoint(
- self,
- name: str,
- *,
- namespace: Optional[str] = None,
- running_ok: bool = True,
- token: Union[bool, str, None] = None,
- ) -> InferenceEndpoint:
- """Resume an Inference Endpoint.
- For convenience, you can also resume an Inference Endpoint using [`InferenceEndpoint.resume`].
- Args:
- name (`str`):
- The name of the Inference Endpoint to resume.
- namespace (`str`, *optional*):
- The namespace in which the Inference Endpoint is located. Defaults to the current user.
- running_ok (`bool`, *optional*):
- If `True`, the method will not raise an error if the Inference Endpoint is already running. Defaults to
- `True`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`InferenceEndpoint`]: information about the resumed Inference Endpoint.
- """
- namespace = namespace or self._get_namespace(token=token)
- response = get_session().post(
- f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}/{name}/resume",
- headers=self._build_hf_headers(token=token),
- )
- try:
- hf_raise_for_status(response)
- except HfHubHTTPError as error:
- # If already running (and it's ok), then fetch current status and return
- if running_ok and error.response.status_code == 400 and "already running" in error.response.text:
- return self.get_inference_endpoint(name, namespace=namespace, token=token)
- # Otherwise, raise the error
- raise
- return InferenceEndpoint.from_raw(response.json(), namespace=namespace, token=token)
- def scale_to_zero_inference_endpoint(
- self, name: str, *, namespace: Optional[str] = None, token: Union[bool, str, None] = None
- ) -> InferenceEndpoint:
- """Scale Inference Endpoint to zero.
- An Inference Endpoint scaled to zero will not be charged. It will be resume on the next request to it, with a
- cold start delay. This is different than pausing the Inference Endpoint with [`pause_inference_endpoint`], which
- would require a manual resume with [`resume_inference_endpoint`].
- For convenience, you can also scale an Inference Endpoint to zero using [`InferenceEndpoint.scale_to_zero`].
- Args:
- name (`str`):
- The name of the Inference Endpoint to scale to zero.
- namespace (`str`, *optional*):
- The namespace in which the Inference Endpoint is located. Defaults to the current user.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`InferenceEndpoint`]: information about the scaled-to-zero Inference Endpoint.
- """
- namespace = namespace or self._get_namespace(token=token)
- response = get_session().post(
- f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}/{name}/scale-to-zero",
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(response)
- return InferenceEndpoint.from_raw(response.json(), namespace=namespace, token=token)
- def _get_namespace(self, token: Union[bool, str, None] = None) -> str:
- """Get the default namespace for the current user."""
- me = self.whoami(token=token)
- if me["type"] == "user":
- return me["name"]
- else:
- raise ValueError(
- "Cannot determine default namespace. You must provide a 'namespace' as input or be logged in as a"
- " user."
- )
- ########################
- # Collection Endpoints #
- ########################
- @validate_hf_hub_args
- def list_collections(
- self,
- *,
- owner: Union[List[str], str, None] = None,
- item: Union[List[str], str, None] = None,
- sort: Optional[Literal["lastModified", "trending", "upvotes"]] = None,
- limit: Optional[int] = None,
- token: Union[bool, str, None] = None,
- ) -> Iterable[Collection]:
- """List collections on the Huggingface Hub, given some filters.
- > [!WARNING]
- > When listing collections, the item list per collection is truncated to 4 items maximum. To retrieve all items
- > from a collection, you must use [`get_collection`].
- Args:
- owner (`List[str]` or `str`, *optional*):
- Filter by owner's username.
- item (`List[str]` or `str`, *optional*):
- Filter collections containing a particular items. Example: `"models/teknium/OpenHermes-2.5-Mistral-7B"`, `"datasets/squad"` or `"papers/2311.12983"`.
- sort (`Literal["lastModified", "trending", "upvotes"]`, *optional*):
- Sort collections by last modified, trending or upvotes.
- limit (`int`, *optional*):
- Maximum number of collections to be returned.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `Iterable[Collection]`: an iterable of [`Collection`] objects.
- """
- # Construct the API endpoint
- path = f"{self.endpoint}/api/collections"
- headers = self._build_hf_headers(token=token)
- params: Dict = {}
- if owner is not None:
- params.update({"owner": owner})
- if item is not None:
- params.update({"item": item})
- if sort is not None:
- params.update({"sort": sort})
- if limit is not None:
- params.update({"limit": limit})
- # Paginate over the results until limit is reached
- items = paginate(path, headers=headers, params=params)
- if limit is not None:
- items = islice(items, limit) # Do not iterate over all pages
- # Parse as Collection and return
- for position, collection_data in enumerate(items):
- yield Collection(position=position, **collection_data)
- def get_collection(self, collection_slug: str, *, token: Union[bool, str, None] = None) -> Collection:
- """Gets information about a Collection on the Hub.
- Args:
- collection_slug (`str`):
- Slug of the collection of the Hub. Example: `"TheBloke/recent-models-64f9a55bb3115b4f513ec026"`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns: [`Collection`]
- Example:
- ```py
- >>> from huggingface_hub import get_collection
- >>> collection = get_collection("TheBloke/recent-models-64f9a55bb3115b4f513ec026")
- >>> collection.title
- 'Recent models'
- >>> len(collection.items)
- 37
- >>> collection.items[0]
- CollectionItem(
- item_object_id='651446103cd773a050bf64c2',
- item_id='TheBloke/U-Amethyst-20B-AWQ',
- item_type='model',
- position=88,
- note=None
- )
- ```
- """
- r = get_session().get(
- f"{self.endpoint}/api/collections/{collection_slug}", headers=self._build_hf_headers(token=token)
- )
- hf_raise_for_status(r)
- return Collection(**{**r.json(), "endpoint": self.endpoint})
- def create_collection(
- self,
- title: str,
- *,
- namespace: Optional[str] = None,
- description: Optional[str] = None,
- private: bool = False,
- exists_ok: bool = False,
- token: Union[bool, str, None] = None,
- ) -> Collection:
- """Create a new Collection on the Hub.
- Args:
- title (`str`):
- Title of the collection to create. Example: `"Recent models"`.
- namespace (`str`, *optional*):
- Namespace of the collection to create (username or org). Will default to the owner name.
- description (`str`, *optional*):
- Description of the collection to create.
- private (`bool`, *optional*):
- Whether the collection should be private or not. Defaults to `False` (i.e. public collection).
- exists_ok (`bool`, *optional*):
- If `True`, do not raise an error if collection already exists.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns: [`Collection`]
- Example:
- ```py
- >>> from huggingface_hub import create_collection
- >>> collection = create_collection(
- ... title="ICCV 2023",
- ... description="Portfolio of models, papers and demos I presented at ICCV 2023",
- ... )
- >>> collection.slug
- "username/iccv-2023-64f9a55bb3115b4f513ec026"
- ```
- """
- if namespace is None:
- namespace = self.whoami(token)["name"]
- payload = {
- "title": title,
- "namespace": namespace,
- "private": private,
- }
- if description is not None:
- payload["description"] = description
- r = get_session().post(
- f"{self.endpoint}/api/collections", headers=self._build_hf_headers(token=token), json=payload
- )
- try:
- hf_raise_for_status(r)
- except HTTPError as err:
- if exists_ok and err.response.status_code == 409:
- # Collection already exists and `exists_ok=True`
- slug = r.json()["slug"]
- return self.get_collection(slug, token=token)
- else:
- raise
- return Collection(**{**r.json(), "endpoint": self.endpoint})
- def update_collection_metadata(
- self,
- collection_slug: str,
- *,
- title: Optional[str] = None,
- description: Optional[str] = None,
- position: Optional[int] = None,
- private: Optional[bool] = None,
- theme: Optional[str] = None,
- token: Union[bool, str, None] = None,
- ) -> Collection:
- """Update metadata of a collection on the Hub.
- All arguments are optional. Only provided metadata will be updated.
- Args:
- collection_slug (`str`):
- Slug of the collection to update. Example: `"TheBloke/recent-models-64f9a55bb3115b4f513ec026"`.
- title (`str`):
- Title of the collection to update.
- description (`str`, *optional*):
- Description of the collection to update.
- position (`int`, *optional*):
- New position of the collection in the list of collections of the user.
- private (`bool`, *optional*):
- Whether the collection should be private or not.
- theme (`str`, *optional*):
- Theme of the collection on the Hub.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns: [`Collection`]
- Example:
- ```py
- >>> from huggingface_hub import update_collection_metadata
- >>> collection = update_collection_metadata(
- ... collection_slug="username/iccv-2023-64f9a55bb3115b4f513ec026",
- ... title="ICCV Oct. 2023"
- ... description="Portfolio of models, datasets, papers and demos I presented at ICCV Oct. 2023",
- ... private=False,
- ... theme="pink",
- ... )
- >>> collection.slug
- "username/iccv-oct-2023-64f9a55bb3115b4f513ec026"
- # ^collection slug got updated but not the trailing ID
- ```
- """
- payload = {
- "position": position,
- "private": private,
- "theme": theme,
- "title": title,
- "description": description,
- }
- r = get_session().patch(
- f"{self.endpoint}/api/collections/{collection_slug}",
- headers=self._build_hf_headers(token=token),
- # Only send not-none values to the API
- json={key: value for key, value in payload.items() if value is not None},
- )
- hf_raise_for_status(r)
- return Collection(**{**r.json()["data"], "endpoint": self.endpoint})
- def delete_collection(
- self, collection_slug: str, *, missing_ok: bool = False, token: Union[bool, str, None] = None
- ) -> None:
- """Delete a collection on the Hub.
- Args:
- collection_slug (`str`):
- Slug of the collection to delete. Example: `"TheBloke/recent-models-64f9a55bb3115b4f513ec026"`.
- missing_ok (`bool`, *optional*):
- If `True`, do not raise an error if collection doesn't exists.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Example:
- ```py
- >>> from huggingface_hub import delete_collection
- >>> collection = delete_collection("username/useless-collection-64f9a55bb3115b4f513ec026", missing_ok=True)
- ```
- > [!WARNING]
- > This is a non-revertible action. A deleted collection cannot be restored.
- """
- r = get_session().delete(
- f"{self.endpoint}/api/collections/{collection_slug}", headers=self._build_hf_headers(token=token)
- )
- try:
- hf_raise_for_status(r)
- except HTTPError as err:
- if missing_ok and err.response.status_code == 404:
- # Collection doesn't exists and `missing_ok=True`
- return
- else:
- raise
- def add_collection_item(
- self,
- collection_slug: str,
- item_id: str,
- item_type: CollectionItemType_T,
- *,
- note: Optional[str] = None,
- exists_ok: bool = False,
- token: Union[bool, str, None] = None,
- ) -> Collection:
- """Add an item to a collection on the Hub.
- Args:
- collection_slug (`str`):
- Slug of the collection to update. Example: `"TheBloke/recent-models-64f9a55bb3115b4f513ec026"`.
- item_id (`str`):
- ID of the item to add to the collection. It can be the ID of a repo on the Hub (e.g. `"facebook/bart-large-mnli"`)
- or a paper id (e.g. `"2307.09288"`).
- item_type (`str`):
- Type of the item to add. Can be one of `"model"`, `"dataset"`, `"space"` or `"paper"`.
- note (`str`, *optional*):
- A note to attach to the item in the collection. The maximum size for a note is 500 characters.
- exists_ok (`bool`, *optional*):
- If `True`, do not raise an error if item already exists.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns: [`Collection`]
- Raises:
- [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
- HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write`
- or `admin` role in the organization the repo belongs to or if you passed a `read` token.
- [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
- HTTP 404 if the item you try to add to the collection does not exist on the Hub.
- [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
- HTTP 409 if the item you try to add to the collection is already in the collection (and exists_ok=False)
- Example:
- ```py
- >>> from huggingface_hub import add_collection_item
- >>> collection = add_collection_item(
- ... collection_slug="davanstrien/climate-64f99dc2a5067f6b65531bab",
- ... item_id="pierre-loic/climate-news-articles",
- ... item_type="dataset"
- ... )
- >>> collection.items[-1].item_id
- "pierre-loic/climate-news-articles"
- # ^item got added to the collection on last position
- # Add item with a note
- >>> add_collection_item(
- ... collection_slug="davanstrien/climate-64f99dc2a5067f6b65531bab",
- ... item_id="datasets/climate_fever",
- ... item_type="dataset"
- ... note="This dataset adopts the FEVER methodology that consists of 1,535 real-world claims regarding climate-change collected on the internet."
- ... )
- (...)
- ```
- """
- payload: Dict[str, Any] = {"item": {"id": item_id, "type": item_type}}
- if note is not None:
- payload["note"] = note
- r = get_session().post(
- f"{self.endpoint}/api/collections/{collection_slug}/items",
- headers=self._build_hf_headers(token=token),
- json=payload,
- )
- try:
- hf_raise_for_status(r)
- except HTTPError as err:
- if exists_ok and err.response.status_code == 409:
- # Item already exists and `exists_ok=True`
- return self.get_collection(collection_slug, token=token)
- else:
- raise
- return Collection(**{**r.json(), "endpoint": self.endpoint})
- def update_collection_item(
- self,
- collection_slug: str,
- item_object_id: str,
- *,
- note: Optional[str] = None,
- position: Optional[int] = None,
- token: Union[bool, str, None] = None,
- ) -> None:
- """Update an item in a collection.
- Args:
- collection_slug (`str`):
- Slug of the collection to update. Example: `"TheBloke/recent-models-64f9a55bb3115b4f513ec026"`.
- item_object_id (`str`):
- ID of the item in the collection. This is not the id of the item on the Hub (repo_id or paper id).
- It must be retrieved from a [`CollectionItem`] object. Example: `collection.items[0].item_object_id`.
- note (`str`, *optional*):
- A note to attach to the item in the collection. The maximum size for a note is 500 characters.
- position (`int`, *optional*):
- New position of the item in the collection.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Example:
- ```py
- >>> from huggingface_hub import get_collection, update_collection_item
- # Get collection first
- >>> collection = get_collection("TheBloke/recent-models-64f9a55bb3115b4f513ec026")
- # Update item based on its ID (add note + update position)
- >>> update_collection_item(
- ... collection_slug="TheBloke/recent-models-64f9a55bb3115b4f513ec026",
- ... item_object_id=collection.items[-1].item_object_id,
- ... note="Newly updated model!"
- ... position=0,
- ... )
- ```
- """
- payload = {"position": position, "note": note}
- r = get_session().patch(
- f"{self.endpoint}/api/collections/{collection_slug}/items/{item_object_id}",
- headers=self._build_hf_headers(token=token),
- # Only send not-none values to the API
- json={key: value for key, value in payload.items() if value is not None},
- )
- hf_raise_for_status(r)
- def delete_collection_item(
- self,
- collection_slug: str,
- item_object_id: str,
- *,
- missing_ok: bool = False,
- token: Union[bool, str, None] = None,
- ) -> None:
- """Delete an item from a collection.
- Args:
- collection_slug (`str`):
- Slug of the collection to update. Example: `"TheBloke/recent-models-64f9a55bb3115b4f513ec026"`.
- item_object_id (`str`):
- ID of the item in the collection. This is not the id of the item on the Hub (repo_id or paper id).
- It must be retrieved from a [`CollectionItem`] object. Example: `collection.items[0].item_object_id`.
- missing_ok (`bool`, *optional*):
- If `True`, do not raise an error if item doesn't exists.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Example:
- ```py
- >>> from huggingface_hub import get_collection, delete_collection_item
- # Get collection first
- >>> collection = get_collection("TheBloke/recent-models-64f9a55bb3115b4f513ec026")
- # Delete item based on its ID
- >>> delete_collection_item(
- ... collection_slug="TheBloke/recent-models-64f9a55bb3115b4f513ec026",
- ... item_object_id=collection.items[-1].item_object_id,
- ... )
- ```
- """
- r = get_session().delete(
- f"{self.endpoint}/api/collections/{collection_slug}/items/{item_object_id}",
- headers=self._build_hf_headers(token=token),
- )
- try:
- hf_raise_for_status(r)
- except HTTPError as err:
- if missing_ok and err.response.status_code == 404:
- # Item already deleted and `missing_ok=True`
- return
- else:
- raise
- ##########################
- # Manage access requests #
- ##########################
- @validate_hf_hub_args
- def list_pending_access_requests(
- self, repo_id: str, *, repo_type: Optional[str] = None, token: Union[bool, str, None] = None
- ) -> List[AccessRequest]:
- """
- Get pending access requests for a given gated repo.
- A pending request means the user has requested access to the repo but the request has not been processed yet.
- If the approval mode is automatic, this list should be empty. Pending requests can be accepted or rejected
- using [`accept_access_request`] and [`reject_access_request`].
- For more info about gated repos, see https://huggingface.co/docs/hub/models-gated.
- Args:
- repo_id (`str`):
- The id of the repo to get access requests for.
- repo_type (`str`, *optional*):
- The type of the repo to get access requests for. Must be one of `model`, `dataset` or `space`.
- Defaults to `model`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `List[AccessRequest]`: A list of [`AccessRequest`] objects. Each time contains a `username`, `email`,
- `status` and `timestamp` attribute. If the gated repo has a custom form, the `fields` attribute will
- be populated with user's answers.
- Raises:
- [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
- HTTP 400 if the repo is not gated.
- [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
- HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write`
- or `admin` role in the organization the repo belongs to or if you passed a `read` token.
- Example:
- ```py
- >>> from huggingface_hub import list_pending_access_requests, accept_access_request
- # List pending requests
- >>> requests = list_pending_access_requests("meta-llama/Llama-2-7b")
- >>> len(requests)
- 411
- >>> requests[0]
- [
- AccessRequest(
- username='clem',
- fullname='Clem 🤗',
- email='***',
- timestamp=datetime.datetime(2023, 11, 23, 18, 4, 53, 828000, tzinfo=datetime.timezone.utc),
- status='pending',
- fields=None,
- ),
- ...
- ]
- # Accept Clem's request
- >>> accept_access_request("meta-llama/Llama-2-7b", "clem")
- ```
- """
- return self._list_access_requests(repo_id, "pending", repo_type=repo_type, token=token)
- @validate_hf_hub_args
- def list_accepted_access_requests(
- self, repo_id: str, *, repo_type: Optional[str] = None, token: Union[bool, str, None] = None
- ) -> List[AccessRequest]:
- """
- Get accepted access requests for a given gated repo.
- An accepted request means the user has requested access to the repo and the request has been accepted. The user
- can download any file of the repo. If the approval mode is automatic, this list should contains by default all
- requests. Accepted requests can be cancelled or rejected at any time using [`cancel_access_request`] and
- [`reject_access_request`]. A cancelled request will go back to the pending list while a rejected request will
- go to the rejected list. In both cases, the user will lose access to the repo.
- For more info about gated repos, see https://huggingface.co/docs/hub/models-gated.
- Args:
- repo_id (`str`):
- The id of the repo to get access requests for.
- repo_type (`str`, *optional*):
- The type of the repo to get access requests for. Must be one of `model`, `dataset` or `space`.
- Defaults to `model`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `List[AccessRequest]`: A list of [`AccessRequest`] objects. Each time contains a `username`, `email`,
- `status` and `timestamp` attribute. If the gated repo has a custom form, the `fields` attribute will
- be populated with user's answers.
- Raises:
- [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
- HTTP 400 if the repo is not gated.
- [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
- HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write`
- or `admin` role in the organization the repo belongs to or if you passed a `read` token.
- Example:
- ```py
- >>> from huggingface_hub import list_accepted_access_requests
- >>> requests = list_accepted_access_requests("meta-llama/Llama-2-7b")
- >>> len(requests)
- 411
- >>> requests[0]
- [
- AccessRequest(
- username='clem',
- fullname='Clem 🤗',
- email='***',
- timestamp=datetime.datetime(2023, 11, 23, 18, 4, 53, 828000, tzinfo=datetime.timezone.utc),
- status='accepted',
- fields=None,
- ),
- ...
- ]
- ```
- """
- return self._list_access_requests(repo_id, "accepted", repo_type=repo_type, token=token)
- @validate_hf_hub_args
- def list_rejected_access_requests(
- self, repo_id: str, *, repo_type: Optional[str] = None, token: Union[bool, str, None] = None
- ) -> List[AccessRequest]:
- """
- Get rejected access requests for a given gated repo.
- A rejected request means the user has requested access to the repo and the request has been explicitly rejected
- by a repo owner (either you or another user from your organization). The user cannot download any file of the
- repo. Rejected requests can be accepted or cancelled at any time using [`accept_access_request`] and
- [`cancel_access_request`]. A cancelled request will go back to the pending list while an accepted request will
- go to the accepted list.
- For more info about gated repos, see https://huggingface.co/docs/hub/models-gated.
- Args:
- repo_id (`str`):
- The id of the repo to get access requests for.
- repo_type (`str`, *optional*):
- The type of the repo to get access requests for. Must be one of `model`, `dataset` or `space`.
- Defaults to `model`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `List[AccessRequest]`: A list of [`AccessRequest`] objects. Each time contains a `username`, `email`,
- `status` and `timestamp` attribute. If the gated repo has a custom form, the `fields` attribute will
- be populated with user's answers.
- Raises:
- [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
- HTTP 400 if the repo is not gated.
- [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
- HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write`
- or `admin` role in the organization the repo belongs to or if you passed a `read` token.
- Example:
- ```py
- >>> from huggingface_hub import list_rejected_access_requests
- >>> requests = list_rejected_access_requests("meta-llama/Llama-2-7b")
- >>> len(requests)
- 411
- >>> requests[0]
- [
- AccessRequest(
- username='clem',
- fullname='Clem 🤗',
- email='***',
- timestamp=datetime.datetime(2023, 11, 23, 18, 4, 53, 828000, tzinfo=datetime.timezone.utc),
- status='rejected',
- fields=None,
- ),
- ...
- ]
- ```
- """
- return self._list_access_requests(repo_id, "rejected", repo_type=repo_type, token=token)
- def _list_access_requests(
- self,
- repo_id: str,
- status: Literal["accepted", "rejected", "pending"],
- repo_type: Optional[str] = None,
- token: Union[bool, str, None] = None,
- ) -> List[AccessRequest]:
- if repo_type not in constants.REPO_TYPES:
- raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}")
- if repo_type is None:
- repo_type = constants.REPO_TYPE_MODEL
- response = get_session().get(
- f"{constants.ENDPOINT}/api/{repo_type}s/{repo_id}/user-access-request/{status}",
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(response)
- return [
- AccessRequest(
- username=request["user"]["user"],
- fullname=request["user"]["fullname"],
- email=request["user"].get("email"),
- status=request["status"],
- timestamp=parse_datetime(request["timestamp"]),
- fields=request.get("fields"), # only if custom fields in form
- )
- for request in response.json()
- ]
- @validate_hf_hub_args
- def cancel_access_request(
- self, repo_id: str, user: str, *, repo_type: Optional[str] = None, token: Union[bool, str, None] = None
- ) -> None:
- """
- Cancel an access request from a user for a given gated repo.
- A cancelled request will go back to the pending list and the user will lose access to the repo.
- For more info about gated repos, see https://huggingface.co/docs/hub/models-gated.
- Args:
- repo_id (`str`):
- The id of the repo to cancel access request for.
- user (`str`):
- The username of the user which access request should be cancelled.
- repo_type (`str`, *optional*):
- The type of the repo to cancel access request for. Must be one of `model`, `dataset` or `space`.
- Defaults to `model`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Raises:
- [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
- HTTP 400 if the repo is not gated.
- [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
- HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write`
- or `admin` role in the organization the repo belongs to or if you passed a `read` token.
- [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
- HTTP 404 if the user does not exist on the Hub.
- [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
- HTTP 404 if the user access request cannot be found.
- [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
- HTTP 404 if the user access request is already in the pending list.
- """
- self._handle_access_request(repo_id, user, "pending", repo_type=repo_type, token=token)
- @validate_hf_hub_args
- def accept_access_request(
- self, repo_id: str, user: str, *, repo_type: Optional[str] = None, token: Union[bool, str, None] = None
- ) -> None:
- """
- Accept an access request from a user for a given gated repo.
- Once the request is accepted, the user will be able to download any file of the repo and access the community
- tab. If the approval mode is automatic, you don't have to accept requests manually. An accepted request can be
- cancelled or rejected at any time using [`cancel_access_request`] and [`reject_access_request`].
- For more info about gated repos, see https://huggingface.co/docs/hub/models-gated.
- Args:
- repo_id (`str`):
- The id of the repo to accept access request for.
- user (`str`):
- The username of the user which access request should be accepted.
- repo_type (`str`, *optional*):
- The type of the repo to accept access request for. Must be one of `model`, `dataset` or `space`.
- Defaults to `model`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Raises:
- [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
- HTTP 400 if the repo is not gated.
- [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
- HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write`
- or `admin` role in the organization the repo belongs to or if you passed a `read` token.
- [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
- HTTP 404 if the user does not exist on the Hub.
- [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
- HTTP 404 if the user access request cannot be found.
- [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
- HTTP 404 if the user access request is already in the accepted list.
- """
- self._handle_access_request(repo_id, user, "accepted", repo_type=repo_type, token=token)
- @validate_hf_hub_args
- def reject_access_request(
- self,
- repo_id: str,
- user: str,
- *,
- repo_type: Optional[str] = None,
- rejection_reason: Optional[str],
- token: Union[bool, str, None] = None,
- ) -> None:
- """
- Reject an access request from a user for a given gated repo.
- A rejected request will go to the rejected list. The user cannot download any file of the repo. Rejected
- requests can be accepted or cancelled at any time using [`accept_access_request`] and [`cancel_access_request`].
- A cancelled request will go back to the pending list while an accepted request will go to the accepted list.
- For more info about gated repos, see https://huggingface.co/docs/hub/models-gated.
- Args:
- repo_id (`str`):
- The id of the repo to reject access request for.
- user (`str`):
- The username of the user which access request should be rejected.
- repo_type (`str`, *optional*):
- The type of the repo to reject access request for. Must be one of `model`, `dataset` or `space`.
- Defaults to `model`.
- rejection_reason (`str`, *optional*):
- Optional rejection reason that will be visible to the user (max 200 characters).
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Raises:
- [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
- HTTP 400 if the repo is not gated.
- [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
- HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write`
- or `admin` role in the organization the repo belongs to or if you passed a `read` token.
- [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
- HTTP 404 if the user does not exist on the Hub.
- [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
- HTTP 404 if the user access request cannot be found.
- [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
- HTTP 404 if the user access request is already in the rejected list.
- """
- self._handle_access_request(
- repo_id, user, "rejected", repo_type=repo_type, rejection_reason=rejection_reason, token=token
- )
- @validate_hf_hub_args
- def _handle_access_request(
- self,
- repo_id: str,
- user: str,
- status: Literal["accepted", "rejected", "pending"],
- repo_type: Optional[str] = None,
- rejection_reason: Optional[str] = None,
- token: Union[bool, str, None] = None,
- ) -> None:
- if repo_type not in constants.REPO_TYPES:
- raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}")
- if repo_type is None:
- repo_type = constants.REPO_TYPE_MODEL
- payload = {"user": user, "status": status}
- if rejection_reason is not None:
- if status != "rejected":
- raise ValueError("`rejection_reason` can only be passed when rejecting an access request.")
- payload["rejectionReason"] = rejection_reason
- response = get_session().post(
- f"{constants.ENDPOINT}/api/{repo_type}s/{repo_id}/user-access-request/handle",
- headers=self._build_hf_headers(token=token),
- json=payload,
- )
- hf_raise_for_status(response)
- @validate_hf_hub_args
- def grant_access(
- self, repo_id: str, user: str, *, repo_type: Optional[str] = None, token: Union[bool, str, None] = None
- ) -> None:
- """
- Grant access to a user for a given gated repo.
- Granting access don't require for the user to send an access request by themselves. The user is automatically
- added to the accepted list meaning they can download the files You can revoke the granted access at any time
- using [`cancel_access_request`] or [`reject_access_request`].
- For more info about gated repos, see https://huggingface.co/docs/hub/models-gated.
- Args:
- repo_id (`str`):
- The id of the repo to grant access to.
- user (`str`):
- The username of the user to grant access.
- repo_type (`str`, *optional*):
- The type of the repo to grant access to. Must be one of `model`, `dataset` or `space`.
- Defaults to `model`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Raises:
- [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
- HTTP 400 if the repo is not gated.
- [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
- HTTP 400 if the user already has access to the repo.
- [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
- HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write`
- or `admin` role in the organization the repo belongs to or if you passed a `read` token.
- [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
- HTTP 404 if the user does not exist on the Hub.
- """
- if repo_type not in constants.REPO_TYPES:
- raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}")
- if repo_type is None:
- repo_type = constants.REPO_TYPE_MODEL
- response = get_session().post(
- f"{constants.ENDPOINT}/api/{repo_type}s/{repo_id}/user-access-request/grant",
- headers=self._build_hf_headers(token=token),
- json={"user": user},
- )
- hf_raise_for_status(response)
- return response.json()
- ###################
- # Manage webhooks #
- ###################
- @validate_hf_hub_args
- def get_webhook(self, webhook_id: str, *, token: Union[bool, str, None] = None) -> WebhookInfo:
- """Get a webhook by its id.
- Args:
- webhook_id (`str`):
- The unique identifier of the webhook to get.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved token, which is the recommended
- method for authentication (see https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`WebhookInfo`]:
- Info about the webhook.
- Example:
- ```python
- >>> from huggingface_hub import get_webhook
- >>> webhook = get_webhook("654bbbc16f2ec14d77f109cc")
- >>> print(webhook)
- WebhookInfo(
- id="654bbbc16f2ec14d77f109cc",
- job=None,
- watched=[WebhookWatchedItem(type="user", name="julien-c"), WebhookWatchedItem(type="org", name="HuggingFaceH4")],
- url="https://webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548",
- secret="my-secret",
- domains=["repo", "discussion"],
- disabled=False,
- )
- ```
- """
- response = get_session().get(
- f"{constants.ENDPOINT}/api/settings/webhooks/{webhook_id}",
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(response)
- webhook_data = response.json()["webhook"]
- watched_items = [WebhookWatchedItem(type=item["type"], name=item["name"]) for item in webhook_data["watched"]]
- webhook = WebhookInfo(
- id=webhook_data["id"],
- url=webhook_data.get("url"),
- job=JobSpec(**webhook_data["job"]) if webhook_data.get("job") else None,
- watched=watched_items,
- domains=webhook_data["domains"],
- secret=webhook_data.get("secret"),
- disabled=webhook_data["disabled"],
- )
- return webhook
- @validate_hf_hub_args
- def list_webhooks(self, *, token: Union[bool, str, None] = None) -> List[WebhookInfo]:
- """List all configured webhooks.
- Args:
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved token, which is the recommended
- method for authentication (see https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `List[WebhookInfo]`:
- List of webhook info objects.
- Example:
- ```python
- >>> from huggingface_hub import list_webhooks
- >>> webhooks = list_webhooks()
- >>> len(webhooks)
- 2
- >>> webhooks[0]
- WebhookInfo(
- id="654bbbc16f2ec14d77f109cc",
- watched=[WebhookWatchedItem(type="user", name="julien-c"), WebhookWatchedItem(type="org", name="HuggingFaceH4")],
- url="https://webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548",
- secret="my-secret",
- domains=["repo", "discussion"],
- disabled=False,
- )
- ```
- """
- response = get_session().get(
- f"{constants.ENDPOINT}/api/settings/webhooks",
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(response)
- webhooks_data = response.json()
- return [
- WebhookInfo(
- id=webhook["id"],
- url=webhook.get("url"),
- job=JobSpec(**webhook["job"]) if webhook.get("job") else None,
- watched=[WebhookWatchedItem(type=item["type"], name=item["name"]) for item in webhook["watched"]],
- domains=webhook["domains"],
- secret=webhook.get("secret"),
- disabled=webhook["disabled"],
- )
- for webhook in webhooks_data
- ]
- @validate_hf_hub_args
- def create_webhook(
- self,
- *,
- url: Optional[str] = None,
- job_id: Optional[str] = None,
- watched: List[Union[Dict, WebhookWatchedItem]],
- domains: Optional[List[constants.WEBHOOK_DOMAIN_T]] = None,
- secret: Optional[str] = None,
- token: Union[bool, str, None] = None,
- ) -> WebhookInfo:
- """Create a new webhook.
- The webhook can either send a payload to a URL, or trigger a Job to run on Hugging Face infrastructure.
- This function should be called with one of `url` or `job_id`, but not both.
- Args:
- url (`str`):
- URL to send the payload to.
- job_id (`str`):
- ID of the source Job to trigger with the webhook payload in the environment variable WEBHOOK_PAYLOAD.
- Additional environment variables are available for convenience: WEBHOOK_REPO_ID, WEBHOOK_REPO_TYPE and WEBHOOK_SECRET.
- watched (`List[WebhookWatchedItem]`):
- List of [`WebhookWatchedItem`] to be watched by the webhook. It can be users, orgs, models, datasets or spaces.
- Watched items can also be provided as plain dictionaries.
- domains (`List[Literal["repo", "discussion"]]`, optional):
- List of domains to watch. It can be "repo", "discussion" or both.
- secret (`str`, optional):
- A secret to sign the payload with.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved token, which is the recommended
- method for authentication (see https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`WebhookInfo`]:
- Info about the newly created webhook.
- Example:
- Create a webhook that sends a payload to a URL
- ```python
- >>> from huggingface_hub import create_webhook
- >>> payload = create_webhook(
- ... watched=[{"type": "user", "name": "julien-c"}, {"type": "org", "name": "HuggingFaceH4"}],
- ... url="https://webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548",
- ... domains=["repo", "discussion"],
- ... secret="my-secret",
- ... )
- >>> print(payload)
- WebhookInfo(
- id="654bbbc16f2ec14d77f109cc",
- url="https://webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548",
- job=None,
- watched=[WebhookWatchedItem(type="user", name="julien-c"), WebhookWatchedItem(type="org", name="HuggingFaceH4")],
- domains=["repo", "discussion"],
- secret="my-secret",
- disabled=False,
- )
- ```
- Run a Job and then create a webhook that triggers this Job
- ```python
- >>> from huggingface_hub import create_webhook, run_job
- >>> job = run_job(
- ... image="ubuntu",
- ... command=["bash", "-c", r"echo An event occured in $WEBHOOK_REPO_ID: $WEBHOOK_PAYLOAD"],
- ... )
- >>> payload = create_webhook(
- ... watched=[{"type": "user", "name": "julien-c"}, {"type": "org", "name": "HuggingFaceH4"}],
- ... job_id=job.id,
- ... domains=["repo", "discussion"],
- ... secret="my-secret",
- ... )
- >>> print(payload)
- WebhookInfo(
- id="654bbbc16f2ec14d77f109cc",
- url=None,
- job=JobSpec(
- docker_image='ubuntu',
- space_id=None,
- command=['bash', '-c', 'echo An event occured in $WEBHOOK_REPO_ID: $WEBHOOK_PAYLOAD'],
- arguments=[],
- environment={},
- secrets=[],
- flavor='cpu-basic',
- timeout=None,
- tags=None,
- arch=None
- ),
- watched=[WebhookWatchedItem(type="user", name="julien-c"), WebhookWatchedItem(type="org", name="HuggingFaceH4")],
- domains=["repo", "discussion"],
- secret="my-secret",
- disabled=False,
- )
- ```
- """
- watched_dicts = [asdict(item) if isinstance(item, WebhookWatchedItem) else item for item in watched]
- post_webhooks_json = {"watched": watched_dicts, "domains": domains, "secret": secret}
- if url is not None and job_id is not None:
- raise ValueError("Set `url` or `job_id` but not both.")
- elif url is not None:
- post_webhooks_json["url"] = url
- elif job_id is not None:
- post_webhooks_json["jobSourceId"] = job_id
- else:
- raise ValueError("Missing argument for webhook: `url` or `job_id`.")
- response = get_session().post(
- f"{constants.ENDPOINT}/api/settings/webhooks",
- json=post_webhooks_json,
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(response)
- webhook_data = response.json()["webhook"]
- watched_items = [WebhookWatchedItem(type=item["type"], name=item["name"]) for item in webhook_data["watched"]]
- webhook = WebhookInfo(
- id=webhook_data["id"],
- url=webhook_data.get("url"),
- job=JobSpec(**webhook_data["job"]) if webhook_data.get("job") else None,
- watched=watched_items,
- domains=webhook_data["domains"],
- secret=webhook_data.get("secret"),
- disabled=webhook_data["disabled"],
- )
- return webhook
- @validate_hf_hub_args
- def update_webhook(
- self,
- webhook_id: str,
- *,
- url: Optional[str] = None,
- watched: Optional[List[Union[Dict, WebhookWatchedItem]]] = None,
- domains: Optional[List[constants.WEBHOOK_DOMAIN_T]] = None,
- secret: Optional[str] = None,
- token: Union[bool, str, None] = None,
- ) -> WebhookInfo:
- """Update an existing webhook.
- Args:
- webhook_id (`str`):
- The unique identifier of the webhook to be updated.
- url (`str`, optional):
- The URL to which the payload will be sent.
- watched (`List[WebhookWatchedItem]`, optional):
- List of items to watch. It can be users, orgs, models, datasets, or spaces.
- Refer to [`WebhookWatchedItem`] for more details. Watched items can also be provided as plain dictionaries.
- domains (`List[Literal["repo", "discussion"]]`, optional):
- The domains to watch. This can include "repo", "discussion", or both.
- secret (`str`, optional):
- A secret to sign the payload with, providing an additional layer of security.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved token, which is the recommended
- method for authentication (see https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`WebhookInfo`]:
- Info about the updated webhook.
- Example:
- ```python
- >>> from huggingface_hub import update_webhook
- >>> updated_payload = update_webhook(
- ... webhook_id="654bbbc16f2ec14d77f109cc",
- ... url="https://new.webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548",
- ... watched=[{"type": "user", "name": "julien-c"}, {"type": "org", "name": "HuggingFaceH4"}],
- ... domains=["repo"],
- ... secret="my-secret",
- ... )
- >>> print(updated_payload)
- WebhookInfo(
- id="654bbbc16f2ec14d77f109cc",
- job=None,
- url="https://new.webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548",
- watched=[WebhookWatchedItem(type="user", name="julien-c"), WebhookWatchedItem(type="org", name="HuggingFaceH4")],
- domains=["repo"],
- secret="my-secret",
- disabled=False,
- ```
- """
- if watched is None:
- watched = []
- watched_dicts = [asdict(item) if isinstance(item, WebhookWatchedItem) else item for item in watched]
- response = get_session().post(
- f"{constants.ENDPOINT}/api/settings/webhooks/{webhook_id}",
- json={"watched": watched_dicts, "url": url, "domains": domains, "secret": secret},
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(response)
- webhook_data = response.json()["webhook"]
- watched_items = [WebhookWatchedItem(type=item["type"], name=item["name"]) for item in webhook_data["watched"]]
- webhook = WebhookInfo(
- id=webhook_data["id"],
- url=webhook_data.get("url"),
- job=JobSpec(**webhook_data["job"]) if webhook_data.get("job") else None,
- watched=watched_items,
- domains=webhook_data["domains"],
- secret=webhook_data.get("secret"),
- disabled=webhook_data["disabled"],
- )
- return webhook
- @validate_hf_hub_args
- def enable_webhook(self, webhook_id: str, *, token: Union[bool, str, None] = None) -> WebhookInfo:
- """Enable a webhook (makes it "active").
- Args:
- webhook_id (`str`):
- The unique identifier of the webhook to enable.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved token, which is the recommended
- method for authentication (see https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`WebhookInfo`]:
- Info about the enabled webhook.
- Example:
- ```python
- >>> from huggingface_hub import enable_webhook
- >>> enabled_webhook = enable_webhook("654bbbc16f2ec14d77f109cc")
- >>> enabled_webhook
- WebhookInfo(
- id="654bbbc16f2ec14d77f109cc",
- job=None,
- url="https://webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548",
- watched=[WebhookWatchedItem(type="user", name="julien-c"), WebhookWatchedItem(type="org", name="HuggingFaceH4")],
- domains=["repo", "discussion"],
- secret="my-secret",
- disabled=False,
- )
- ```
- """
- response = get_session().post(
- f"{constants.ENDPOINT}/api/settings/webhooks/{webhook_id}/enable",
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(response)
- webhook_data = response.json()["webhook"]
- watched_items = [WebhookWatchedItem(type=item["type"], name=item["name"]) for item in webhook_data["watched"]]
- webhook = WebhookInfo(
- id=webhook_data["id"],
- url=webhook_data.get("url"),
- job=JobSpec(**webhook_data["job"]) if webhook_data.get("job") else None,
- watched=watched_items,
- domains=webhook_data["domains"],
- secret=webhook_data.get("secret"),
- disabled=webhook_data["disabled"],
- )
- return webhook
- @validate_hf_hub_args
- def disable_webhook(self, webhook_id: str, *, token: Union[bool, str, None] = None) -> WebhookInfo:
- """Disable a webhook (makes it "disabled").
- Args:
- webhook_id (`str`):
- The unique identifier of the webhook to disable.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved token, which is the recommended
- method for authentication (see https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- [`WebhookInfo`]:
- Info about the disabled webhook.
- Example:
- ```python
- >>> from huggingface_hub import disable_webhook
- >>> disabled_webhook = disable_webhook("654bbbc16f2ec14d77f109cc")
- >>> disabled_webhook
- WebhookInfo(
- id="654bbbc16f2ec14d77f109cc",
- url="https://webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548",
- jon=None,
- watched=[WebhookWatchedItem(type="user", name="julien-c"), WebhookWatchedItem(type="org", name="HuggingFaceH4")],
- domains=["repo", "discussion"],
- secret="my-secret",
- disabled=True,
- )
- ```
- """
- response = get_session().post(
- f"{constants.ENDPOINT}/api/settings/webhooks/{webhook_id}/disable",
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(response)
- webhook_data = response.json()["webhook"]
- watched_items = [WebhookWatchedItem(type=item["type"], name=item["name"]) for item in webhook_data["watched"]]
- webhook = WebhookInfo(
- id=webhook_data["id"],
- url=webhook_data.get("url"),
- job=JobSpec(**webhook_data["job"]) if webhook_data.get("job") else None,
- watched=watched_items,
- domains=webhook_data["domains"],
- secret=webhook_data.get("secret"),
- disabled=webhook_data["disabled"],
- )
- return webhook
- @validate_hf_hub_args
- def delete_webhook(self, webhook_id: str, *, token: Union[bool, str, None] = None) -> None:
- """Delete a webhook.
- Args:
- webhook_id (`str`):
- The unique identifier of the webhook to delete.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved token, which is the recommended
- method for authentication (see https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `None`
- Example:
- ```python
- >>> from huggingface_hub import delete_webhook
- >>> delete_webhook("654bbbc16f2ec14d77f109cc")
- ```
- """
- response = get_session().delete(
- f"{constants.ENDPOINT}/api/settings/webhooks/{webhook_id}",
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(response)
- #############
- # Internals #
- #############
- def _build_hf_headers(
- self,
- token: Union[bool, str, None] = None,
- library_name: Optional[str] = None,
- library_version: Optional[str] = None,
- user_agent: Union[Dict, str, None] = None,
- ) -> Dict[str, str]:
- """
- Alias for [`build_hf_headers`] that uses the token from [`HfApi`] client
- when `token` is not provided.
- """
- if token is None:
- # Cannot do `token = token or self.token` as token can be `False`.
- token = self.token
- return build_hf_headers(
- token=token,
- library_name=library_name or self.library_name,
- library_version=library_version or self.library_version,
- user_agent=user_agent or self.user_agent,
- headers=self.headers,
- )
- def _prepare_folder_deletions(
- self,
- repo_id: str,
- repo_type: Optional[str],
- revision: Optional[str],
- path_in_repo: str,
- delete_patterns: Optional[Union[List[str], str]],
- token: Union[bool, str, None] = None,
- ) -> List[CommitOperationDelete]:
- """Generate the list of Delete operations for a commit to delete files from a repo.
- List remote files and match them against the `delete_patterns` constraints. Returns a list of [`CommitOperationDelete`]
- with the matching items.
- Note: `.gitattributes` file is essential to make a repo work properly on the Hub. This file will always be
- kept even if it matches the `delete_patterns` constraints.
- """
- if delete_patterns is None:
- # If no delete patterns, no need to list and filter remote files
- return []
- # List remote files
- filenames = self.list_repo_files(repo_id=repo_id, revision=revision, repo_type=repo_type, token=token)
- # Compute relative path in repo
- if path_in_repo and path_in_repo not in (".", "./"):
- path_in_repo = path_in_repo.strip("/") + "/" # harmonize
- relpath_to_abspath = {
- file[len(path_in_repo) :]: file for file in filenames if file.startswith(path_in_repo)
- }
- else:
- relpath_to_abspath = {file: file for file in filenames}
- # Apply filter on relative paths and return
- return [
- CommitOperationDelete(path_in_repo=relpath_to_abspath[relpath], is_folder=False)
- for relpath in filter_repo_objects(relpath_to_abspath.keys(), allow_patterns=delete_patterns)
- if relpath_to_abspath[relpath] != ".gitattributes"
- ]
- def _prepare_upload_folder_additions(
- self,
- folder_path: Union[str, Path],
- path_in_repo: str,
- allow_patterns: Optional[Union[List[str], str]] = None,
- ignore_patterns: Optional[Union[List[str], str]] = None,
- repo_type: Optional[str] = None,
- token: Union[bool, str, None] = None,
- ) -> List[CommitOperationAdd]:
- """Generate the list of Add operations for a commit to upload a folder.
- Files not matching the `allow_patterns` (allowlist) and `ignore_patterns` (denylist)
- constraints are discarded.
- """
- folder_path = Path(folder_path).expanduser().resolve()
- if not folder_path.is_dir():
- raise ValueError(f"Provided path: '{folder_path}' is not a directory")
- # List files from folder
- relpath_to_abspath = {
- path.relative_to(folder_path).as_posix(): path
- for path in sorted(folder_path.glob("**/*")) # sorted to be deterministic
- if path.is_file()
- }
- # Filter files
- # Patterns are applied on the path relative to `folder_path`. `path_in_repo` is prefixed after the filtering.
- filtered_repo_objects = list(
- filter_repo_objects(
- relpath_to_abspath.keys(), allow_patterns=allow_patterns, ignore_patterns=ignore_patterns
- )
- )
- prefix = f"{path_in_repo.strip('/')}/" if path_in_repo else ""
- # If updating a README.md file, make sure the metadata format is valid
- # It's better to fail early than to fail after all the files have been hashed.
- if "README.md" in filtered_repo_objects:
- self._validate_yaml(
- content=relpath_to_abspath["README.md"].read_text(encoding="utf8"),
- repo_type=repo_type,
- token=token,
- )
- if len(filtered_repo_objects) > 30:
- log = logger.warning if len(filtered_repo_objects) > 200 else logger.info
- log(
- "It seems you are trying to upload a large folder at once. This might take some time and then fail if "
- "the folder is too large. For such cases, it is recommended to upload in smaller batches or to use "
- "`HfApi().upload_large_folder(...)`/`hf upload-large-folder` instead. For more details, "
- "check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder."
- )
- logger.info(f"Start hashing {len(filtered_repo_objects)} files.")
- operations = [
- CommitOperationAdd(
- path_or_fileobj=relpath_to_abspath[relpath], # absolute path on disk
- path_in_repo=prefix + relpath, # "absolute" path in repo
- )
- for relpath in filtered_repo_objects
- ]
- logger.info(f"Finished hashing {len(filtered_repo_objects)} files.")
- return operations
- def _validate_yaml(self, content: str, *, repo_type: Optional[str] = None, token: Union[bool, str, None] = None):
- """
- Validate YAML from `README.md`, used before file hashing and upload.
- Args:
- content (`str`):
- Content of `README.md` to validate.
- repo_type (`str`, *optional*):
- The type of the repo to grant access to. Must be one of `model`, `dataset` or `space`.
- Defaults to `model`.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Raises:
- - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
- if YAML is invalid
- """
- repo_type = repo_type if repo_type is not None else constants.REPO_TYPE_MODEL
- headers = self._build_hf_headers(token=token)
- response = get_session().post(
- f"{self.endpoint}/api/validate-yaml",
- json={"content": content, "repoType": repo_type},
- headers=headers,
- )
- # Handle warnings (example: empty metadata)
- response_content = response.json()
- message = "\n".join([f"- {warning.get('message')}" for warning in response_content.get("warnings", [])])
- if message:
- warnings.warn(f"Warnings while validating metadata in README.md:\n{message}")
- # Raise on errors
- try:
- hf_raise_for_status(response)
- except BadRequestError as e:
- errors = response_content.get("errors", [])
- message = "\n".join([f"- {error.get('message')}" for error in errors])
- raise ValueError(f"Invalid metadata in README.md.\n{message}") from e
- def get_user_overview(self, username: str, token: Union[bool, str, None] = None) -> User:
- """
- Get an overview of a user on the Hub.
- Args:
- username (`str`):
- Username of the user to get an overview of.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `User`: A [`User`] object with the user's overview.
- Raises:
- [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
- HTTP 404 If the user does not exist on the Hub.
- """
- r = get_session().get(
- f"{constants.ENDPOINT}/api/users/{username}/overview", headers=self._build_hf_headers(token=token)
- )
- hf_raise_for_status(r)
- return User(**r.json())
- @validate_hf_hub_args
- def get_organization_overview(self, organization: str, token: Union[bool, str, None] = None) -> Organization:
- """
- Get an overview of an organization on the Hub.
- Args:
- organization (`str`):
- Name of the organization to get an overview of.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved token, which is the recommended method
- for authentication (see https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `Organization`: An [`Organization`] object with the organization's overview.
- Raises:
- [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
- HTTP 404 If the organization does not exist on the Hub.
- """
- r = get_session().get(
- f"{constants.ENDPOINT}/api/organizations/{organization}/overview",
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(r)
- return Organization(**r.json())
- def list_organization_members(self, organization: str, token: Union[bool, str, None] = None) -> Iterable[User]:
- """
- List of members of an organization on the Hub.
- Args:
- organization (`str`):
- Name of the organization to get the members of.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `Iterable[User]`: A list of [`User`] objects with the members of the organization.
- Raises:
- [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
- HTTP 404 If the organization does not exist on the Hub.
- """
- for member in paginate(
- path=f"{constants.ENDPOINT}/api/organizations/{organization}/members",
- params={},
- headers=self._build_hf_headers(token=token),
- ):
- yield User(**member)
- def list_user_followers(self, username: str, token: Union[bool, str, None] = None) -> Iterable[User]:
- """
- Get the list of followers of a user on the Hub.
- Args:
- username (`str`):
- Username of the user to get the followers of.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `Iterable[User]`: A list of [`User`] objects with the followers of the user.
- Raises:
- [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
- HTTP 404 If the user does not exist on the Hub.
- """
- for follower in paginate(
- path=f"{constants.ENDPOINT}/api/users/{username}/followers",
- params={},
- headers=self._build_hf_headers(token=token),
- ):
- yield User(**follower)
- def list_user_following(self, username: str, token: Union[bool, str, None] = None) -> Iterable[User]:
- """
- Get the list of users followed by a user on the Hub.
- Args:
- username (`str`):
- Username of the user to get the users followed by.
- token (`bool` or `str`, *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `Iterable[User]`: A list of [`User`] objects with the users followed by the user.
- Raises:
- [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
- HTTP 404 If the user does not exist on the Hub.
- """
- for followed_user in paginate(
- path=f"{constants.ENDPOINT}/api/users/{username}/following",
- params={},
- headers=self._build_hf_headers(token=token),
- ):
- yield User(**followed_user)
- def list_papers(
- self,
- *,
- query: Optional[str] = None,
- token: Union[bool, str, None] = None,
- ) -> Iterable[PaperInfo]:
- """
- List daily papers on the Hugging Face Hub given a search query.
- Args:
- query (`str`, *optional*):
- A search query string to find papers.
- If provided, returns papers that match the query.
- token (Union[bool, str, None], *optional*):
- A valid user access token (string). Defaults to the locally saved
- token, which is the recommended method for authentication (see
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
- To disable authentication, pass `False`.
- Returns:
- `Iterable[PaperInfo]`: an iterable of [`huggingface_hub.hf_api.PaperInfo`] objects.
- Example:
- ```python
- >>> from huggingface_hub import HfApi
- >>> api = HfApi()
- # List all papers with "attention" in their title
- >>> api.list_papers(query="attention")
- ```
- """
- path = f"{self.endpoint}/api/papers/search"
- params = {}
- if query:
- params["q"] = query
- r = get_session().get(
- path,
- params=params,
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(r)
- for paper in r.json():
- yield PaperInfo(**paper)
- def paper_info(self, id: str) -> PaperInfo:
- """
- Get information for a paper on the Hub.
- Args:
- id (`str`, **optional**):
- ArXiv id of the paper.
- Returns:
- `PaperInfo`: A `PaperInfo` object.
- Raises:
- [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError):
- HTTP 404 If the paper does not exist on the Hub.
- """
- path = f"{self.endpoint}/api/papers/{id}"
- r = get_session().get(path)
- hf_raise_for_status(r)
- return PaperInfo(**r.json())
- def auth_check(
- self, repo_id: str, *, repo_type: Optional[str] = None, token: Union[bool, str, None] = None
- ) -> None:
- """
- Check if the provided user token has access to a specific repository on the Hugging Face Hub.
- This method verifies whether the user, authenticated via the provided token, has access to the specified
- repository. If the repository is not found or if the user lacks the required permissions to access it,
- the method raises an appropriate exception.
- Args:
- repo_id (`str`):
- The repository to check for access. Format should be `"user/repo_name"`.
- Example: `"user/my-cool-model"`.
- repo_type (`str`, *optional*):
- The type of the repository. Should be one of `"model"`, `"dataset"`, or `"space"`.
- If not specified, the default is `"model"`.
- token `(Union[bool, str, None]`, *optional*):
- A valid user access token. If not provided, the locally saved token will be used, which is the
- recommended authentication method. Set to `False` to disable authentication.
- Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication.
- Raises:
- [`~utils.RepositoryNotFoundError`]:
- Raised if the repository does not exist, is private, or the user does not have access. This can
- occur if the `repo_id` or `repo_type` is incorrect or if the repository is private but the user
- is not authenticated.
- [`~utils.GatedRepoError`]:
- Raised if the repository exists but is gated and the user is not authorized to access it.
- Example:
- Check if the user has access to a repository:
- ```python
- >>> from huggingface_hub import auth_check
- >>> from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError
- try:
- auth_check("user/my-cool-model")
- except GatedRepoError:
- # Handle gated repository error
- print("You do not have permission to access this gated repository.")
- except RepositoryNotFoundError:
- # Handle repository not found error
- print("The repository was not found or you do not have access.")
- ```
- In this example:
- - If the user has access, the method completes successfully.
- - If the repository is gated or does not exist, appropriate exceptions are raised, allowing the user
- to handle them accordingly.
- """
- headers = self._build_hf_headers(token=token)
- if repo_type is None:
- repo_type = constants.REPO_TYPE_MODEL
- if repo_type not in constants.REPO_TYPES:
- raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}")
- path = f"{self.endpoint}/api/{repo_type}s/{repo_id}/auth-check"
- r = get_session().get(path, headers=headers)
- hf_raise_for_status(r)
- def run_job(
- self,
- *,
- image: str,
- command: List[str],
- env: Optional[Dict[str, Any]] = None,
- secrets: Optional[Dict[str, Any]] = None,
- flavor: Optional[SpaceHardware] = None,
- timeout: Optional[Union[int, float, str]] = None,
- namespace: Optional[str] = None,
- token: Union[bool, str, None] = None,
- ) -> JobInfo:
- """
- Run compute Jobs on Hugging Face infrastructure.
- Args:
- image (`str`):
- The Docker image to use.
- Examples: `"ubuntu"`, `"python:3.12"`, `"pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel"`.
- Example with an image from a Space: `"hf.co/spaces/lhoestq/duckdb"`.
- command (`List[str]`):
- The command to run. Example: `["echo", "hello"]`.
- env (`Dict[str, Any]`, *optional*):
- Defines the environment variables for the Job.
- secrets (`Dict[str, Any]`, *optional*):
- Defines the secret environment variables for the Job.
- flavor (`str`, *optional*):
- Flavor for the hardware, as in Hugging Face Spaces. See [`SpaceHardware`] for possible values.
- Defaults to `"cpu-basic"`.
- timeout (`Union[int, float, str]`, *optional*):
- Max duration for the Job: int/float with s (seconds, default), m (minutes), h (hours) or d (days).
- Example: `300` or `"5m"` for 5 minutes.
- namespace (`str`, *optional*):
- The namespace where the Job will be created. Defaults to the current user's namespace.
- token `(Union[bool, str, None]`, *optional*):
- A valid user access token. If not provided, the locally saved token will be used, which is the
- recommended authentication method. Set to `False` to disable authentication.
- Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication.
- Example:
- Run your first Job:
- ```python
- >>> from huggingface_hub import run_job
- >>> run_job(image="python:3.12", command=["python", "-c" ,"print('Hello from HF compute!')"])
- ```
- Run a GPU Job:
- ```python
- >>> from huggingface_hub import run_job
- >>> image = "pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel"
- >>> command = ["python", "-c", "import torch; print(f"This code ran with the following GPU: {torch.cuda.get_device_name()}")"]
- >>> run_job(image=image, command=command, flavor="a10g-small")
- ```
- """
- if namespace is None:
- namespace = self.whoami(token=token)["name"]
- job_spec = _create_job_spec(
- image=image,
- command=command,
- env=env,
- secrets=secrets,
- flavor=flavor,
- timeout=timeout,
- )
- response = get_session().post(
- f"https://huggingface.co/api/jobs/{namespace}",
- json=job_spec,
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(response)
- job_info = response.json()
- return JobInfo(**job_info, endpoint=self.endpoint)
- def fetch_job_logs(
- self,
- *,
- job_id: str,
- namespace: Optional[str] = None,
- token: Union[bool, str, None] = None,
- ) -> Iterable[str]:
- """
- Fetch all the logs from a compute Job on Hugging Face infrastructure.
- Args:
- job_id (`str`):
- ID of the Job.
- namespace (`str`, *optional*):
- The namespace where the Job is running. Defaults to the current user's namespace.
- token `(Union[bool, str, None]`, *optional*):
- A valid user access token. If not provided, the locally saved token will be used, which is the
- recommended authentication method. Set to `False` to disable authentication.
- Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication.
- Example:
- ```python
- >>> from huggingface_hub import fetch_job_logs, run_job
- >>> job = run_job(image="python:3.12", command=["python", "-c" ,"print('Hello from HF compute!')"])
- >>> for log in fetch_job_logs(job.id):
- ... print(log)
- Hello from HF compute!
- ```
- """
- if namespace is None:
- namespace = self.whoami(token=token)["name"]
- logging_finished = logging_started = False
- job_finished = False
- # - We need to retry because sometimes the /logs doesn't return logs when the job just started.
- # (for example it can return only two lines: one for "Job started" and one empty line)
- # - Timeouts can happen in case of build errors
- # - ChunkedEncodingError can happen in case of stopped logging in the middle of streaming
- # - Infinite empty log stream can happen in case of build error
- # (the logs stream is infinite and empty except for the Job started message)
- # - there is a ": keep-alive" every 30 seconds
- # We don't use http_backoff since we need to check ourselves if ConnectionError.__context__ is a TimeoutError
- max_retries = 5
- min_wait_time = 1
- max_wait_time = 10
- sleep_time = 0
- for _ in range(max_retries):
- time.sleep(sleep_time)
- sleep_time = min(max_wait_time, max(min_wait_time, sleep_time * 2))
- try:
- resp = get_session().get(
- f"https://huggingface.co/api/jobs/{namespace}/{job_id}/logs",
- headers=self._build_hf_headers(token=token),
- stream=True,
- timeout=120,
- )
- log = None
- for line in resp.iter_lines(chunk_size=1):
- line = line.decode("utf-8")
- if line and line.startswith("data: {"):
- data = json.loads(line[len("data: ") :])
- # timestamp = data["timestamp"]
- if not data["data"].startswith("===== Job started"):
- logging_started = True
- log = data["data"]
- yield log
- logging_finished = logging_started
- except requests.exceptions.ChunkedEncodingError:
- # Response ended prematurely
- break
- except KeyboardInterrupt:
- break
- except requests.exceptions.ConnectionError as err:
- is_timeout = err.__context__ and isinstance(getattr(err.__context__, "__cause__", None), TimeoutError)
- if logging_started or not is_timeout:
- raise
- if logging_finished or job_finished:
- break
- job_status = (
- get_session()
- .get(
- f"https://huggingface.co/api/jobs/{namespace}/{job_id}",
- headers=self._build_hf_headers(token=token),
- )
- .json()
- )
- if "status" in job_status and job_status["status"]["stage"] not in ("RUNNING", "UPDATING"):
- job_finished = True
- def list_jobs(
- self,
- *,
- timeout: Optional[int] = None,
- namespace: Optional[str] = None,
- token: Union[bool, str, None] = None,
- ) -> List[JobInfo]:
- """
- List compute Jobs on Hugging Face infrastructure.
- Args:
- timeout (`float`, *optional*):
- Whether to set a timeout for the request to the Hub.
- namespace (`str`, *optional*):
- The namespace from where it lists the jobs. Defaults to the current user's namespace.
- token `(Union[bool, str, None]`, *optional*):
- A valid user access token. If not provided, the locally saved token will be used, which is the
- recommended authentication method. Set to `False` to disable authentication.
- Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication.
- """
- if namespace is None:
- namespace = whoami(token=token)["name"]
- response = get_session().get(
- f"{self.endpoint}/api/jobs/{namespace}",
- headers=self._build_hf_headers(token=token),
- timeout=timeout,
- )
- response.raise_for_status()
- return [JobInfo(**job_info, endpoint=self.endpoint) for job_info in response.json()]
- def inspect_job(
- self,
- *,
- job_id: str,
- namespace: Optional[str] = None,
- token: Union[bool, str, None] = None,
- ) -> JobInfo:
- """
- Inspect a compute Job on Hugging Face infrastructure.
- Args:
- job_id (`str`):
- ID of the Job.
- namespace (`str`, *optional*):
- The namespace where the Job is running. Defaults to the current user's namespace.
- token `(Union[bool, str, None]`, *optional*):
- A valid user access token. If not provided, the locally saved token will be used, which is the
- recommended authentication method. Set to `False` to disable authentication.
- Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication.
- Example:
- ```python
- >>> from huggingface_hub import inspect_job, run_job
- >>> job = run_job(image="python:3.12", command=["python", "-c" ,"print('Hello from HF compute!')"])
- >>> inspect_job(job.id)
- JobInfo(
- id='68780d00bbe36d38803f645f',
- created_at=datetime.datetime(2025, 7, 16, 20, 35, 12, 808000, tzinfo=datetime.timezone.utc),
- docker_image='python:3.12',
- space_id=None,
- command=['python', '-c', "print('Hello from HF compute!')"],
- arguments=[],
- environment={},
- secrets={},
- flavor='cpu-basic',
- status=JobStatus(stage='RUNNING', message=None)
- )
- ```
- """
- if namespace is None:
- namespace = self.whoami(token=token)["name"]
- response = get_session().get(
- f"{self.endpoint}/api/jobs/{namespace}/{job_id}",
- headers=self._build_hf_headers(token=token),
- )
- response.raise_for_status()
- return JobInfo(**response.json(), endpoint=self.endpoint)
- def cancel_job(
- self,
- *,
- job_id: str,
- namespace: Optional[str] = None,
- token: Union[bool, str, None] = None,
- ) -> None:
- """
- Cancel a compute Job on Hugging Face infrastructure.
- Args:
- job_id (`str`):
- ID of the Job.
- namespace (`str`, *optional*):
- The namespace where the Job is running. Defaults to the current user's namespace.
- token `(Union[bool, str, None]`, *optional*):
- A valid user access token. If not provided, the locally saved token will be used, which is the
- recommended authentication method. Set to `False` to disable authentication.
- Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication.
- """
- if namespace is None:
- namespace = self.whoami(token=token)["name"]
- get_session().post(
- f"{self.endpoint}/api/jobs/{namespace}/{job_id}/cancel",
- headers=self._build_hf_headers(token=token),
- ).raise_for_status()
- @experimental
- def run_uv_job(
- self,
- script: str,
- *,
- script_args: Optional[List[str]] = None,
- dependencies: Optional[List[str]] = None,
- python: Optional[str] = None,
- image: Optional[str] = None,
- env: Optional[Dict[str, Any]] = None,
- secrets: Optional[Dict[str, Any]] = None,
- flavor: Optional[SpaceHardware] = None,
- timeout: Optional[Union[int, float, str]] = None,
- namespace: Optional[str] = None,
- token: Union[bool, str, None] = None,
- _repo: Optional[str] = None,
- ) -> JobInfo:
- """
- Run a UV script Job on Hugging Face infrastructure.
- Args:
- script (`str`):
- Path or URL of the UV script, or a command.
- script_args (`List[str]`, *optional*)
- Arguments to pass to the script or command.
- dependencies (`List[str]`, *optional*)
- Dependencies to use to run the UV script.
- python (`str`, *optional*)
- Use a specific Python version. Default is 3.12.
- image (`str`, *optional*, defaults to "ghcr.io/astral-sh/uv:python3.12-bookworm"):
- Use a custom Docker image with `uv` installed.
- env (`Dict[str, Any]`, *optional*):
- Defines the environment variables for the Job.
- secrets (`Dict[str, Any]`, *optional*):
- Defines the secret environment variables for the Job.
- flavor (`str`, *optional*):
- Flavor for the hardware, as in Hugging Face Spaces. See [`SpaceHardware`] for possible values.
- Defaults to `"cpu-basic"`.
- timeout (`Union[int, float, str]`, *optional*):
- Max duration for the Job: int/float with s (seconds, default), m (minutes), h (hours) or d (days).
- Example: `300` or `"5m"` for 5 minutes.
- namespace (`str`, *optional*):
- The namespace where the Job will be created. Defaults to the current user's namespace.
- token `(Union[bool, str, None]`, *optional*):
- A valid user access token. If not provided, the locally saved token will be used, which is the
- recommended authentication method. Set to `False` to disable authentication.
- Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication.
- Example:
- Run a script from a URL:
- ```python
- >>> from huggingface_hub import run_uv_job
- >>> script = "https://raw.githubusercontent.com/huggingface/trl/refs/heads/main/trl/scripts/sft.py"
- >>> script_args = ["--model_name_or_path", "Qwen/Qwen2-0.5B", "--dataset_name", "trl-lib/Capybara", "--push_to_hub"]
- >>> run_uv_job(script, script_args=script_args, dependencies=["trl"], flavor="a10g-small")
- ```
- Run a local script:
- ```python
- >>> from huggingface_hub import run_uv_job
- >>> script = "my_sft.py"
- >>> script_args = ["--model_name_or_path", "Qwen/Qwen2-0.5B", "--dataset_name", "trl-lib/Capybara", "--push_to_hub"]
- >>> run_uv_job(script, script_args=script_args, dependencies=["trl"], flavor="a10g-small")
- ```
- Run a command:
- ```python
- >>> from huggingface_hub import run_uv_job
- >>> script = "lighteval"
- >>> script_args= ["endpoint", "inference-providers", "model_name=openai/gpt-oss-20b,provider=auto", "lighteval|gsm8k|0|0"]
- >>> run_uv_job(script, script_args=script_args, dependencies=["lighteval"], flavor="a10g-small")
- ```
- """
- image = image or "ghcr.io/astral-sh/uv:python3.12-bookworm"
- env = env or {}
- secrets = secrets or {}
- # Build command
- command, env, secrets = self._create_uv_command_env_and_secrets(
- script=script,
- script_args=script_args,
- dependencies=dependencies,
- python=python,
- env=env,
- secrets=secrets,
- namespace=namespace,
- token=token,
- _repo=_repo,
- )
- # Create RunCommand args
- return self.run_job(
- image=image,
- command=command,
- env=env,
- secrets=secrets,
- flavor=flavor,
- timeout=timeout,
- namespace=namespace,
- token=token,
- )
- def create_scheduled_job(
- self,
- *,
- image: str,
- command: List[str],
- schedule: str,
- suspend: Optional[bool] = None,
- concurrency: Optional[bool] = None,
- env: Optional[Dict[str, Any]] = None,
- secrets: Optional[Dict[str, Any]] = None,
- flavor: Optional[SpaceHardware] = None,
- timeout: Optional[Union[int, float, str]] = None,
- namespace: Optional[str] = None,
- token: Union[bool, str, None] = None,
- ) -> ScheduledJobInfo:
- """
- Create scheduled compute Jobs on Hugging Face infrastructure.
- Args:
- image (`str`):
- The Docker image to use.
- Examples: `"ubuntu"`, `"python:3.12"`, `"pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel"`.
- Example with an image from a Space: `"hf.co/spaces/lhoestq/duckdb"`.
- command (`List[str]`):
- The command to run. Example: `["echo", "hello"]`.
- schedule (`str`):
- One of "@annually", "@yearly", "@monthly", "@weekly", "@daily", "@hourly", or a
- CRON schedule expression (e.g., '0 9 * * 1' for 9 AM every Monday).
- suspend (`bool`, *optional*):
- If True, the scheduled Job is suspended (paused). Defaults to False.
- concurrency (`bool`, *optional*):
- If True, multiple instances of this Job can run concurrently. Defaults to False.
- env (`Dict[str, Any]`, *optional*):
- Defines the environment variables for the Job.
- secrets (`Dict[str, Any]`, *optional*):
- Defines the secret environment variables for the Job.
- flavor (`str`, *optional*):
- Flavor for the hardware, as in Hugging Face Spaces. See [`SpaceHardware`] for possible values.
- Defaults to `"cpu-basic"`.
- timeout (`Union[int, float, str]`, *optional*):
- Max duration for the Job: int/float with s (seconds, default), m (minutes), h (hours) or d (days).
- Example: `300` or `"5m"` for 5 minutes.
- namespace (`str`, *optional*):
- The namespace where the Job will be created. Defaults to the current user's namespace.
- token `(Union[bool, str, None]`, *optional*):
- A valid user access token. If not provided, the locally saved token will be used, which is the
- recommended authentication method. Set to `False` to disable authentication.
- Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication.
- Example:
- Create your first scheduled Job:
- ```python
- >>> from huggingface_hub import create_scheduled_job
- >>> create_scheduled_job(image="python:3.12", command=["python", "-c" ,"print('Hello from HF compute!')"], schedule="@hourly")
- ```
- Use a CRON schedule expression:
- ```python
- >>> from huggingface_hub import create_scheduled_job
- >>> create_scheduled_job(image="python:3.12", command=["python", "-c" ,"print('this runs every 5min')"], schedule="*/5 * * * *")
- ```
- Create a scheduled GPU Job:
- ```python
- >>> from huggingface_hub import create_scheduled_job
- >>> image = "pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel"
- >>> command = ["python", "-c", "import torch; print(f"This code ran with the following GPU: {torch.cuda.get_device_name()}")"]
- >>> create_scheduled_job(image, command, flavor="a10g-small", schedule="@hourly")
- ```
- """
- if namespace is None:
- namespace = self.whoami(token=token)["name"]
- # prepare payload to send to HF Jobs API
- job_spec = _create_job_spec(
- image=image,
- command=command,
- env=env,
- secrets=secrets,
- flavor=flavor,
- timeout=timeout,
- )
- input_json: Dict[str, Any] = {
- "jobSpec": job_spec,
- "schedule": schedule,
- }
- if concurrency is not None:
- input_json["concurrency"] = concurrency
- if suspend is not None:
- input_json["suspend"] = suspend
- response = get_session().post(
- f"https://huggingface.co/api/scheduled-jobs/{namespace}",
- json=input_json,
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(response)
- scheduled_job_info = response.json()
- return ScheduledJobInfo(**scheduled_job_info)
- def list_scheduled_jobs(
- self,
- *,
- timeout: Optional[int] = None,
- namespace: Optional[str] = None,
- token: Union[bool, str, None] = None,
- ) -> List[ScheduledJobInfo]:
- """
- List scheduled compute Jobs on Hugging Face infrastructure.
- Args:
- timeout (`float`, *optional*):
- Whether to set a timeout for the request to the Hub.
- namespace (`str`, *optional*):
- The namespace from where it lists the jobs. Defaults to the current user's namespace.
- token `(Union[bool, str, None]`, *optional*):
- A valid user access token. If not provided, the locally saved token will be used, which is the
- recommended authentication method. Set to `False` to disable authentication.
- Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication.
- """
- if namespace is None:
- namespace = self.whoami(token=token)["name"]
- response = get_session().get(
- f"{self.endpoint}/api/scheduled-jobs/{namespace}",
- headers=self._build_hf_headers(token=token),
- timeout=timeout,
- )
- hf_raise_for_status(response)
- return [ScheduledJobInfo(**scheduled_job_info) for scheduled_job_info in response.json()]
- def inspect_scheduled_job(
- self,
- *,
- scheduled_job_id: str,
- namespace: Optional[str] = None,
- token: Union[bool, str, None] = None,
- ) -> ScheduledJobInfo:
- """
- Inspect a scheduled compute Job on Hugging Face infrastructure.
- Args:
- scheduled_job_id (`str`):
- ID of the scheduled Job.
- namespace (`str`, *optional*):
- The namespace where the scheduled Job is. Defaults to the current user's namespace.
- token `(Union[bool, str, None]`, *optional*):
- A valid user access token. If not provided, the locally saved token will be used, which is the
- recommended authentication method. Set to `False` to disable authentication.
- Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication.
- Example:
- ```python
- >>> from huggingface_hub import inspect_job, create_scheduled_job
- >>> scheduled_job = create_scheduled_job(image="python:3.12", command=["python", "-c" ,"print('Hello from HF compute!')"], schedule="@hourly")
- >>> inspect_scheduled_job(scheduled_job.id)
- ```
- """
- if namespace is None:
- namespace = self.whoami(token=token)["name"]
- response = get_session().get(
- f"{self.endpoint}/api/scheduled-jobs/{namespace}/{scheduled_job_id}",
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(response)
- return ScheduledJobInfo(**response.json())
- def delete_scheduled_job(
- self,
- *,
- scheduled_job_id: str,
- namespace: Optional[str] = None,
- token: Union[bool, str, None] = None,
- ) -> None:
- """
- Delete a scheduled compute Job on Hugging Face infrastructure.
- Args:
- scheduled_job_id (`str`):
- ID of the scheduled Job.
- namespace (`str`, *optional*):
- The namespace where the scheduled Job is. Defaults to the current user's namespace.
- token `(Union[bool, str, None]`, *optional*):
- A valid user access token. If not provided, the locally saved token will be used, which is the
- recommended authentication method. Set to `False` to disable authentication.
- Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication.
- """
- if namespace is None:
- namespace = self.whoami(token=token)["name"]
- response = get_session().delete(
- f"{self.endpoint}/api/scheduled-jobs/{namespace}/{scheduled_job_id}",
- headers=self._build_hf_headers(token=token),
- )
- hf_raise_for_status(response)
- def suspend_scheduled_job(
- self,
- *,
- scheduled_job_id: str,
- namespace: Optional[str] = None,
- token: Union[bool, str, None] = None,
- ) -> None:
- """
- Suspend (pause) a scheduled compute Job on Hugging Face infrastructure.
- Args:
- scheduled_job_id (`str`):
- ID of the scheduled Job.
- namespace (`str`, *optional*):
- The namespace where the scheduled Job is. Defaults to the current user's namespace.
- token `(Union[bool, str, None]`, *optional*):
- A valid user access token. If not provided, the locally saved token will be used, which is the
- recommended authentication method. Set to `False` to disable authentication.
- Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication.
- """
- if namespace is None:
- namespace = self.whoami(token=token)["name"]
- get_session().post(
- f"{self.endpoint}/api/scheduled-jobs/{namespace}/{scheduled_job_id}/suspend",
- headers=self._build_hf_headers(token=token),
- ).raise_for_status()
- def resume_scheduled_job(
- self,
- *,
- scheduled_job_id: str,
- namespace: Optional[str] = None,
- token: Union[bool, str, None] = None,
- ) -> None:
- """
- Resume (unpause) a scheduled compute Job on Hugging Face infrastructure.
- Args:
- scheduled_job_id (`str`):
- ID of the scheduled Job.
- namespace (`str`, *optional*):
- The namespace where the scheduled Job is. Defaults to the current user's namespace.
- token `(Union[bool, str, None]`, *optional*):
- A valid user access token. If not provided, the locally saved token will be used, which is the
- recommended authentication method. Set to `False` to disable authentication.
- Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication.
- """
- if namespace is None:
- namespace = self.whoami(token=token)["name"]
- get_session().post(
- f"{self.endpoint}/api/scheduled-jobs/{namespace}/{scheduled_job_id}/resume",
- headers=self._build_hf_headers(token=token),
- ).raise_for_status()
- @experimental
- def create_scheduled_uv_job(
- self,
- script: str,
- *,
- script_args: Optional[List[str]] = None,
- schedule: str,
- suspend: Optional[bool] = None,
- concurrency: Optional[bool] = None,
- dependencies: Optional[List[str]] = None,
- python: Optional[str] = None,
- image: Optional[str] = None,
- env: Optional[Dict[str, Any]] = None,
- secrets: Optional[Dict[str, Any]] = None,
- flavor: Optional[SpaceHardware] = None,
- timeout: Optional[Union[int, float, str]] = None,
- namespace: Optional[str] = None,
- token: Union[bool, str, None] = None,
- _repo: Optional[str] = None,
- ) -> ScheduledJobInfo:
- """
- Run a UV script Job on Hugging Face infrastructure.
- Args:
- script (`str`):
- Path or URL of the UV script, or a command.
- script_args (`List[str]`, *optional*)
- Arguments to pass to the script, or a command.
- schedule (`str`):
- One of "@annually", "@yearly", "@monthly", "@weekly", "@daily", "@hourly", or a
- CRON schedule expression (e.g., '0 9 * * 1' for 9 AM every Monday).
- suspend (`bool`, *optional*):
- If True, the scheduled Job is suspended (paused). Defaults to False.
- concurrency (`bool`, *optional*):
- If True, multiple instances of this Job can run concurrently. Defaults to False.
- dependencies (`List[str]`, *optional*)
- Dependencies to use to run the UV script.
- python (`str`, *optional*)
- Use a specific Python version. Default is 3.12.
- image (`str`, *optional*, defaults to "ghcr.io/astral-sh/uv:python3.12-bookworm"):
- Use a custom Docker image with `uv` installed.
- env (`Dict[str, Any]`, *optional*):
- Defines the environment variables for the Job.
- secrets (`Dict[str, Any]`, *optional*):
- Defines the secret environment variables for the Job.
- flavor (`str`, *optional*):
- Flavor for the hardware, as in Hugging Face Spaces. See [`SpaceHardware`] for possible values.
- Defaults to `"cpu-basic"`.
- timeout (`Union[int, float, str]`, *optional*):
- Max duration for the Job: int/float with s (seconds, default), m (minutes), h (hours) or d (days).
- Example: `300` or `"5m"` for 5 minutes.
- namespace (`str`, *optional*):
- The namespace where the Job will be created. Defaults to the current user's namespace.
- token `(Union[bool, str, None]`, *optional*):
- A valid user access token. If not provided, the locally saved token will be used, which is the
- recommended authentication method. Set to `False` to disable authentication.
- Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication.
- Example:
- Schedule a script from a URL:
- ```python
- >>> from huggingface_hub import create_scheduled_uv_job
- >>> script = "https://raw.githubusercontent.com/huggingface/trl/refs/heads/main/trl/scripts/sft.py"
- >>> script_args = ["--model_name_or_path", "Qwen/Qwen2-0.5B", "--dataset_name", "trl-lib/Capybara", "--push_to_hub"]
- >>> create_scheduled_uv_job(script, script_args=script_args, dependencies=["trl"], flavor="a10g-small", schedule="@weekly")
- ```
- Schedule a local script:
- ```python
- >>> from huggingface_hub import create_scheduled_uv_job
- >>> script = "my_sft.py"
- >>> script_args = ["--model_name_or_path", "Qwen/Qwen2-0.5B", "--dataset_name", "trl-lib/Capybara", "--push_to_hub"]
- >>> create_scheduled_uv_job(script, script_args=script_args, dependencies=["trl"], flavor="a10g-small", schedule="@weekly")
- ```
- Schedule a command:
- ```python
- >>> from huggingface_hub import create_scheduled_uv_job
- >>> script = "lighteval"
- >>> script_args= ["endpoint", "inference-providers", "model_name=openai/gpt-oss-20b,provider=auto", "lighteval|gsm8k|0|0"]
- >>> create_scheduled_uv_job(script, script_args=script_args, dependencies=["lighteval"], flavor="a10g-small", schedule="@weekly")
- ```
- """
- image = image or "ghcr.io/astral-sh/uv:python3.12-bookworm"
- # Build command
- command, env, secrets = self._create_uv_command_env_and_secrets(
- script=script,
- script_args=script_args,
- dependencies=dependencies,
- python=python,
- env=env,
- secrets=secrets,
- namespace=namespace,
- token=token,
- _repo=_repo,
- )
- # Create RunCommand args
- return self.create_scheduled_job(
- image=image,
- command=command,
- schedule=schedule,
- suspend=suspend,
- concurrency=concurrency,
- env=env,
- secrets=secrets,
- flavor=flavor,
- timeout=timeout,
- namespace=namespace,
- token=token,
- )
- def _create_uv_command_env_and_secrets(
- self,
- *,
- script: str,
- script_args: Optional[List[str]],
- dependencies: Optional[List[str]],
- python: Optional[str],
- env: Optional[Dict[str, Any]],
- secrets: Optional[Dict[str, Any]],
- namespace: Optional[str],
- token: Union[bool, str, None],
- _repo: Optional[str],
- ) -> Tuple[List[str], Dict[str, Any], Dict[str, Any]]:
- env = env or {}
- secrets = secrets or {}
- # Build command
- uv_args = []
- if dependencies:
- for dependency in dependencies:
- uv_args += ["--with", dependency]
- if python:
- uv_args += ["--python", python]
- script_args = script_args or []
- if namespace is None:
- namespace = self.whoami(token=token)["name"]
- is_url = script.startswith("http://") or script.startswith("https://")
- if is_url or not Path(script).is_file():
- # Direct URL execution or command - no upload needed
- command = ["uv", "run"] + uv_args + [script] + script_args
- else:
- # Local file - upload to HF
- script_path = Path(script)
- filename = script_path.name
- # Parse repo
- if _repo:
- repo_id = _repo
- if "/" not in repo_id:
- repo_id = f"{namespace}/{repo_id}"
- else:
- repo_id = f"{namespace}/hf-cli-jobs-uv-run-scripts"
- # Create repo if needed
- try:
- self.repo_info(repo_id, repo_type="dataset")
- logger.debug(f"Using existing repository: {repo_id}")
- except RepositoryNotFoundError:
- logger.info(f"Creating repository: {repo_id}")
- create_repo(repo_id, repo_type="dataset", private=True, exist_ok=True)
- # Upload script
- logger.info(f"Uploading {script_path.name} to {repo_id}...")
- with open(script_path, "r") as f:
- script_content = f.read()
- commit_hash = self.upload_file(
- path_or_fileobj=script_content.encode(),
- path_in_repo=filename,
- repo_id=repo_id,
- repo_type="dataset",
- ).oid
- script_url = f"{self.endpoint}/datasets/{repo_id}/resolve/{commit_hash}/{filename}"
- repo_url = f"{self.endpoint}/datasets/{repo_id}"
- logger.debug(f"✓ Script uploaded to: {repo_url}/blob/main/{filename}")
- # Create and upload minimal README
- timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S UTC")
- readme_content = dedent(
- f"""
- ---
- tags:
- - hf-cli-jobs-uv-script
- - ephemeral
- viewer: false
- ---
- # UV Script: {filename}
- Executed via `hf jobs uv run` on {timestamp}
- ## Run this script
- ```bash
- hf jobs uv run {filename}
- ```
- ---
- *Created with [hf jobs](https://huggingface.co/docs/huggingface_hub/main/en/guides/jobs)*
- """
- )
- self.upload_file(
- path_or_fileobj=readme_content.encode(),
- path_in_repo="README.md",
- repo_id=repo_id,
- repo_type="dataset",
- )
- secrets["UV_SCRIPT_HF_TOKEN"] = token or self.token or get_token()
- env["UV_SCRIPT_URL"] = script_url
- pre_command = (
- dedent(
- """
- import urllib.request
- import os
- from pathlib import Path
- o = urllib.request.build_opener()
- o.addheaders = [("Authorization", "Bearer " + os.environ["UV_SCRIPT_HF_TOKEN"])]
- Path("/tmp/script.py").write_bytes(o.open(os.environ["UV_SCRIPT_URL"]).read())
- """
- )
- .strip()
- .replace('"', r"\"")
- .split("\n")
- )
- pre_command = ["python", "-c", '"' + "; ".join(pre_command) + '"']
- command = ["uv", "run"] + uv_args + ["/tmp/script.py"] + script_args
- command = ["bash", "-c", " ".join(pre_command) + " && " + " ".join(command)]
- return command, env, secrets
- def _parse_revision_from_pr_url(pr_url: str) -> str:
- """Safely parse revision number from a PR url.
- Example:
- ```py
- >>> _parse_revision_from_pr_url("https://huggingface.co/bigscience/bloom/discussions/2")
- "refs/pr/2"
- ```
- """
- re_match = re.match(_REGEX_DISCUSSION_URL, pr_url)
- if re_match is None:
- raise RuntimeError(f"Unexpected response from the hub, expected a Pull Request URL but got: '{pr_url}'")
- return f"refs/pr/{re_match[1]}"
- api = HfApi()
- whoami = api.whoami
- auth_check = api.auth_check
- get_token_permission = api.get_token_permission
- list_models = api.list_models
- model_info = api.model_info
- list_datasets = api.list_datasets
- dataset_info = api.dataset_info
- list_spaces = api.list_spaces
- space_info = api.space_info
- list_papers = api.list_papers
- paper_info = api.paper_info
- repo_exists = api.repo_exists
- revision_exists = api.revision_exists
- file_exists = api.file_exists
- repo_info = api.repo_info
- list_repo_files = api.list_repo_files
- list_repo_refs = api.list_repo_refs
- list_repo_commits = api.list_repo_commits
- list_repo_tree = api.list_repo_tree
- get_paths_info = api.get_paths_info
- get_model_tags = api.get_model_tags
- get_dataset_tags = api.get_dataset_tags
- create_commit = api.create_commit
- create_repo = api.create_repo
- delete_repo = api.delete_repo
- update_repo_visibility = api.update_repo_visibility
- update_repo_settings = api.update_repo_settings
- move_repo = api.move_repo
- upload_file = api.upload_file
- upload_folder = api.upload_folder
- delete_file = api.delete_file
- delete_folder = api.delete_folder
- delete_files = api.delete_files
- upload_large_folder = api.upload_large_folder
- preupload_lfs_files = api.preupload_lfs_files
- create_branch = api.create_branch
- delete_branch = api.delete_branch
- create_tag = api.create_tag
- delete_tag = api.delete_tag
- get_full_repo_name = api.get_full_repo_name
- # Danger-zone API
- super_squash_history = api.super_squash_history
- list_lfs_files = api.list_lfs_files
- permanently_delete_lfs_files = api.permanently_delete_lfs_files
- # Safetensors helpers
- get_safetensors_metadata = api.get_safetensors_metadata
- parse_safetensors_file_metadata = api.parse_safetensors_file_metadata
- # Background jobs
- run_as_future = api.run_as_future
- # Activity API
- list_liked_repos = api.list_liked_repos
- list_repo_likers = api.list_repo_likers
- unlike = api.unlike
- # Community API
- get_discussion_details = api.get_discussion_details
- get_repo_discussions = api.get_repo_discussions
- create_discussion = api.create_discussion
- create_pull_request = api.create_pull_request
- change_discussion_status = api.change_discussion_status
- comment_discussion = api.comment_discussion
- edit_discussion_comment = api.edit_discussion_comment
- rename_discussion = api.rename_discussion
- merge_pull_request = api.merge_pull_request
- # Space API
- add_space_secret = api.add_space_secret
- delete_space_secret = api.delete_space_secret
- get_space_variables = api.get_space_variables
- add_space_variable = api.add_space_variable
- delete_space_variable = api.delete_space_variable
- get_space_runtime = api.get_space_runtime
- request_space_hardware = api.request_space_hardware
- set_space_sleep_time = api.set_space_sleep_time
- pause_space = api.pause_space
- restart_space = api.restart_space
- duplicate_space = api.duplicate_space
- request_space_storage = api.request_space_storage
- delete_space_storage = api.delete_space_storage
- # Inference Endpoint API
- list_inference_endpoints = api.list_inference_endpoints
- create_inference_endpoint = api.create_inference_endpoint
- get_inference_endpoint = api.get_inference_endpoint
- update_inference_endpoint = api.update_inference_endpoint
- delete_inference_endpoint = api.delete_inference_endpoint
- pause_inference_endpoint = api.pause_inference_endpoint
- resume_inference_endpoint = api.resume_inference_endpoint
- scale_to_zero_inference_endpoint = api.scale_to_zero_inference_endpoint
- create_inference_endpoint_from_catalog = api.create_inference_endpoint_from_catalog
- list_inference_catalog = api.list_inference_catalog
- # Collections API
- get_collection = api.get_collection
- list_collections = api.list_collections
- create_collection = api.create_collection
- update_collection_metadata = api.update_collection_metadata
- delete_collection = api.delete_collection
- add_collection_item = api.add_collection_item
- update_collection_item = api.update_collection_item
- delete_collection_item = api.delete_collection_item
- delete_collection_item = api.delete_collection_item
- # Access requests API
- list_pending_access_requests = api.list_pending_access_requests
- list_accepted_access_requests = api.list_accepted_access_requests
- list_rejected_access_requests = api.list_rejected_access_requests
- cancel_access_request = api.cancel_access_request
- accept_access_request = api.accept_access_request
- reject_access_request = api.reject_access_request
- grant_access = api.grant_access
- # Webhooks API
- create_webhook = api.create_webhook
- disable_webhook = api.disable_webhook
- delete_webhook = api.delete_webhook
- enable_webhook = api.enable_webhook
- get_webhook = api.get_webhook
- list_webhooks = api.list_webhooks
- update_webhook = api.update_webhook
- # User API
- get_user_overview = api.get_user_overview
- get_organization_overview = api.get_organization_overview
- list_organization_members = api.list_organization_members
- list_user_followers = api.list_user_followers
- list_user_following = api.list_user_following
- # Jobs API
- run_job = api.run_job
- fetch_job_logs = api.fetch_job_logs
- list_jobs = api.list_jobs
- inspect_job = api.inspect_job
- cancel_job = api.cancel_job
- run_uv_job = api.run_uv_job
- create_scheduled_job = api.create_scheduled_job
- list_scheduled_jobs = api.list_scheduled_jobs
- inspect_scheduled_job = api.inspect_scheduled_job
- delete_scheduled_job = api.delete_scheduled_job
- suspend_scheduled_job = api.suspend_scheduled_job
- resume_scheduled_job = api.resume_scheduled_job
- create_scheduled_uv_job = api.create_scheduled_uv_job
|