{"@context":{"@language":"en","@vocab":"https://schema.org/","arrayShape":"cr:arrayShape","citeAs":"cr:citeAs","column":"cr:column","conformsTo":"dct:conformsTo","cr":"http://mlcommons.org/croissant/","data":{"@id":"cr:data","@type":"@json"},"dataBiases":"cr:dataBiases","dataCollection":"cr:dataCollection","dataType":{"@id":"cr:dataType","@type":"@vocab"},"dct":"http://purl.org/dc/terms/","extract":"cr:extract","field":"cr:field","fileProperty":"cr:fileProperty","fileObject":"cr:fileObject","fileSet":"cr:fileSet","format":"cr:format","includes":"cr:includes","isArray":"cr:isArray","isLiveDataset":"cr:isLiveDataset","jsonPath":"cr:jsonPath","key":"cr:key","md5":"cr:md5","parentField":"cr:parentField","path":"cr:path","personalSensitiveInformation":"cr:personalSensitiveInformation","recordSet":"cr:recordSet","references":"cr:references","regex":"cr:regex","repeated":"cr:repeated","replace":"cr:replace","sc":"https://schema.org/","separator":"cr:separator","source":"cr:source","subField":"cr:subField","transform":"cr:transform","containedIn":"cr:containedIn"},"@type":"sc:Dataset","distribution":[{"@type":"cr:FileObject","@id":"repo","name":"repo","description":"The Hugging Face git repository.","contentUrl":"https://huggingface.co/datasets/ash56/ShiftySpeech/tree/refs%2Fconvert%2Fparquet","encodingFormat":"git+https","sha256":"https://github.com/mlcommons/croissant/issues/80"},{"@type":"cr:FileSet","@id":"parquet-files-for-config-real","containedIn":{"@id":"repo"},"encodingFormat":"application/x-parquet","includes":"real/*/*.parquet"},{"@type":"cr:FileSet","@id":"parquet-files-for-config-tts","containedIn":{"@id":"repo"},"encodingFormat":"application/x-parquet","includes":"tts/*/*.parquet"},{"@type":"cr:FileSet","@id":"parquet-files-for-config-vocoders","containedIn":{"@id":"repo"},"encodingFormat":"application/x-parquet","includes":"vocoders/*/*.parquet"}],"recordSet":[{"@type":"cr:RecordSet","dataType":"cr:Split","key":{"@id":"real_splits/split_name"},"@id":"real_splits","name":"real_splits","description":"Splits for the real config.","field":[{"@type":"cr:Field","@id":"real_splits/split_name","dataType":"sc:Text"}],"data":[{"real_splits/split_name":"test"}]},{"@type":"cr:RecordSet","@id":"real","description":"ash56/ShiftySpeech - 'real' subset (first 5GB)\n\nAdditional information:\n- 1 skipped column: flac","field":[{"@type":"cr:Field","@id":"real/split","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-real"},"extract":{"fileProperty":"fullpath"},"transform":{"regex":"real/(?:partial-)?(test)/.+parquet$"}},"references":{"field":{"@id":"real_splits/split_name"}}},{"@type":"cr:Field","@id":"real/__key__","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-real"},"extract":{"column":"__key__"}}},{"@type":"cr:Field","@id":"real/__url__","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-real"},"extract":{"column":"__url__"}}}]},{"@type":"cr:RecordSet","dataType":"cr:Split","key":{"@id":"tts_splits/split_name"},"@id":"tts_splits","name":"tts_splits","description":"Splits for the tts config.","field":[{"@type":"cr:Field","@id":"tts_splits/split_name","dataType":"sc:Text"}],"data":[{"tts_splits/split_name":"test"}]},{"@type":"cr:RecordSet","@id":"tts","description":"ash56/ShiftySpeech - 'tts' subset (first 5GB)\n\nAdditional information:\n- 1 skipped column: flac","field":[{"@type":"cr:Field","@id":"tts/split","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-tts"},"extract":{"fileProperty":"fullpath"},"transform":{"regex":"tts/(?:partial-)?(test)/.+parquet$"}},"references":{"field":{"@id":"tts_splits/split_name"}}},{"@type":"cr:Field","@id":"tts/__key__","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-tts"},"extract":{"column":"__key__"}}},{"@type":"cr:Field","@id":"tts/__url__","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-tts"},"extract":{"column":"__url__"}}}]},{"@type":"cr:RecordSet","dataType":"cr:Split","key":{"@id":"vocoders_splits/split_name"},"@id":"vocoders_splits","name":"vocoders_splits","description":"Splits for the vocoders config.","field":[{"@type":"cr:Field","@id":"vocoders_splits/split_name","dataType":"sc:Text"}],"data":[{"vocoders_splits/split_name":"test"}]},{"@type":"cr:RecordSet","@id":"vocoders","description":"ash56/ShiftySpeech - 'vocoders' subset (first 5GB)\n\nAdditional information:\n- 1 skipped column: flac","field":[{"@type":"cr:Field","@id":"vocoders/split","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-vocoders"},"extract":{"fileProperty":"fullpath"},"transform":{"regex":"vocoders/(?:partial-)?(test)/.+parquet$"}},"references":{"field":{"@id":"vocoders_splits/split_name"}}},{"@type":"cr:Field","@id":"vocoders/__key__","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-vocoders"},"extract":{"column":"__key__"}}},{"@type":"cr:Field","@id":"vocoders/__url__","dataType":"sc:Text","source":{"fileSet":{"@id":"parquet-files-for-config-vocoders"},"extract":{"column":"__url__"}}}]}],"conformsTo":"http://mlcommons.org/croissant/1.1","name":"ShiftySpeech","description":"This repository introduces:  🌀 ShiftySpeech: A Large-Scale Synthetic Speech Dataset with Distribution Shifts\n\n\t\n\t\t\n\t\t🔥 Key Features\n\t\n\n\n3000+ hours of synthetic speech\nDiverse Distribution Shifts: The dataset spans 7 key distribution shifts, including:  \n📖 Reading Style  \n🎙️ Podcast  \n🎥 YouTube  \n🗣️ Languages (Three different languages)  \n🌎 Demographics (including variations in age, accent, and gender)\n\n\nMultiple Speech Generation Systems: Includes data synthesized from various TTS… See the full description on the dataset page: https://huggingface.co/datasets/ash56/ShiftySpeech.","alternateName":["ash56/ShiftySpeech"],"creator":{"@type":"Person","name":"Ashi Garg","url":"https://huggingface.co/ash56"},"keywords":["English","Chinese","Japanese","apache-2.0","1M - 10M","webdataset","Audio","Text","Datasets","WebDataset","Croissant","arxiv:2502.05674","🇺🇸 Region: US","audio","synthetic-speech-detection","deepfake","deepfake-audio","security","voice-spoofing","anti-spoofing"],"license":"https://choosealicense.com/licenses/apache-2.0/","url":"https://huggingface.co/datasets/ash56/ShiftySpeech"}