id stringlengths 4 123 | downloads int64 0 3.44M | downloadsAllTime int64 0 143M | likes int64 0 9.75k | tags listlengths 1 7.92k | organization stringlengths 2 42 | has_audio bool 2
classes | has_speech bool 2
classes | has_music bool 2
classes | has_robot bool 2
classes | has_bio bool 2
classes | has_med bool 2
classes | has_series bool 2
classes | has_video bool 2
classes | has_image bool 2
classes | has_text bool 2
classes | has_science bool 2
classes | is_biomed bool 2
classes | data_download_timestamp timestamp[us, tz=UTC]date 2026-07-04 05:40:15 2026-07-04 05:40:15 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Glint-Research/Fable-5-traces | 51,152 | 51,152 | 506 | [
"task_categories:text-generation",
"annotations_creators:machine-generated",
"language:en",
"license:agpl-3.0",
"size_categories:1K<n<10K",
"format:json",
"format:agent-traces",
"modality:tabular",
"modality:text",
"library:datasets",
"library:dask",
"library:polars",
"library:mlcroissant",
... | Glint-Research | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
mlabonne/open-perfectblend | 3,045 | 14,746 | 138 | [
"license:apache-2.0",
"size_categories:1M<n<10M",
"format:parquet",
"modality:text",
"library:datasets",
"library:dask",
"library:mlcroissant",
"library:polars",
"arxiv:2409.20370",
"region:us"
] | mlabonne | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
armand0e/claude-fable-5-claude-code | 15,763 | 15,763 | 266 | [
"task_categories:text-generation",
"size_categories:n<1K",
"format:json",
"format:agent-traces",
"modality:tabular",
"modality:text",
"library:datasets",
"library:dask",
"library:polars",
"library:mlcroissant",
"region:us",
"agent-traces",
"format:agent-traces",
"claude",
"distillation",... | armand0e | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
bcbl190626/SpanishBCBL | 3,686 | 3,686 | 48 | [
"task_categories:other",
"language:es",
"license:cc-by-nc-4.0",
"arxiv:2502.07429",
"region:us",
"neuroscience",
"meg",
"eeg",
"brain-computer-interface",
"bci",
"brain-to-text",
"typing",
"motor",
"electrophysiology"
] | bcbl190626 | false | false | false | false | false | false | false | false | false | true | true | false | 2026-07-04T05:40:15.973000 |
markov-ai/gaming-500-hours | 8,696 | 8,696 | 44 | [
"size_categories:n<1K",
"format:json",
"modality:tabular",
"modality:text",
"modality:video",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"region:us"
] | markov-ai | false | false | false | false | false | false | false | true | false | true | false | false | 2026-07-04T05:40:15.973000 |
Crownelius/Complete-FABLE.5-traces-2M | 4,300 | 4,300 | 56 | [
"task_categories:text-generation",
"task_ids:language-modeling",
"annotations_creators:machine-generated",
"language_creators:found",
"language_creators:machine-generated",
"multilinguality:monolingual",
"language:en",
"license:mit",
"size_categories:1M<n<10M",
"format:parquet",
"modality:tabula... | Crownelius | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
scholarweave/arxiv-latex | 17,543 | 17,543 | 52 | [
"task_categories:text-generation",
"task_categories:feature-extraction",
"language:en",
"license:other",
"size_categories:1M<n<10M",
"modality:text",
"region:us",
"science",
"arxiv",
"latex",
"academic"
] | scholarweave | false | false | false | false | false | false | false | false | false | true | true | false | 2026-07-04T05:40:15.973000 |
LiquidAI/ifstruct-v1.0 | 114 | 114 | 33 | [
"benchmark:official",
"benchmark:eval-yaml",
"task_categories:text-generation",
"language:en",
"license:apache-2.0",
"size_categories:1K<n<10K",
"format:parquet",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"region:us",
"structured-output... | LiquidAI | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
Qwen/AgentWorldBench | 1,684 | 1,684 | 62 | [
"task_categories:text-generation",
"language:en",
"license:apache-2.0",
"size_categories:1K<n<10K",
"format:json",
"modality:tabular",
"modality:text",
"library:datasets",
"library:dask",
"library:polars",
"library:mlcroissant",
"arxiv:2606.24597",
"region:us",
"world-model",
"agent",
... | Qwen | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
LocalLaws/LOCUS-v1 | 2,005 | 2,677 | 86 | [
"task_categories:text-classification",
"language:en",
"license:cc-by-nc-4.0",
"size_categories:1M<n<10M",
"format:parquet",
"format:optimized-parquet",
"modality:tabular",
"modality:text",
"library:datasets",
"library:dask",
"library:polars",
"library:mlcroissant",
"arxiv:2606.19334",
"reg... | LocalLaws | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
WithinUsAI/claude_mythos_distilled_25k | 3,828 | 4,433 | 139 | [
"language:en",
"license:apache-2.0",
"size_categories:10K<n<100K",
"format:json",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"region:us",
"synthetic",
"claude",
"mythos",
"distillation",
"cybersecurity",
"coding",
"reasoning",
"a... | WithinUsAI | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
lordx64/agentic-distill-fable-5-sft | 1,550 | 1,550 | 55 | [
"task_categories:text-generation",
"language:en",
"license:agpl-3.0",
"size_categories:1K<n<10K",
"format:parquet",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"region:us",
"agentic",
"chain-of-thought",
"distillation",
"claude",
"cla... | lordx64 | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
ginigen-ai/Metacognition-Bench | 140 | 140 | 22 | [
"task_categories:text-generation",
"task_categories:question-answering",
"language:en",
"license:apache-2.0",
"size_categories:n<1K",
"region:us",
"metacognition",
"self-correction",
"hallucination-detection",
"reasoning",
"benchmark",
"trap-escape",
"error-recovery",
"metacognition-adapte... | ginigen-ai | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
openai/gsm8k | 934,366 | 13,056,002 | 1,418 | [
"benchmark:official",
"benchmark:eval-yaml",
"task_categories:text-generation",
"annotations_creators:crowdsourced",
"language_creators:crowdsourced",
"multilinguality:monolingual",
"source_datasets:original",
"language:en",
"license:mit",
"size_categories:10K<n<100K",
"format:parquet",
"modal... | openai | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
XDOF/ABC-130k | 572,683 | 572,683 | 71 | [
"task_categories:robotics",
"language:en",
"license:apache-2.0",
"size_categories:n>1T",
"arxiv:2606.27375",
"region:us",
"robotics",
"manipulation",
"imitation-learning",
"bimanual",
"teleoperation",
"mcap"
] | XDOF | false | false | false | true | false | false | false | false | false | false | false | false | 2026-07-04T05:40:15.973000 |
Rapidata/svg-benchmark | 872 | 872 | 26 | [
"task_categories:text-to-image",
"task_categories:image-classification",
"task_categories:reinforcement-learning",
"language:en",
"license:cc-by-4.0",
"size_categories:100K<n<1M",
"format:parquet",
"format:optimized-parquet",
"modality:image",
"modality:text",
"library:datasets",
"library:dask... | Rapidata | false | false | false | false | false | false | false | false | true | true | false | false | 2026-07-04T05:40:15.973000 |
ByteDance-Seed/EdgeBench | 519 | 519 | 13 | [
"task_categories:text-generation",
"language:en",
"license:cc-by-4.0",
"size_categories:n<1K",
"region:us",
"benchmark",
"code-agents",
"evaluation",
"long-horizon"
] | ByteDance-Seed | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
BitRobot/HIW-500 | 67,892 | 67,892 | 39 | [
"language:en",
"license:cc-by-4.0",
"region:us",
"robotics",
"humanoid"
] | BitRobot | false | false | false | true | false | false | false | false | false | false | false | false | 2026-07-04T05:40:15.973000 |
PawanKrd/claude-fable-5-code | 860 | 860 | 31 | [
"task_categories:text-generation",
"language:en",
"size_categories:n<1K",
"format:json",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"region:us",
"code",
"claude",
"fable-5"
] | PawanKrd | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
DavydenkoGr/AFTER | 1,174 | 1,174 | 13 | [
"language:en",
"license:apache-2.0",
"arxiv:2606.23127",
"region:us",
"benchmark",
"agents",
"skill-evolution",
"evaluation",
"software-engineering",
"data-science",
"data-engineering",
"infrastructure",
"generative-ai",
"project-management"
] | DavydenkoGr | false | false | false | false | false | false | false | false | false | false | true | false | 2026-07-04T05:40:15.973000 |
allenai/olmOCR-bench | 7,828 | 53,267 | 258 | [
"benchmark:official",
"benchmark:eval-yaml",
"language:en",
"license:odc-by",
"size_categories:1K<n<10K",
"modality:document",
"modality:text",
"arxiv:2502.18443",
"region:us",
"text"
] | allenai | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
angrygiraffe/claude-opus-4.6-4.7-reasoning-8.7k | 8,565 | 17,152 | 427 | [
"task_categories:text-generation",
"task_categories:question-answering",
"language:en",
"license:apache-2.0",
"size_categories:10K<n<100K",
"format:json",
"modality:text",
"library:datasets",
"library:dask",
"library:mlcroissant",
"region:us",
"sft",
"chain-of-thought",
"coding",
"math",... | angrygiraffe | false | false | false | false | false | false | false | false | false | true | true | false | 2026-07-04T05:40:15.973000 |
RekaAI/CS2-10k | 56,741 | 56,741 | 13 | [
"task_categories:other",
"license:cc-by-nc-4.0",
"size_categories:100K<n<1M",
"modality:video",
"library:webdataset",
"region:us",
"counter-strike",
"cs2",
"gaming",
"egocentric",
"first-person",
"video",
"world-models",
"imitation-learning",
"action-prediction",
"webdataset"
] | RekaAI | false | false | false | false | false | false | false | true | false | false | false | false | 2026-07-04T05:40:15.973000 |
prathoshap/vagdhenu-data | 2,174 | 2,174 | 11 | [
"task_categories:text-to-speech",
"language:sa",
"license:cc-by-4.0",
"size_categories:1K<n<10K",
"format:audiofolder",
"modality:audio",
"modality:text",
"library:datasets",
"library:mlcroissant",
"region:us"
] | prathoshap | true | true | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
nvidia/Open-SWE-Traces | 4,730 | 4,748 | 34 | [
"license:cc-by-4.0",
"size_categories:100K<n<1M",
"format:parquet",
"modality:text",
"library:datasets",
"library:dask",
"library:polars",
"library:mlcroissant",
"arxiv:2606.16038",
"region:us",
"code",
"synthetic",
"tools",
"agents",
"software"
] | nvidia | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
CodeDevX/Vibe-Coding-Instruct | 2,594 | 2,594 | 176 | [
"task_categories:text-generation",
"language:en",
"license:apache-2.0",
"size_categories:1M<n<10M",
"format:json",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"region:us",
"custom",
"vibecodinginstruct"
] | CodeDevX | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
AletheiaResearch/GLM-5.2-Agent | 1,472 | 1,472 | 26 | [
"task_categories:text-generation",
"size_categories:n<1K",
"format:json",
"format:agent-traces",
"modality:tabular",
"modality:text",
"library:datasets",
"library:dask",
"library:polars",
"library:mlcroissant",
"region:eu",
"agent-traces",
"format:agent-traces",
"pi",
"distillation",
"... | AletheiaResearch | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
macrodata/WGO-Bench | 814 | 814 | 10 | [
"task_categories:robotics",
"task_categories:video-classification",
"language:en",
"license:cc-by-nc-sa-4.0",
"size_categories:n<1K",
"format:parquet",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"region:us",
"temporal-segmentation",
"sub... | macrodata | false | false | false | true | false | false | false | true | false | true | false | false | 2026-07-04T05:40:15.973000 |
nvidia/PhysicalAI-Autonomous-Vehicles | 207,013 | 2,571,417 | 933 | [
"license:other",
"region:us"
] | nvidia | false | false | false | false | false | false | false | false | false | false | false | false | 2026-07-04T05:40:15.973000 |
xlangai/osworld_v2_tasks | 985 | 998 | 12 | [
"license:apache-2.0",
"size_categories:n<1K",
"format:json",
"modality:tabular",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"region:us"
] | xlangai | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
sbintuitions/joyo-kanji-yomi-benchmark | 55 | 55 | 9 | [
"task_categories:text-to-speech",
"language:ja",
"license:mit",
"size_categories:10K<n<100K",
"format:json",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"arxiv:2606.25369",
"region:us",
"tts-evaluation",
"japanese",
"kanji",
"chinese-... | sbintuitions | false | true | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
Voxel51/SceneFun3D | 1,363 | 1,363 | 9 | [
"task_categories:object-detection",
"annotations_creators:expert-generated",
"annotations_creators:machine-generated",
"language:en",
"license:cc-by-nc-sa-4.0",
"size_categories:n<1K",
"modality:video",
"modality:3d",
"library:fiftyone",
"region:us",
"fiftyone",
"3d",
"point-cloud",
"fo3d"... | Voxel51 | false | false | false | true | false | false | false | true | false | false | false | false | 2026-07-04T05:40:15.973000 |
Anthropic/hh-rlhf | 28,650 | 1,938,361 | 1,808 | [
"license:mit",
"size_categories:100K<n<1M",
"format:json",
"modality:text",
"library:datasets",
"library:dask",
"library:mlcroissant",
"library:polars",
"arxiv:2204.05862",
"region:us",
"human-feedback"
] | Anthropic | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
HuggingFaceFW/fineweb | 317,272 | 8,692,067 | 2,913 | [
"task_categories:text-generation",
"language:en",
"license:odc-by",
"size_categories:10B<n<100B",
"modality:tabular",
"modality:text",
"arxiv:2306.01116",
"arxiv:2109.07445",
"arxiv:2406.17557",
"doi:10.57967/hf/2493",
"region:us"
] | HuggingFaceFW | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
google/WaxalNLP | 32,148 | 131,479 | 243 | [
"task_categories:automatic-speech-recognition",
"task_categories:text-to-speech",
"language_creators:creator_1",
"multilinguality:multilingual",
"source_datasets:UGSpeechData",
"source_datasets:DigitalUmuganda/AfriVoice",
"source_datasets:original",
"language:ach",
"language:aka",
"language:amh",
... | google | true | true | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
uw-math-ai/math-graph | 110 | 239 | 8 | [
"task_categories:text-retrieval",
"task_categories:feature-extraction",
"language:en",
"license:cc-by-4.0",
"size_categories:10M<n<100M",
"format:csv",
"modality:tabular",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"arxiv:2606.25363",
"r... | uw-math-ai | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
HuggingFaceFW/fineweb-edu | 384,321 | 7,819,755 | 1,172 | [
"task_categories:text-generation",
"language:en",
"license:odc-by",
"size_categories:1B<n<10B",
"format:parquet",
"modality:tabular",
"modality:text",
"library:datasets",
"library:dask",
"library:polars",
"library:mlcroissant",
"arxiv:2406.17557",
"arxiv:2404.14219",
"arxiv:2401.10020",
... | HuggingFaceFW | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
cais/hle | 28,280 | 363,142 | 850 | [
"benchmark:official",
"license:mit",
"size_categories:1K<n<10K",
"format:parquet",
"modality:image",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"region:us"
] | cais | false | false | false | false | false | false | false | false | true | true | false | false | 2026-07-04T05:40:15.973000 |
badlogicgames/pi-mono | 2,367 | 25,328 | 174 | [
"task_categories:text-generation",
"language:en",
"language:code",
"license:other",
"region:us",
"agent-traces",
"coding-agent",
"pi-share-hf"
] | badlogicgames | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
kelexine/fable-5-sft-traces | 766 | 766 | 12 | [
"task_categories:text-generation",
"language:en",
"license:agpl-3.0",
"size_categories:1K<n<10K",
"format:parquet",
"format:optimized-parquet",
"modality:tabular",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"region:us",
"agentic",
"rea... | kelexine | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
OpenOneRec/Explorer_LLM_Rec_Competition | 11,897 | 11,897 | 8 | [
"region:us"
] | OpenOneRec | false | false | false | false | false | false | false | false | false | false | false | false | 2026-07-04T05:40:15.973000 |
wikimedia/wikipedia | 180,998 | 2,500,885 | 1,261 | [
"task_categories:text-generation",
"task_categories:fill-mask",
"task_ids:language-modeling",
"task_ids:masked-language-modeling",
"language:ab",
"language:ace",
"language:ady",
"language:af",
"language:alt",
"language:am",
"language:ami",
"language:an",
"language:ang",
"language:anp",
"... | wikimedia | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
roneneldan/TinyStories | 81,467 | 1,515,983 | 1,049 | [
"task_categories:text-generation",
"language:en",
"license:cdla-sharing-1.0",
"size_categories:1M<n<10M",
"format:parquet",
"modality:text",
"library:datasets",
"library:dask",
"library:polars",
"library:mlcroissant",
"arxiv:2305.07759",
"region:us"
] | roneneldan | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
Lichess/chess-puzzles | 4,350 | 17,438 | 35 | [
"license:cc0-1.0",
"size_categories:1M<n<10M",
"format:parquet",
"format:optimized-parquet",
"modality:tabular",
"modality:text",
"library:datasets",
"library:dask",
"library:polars",
"library:mlcroissant",
"region:us",
"chess",
"lichess",
"puzzles"
] | Lichess | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
nvidia/Nemotron-Personas-USA | 13,440 | 146,712 | 331 | [
"task_categories:text-generation",
"language:en",
"license:cc-by-4.0",
"size_categories:1M<n<10M",
"format:parquet",
"modality:text",
"library:datasets",
"library:dask",
"library:mlcroissant",
"library:polars",
"library:datadesigner",
"region:us",
"synthetic",
"personas",
"NVIDIA",
"da... | nvidia | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
ruggsea/infini-news-corpus | 26,305 | 124,676 | 13 | [
"task_categories:text-generation",
"task_categories:text-classification",
"task_categories:text-retrieval",
"annotations_creators:machine-generated",
"multilinguality:multilingual",
"source_datasets:original",
"language:eng",
"language:spa",
"language:rus",
"language:deu",
"language:ita",
"lan... | ruggsea | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
Ujjwal-Tyagi/ai-ml-foundations-book-collection | 3,269 | 7,353 | 64 | [
"task_categories:text-generation",
"task_categories:text-classification",
"task_categories:question-answering",
"task_categories:summarization",
"task_categories:sentence-similarity",
"task_categories:feature-extraction",
"task_categories:zero-shot-classification",
"task_categories:text-retrieval",
... | Ujjwal-Tyagi | false | false | false | false | false | false | false | false | true | true | true | false | 2026-07-04T05:40:15.973000 |
Meddies/meddies-persona-vie | 1,288 | 1,705 | 14 | [
"task_categories:other",
"annotations_creators:machine-generated",
"language_creators:machine-generated",
"multilinguality:monolingual",
"source_datasets:HoangHa/meddies-persona",
"language:vi",
"license:cc-by-nc-4.0",
"size_categories:100K<n<1M",
"format:parquet",
"modality:text",
"library:data... | Meddies | false | false | false | false | false | true | false | false | false | true | false | true | 2026-07-04T05:40:15.973000 |
agents-last-exam/agents-last-exam | 8,292 | 8,336 | 197 | [
"language:en",
"license:cc-by-4.0",
"size_categories:n<1K",
"format:parquet",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"region:us",
"computer-use-agents",
"agent-benchmark",
"benchmark",
"evaluation"
] | agents-last-exam | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
ArtificialAnalysis/ITBench-AA | 47,632 | 47,635 | 46 | [
"task_categories:question-answering",
"language:en",
"license:cc-by-4.0",
"size_categories:n<1K",
"format:json",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"region:us",
"sre",
"kubernetes",
"root-cause-analysis",
"agents",
"it-operat... | ArtificialAnalysis | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
open-thoughts/OpenThoughts-Agent-SFT-100K | 725 | 725 | 15 | [
"language:en",
"license:apache-2.0",
"size_categories:10K<n<100K",
"format:parquet",
"format:optimized-parquet",
"modality:text",
"library:datasets",
"library:dask",
"library:polars",
"library:mlcroissant",
"region:us",
"agents",
"terminal",
"code",
"software-engineering",
"sft"
] | open-thoughts | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
BAAI-Agents/SWITCH | 2,287 | 2,287 | 6 | [
"task_categories:visual-question-answering",
"task_categories:text-generation",
"task_categories:video-to-video",
"task_categories:image-to-text",
"language:en",
"license:cc-by-nc-4.0",
"size_categories:1K<n<10K",
"format:json",
"modality:image",
"modality:text",
"modality:video",
"library:dat... | BAAI-Agents | false | false | false | false | false | false | false | true | true | true | false | false | 2026-07-04T05:40:15.973000 |
hotdogs/uka-fable-reasoning | 489 | 489 | 12 | [
"language:en",
"license:agpl-3.0",
"size_categories:10K<n<100K",
"modality:text",
"region:us",
"reasoning",
"agentic",
"sft",
"chain-of-thought",
"multi-turn",
"tool-use",
"chatml"
] | hotdogs | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
fka/prompts.chat | 33,414 | 618,255 | 9,752 | [
"task_categories:question-answering",
"task_categories:text-generation",
"license:cc0-1.0",
"size_categories:1K<n<10K",
"format:csv",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"region:us",
"ChatGPT",
"prompts",
"AI",
"GPT",
"Claude"... | fka | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
OpenAssistant/oasst1 | 15,576 | 413,426 | 1,540 | [
"language:en",
"language:es",
"language:ru",
"language:de",
"language:pl",
"language:th",
"language:vi",
"language:sv",
"language:bn",
"language:da",
"language:he",
"language:it",
"language:fa",
"language:sk",
"language:id",
"language:nb",
"language:el",
"language:nl",
"language:... | OpenAssistant | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
lmsys/lmsys-chat-1m | 5,981 | 333,053 | 930 | [
"size_categories:1M<n<10M",
"format:parquet",
"modality:text",
"library:datasets",
"library:dask",
"library:polars",
"library:mlcroissant",
"arxiv:2309.11998",
"region:us"
] | lmsys | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
galileo-ai/ragbench | 6,593 | 95,766 | 120 | [
"license:cc-by-4.0",
"size_categories:10K<n<100K",
"format:parquet",
"modality:tabular",
"modality:text",
"library:datasets",
"library:pandas",
"library:mlcroissant",
"library:polars",
"region:us"
] | galileo-ai | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
ai4bharat/IndicVoices | 13,249 | 133,380 | 74 | [
"license:cc-by-4.0",
"size_categories:1M<n<10M",
"format:parquet",
"format:optimized-parquet",
"modality:audio",
"modality:text",
"library:datasets",
"library:dask",
"library:polars",
"library:mlcroissant",
"arxiv:2403.01926",
"region:us"
] | ai4bharat | true | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
SWE-bench/SWE-bench_Verified | 71,328 | 1,071,619 | 101 | [
"benchmark:official",
"benchmark:eval-yaml",
"size_categories:n<1K",
"format:parquet",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"region:us"
] | SWE-bench | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
nvidia/PhysicalAI-Autonomous-Vehicles-NuRec | 22,353 | 124,205 | 193 | [
"license:other",
"region:us"
] | nvidia | false | false | false | false | false | false | false | false | false | false | false | false | 2026-07-04T05:40:15.973000 |
thedeoxen/refcontrol-flux-kontext-dataset | 77 | 5,111 | 13 | [
"license:apache-2.0",
"size_categories:1K<n<10K",
"format:imagefolder",
"modality:image",
"library:datasets",
"library:mlcroissant",
"region:us"
] | thedeoxen | false | false | false | false | false | false | false | false | true | false | false | false | 2026-07-04T05:40:15.973000 |
larlarHF/TikTok-10M | 122 | 349 | 5 | [
"license:other",
"size_categories:10M<n<100M",
"format:parquet",
"modality:image",
"modality:tabular",
"modality:text",
"modality:video",
"library:datasets",
"library:dask",
"library:polars",
"library:mlcroissant",
"region:us",
"dataset",
"video",
"social-media",
"tiktok",
"multimoda... | larlarHF | true | false | false | false | false | false | false | true | true | true | false | false | 2026-07-04T05:40:15.973000 |
eddmpython/dartlab-data | 60,774 | 139,329 | 8 | [
"task_categories:table-question-answering",
"task_categories:text-classification",
"language:ko",
"language:en",
"license:apache-2.0",
"size_categories:1M<n<10M",
"region:us",
"finance",
"disclosure",
"dart",
"edgar",
"sec",
"xbrl",
"korea",
"financial-statements",
"corporate-filings",... | eddmpython | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
nvidia/Nemotron-Personas-Korea | 13,125 | 106,790 | 515 | [
"task_categories:text-generation",
"language:ko",
"license:cc-by-4.0",
"size_categories:1M<n<10M",
"format:parquet",
"format:optimized-parquet",
"modality:image",
"modality:text",
"library:datasets",
"library:dask",
"library:polars",
"library:mlcroissant",
"library:datadesigner",
"region:u... | nvidia | false | false | false | false | false | false | false | false | true | true | false | false | 2026-07-04T05:40:15.973000 |
WithinUsAI/GPT_5.5_Distilled | 1,040 | 1,252 | 26 | [
"license:apache-2.0",
"size_categories:10K<n<100K",
"format:json",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"region:us"
] | WithinUsAI | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
11-47/claude_opus_4.8_max_thinking_5k_v2 | 602 | 602 | 6 | [
"size_categories:1K<n<10K",
"format:json",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"region:us"
] | 11-47 | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
anthonytec2/OctoSense | 18,150 | 18,150 | 10 | [
"task_categories:depth-estimation",
"task_categories:image-segmentation",
"task_categories:robotics",
"license:mit",
"size_categories:n<1K",
"format:parquet",
"format:optimized-parquet",
"modality:image",
"modality:text",
"modality:video",
"modality:geospatial",
"modality:3d",
"modality:time... | anthonytec2 | false | false | false | true | false | false | true | true | true | true | false | false | 2026-07-04T05:40:15.973000 |
ajibawa-2023/Shell-Code-Large | 393 | 393 | 19 | [
"task_categories:text-generation",
"language:en",
"license:mit",
"size_categories:100K<n<1M",
"format:json",
"modality:text",
"library:datasets",
"library:dask",
"library:polars",
"library:mlcroissant",
"region:us",
"Shell",
"Code",
"LLM",
"Training"
] | ajibawa-2023 | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
TonicAI/Privacy-Bench | 327 | 327 | 17 | [
"task_categories:token-classification",
"task_categories:text-generation",
"language:en",
"license:cc-by-4.0",
"size_categories:10K<n<100K",
"format:json",
"modality:text",
"library:datasets",
"library:dask",
"library:polars",
"library:mlcroissant",
"region:us",
"pii",
"de-identification",... | TonicAI | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
yigitekin/BeyondMasks | 473 | 473 | 5 | [
"license:cc-by-4.0",
"region:us"
] | yigitekin | false | false | false | false | false | false | false | false | false | false | false | false | 2026-07-04T05:40:15.973000 |
Arcadia-2026/razavi-bench | 254 | 254 | 5 | [
"task_categories:question-answering",
"task_categories:visual-question-answering",
"language:en",
"license:cc-by-4.0",
"size_categories:n<1K",
"format:json",
"modality:image",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"region:us",
"anal... | Arcadia-2026 | false | false | false | false | false | false | false | false | true | true | false | false | 2026-07-04T05:40:15.973000 |
qwedsacf/competition_math | 9,675 | 99,190 | 137 | [
"annotations_creators:expert-generated",
"language_creators:expert-generated",
"multilinguality:monolingual",
"source_datasets:original",
"language:en",
"license:mit",
"size_categories:10K<n<100K",
"format:parquet",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"li... | qwedsacf | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
gaia-benchmark/GAIA | 20,160 | 319,441 | 708 | [
"language:en",
"size_categories:n<1K",
"format:parquet",
"modality:audio",
"modality:document",
"modality:image",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"arxiv:2311.12983",
"region:us"
] | gaia-benchmark | true | false | false | false | false | false | false | false | true | true | false | false | 2026-07-04T05:40:15.973000 |
opendatalab/OmniDocBench | 19,791 | 160,940 | 96 | [
"size_categories:1K<n<10K",
"format:imagefolder",
"modality:image",
"library:datasets",
"library:mlcroissant",
"arxiv:2412.07626",
"region:us"
] | opendatalab | false | false | false | false | false | false | false | false | true | false | false | false | 2026-07-04T05:40:15.973000 |
FreedomIntelligence/medical-o1-reasoning-SFT | 10,648 | 186,466 | 1,134 | [
"task_categories:question-answering",
"task_categories:text-generation",
"language:en",
"language:zh",
"license:apache-2.0",
"size_categories:10K<n<100K",
"format:json",
"modality:text",
"library:datasets",
"library:pandas",
"library:mlcroissant",
"library:polars",
"arxiv:2412.18925",
"reg... | FreedomIntelligence | false | false | false | false | true | true | false | false | false | true | false | true | 2026-07-04T05:40:15.973000 |
nvidia/Aegis-AI-Content-Safety-Dataset-2.0 | 3,558 | 63,592 | 99 | [
"task_categories:text-classification",
"language:en",
"license:cc-by-4.0",
"size_categories:10K<n<100K",
"format:json",
"modality:text",
"library:datasets",
"library:pandas",
"library:mlcroissant",
"library:polars",
"region:us",
"safety",
"content moderation",
"LLM safety",
"toxicity det... | nvidia | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
MCAA1-MSU/anv_data_ke | 1,878 | 8,660 | 16 | [
"size_categories:100K<n<1M",
"modality:audio",
"modality:text",
"region:us"
] | MCAA1-MSU | true | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
ScaleAI/SWE-bench_Pro | 68,282 | 1,119,332 | 145 | [
"benchmark:official",
"benchmark:eval-yaml",
"size_categories:n<1K",
"format:parquet",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"region:us"
] | ScaleAI | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
ma-xu/fine-t2i | 28,561 | 203,291 | 113 | [
"task_categories:image-to-text",
"task_categories:text-to-image",
"language:en",
"license:apache-2.0",
"size_categories:100K<n<1M",
"format:webdataset",
"modality:image",
"modality:text",
"library:datasets",
"library:webdataset",
"library:mlcroissant",
"arxiv:2602.09439",
"region:us",
"tex... | ma-xu | false | false | false | false | false | false | false | false | true | true | false | false | 2026-07-04T05:40:15.973000 |
HuggingFaceFW/finephrase | 337,413 | 1,862,578 | 133 | [
"task_categories:text-generation",
"task_ids:language-modeling",
"annotations_creators:machine-generated",
"language_creators:found",
"source_datasets:HuggingFaceFW/fineweb-edu/sample-350BT",
"language:en",
"license:odc-by",
"size_categories:1B<n<10B",
"modality:tabular",
"modality:text",
"regio... | HuggingFaceFW | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
agibot-world/AgiBotWorld2026 | 34,775 | 114,216 | 47 | [
"task_categories:robotics",
"language:en",
"license:cc-by-nc-sa-4.0",
"size_categories:1K<n<10K",
"modality:image",
"modality:text",
"region:us",
"agibot",
"imitation-learning",
"embodied-ai",
"lerobot",
"real-world",
"dual-arm"
] | agibot-world | false | false | false | true | false | false | false | false | true | true | false | false | 2026-07-04T05:40:15.973000 |
queyuecanyang/MIRACLE | 1,615 | 1,641 | 4 | [
"task_categories:visual-question-answering",
"task_categories:image-to-text",
"language:en",
"license:cc-by-4.0",
"size_categories:1K<n<10K",
"modality:image",
"modality:tabular",
"modality:text",
"region:us",
"multimodal",
"benchmark",
"vision-language"
] | queyuecanyang | false | false | false | false | false | false | false | false | true | true | false | false | 2026-07-04T05:40:15.973000 |
nvidia/SWE-Zero-openhands-trajectories | 2,114 | 5,107 | 15 | [
"license:cc-by-4.0",
"size_categories:100K<n<1M",
"format:parquet",
"modality:text",
"library:datasets",
"library:dask",
"library:polars",
"library:mlcroissant",
"arxiv:2604.01496",
"region:us",
"code",
"synthetic",
"tools",
"agents",
"software"
] | nvidia | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
jasperai/monet | 211,619 | 515,038 | 139 | [
"task_categories:text-to-image",
"task_categories:image-feature-extraction",
"task_categories:zero-shot-image-classification",
"language:en",
"license:apache-2.0",
"size_categories:100M<n<1B",
"arxiv:2605.21272",
"region:us",
"text-to-image",
"image-text",
"multimodal",
"captioning",
"synthe... | jasperai | false | false | false | false | false | false | false | false | true | true | false | false | 2026-07-04T05:40:15.973000 |
ai4bharat/SpeechArenaBench | 184 | 280 | 6 | [
"language:bn",
"language:gu",
"language:hi",
"language:kn",
"language:ml",
"language:mr",
"language:or",
"language:ta",
"language:te",
"language:ur",
"license:mit",
"size_categories:100K<n<1M",
"format:parquet",
"format:optimized-parquet",
"modality:audio",
"modality:text",
"library:... | ai4bharat | true | true | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
openbmb/UltraData-SFT-2605 | 31,289 | 54,346 | 357 | [
"task_categories:text-generation",
"task_categories:question-answering",
"language:en",
"language:zh",
"license:apache-2.0",
"size_categories:10M<n<100M",
"modality:text",
"arxiv:2602.09003",
"region:us",
"llm",
"sft",
"supervised-fine-tuning",
"post-training",
"deep-thinking",
"reasonin... | openbmb | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
ai4privacy/pii-masking-openpii-1.5m | 1,921 | 2,108 | 13 | [
"task_categories:token-classification",
"task_categories:text-generation",
"source_datasets:extended",
"language:en",
"language:fr",
"language:de",
"language:es",
"language:it",
"language:nl",
"language:pt",
"language:bg",
"language:cs",
"language:da",
"language:el",
"language:et",
"la... | ai4privacy | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
BitRobot/HIW-500-LeRobot | 24,807 | 24,807 | 17 | [
"task_categories:robotics",
"language:en",
"license:cc-by-4.0",
"size_categories:10M<n<100M",
"format:parquet",
"modality:tabular",
"modality:text",
"modality:timeseries",
"modality:video",
"library:datasets",
"library:dask",
"library:polars",
"library:mlcroissant",
"library:lerobot",
"r... | BitRobot | false | false | false | true | false | false | true | true | false | true | false | false | 2026-07-04T05:40:15.973000 |
ratschlab/TCGA_virtual_spatial_transcriptomics_atlas | 316 | 316 | 5 | [
"language:en",
"license:cc-by-nc-sa-4.0",
"size_categories:1K<n<10K",
"modality:image",
"region:us",
"spatial-transcriptomics",
"histology",
"pathology",
"transcriptomics",
"machine-learning",
"TCGA",
"computational-pathology",
"foundation-model",
"multimodal",
"virtual-spatial-transcrip... | ratschlab | false | false | false | false | false | false | false | false | true | false | false | false | 2026-07-04T05:40:15.973000 |
MicroAtlas/MicroAtlas-2B | 679 | 679 | 5 | [
"region:us"
] | MicroAtlas | false | false | false | false | false | false | false | false | false | false | false | false | 2026-07-04T05:40:15.973000 |
agents-last-exam/agents-last-exam-data-archive | 205 | 205 | 9 | [
"language:en",
"license:cc-by-4.0",
"region:us",
"computer-use-agents",
"agent-benchmark",
"benchmark",
"evaluation"
] | agents-last-exam | false | false | false | false | false | false | false | false | false | false | false | false | 2026-07-04T05:40:15.973000 |
cfahlgren1/Fable-5-traces | 1,409 | 1,409 | 15 | [
"license:agpl-3.0",
"size_categories:n<1K",
"format:json",
"format:agent-traces",
"modality:tabular",
"modality:text",
"library:datasets",
"library:dask",
"library:polars",
"library:mlcroissant",
"region:us"
] | cfahlgren1 | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
Infatoshi/kernelbench-mega-traces | 523 | 523 | 10 | [
"license:mit",
"size_categories:n<1K",
"format:json",
"format:agent-traces",
"modality:tabular",
"modality:text",
"library:datasets",
"library:dask",
"library:polars",
"library:mlcroissant",
"region:us",
"agent-traces",
"claude",
"kernelbench",
"gpu-kernels"
] | Infatoshi | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
Goku-2M/GOKU-2M | 2,980 | 2,980 | 6 | [
"task_categories:text-to-video",
"task_categories:video-to-video",
"language:en",
"license:cc-by-nc-4.0",
"size_categories:1M<n<10M",
"modality:image",
"modality:video",
"arxiv:2606.30599",
"region:us",
"video-editing",
"instruction-based-editing",
"video"
] | Goku-2M | false | false | false | false | false | false | false | true | true | true | false | false | 2026-07-04T05:40:15.973000 |
hotdogs/uka-glm-5.2 | 370 | 370 | 5 | [
"task_categories:text-generation",
"task_ids:language-modeling",
"annotations_creators:machine-generated",
"language_creators:machine-generated",
"multilinguality:monolingual",
"source_datasets:DavidrPatton/Fable-5-GLM-5.2-Traces",
"source_datasets:AletheiaResearch/GLM-5.2-Agent",
"language:en",
"la... | hotdogs | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
nvidia/Cosmos3-DROID | 15,567 | 15,567 | 6 | [
"license:openmdw-1.1",
"size_categories:1K<n<10K",
"modality:video",
"library:datasets",
"library:mlcroissant",
"arxiv:2403.12945",
"region:us"
] | nvidia | false | false | false | false | false | false | false | true | false | false | false | false | 2026-07-04T05:40:15.973000 |
cais/mmlu | 427,487 | 42,140,118 | 780 | [
"task_categories:question-answering",
"task_ids:multiple-choice-qa",
"annotations_creators:no-annotation",
"language_creators:expert-generated",
"multilinguality:monolingual",
"source_datasets:original",
"language:en",
"license:mit",
"size_categories:100K<n<1M",
"format:parquet",
"modality:text"... | cais | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
coastalcph/lex_glue | 45,384 | 804,965 | 78 | [
"task_categories:question-answering",
"task_categories:text-classification",
"task_ids:multi-class-classification",
"task_ids:multi-label-classification",
"task_ids:multiple-choice-qa",
"task_ids:topic-classification",
"annotations_creators:found",
"language_creators:found",
"multilinguality:monolin... | coastalcph | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
ILSVRC/imagenet-1k | 106,990 | 2,085,347 | 848 | [
"task_categories:image-classification",
"task_ids:multi-class-image-classification",
"annotations_creators:crowdsourced",
"language_creators:crowdsourced",
"multilinguality:monolingual",
"source_datasets:original",
"language:en",
"license:other",
"size_categories:1M<n<10M",
"format:parquet",
"fo... | ILSVRC | false | false | false | false | false | false | false | false | true | false | false | false | 2026-07-04T05:40:15.973000 |
yahma/alpaca-cleaned | 26,379 | 1,018,545 | 849 | [
"task_categories:text-generation",
"language:en",
"license:cc-by-4.0",
"size_categories:10K<n<100K",
"format:json",
"modality:text",
"library:datasets",
"library:pandas",
"library:mlcroissant",
"library:polars",
"region:us",
"instruction-finetuning"
] | yahma | false | false | false | false | false | false | false | false | false | true | false | false | 2026-07-04T05:40:15.973000 |
End of preview. Expand in Data Studio
No dataset card yet
- Downloads last month
- 435