KodCode-V1 is the largest fully-synthetic open-source dataset providing verifiable solutions and tests for coding tasks.

KodCode
community
Project Website | π Technical Report | πΎ Github Repo | π€ KodCode-V1 (For RL) | π€ KodCode-V1-SFT-R1 (for SFT)\n
","classNames":"hf-sanitized hf-sanitized-o3r0_J3KhAMSlQSuDWXP1"},"users":[{"_id":"653df1323479e9ebbe3eb6cc","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/653df1323479e9ebbe3eb6cc/K_g-r1iMRNKj99LXPuYF3.jpeg","isPro":true,"fullname":"Zhangchen Xu","user":"zhangchenxu","type":"user"},{"_id":"637c88b6d55081513c5690d8","avatarUrl":"/avatars/6766e23ebf46b46d6c8b48351c571907.svg","isPro":false,"fullname":"Yang Liu","user":"nlpyang","type":"user"},{"_id":"605e8dfd5abeb13e714c4c18","avatarUrl":"/avatars/bc27a0ed17b2bd4311e89d3028fa327b.svg","isPro":false,"fullname":"yueqin yin","user":"yyqoni","type":"user"}],"collections":[{"slug":"KodCode/kodcode-v1-67c28236e5911f17dbe1769a","title":"KodCode-V1","description":"KodCode-V1 is the largest fully-synthetic open-source dataset providing verifiable solutions and tests for coding tasks.","gating":false,"lastUpdated":"2025-03-06T20:50:21.396Z","owner":{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/653df1323479e9ebbe3eb6cc/PXYB67R9dgSLPARIhySGr.jpeg","fullname":"KodCode","name":"KodCode","type":"org","isHf":false,"isMod":false,"isEnterprise":false,"followerCount":12},"items":[{"_id":"67c2823ca237c1ac224d9d38","position":0,"type":"dataset","note":{"html":"For RL.","text":"For RL."},"author":"KodCode","downloads":3021,"gated":false,"id":"KodCode/KodCode-V1","lastModified":"2025-03-09T20:50:44.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":446878,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":64,"isLikedByUser":false},{"_id":"67c2aacb6748b10a82a92538","position":2,"type":"dataset","note":{"html":"for SFT.","text":"for SFT."},"author":"KodCode","downloads":3308,"gated":false,"id":"KodCode/KodCode-V1-SFT-R1","lastModified":"2025-03-09T20:53:53.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":442933,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":17,"isLikedByUser":false},{"_id":"67ca0a8d3943728b3be5d134","position":3,"type":"paper","id":"2503.02951","title":"KodCode: A Diverse, Challenging, and Verifiable Synthetic Dataset for\n Coding","thumbnailUrl":"https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2503.02951.png","upvotes":26,"publishedAt":"2025-03-04T19:17:36.000Z","isUpvotedByUser":false}],"position":0,"theme":"orange","private":false,"shareUrl":"https://huggingface.co/collections/KodCode/kodcode-v1-67c28236e5911f17dbe1769a","upvotes":2,"isUpvotedByUser":false}],"datasets":[{"author":"KodCode","downloads":3308,"gated":false,"id":"KodCode/KodCode-V1-SFT-R1","lastModified":"2025-03-09T20:53:53.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":442933,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":17,"isLikedByUser":false},{"author":"KodCode","downloads":3021,"gated":false,"id":"KodCode/KodCode-V1","lastModified":"2025-03-09T20:50:44.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":446878,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":64,"isLikedByUser":false}],"models":[],"spaces":[],"repoFilterModels":{"sortKey":"modified"},"repoFilterDatasets":{"sortKey":"modified"},"repoFilterSpaces":{"sortKey":"modified"},"lastOrgActivities":[{"time":"2025-03-09T20:53:55.933Z","user":"zhangchenxu","userAvatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/653df1323479e9ebbe3eb6cc/K_g-r1iMRNKj99LXPuYF3.jpeg","orgAvatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/653df1323479e9ebbe3eb6cc/PXYB67R9dgSLPARIhySGr.jpeg","type":"update","repoData":{"author":"KodCode","downloads":3308,"gated":false,"id":"KodCode/KodCode-V1-SFT-R1","lastModified":"2025-03-09T20:53:53.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":442933,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":17,"isLikedByUser":false},"repoId":"KodCode/KodCode-V1-SFT-R1","repoType":"dataset","org":"KodCode"},{"time":"2025-03-09T20:52:38.280Z","user":"zhangchenxu","userAvatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/653df1323479e9ebbe3eb6cc/K_g-r1iMRNKj99LXPuYF3.jpeg","org":"KodCode","orgAvatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/653df1323479e9ebbe3eb6cc/PXYB67R9dgSLPARIhySGr.jpeg","type":"discussion","discussionData":{"num":2,"author":{"_id":"5f1158120c833276f61f1a84","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1608042047613-5f1158120c833276f61f1a84.jpeg","fullname":"Niels Rogge","name":"nielsr","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":787},"repo":{"name":"KodCode/KodCode-V1-SFT-R1","type":"dataset"},"title":"Add coding task category to KodCode dataset card","status":"merged","createdAt":"2025-03-09T20:39:28.000Z","isPullRequest":true,"numComments":2,"pinned":false,"repoOwner":{"name":"KodCode","isParticipating":true,"type":"org","isDiscussionAuthor":false}},"repoId":"KodCode/KodCode-V1-SFT-R1","repoType":"dataset","eventId":"67cdff96235c7677a4f883e1"},{"time":"2025-03-09T20:51:52.606Z","user":"zhangchenxu","userAvatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/653df1323479e9ebbe3eb6cc/K_g-r1iMRNKj99LXPuYF3.jpeg","org":"KodCode","orgAvatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/653df1323479e9ebbe3eb6cc/PXYB67R9dgSLPARIhySGr.jpeg","type":"discussion","discussionData":{"num":2,"author":{"_id":"5f1158120c833276f61f1a84","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1608042047613-5f1158120c833276f61f1a84.jpeg","fullname":"Niels Rogge","name":"nielsr","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":787},"repo":{"name":"KodCode/KodCode-V1","type":"dataset"},"title":"Update license to CC BY-NC 4.0","status":"merged","createdAt":"2025-03-09T20:38:42.000Z","isPullRequest":true,"numComments":2,"pinned":false,"repoOwner":{"name":"KodCode","isParticipating":true,"type":"org","isDiscussionAuthor":false}},"repoId":"KodCode/KodCode-V1","repoType":"dataset","eventId":"67cdff68547e3ec05ea1371e"}],"acceptLanguages":["en","*"],"blogPosts":[]}">
AI & ML interests
Better coding data for all π§‘
Recent Activity
View all activity
Organization Card
π± KodCode: A Diverse, Challenging, and Verifiable Synthetic Dataset for Coding
KodCode is the largest fully-synthetic open-source dataset providing verifiable solutions and tests for coding tasks. It contains 12 distinct subsets spanning various domains (from algorithmic to package-specific knowledge) and difficulty levels (from basic coding exercises to interview and competitive programming challenges). KodCode is designed for both supervised fine-tuning (SFT) and RL tuning.
πΈοΈ Project Website | π Technical Report | πΎ Github Repo | π€ KodCode-V1 (For RL) | π€ KodCode-V1-SFT-R1 (for SFT)
Collections
1
models
None public yet