Skip to content

Build a Mini Git Clone

Git stores snapshots of files.

Git stores snapshots of files.

A small Git-like tool needs only a few core ideas:

repository
objects
hashes
commits
branches
checkout

In this project, we will build a tiny version of Git. It will not be compatible with real Git. The goal is to understand the storage model.

Our tool will support:

init
add
commit
log
checkout

The Repository Directory

A real Git repository stores data in .git.

Our mini version will store data in .mini-git.

project/
  file.txt
  .mini-git/
    objects/
    index
    HEAD

The objects directory stores file contents and commits.

The index file stores staged files.

The HEAD file stores the latest commit hash.

Initialize a Repository

The init command creates the repository structure.

fn initRepo() !void {
    try std.fs.cwd().makePath(".mini-git/objects");

    const head = try std.fs.cwd().createFile(".mini-git/HEAD", .{});
    defer head.close();

    try head.writeAll("");
}

This gives us a place to store objects.

Hashing Data

Git identifies objects by hash. We will do the same with SHA-256.

fn hashBytes(bytes: []const u8, out: *[32]u8) void {
    std.crypto.hash.sha2.Sha256.hash(bytes, out, .{});
}

To use the hash as a filename, encode it as hex.

fn hexHash(hash: [32]u8, buffer: *[64]u8) []const u8 {
    return std.fmt.bytesToHex(buffer, &hash, .lower);
}

A file object will be stored at:

.mini-git/objects/<hash>

Store an Object

fn storeObject(data: []const u8) ![32]u8 {
    var hash: [32]u8 = undefined;
    hashBytes(data, &hash);

    var hex_buffer: [64]u8 = undefined;
    const name = hexHash(hash, &hex_buffer);

    var path_buffer: [128]u8 = undefined;
    const path = try std.fmt.bufPrint(
        &path_buffer,
        ".mini-git/objects/{s}",
        .{name},
    );

    const file = try std.fs.cwd().createFile(path, .{});
    defer file.close();

    try file.writeAll(data);

    return hash;
}

This stores raw bytes under their hash.

If the same file content appears twice, it gets the same hash.

That is content-addressed storage.

The Index

Before committing, Git stages files.

Our index will be a plain text file:

path hash

Example:

hello.txt 2cf24dba...
main.zig b94d27b9...

The add command reads a file, stores its content as an object, and records the path and hash in .mini-git/index.

fn addFile(allocator: std.mem.Allocator, path: []const u8) !void {
    const data = try std.fs.cwd().readFileAlloc(allocator, path, 10 * 1024 * 1024);
    defer allocator.free(data);

    const hash = try storeObject(data);

    var hex_buffer: [64]u8 = undefined;
    const name = hexHash(hash, &hex_buffer);

    const index = try std.fs.cwd().createFile(".mini-git/index", .{
        .truncate = false,
    });
    defer index.close();

    try index.seekFromEnd(0);
    try index.writer().print("{s} {s}\n", .{ path, name });
}

This is simple but imperfect. If you add the same path twice, the index will contain duplicates. A better version would replace old entries.

Commit Objects

A commit records:

parent commit hash
list of staged files
message

Our commit format will be plain text:

parent <hash>
message <message>

file <path> <hash>
file <path> <hash>

Create a commit by reading the index and HEAD:

fn readSmallFile(allocator: std.mem.Allocator, path: []const u8) ![]u8 {
    return std.fs.cwd().readFileAlloc(allocator, path, 1024 * 1024);
}
fn commit(allocator: std.mem.Allocator, message: []const u8) !void {
    const parent = readSmallFile(allocator, ".mini-git/HEAD") catch "";
    defer if (parent.len > 0) allocator.free(parent);

    const index = try readSmallFile(allocator, ".mini-git/index");
    defer allocator.free(index);

    const commit_text = try std.fmt.allocPrint(
        allocator,
        "parent {s}\nmessage {s}\n\n{s}",
        .{ parent, message, index },
    );
    defer allocator.free(commit_text);

    const hash = try storeObject(commit_text);

    var hex_buffer: [64]u8 = undefined;
    const name = hexHash(hash, &hex_buffer);

    const head = try std.fs.cwd().createFile(".mini-git/HEAD", .{});
    defer head.close();

    try head.writeAll(name);
}

Now the latest commit is stored in HEAD.

Show the Log

The log starts at HEAD, reads the commit object, prints it, then follows the parent.

fn readObject(allocator: std.mem.Allocator, hash: []const u8) ![]u8 {
    const path = try std.fmt.allocPrint(
        allocator,
        ".mini-git/objects/{s}",
        .{hash},
    );
    defer allocator.free(path);

    return std.fs.cwd().readFileAlloc(allocator, path, 1024 * 1024);
}
fn log(allocator: std.mem.Allocator) !void {
    var current = try readSmallFile(allocator, ".mini-git/HEAD");
    defer allocator.free(current);

    while (current.len > 0) {
        const commit_data = try readObject(allocator, current);
        defer allocator.free(commit_data);

        std.debug.print("commit {s}\n{s}\n", .{ current, commit_data });

        const parent_line_end = std.mem.indexOfScalar(u8, commit_data, '\n') orelse break;
        const parent_line = commit_data[0..parent_line_end];

        if (!std.mem.startsWith(u8, parent_line, "parent ")) {
            break;
        }

        const parent = parent_line["parent ".len..];

        if (parent.len == 0) {
            break;
        }

        allocator.free(current);
        current = try allocator.dupe(u8, parent);
    }
}

This follows the commit chain backward.

Checkout

Checkout restores files from a commit.

The commit contains lines like:

file hello.txt 2cf24dba...

For each line, read the object and write it back to the working tree.

fn checkout(allocator: std.mem.Allocator, commit_hash: []const u8) !void {
    const commit_data = try readObject(allocator, commit_hash);
    defer allocator.free(commit_data);

    var lines = std.mem.splitScalar(u8, commit_data, '\n');

    while (lines.next()) |line| {
        if (!std.mem.startsWith(u8, line, "file ")) {
            continue;
        }

        var parts = std.mem.splitScalar(u8, line, ' ');
        _ = parts.next();

        const path = parts.next() orelse return error.InvalidCommit;
        const hash = parts.next() orelse return error.InvalidCommit;

        const data = try readObject(allocator, hash);
        defer allocator.free(data);

        const file = try std.fs.cwd().createFile(path, .{});
        defer file.close();

        try file.writeAll(data);
    }
}

This restores the tracked files from that commit.

Command Dispatch

Now the program needs to read command-line arguments.

pub fn main() !void {
    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
    defer _ = gpa.deinit();

    const allocator = gpa.allocator();

    var args = try std.process.argsWithAllocator(allocator);
    defer args.deinit();

    _ = args.next();

    const command = args.next() orelse {
        std.debug.print("usage: mini-git <command>\n", .{});
        return;
    };

    if (std.mem.eql(u8, command, "init")) {
        try initRepo();
    } else if (std.mem.eql(u8, command, "add")) {
        const path = args.next() orelse {
            std.debug.print("usage: mini-git add <path>\n", .{});
            return;
        };

        try addFile(allocator, path);
    } else if (std.mem.eql(u8, command, "commit")) {
        const message = args.next() orelse {
            std.debug.print("usage: mini-git commit <message>\n", .{});
            return;
        };

        try commit(allocator, message);
    } else if (std.mem.eql(u8, command, "log")) {
        try log(allocator);
    } else if (std.mem.eql(u8, command, "checkout")) {
        const hash = args.next() orelse {
            std.debug.print("usage: mini-git checkout <hash>\n", .{});
            return;
        };

        try checkout(allocator, hash);
    } else {
        std.debug.print("unknown command: {s}\n", .{command});
    }
}

Try It

Create a file:

echo "hello" > hello.txt

Initialize the repository:

zig build run -- init

Add the file:

zig build run -- add hello.txt

Commit it:

zig build run -- commit "first commit"

Show the log:

zig build run -- log

Change the file:

echo "changed" > hello.txt

Add and commit again:

zig build run -- add hello.txt
zig build run -- commit "second commit"

Now log shows two commits.

Checkout an older commit by hash:

zig build run -- checkout <commit-hash>

The file content changes back to the version stored in that commit.

What This Mini Git Does Not Do

This project is intentionally small.

It does not store directories as tree objects.

It does not compress objects.

It does not deduplicate index entries.

It does not handle branches.

It does not merge.

It does not track deletions.

It does not detect dirty working trees.

It does not use Git’s real object format.

Those missing features are exactly where real Git becomes deeper.

The Core Idea

Even this small version shows the heart of Git.

Files become objects.

Objects are named by hashes.

Commits point to previous commits.

HEAD points to the latest commit.

Checkout restores files from stored objects.

That is the basic model:

working files
    add
objects + index
    commit
commit object
    HEAD

Once this model is clear, real Git internals become much easier to read.