This commit is contained in:
Frans Kaashoek 2025-01-31 12:47:59 -05:00
commit 43a3ba2a00
77 changed files with 75991 additions and 0 deletions

137
.check-build Executable file
View File

@ -0,0 +1,137 @@
#!/usr/bin/env bash
set -eu
REFERENCE_FILES=(
# lab 1
src/mrapps/crash.go
src/mrapps/indexer.go
src/mrapps/mtiming.go
src/mrapps/nocrash.go
src/mrapps/rtiming.go
src/mrapps/wc.go
src/main/mrsequential.go
src/main/mrcoordinator.go
src/main/mrworker.go
# lab 2
src/kvsrv/test_test.go
src/kvsrv/config.go
# lab 3
src/raft/persister.go
src/raft/test_test.go
src/raft/config.go
src/labrpc/labrpc.go
# lab 4
src/kvraft/test_test.go
src/kvraft/config.go
# lab 5a
src/shardctrler/test_test.go
src/shardctrler/config.go
# lab 5b
src/shardkv/test_test.go
src/shardkv/config.go
)
main() {
upstream="$1"
labnum="$2"
# make sure we have reference copy of lab, in FETCH_HEAD
git fetch "$upstream" 2>/dev/null || die "unable to git fetch $upstream"
# copy existing directory
tmpdir="$(mktemp -d)"
find src -type s -delete # cp can't copy sockets
cp -r src "$tmpdir"
orig="$PWD"
cd "$tmpdir"
# check out reference files
for f in ${REFERENCE_FILES[@]}; do
mkdir -p "$(dirname $f)"
git --git-dir="$orig/.git" show "FETCH_HEAD:$f" > "$f"
done
case $labnum in
"lab1") check_lab1;;
"lab2") check_lab2;;
"lab3a"|"lab3b"|"lab3c"|"lab3d") check_lab3;;
"lab4a"|"lab4b") check_lab4;;
"lab5a") check_lab5a;;
"lab5b") check_lab5b;;
*) die "unknown lab: $labnum";;
esac
cd
rm -rf "$tmpdir"
}
check_lab1() {
check_cmd cd src/mrapps
check_cmd go build -buildmode=plugin wc.go
check_cmd go build -buildmode=plugin indexer.go
check_cmd go build -buildmode=plugin mtiming.go
check_cmd go build -buildmode=plugin rtiming.go
check_cmd go build -buildmode=plugin crash.go
check_cmd go build -buildmode=plugin nocrash.go
check_cmd cd ../main
check_cmd go build mrcoordinator.go
check_cmd go build mrworker.go
check_cmd go build mrsequential.go
}
check_lab2() {
check_cmd cd src/kvsrv
check_cmd go test -c
}
check_lab3() {
check_cmd cd src/raft
check_cmd go test -c
}
check_lab4() {
check_cmd cd src/kvraft
check_cmd go test -c
}
check_lab5a() {
check_cmd cd src/shardctrler
check_cmd go test -c
}
check_lab5b() {
check_cmd cd src/shardkv
check_cmd go test -c
# also check other labs/parts
cd "$tmpdir"
check_lab5a
cd "$tmpdir"
check_lab4
cd "$tmpdir"
check_lab3
}
check_cmd() {
if ! "$@" >/dev/null 2>&1; then
echo "We tried building your source code with testing-related files reverted to original versions, and the build failed. This copy of your code is preserved in $tmpdir for debugging purposes. Please make sure the code you are trying to hand in does not make changes to test code." >&2
echo >&2
echo "The build failed while trying to run the following command:" >&2
echo >&2
echo "$ $@" >&2
echo " (cwd: ${PWD#$tmpdir/})" >&2
exit 1
fi
}
die() {
echo "$1" >&2
exit 1
}
main "$@"

4
.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
pkg/
api.key
.api.key.trimmed
*-handin.tar.gz

34
Makefile Normal file
View File

@ -0,0 +1,34 @@
# This is the Makefile helping you submit the labs.
# Just create 6.5840/api.key with your API key in it,
# and submit your lab with the following command:
# $ make [lab1|lab2|lab3a|lab3b|lab3c|lab3d|lab4a|lab4b|lab5a|lab5b]
LABS=" lab1 lab2 lab3a lab3b lab3c lab3d lab4a lab4b lab5a lab5b "
%: check-%
@echo "Preparing $@-handin.tar.gz"
@if echo $(LABS) | grep -q " $@ " ; then \
echo "Tarring up your submission..." ; \
COPYFILE_DISABLE=1 tar cvzf $@-handin.tar.gz \
"--exclude=src/main/pg-*.txt" \
"--exclude=src/main/diskvd" \
"--exclude=src/mapreduce/824-mrinput-*.txt" \
"--exclude=src/mapreduce/5840-mrinput-*.txt" \
"--exclude=src/main/mr-*" \
"--exclude=mrtmp.*" \
"--exclude=src/main/diff.out" \
"--exclude=src/main/mrcoordinator" \
"--exclude=src/main/mrsequential" \
"--exclude=src/main/mrworker" \
"--exclude=*.so" \
Makefile src; \
if test `stat -c "%s" "$@-handin.tar.gz" 2>/dev/null || stat -f "%z" "$@-handin.tar.gz"` -ge 20971520 ; then echo "File exceeds 20MB."; rm $@-handin.tar.gz; exit; fi; \
echo "$@-handin.tar.gz successfully created. Please upload the tarball manually on Gradescope."; \
else \
echo "Bad target $@. Usage: make [$(LABS)]"; \
fi
.PHONY: check-%
check-%:
@echo "Checking that your submission builds correctly..."
@./.check-build git://g.csail.mit.edu/6.5840-golabs-2024 $(patsubst check-%,%,$@)

12
src/.gitignore vendored Normal file
View File

@ -0,0 +1,12 @@
*.*/
main/mr-tmp/
mrtmp.*
824-mrinput-*.txt
/main/diff.out
/mapreduce/x.txt
/pbservice/x.txt
/kvpaxos/x.txt
*.so
/main/mrcoordinator
/main/mrsequential
/main/mrworker

5
src/go.mod Normal file
View File

@ -0,0 +1,5 @@
module 6.5840
go 1.21
require github.com/anishathalye/porcupine v1.0.0

2
src/go.sum Normal file
View File

@ -0,0 +1,2 @@
github.com/anishathalye/porcupine v1.0.0 h1:93eF6d26IMDky+G4h8FcLuYp1oO+no8a//I7asq/oKI=
github.com/anishathalye/porcupine v1.0.0/go.mod h1:WM0SsFjWNl2Y4BqHr/E/ll2yY1GY1jqn+W7Z/84Zoog=

31
src/kvraft1/client.go Normal file
View File

@ -0,0 +1,31 @@
package kvraft
import (
"6.5840/kvsrv1/rpc"
"6.5840/kvtest1"
"6.5840/tester1"
)
type Clerk struct {
clnt *tester.Clnt
servers []string
// You will have to modify this struct.
}
func MakeClerk(clnt *tester.Clnt, servers []string) kvtest.IKVClerk {
ck := &Clerk{clnt: clnt, servers: servers}
// You'll have to add code here.
return ck
}
func (ck *Clerk) Get(key string) (string, rpc.Tversion, rpc.Err) {
// You will have to modify this function.
return "", 0, ""
}
func (ck *Clerk) Put(key string, value string, version rpc.Tversion) rpc.Err {
// You will have to modify this function.
return ""
}

405
src/kvraft1/kvraft_test.go Normal file
View File

@ -0,0 +1,405 @@
package kvraft
import (
// "log"
"strconv"
"testing"
"time"
"6.5840/kvsrv1/rpc"
"6.5840/kvtest1"
)
const (
NSEC = 1
NCLNT = 10
)
// Basic test is as follows: one or more clients submitting Puts/Gets
// operations to set of servers for some period of time using
// kvtest.OneClientPut. After the period is over, test checks that all
// puts/gets values form a linearizable history. If unreliable is set,
// RPCs may fail. If crash is set, the servers crash after the period
// is over and restart. If partitions is set, the test repartitions
// the network concurrently with the clients and servers. If
// maxraftstate is a positive number, the size of the state for Raft
// (i.e., log size) shouldn't exceed 8*maxraftstate. If maxraftstate
// is negative, snapshots shouldn't be used.
func (ts *Test) GenericTest() {
const (
NITER = 3
T = NSEC * time.Second
NKEYS = 100
)
// const T = 1 * time.Millisecond
defer ts.Cleanup()
ch_partitioner := make(chan bool)
ch_spawn := make(chan struct{})
ck := ts.MakeClerk()
res := kvtest.ClntRes{}
default_key := []string{"k"} // if not running with randomkeys
if ts.randomkeys {
default_key = kvtest.MakeKeys(NKEYS)
}
for i := 0; i < NITER; i++ {
// log.Printf("Iteration %v\n", i)
go func() {
rs := ts.SpawnClientsAndWait(ts.nclients, T, func(cli int, ck kvtest.IKVClerk, done chan struct{}) kvtest.ClntRes {
return ts.OneClientPut(cli, ck, default_key, done)
})
if !ts.randomkeys {
ts.CheckPutConcurrent(ck, default_key[0], rs, &res)
}
ch_spawn <- struct{}{}
}()
if ts.partitions {
// Allow the clients to perform some operations without interruption
time.Sleep(1 * time.Second)
go ts.Partitioner(Gid, ch_partitioner)
}
<-ch_spawn // wait for clients to be done
ts.CheckPorcupine()
if ts.partitions {
ch_partitioner <- true
// log.Printf("wait for partitioner\n")
<-ch_partitioner
// reconnect network and submit a request. A client may
// have submitted a request in a minority. That request
// won't return until that server discovers a new term
// has started.
ts.Group(Gid).ConnectAll()
// wait for a while so that we have a new term
time.Sleep(kvtest.ElectionTimeout)
}
if ts.crash {
// log.Printf("shutdown servers\n")
for i := 0; i < ts.nservers; i++ {
ts.Group(Gid).ShutdownServer(i)
}
// Wait for a while for servers to shutdown, since
// shutdown isn't a real crash and isn't instantaneous
time.Sleep(kvtest.ElectionTimeout)
// log.Printf("restart servers\n")
// crash and re-start all
for i := 0; i < ts.nservers; i++ {
ts.Group(Gid).StartServer(i)
}
ts.Group(Gid).ConnectAll()
}
if ts.maxraftstate > 0 {
// Check maximum after the servers have processed all client
// requests and had time to checkpoint.
sz := ts.Config.Group(Gid).LogSize()
if sz > 8*ts.maxraftstate {
ts.Fatalf("logs were not trimmed (%v > 8*%v)", sz, ts.maxraftstate)
}
}
if ts.maxraftstate < 0 {
// Check that snapshots are not used
ssz := ts.Group(Gid).SnapshotSize()
if ssz > 0 {
ts.t.Fatalf("snapshot too large (%v), should not be used when maxraftstate = %d", ssz, ts.maxraftstate)
}
}
}
}
// check that ops are committed fast enough, better than 1 per heartbeat interval
func (ts *Test) GenericTestSpeed() {
const numOps = 1000
defer ts.Cleanup()
ck := ts.MakeClerk()
// wait until first op completes, so we know a leader is elected
// and KV servers are ready to process client requests
ck.Get("x")
start := time.Now()
for i := 0; i < numOps; i++ {
if err := ck.Put("k", strconv.Itoa(i), rpc.Tversion(i)); err != rpc.OK {
ts.t.Fatalf("Put err %v", err)
}
}
dur := time.Since(start)
if _, ver, err := ck.Get("k"); err != rpc.OK {
ts.t.Fatalf("Get err %v", err)
} else if ver != numOps {
ts.t.Fatalf("Get too few ops %v", ver)
}
// heartbeat interval should be ~ 100 ms; require at least 3 ops per
const heartbeatInterval = 100 * time.Millisecond
const opsPerInterval = 3
const timePerOp = heartbeatInterval / opsPerInterval
if dur > numOps*timePerOp {
ts.t.Fatalf("Operations completed too slowly %v/op > %v/op\n", dur/numOps, timePerOp)
}
}
func TestBasic4A(t *testing.T) {
ts := MakeTest(t, "4A basic", 1, 5, true, false, false, -1, false)
ts.GenericTest()
}
func TestSpeed4A(t *testing.T) {
ts := MakeTest(t, "4A speed", 1, 3, true, false, false, -1, false)
ts.GenericTestSpeed()
}
func TestConcurrent4A(t *testing.T) {
ts := MakeTest(t, "4A many clients", 5, 5, true, false, false, -1, false)
ts.GenericTest()
}
func TestUnreliable4A(t *testing.T) {
ts := MakeTest(t, "4A unreliable net, many clients", 5, 5, false, false, false, -1, false)
ts.GenericTest()
}
// Submit a request in the minority partition and check that the requests
// doesn't go through until the partition heals. The leader in the original
// network ends up in the minority partition.
func TestOnePartition4A(t *testing.T) {
ts := MakeTest(t, "4A progress in majority", 0, 5, false, false, false, -1, false)
defer ts.Cleanup()
ck := ts.MakeClerk()
ver0 := ts.PutAtLeastOnce(ck, "1", "13", rpc.Tversion(0), -1)
p1, p2 := ts.Group(Gid).MakePartition()
ts.Group(Gid).Partition(p1, p2)
ckp1 := ts.MakeClerkTo(p1) // connect ckp1 to p1
ckp2a := ts.MakeClerkTo(p2) // connect ckp2a to p2
ckp2b := ts.MakeClerkTo(p2) // connect ckp2b to p2
ver1 := ts.PutAtLeastOnce(ckp1, "1", "14", ver0+1, -1)
ts.CheckGet(ckp1, "1", "14", ver1)
ts.End()
done0 := make(chan rpc.Tversion)
done1 := make(chan rpc.Tversion)
ts.Begin("Test: no progress in minority (4A)")
go func() {
ver := ts.PutAtLeastOnce(ckp2a, "1", "15", ver1+1, -1)
done0 <- ver
}()
go func() {
_, ver, _ := ts.Get(ckp2b, "1", -1) // different clerk in p2
done1 <- ver
}()
select {
case ver := <-done0:
t.Fatalf("Put in minority completed %v", ver)
case ver := <-done1:
t.Fatalf("Get in minority completed %v", ver)
case <-time.After(time.Second):
}
ts.CheckGet(ckp1, "1", "14", ver1)
ver2 := ts.PutAtLeastOnce(ckp1, "1", "16", ver1+1, -1)
ts.CheckGet(ckp1, "1", "16", ver2)
ts.End()
ts.Begin("Test: completion after heal (4A)")
ts.Group(Gid).ConnectAll()
ckp2a.(*kvtest.TestClerk).Clnt.ConnectAll()
ckp2b.(*kvtest.TestClerk).Clnt.ConnectAll()
time.Sleep(kvtest.ElectionTimeout)
select {
case <-done0:
case <-time.After(30 * 100 * time.Millisecond):
t.Fatalf("Put did not complete")
}
select {
case <-done1:
case <-time.After(30 * 100 * time.Millisecond):
t.Fatalf("Get did not complete")
default:
}
ts.CheckGet(ck, "1", "15", ver2+1)
}
func TestManyPartitionsOneClient4A(t *testing.T) {
ts := MakeTest(t, "4A partitions, one client", 1, 5, false, false, true, -1, false)
ts.GenericTest()
}
func TestManyPartitionsManyClients4A(t *testing.T) {
ts := MakeTest(t, "4A partitions, many clients (4A)", 5, 5, false, false, true, -1, false)
ts.GenericTest()
}
func TestPersistOneClient4A(t *testing.T) {
ts := MakeTest(t, "4A restarts, one client 4A ", 1, 5, false, true, false, -1, false)
ts.GenericTest()
}
func TestPersistConcurrent4A(t *testing.T) {
ts := MakeTest(t, "4A restarts, many clients", 5, 5, false, true, false, -1, false)
ts.GenericTest()
}
func TestPersistConcurrentUnreliable4A(t *testing.T) {
ts := MakeTest(t, "4A unreliable net, restarts, many clients ", 5, 5, true, true, false, -1, false)
ts.GenericTest()
}
func TestPersistPartition4A(t *testing.T) {
ts := MakeTest(t, "4A restarts, partitions, many clients", 5, 5, false, true, true, -1, false)
ts.GenericTest()
}
func TestPersistPartitionUnreliable4A(t *testing.T) {
ts := MakeTest(t, "4A unreliable net, restarts, partitions, many clients", 5, 5, true, true, true, -1, false)
ts.GenericTest()
}
func TestPersistPartitionUnreliableLinearizable4A(t *testing.T) {
ts := MakeTest(t, "4A unreliable net, restarts, partitions, random keys, many clients", 15, 7, true, true, true, -1, true)
ts.GenericTest()
}
// if one server falls behind, then rejoins, does it
// recover by using the InstallSnapshot RPC?
// also checks that majority discards committed log entries
// even if minority doesn't respond.
func TestSnapshotRPC4B(t *testing.T) {
ts := MakeTest(t, "4B SnapshotsRPC", 0, 3, false, false, false, 1000, false)
defer ts.Cleanup()
ck := ts.MakeClerk()
ts.Begin("Test: InstallSnapshot RPC (4B)")
vera := ts.PutAtLeastOnce(ck, "a", "A", rpc.Tversion(0), -1)
ts.CheckGet(ck, "a", "A", vera)
verb := rpc.Tversion(0)
// a bunch of puts into the majority partition.
ts.Group(Gid).Partition([]int{0, 1}, []int{2})
{
ck1 := ts.MakeClerkTo([]int{0, 1})
for i := 0; i < 50; i++ {
verb = ts.PutAtLeastOnce(ck1, strconv.Itoa(i), strconv.Itoa(i), rpc.Tversion(0), -1)
}
time.Sleep(kvtest.ElectionTimeout)
verb = ts.PutAtLeastOnce(ck1, "b", "B", verb, -1)
}
// check that the majority partition has thrown away
// most of its log entries.
sz := ts.Group(Gid).LogSize()
if sz > 8*ts.maxraftstate {
t.Fatalf("logs were not trimmed (%v > 8*%v)", sz, ts.maxraftstate)
}
// now make group that requires participation of
// lagging server, so that it has to catch up.
ts.Group(Gid).Partition([]int{0, 2}, []int{1})
{
ck1 := ts.MakeClerkTo([]int{0, 2})
ts.PutAtLeastOnce(ck1, "c", "C", rpc.Tversion(0), -1)
ts.PutAtLeastOnce(ck1, "d", "D", rpc.Tversion(0), -1)
ts.CheckGet(ck1, "a", "A", vera)
ts.CheckGet(ck1, "b", "B", verb)
ts.CheckGet(ck1, "1", "1", rpc.Tversion(1))
ts.CheckGet(ck1, "49", "49", rpc.Tversion(1))
}
// now everybody
ts.Group(Gid).Partition([]int{0, 1, 2}, []int{})
vere := ts.PutAtLeastOnce(ck, "e", "E", rpc.Tversion(0), -1)
ts.CheckGet(ck, "c", "C", 1)
ts.CheckGet(ck, "e", "E", vere)
ts.CheckGet(ck, "1", "1", rpc.Tversion(1))
}
// are the snapshots not too huge? 500 bytes is a generous bound for the
// operations we're doing here.
func TestSnapshotSize4B(t *testing.T) {
ts := MakeTest(t, "4B snapshot size is reasonable", 0, 3, false, false, false, 1000, false)
defer ts.Cleanup()
maxsnapshotstate := 500
ck := ts.MakeClerk()
ver := rpc.Tversion(0)
for i := 0; i < 200; i++ {
ver = ts.PutAtLeastOnce(ck, "x", "0", ver, -1)
ts.CheckGet(ck, "x", "0", ver)
ver = ts.PutAtLeastOnce(ck, "x", "1", ver+1, -1)
ts.CheckGet(ck, "x", "1", ver)
ver += 1
}
// check that servers have thrown away most of their log entries
sz := ts.Group(Gid).LogSize()
if sz > 8*ts.maxraftstate {
t.Fatalf("logs were not trimmed (%v > 8*%v)", sz, ts.maxraftstate)
}
// check that the snapshots are not unreasonably large
ssz := ts.Group(Gid).SnapshotSize()
if ssz > maxsnapshotstate {
t.Fatalf("snapshot too large (%v > %v)", ssz, maxsnapshotstate)
}
}
func TestSpeed4B(t *testing.T) {
ts := MakeTest(t, "4B speed", 1, 3, true, false, false, 1000, false)
ts.GenericTestSpeed()
}
func TestSnapshotRecover4B(t *testing.T) {
ts := MakeTest(t, "4B restarts, snapshots, one client", 1, 5, true, true, false, 1000, false)
ts.GenericTest()
}
func TestSnapshotRecoverManyClients4B(t *testing.T) {
ts := MakeTest(t, "4B restarts, snapshots, many clients ", 20, 5, true, true, false, 1000, false)
ts.GenericTest()
}
func TestSnapshotUnreliable4B(t *testing.T) {
ts := MakeTest(t, "4B unreliable net, snapshots, many clients", 5, 5, false, false, false, 1000, false)
ts.GenericTest()
}
func TestSnapshotUnreliableRecover4B(t *testing.T) {
ts := MakeTest(t, "4B unreliable net, restarts, snapshots, many clients", 5, 5, false, true, false, 1000, false)
ts.GenericTest()
}
func TestSnapshotUnreliableRecoverConcurrentPartition4B(t *testing.T) {
ts := MakeTest(t, "4B unreliable net, restarts, partitions, snapshots, many clients", 5, 5, false, true, true, 1000, false)
ts.GenericTest()
}
func TestSnapshotUnreliableRecoverConcurrentPartitionLinearizable4B(t *testing.T) {
ts := MakeTest(t, "4B unreliable net, restarts, partitions, snapshots, random keys, many clients", 15, 7, false, true, true, 1000, true)
ts.GenericTest()
}

88
src/kvraft1/rsm/rsm.go Normal file
View File

@ -0,0 +1,88 @@
package rsm
import (
"sync"
"6.5840/kvsrv1/rpc"
"6.5840/labrpc"
"6.5840/raft"
)
type Op struct {
// Your definitions here.
// Field names must start with capital letters,
// otherwise RPC will break.
}
// A server (i.e., ../server.go) that wants to replicate itself calls
// MakeRSM and must implement the StateMachine interface. This
// interface allows the rsm package to interact with the server for
// server-specific operations: the server must implement DoOp to
// execute an operation (e.g., a Get or Put request), and
// Snapshot/Restore to snapshot and restore the server's state.
type StateMachine interface {
DoOp(any) any
Snapshot() []byte
Restore([]byte)
}
type RSM struct {
mu sync.Mutex
me int
rf *raft.Raft
applyCh chan raft.ApplyMsg
maxraftstate int // snapshot if log grows this big
sm StateMachine
// Your definitions here.
}
// servers[] contains the ports of the set of
// servers that will cooperate via Raft to
// form the fault-tolerant key/value service.
// me is the index of the current server in servers[].
// the k/v server should store snapshots through the underlying Raft
// implementation, which should call persister.SaveStateAndSnapshot() to
// atomically save the Raft state along with the snapshot.
// The RSM should snapshot when Raft's saved state exceeds maxraftstate bytes,
// in order to allow Raft to garbage-collect its log. if maxraftstate is -1,
// you don't need to snapshot.
//
// MakeRSM() must return quickly, so it should start goroutines for
// any long-running work.
func MakeRSM(servers []*labrpc.ClientEnd, me int, persister *raft.Persister, maxraftstate int, sm StateMachine) *RSM {
rsm := &RSM{
me: me,
maxraftstate: maxraftstate,
applyCh: make(chan raft.ApplyMsg),
sm: sm,
}
rsm.rf = raft.Make(servers, me, persister, rsm.applyCh)
return rsm
}
func (rsm *RSM) Raft() *raft.Raft {
return rsm.rf
}
// submit a command to Raft,
// and wait for it to be committed.
// perform() will tell us via ClientStatus and lastApplied
// when our command is either executed or not.
//
// returns (executeError, executeResult)
// if executeError==ErrWrongLeader, client should find new leader
// and try again.
func (rsm *RSM) Submit(req any) (rpc.Err, any) {
rsm.mu.Lock()
defer rsm.mu.Unlock()
// Submit creates an Op structure to run a command through Raft;
// for example: op := Op{Id: rsm.nextId, Req: req}, where req is
// the argument to Submit and rsm.nextId a unique id for the op.
// your code here
return rpc.ErrWrongLeader, nil // i'm dead, try another server.
}

View File

@ -0,0 +1,63 @@
package rsm
import (
//"log"
"testing"
)
// test that each server executes increments and updates its counter.
func TestBasic(t *testing.T) {
ts := makeTest(t, -1)
defer ts.cleanup()
ts.Begin("Test RSM basic")
for i := 0; i < 10; i++ {
r := ts.one()
if r.N != i+1 {
ts.Fatalf("expected %d instead of %d", i, r.N)
}
ts.checkCounter(r.N, NSRV)
}
}
// test that each server executes increments after disconnecting and
// reconnecting leader
func TestLeaderFailure(t *testing.T) {
ts := makeTest(t, -1)
defer ts.cleanup()
r := ts.one()
ts.checkCounter(r.N, NSRV)
l := ts.disconnectLeader()
r = ts.one()
ts.checkCounter(r.N, NSRV-1)
ts.connect(l)
ts.checkCounter(r.N, NSRV)
}
// test snapshot and restore
func TestSnapshot(t *testing.T) {
const N = 100
ts := makeTest(t, 1000)
defer ts.cleanup()
for i := 0; i < N; i++ {
ts.one()
}
ts.checkCounter(N, NSRV)
// rsm must have made snapshots by now shutdown all servers and
// restart them from a snapshot
ts.g.Shutdown()
ts.g.StartServers()
// make restarted servers do one increment
ts.one()
ts.checkCounter(N+1, NSRV)
}

77
src/kvraft1/rsm/server.go Normal file
View File

@ -0,0 +1,77 @@
package rsm
import (
"bytes"
"log"
"sync"
"6.5840/labgob"
// "6.5840/kvtest1"
"6.5840/labrpc"
"6.5840/raft"
)
type Inc struct {
}
type Rep struct {
N int
}
type rsmSrv struct {
ts *Test
me int
rsm *RSM
mu sync.Mutex
counter int
}
func makeRsmSrv(ts *Test, srv int, ends []*labrpc.ClientEnd, persister *raft.Persister, snapshot bool) *rsmSrv {
//log.Printf("mksrv %d", srv)
labgob.Register(Op{})
labgob.Register(Inc{})
labgob.Register(Rep{})
s := &rsmSrv{
ts: ts,
me: srv,
}
s.rsm = MakeRSM(ends, srv, persister, ts.maxraftstate, s)
return s
}
func (rs *rsmSrv) DoOp(req any) any {
//log.Printf("%d: DoOp: %v", rs.me, req)
rs.counter += 1
return &Rep{rs.counter}
}
func (rs *rsmSrv) Snapshot() []byte {
//log.Printf("%d: snapshot", rs.me)
w := new(bytes.Buffer)
e := labgob.NewEncoder(w)
e.Encode(rs.counter)
return w.Bytes()
}
func (rs *rsmSrv) Restore(data []byte) {
r := bytes.NewBuffer(data)
d := labgob.NewDecoder(r)
if d.Decode(&rs.counter) != nil {
log.Fatalf("%v couldn't decode counter", rs.me)
}
//log.Printf("%d: restore %d", rs.me, rs.counter)
}
func (rs *rsmSrv) Kill() {
rs.mu.Lock()
defer rs.mu.Unlock()
//log.Printf("kill %d", rs.me)
//rs.rsm.Kill()
rs.rsm = nil
}
func (rs *rsmSrv) Raft() *raft.Raft {
rs.mu.Lock()
defer rs.mu.Unlock()
return rs.rsm.Raft()
}

113
src/kvraft1/rsm/test.go Normal file
View File

@ -0,0 +1,113 @@
package rsm
import (
//"log"
"testing"
"time"
"6.5840/kvsrv1/rpc"
"6.5840/labrpc"
"6.5840/raft"
"6.5840/tester1"
)
type Test struct {
*tester.Config
t *testing.T
g *tester.ServerGrp
maxraftstate int
srvs []*rsmSrv
leader int
}
const (
NSRV = 3
NSEC = 10
)
func makeTest(t *testing.T, maxraftstate int) *Test {
ts := &Test{
t: t,
maxraftstate: maxraftstate,
srvs: make([]*rsmSrv, NSRV),
}
ts.Config = tester.MakeConfig(t, NSRV, true, maxraftstate, ts.mksrv)
ts.g = ts.Group(tester.GRP0)
return ts
}
func (ts *Test) cleanup() {
ts.End()
ts.Config.Cleanup()
ts.CheckTimeout()
}
func (ts *Test) mksrv(ends []*labrpc.ClientEnd, grp tester.Tgid, srv int, persister *raft.Persister, maxraftstate int) tester.IKVServer {
s := makeRsmSrv(ts, srv, ends, persister, false)
ts.srvs[srv] = s
return s
}
func (ts *Test) one() *Rep {
// try all the servers, maybe one is the leader but give up after NSEC
t0 := time.Now()
for time.Since(t0).Seconds() < NSEC {
index := ts.leader
for range ts.srvs {
if ts.g.IsConnected(index) {
s := ts.srvs[index]
if s.rsm != nil {
err, rep := s.rsm.Submit(Inc{})
if err == rpc.OK {
ts.leader = index
//log.Printf("leader = %d", ts.leader)
return rep.(*Rep)
}
}
}
index = (index + 1) % len(ts.srvs)
}
time.Sleep(50 * time.Millisecond)
//log.Printf("try again: no leader")
}
ts.Fatalf("one: took too long")
return nil
}
func (ts *Test) checkCounter(v int, nsrv int) {
to := 10 * time.Millisecond
n := 0
for iters := 0; iters < 30; iters++ {
n = ts.countValue(v)
if n >= nsrv {
return
}
time.Sleep(to)
if to < time.Second {
to *= 2
}
}
ts.Fatalf("checkCounter: only %d srvs have %v instead of %d", n, v, nsrv)
}
func (ts *Test) countValue(v int) int {
i := 0
for _, s := range ts.srvs {
if s.counter == v {
i += 1
}
}
return i
}
func (ts *Test) disconnectLeader() int {
//log.Printf("disconnect %d", ts.leader)
ts.g.DisconnectAll(ts.leader)
return ts.leader
}
func (ts *Test) connect(i int) {
//log.Printf("connect %d", i)
ts.g.ConnectOne(i)
}

86
src/kvraft1/test.go Normal file
View File

@ -0,0 +1,86 @@
package kvraft
import (
"testing"
"6.5840/kvtest1"
"6.5840/tester1"
)
type Test struct {
t *testing.T
*kvtest.Test
part string
nclients int
nservers int
crash bool
partitions bool
maxraftstate int
randomkeys bool
}
const Gid = tester.GRP0
func MakeTest(t *testing.T, part string, nclients, nservers int, reliable bool, crash bool, partitions bool, maxraftstate int, randomkeys bool) *Test {
cfg := tester.MakeConfig(t, nservers, reliable, maxraftstate, StartKVServer)
ts := &Test{
t: t,
part: part,
nclients: nclients,
nservers: nservers,
crash: crash,
partitions: partitions,
maxraftstate: maxraftstate,
randomkeys: randomkeys,
}
ts.Test = kvtest.MakeTest(t, cfg, randomkeys, ts)
ts.Begin(ts.makeTitle())
return ts
}
func (ts *Test) MakeClerk() kvtest.IKVClerk {
clnt := ts.Config.MakeClient()
ck := MakeClerk(clnt, ts.Group(Gid).SrvNames())
return &kvtest.TestClerk{ck, clnt}
}
func (ts *Test) DeleteClerk(ck kvtest.IKVClerk) {
tck := ck.(*kvtest.TestClerk)
ts.DeleteClient(tck.Clnt)
}
func (ts *Test) MakeClerkTo(to []int) kvtest.IKVClerk {
ns := ts.Config.Group(Gid).SrvNamesTo(to)
clnt := ts.Config.MakeClientTo(ns)
ck := MakeClerk(clnt, ts.Group(Gid).SrvNames())
return &kvtest.TestClerk{ck, clnt}
}
func (ts *Test) cleanup() {
ts.Test.Cleanup()
}
func (ts *Test) makeTitle() string {
title := "Test: "
if ts.crash {
// peers re-start, and thus persistence must work.
title = title + "restarts, "
}
if ts.partitions {
// the network may partition
title = title + "partitions, "
}
if ts.maxraftstate != -1 {
title = title + "snapshots, "
}
if ts.randomkeys {
title = title + "random keys, "
}
if ts.nclients > 1 {
title = title + "many clients"
} else {
title = title + "one client"
}
title = title + " (" + ts.part + ")" // 4A or 4B
return title
}

57
src/kvsrv1/client.go Normal file
View File

@ -0,0 +1,57 @@
package kvsrv
import (
"6.5840/kvsrv1/rpc"
"6.5840/kvtest1"
"6.5840/tester1"
)
type Clerk struct {
clnt *tester.Clnt
server string
}
func MakeClerk(clnt *tester.Clnt, server string) kvtest.IKVClerk {
ck := &Clerk{clnt: clnt, server: server}
// You may add code here.
return ck
}
// Get fetches the current value and version for a key. It returns
// ErrNoKey if the key does not exist. It keeps trying forever in the
// face of all other errors.
//
// You can send an RPC with code like this:
// ok := ck.clnt.Call(ck.server, "KVServer.Get", &args, &reply)
//
// the types of args and reply (including whether they are pointers)
// must match the declared types of the RPC handler function's
// arguments. and reply must be passed as a pointer.
func (ck *Clerk) Get(key string) (string, rpc.Tversion, rpc.Err) {
// You will have to modify this function.
return "", 0, rpc.ErrNoKey
}
// Put updates key with value only if version is the version in the
// request matches the version of the key at the server. If the
// versions numbers don't match, the server should return
// ErrNoVersion. If Put receives an ErrVersion on its first RPC, Put
// should return ErrVersion, since the Put was definitely not
// performed at the server. If the server returns ErrVersion on a
// resend RPC, then Put must return ErrMaybe to the application, since
// its earlier RPC might have een processed by the server successfully
// but the response was lost, and the the Clerk doesn't know if
// the Put was performed or not.
//
// You can send an RPC with code like this:
// ok := ck.clnt.Call(ck.server, "KVServer.Put", &args, &reply)
//
// the types of args and reply (including whether they are pointers)
// must match the declared types of the RPC handler function's
// arguments. and reply must be passed as a pointer.
func (ck *Clerk) Put(key, value string, version rpc.Tversion) rpc.Err {
// You will have to modify this function.
return rpc.ErrNoKey
}

162
src/kvsrv1/kvsrv_test.go Normal file
View File

@ -0,0 +1,162 @@
package kvsrv
import (
// "log"
"runtime"
"testing"
"time"
"6.5840/kvsrv1/rpc"
"6.5840/kvtest1"
)
// Test Put with a single client and a reliable network
func TestReliablePut(t *testing.T) {
const Val = "6.5840"
const Ver = 0
ts := MakeTestKV(t, true)
defer ts.Cleanup()
ts.Begin("One client and reliable Put")
ck := ts.MakeClerk()
if err := ck.Put("k", Val, Ver); err != rpc.OK {
t.Fatalf("Put err %v", err)
}
if val, ver, err := ck.Get("k"); err != rpc.OK {
t.Fatalf("Get err %v; expected OK", err)
} else if val != Val {
t.Fatalf("Get value err %v; expected %v", val, Val)
} else if ver != Ver+1 {
t.Fatalf("Get wrong version %v; expected %v", ver, Ver+1)
}
if err := ck.Put("k", Val, 0); err != rpc.ErrVersion {
t.Fatalf("expected Put to fail with ErrVersion; got err=%v", err)
}
if err := ck.Put("y", Val, rpc.Tversion(1)); err != rpc.ErrNoKey {
t.Fatalf("expected Put to fail with ErrNoKey; got err=%v", err)
}
if _, _, err := ck.Get("y"); err != rpc.ErrNoKey {
t.Fatalf("expected Get to fail with ErrNoKey; got err=%v", err)
}
}
// Many clients putting on same key.
func TestPutConcurrentReliable(t *testing.T) {
const (
PORCUPINETIME = 10 * time.Second
NCLNT = 10
NSEC = 1
)
ts := MakeTestKV(t, true)
defer ts.Cleanup()
ts.Begin("Test: many clients racing to put values to the same key")
rs := ts.SpawnClientsAndWait(NCLNT, NSEC*time.Second, func(me int, ck kvtest.IKVClerk, done chan struct{}) kvtest.ClntRes {
return ts.OneClientPut(me, ck, []string{"k"}, done)
})
ck := ts.MakeClerk()
ts.CheckPutConcurrent(ck, "k", rs, &kvtest.ClntRes{})
ts.CheckPorcupineT(PORCUPINETIME)
}
// Check if memory used on server is reasonable
func TestMemPutManyClientsReliable(t *testing.T) {
const (
NCLIENT = 100_000
MEM = 1000
)
ts := MakeTestKV(t, true)
defer ts.Cleanup()
v := kvtest.RandValue(MEM)
cks := make([]kvtest.IKVClerk, NCLIENT)
for i, _ := range cks {
cks[i] = ts.MakeClerk()
}
// force allocation of ends for server in each client
for i := 0; i < NCLIENT; i++ {
if err := cks[i].Put("k", "", 1); err != rpc.ErrNoKey {
t.Fatalf("Put failed %v", err)
}
}
ts.Begin("Test: memory use many put clients")
// allow threads started by labrpc to start
time.Sleep(1 * time.Second)
runtime.GC()
runtime.GC()
var st runtime.MemStats
runtime.ReadMemStats(&st)
m0 := st.HeapAlloc
for i := 0; i < NCLIENT; i++ {
if err := cks[i].Put("k", v, rpc.Tversion(i)); err != rpc.OK {
t.Fatalf("Put failed %v", err)
}
}
runtime.GC()
time.Sleep(1 * time.Second)
runtime.GC()
runtime.ReadMemStats(&st)
m1 := st.HeapAlloc
f := (float64(m1) - float64(m0)) / NCLIENT
if m1 > m0+(NCLIENT*200) {
t.Fatalf("error: server using too much memory %d %d (%.2f per client)\n", m0, m1, f)
}
}
// Test with one client and unreliable network. If Clerk.Put returns
// ErrMaybe, the Put must have happened, since the test uses only one
// client.
func TestUnreliableNet(t *testing.T) {
const NTRY = 100
ts := MakeTestKV(t, false)
defer ts.Cleanup()
ts.Begin("One client")
ck := ts.MakeClerk()
retried := false
for try := 0; try < NTRY; try++ {
for i := 0; true; i++ {
if err := ts.PutJson(ck, "k", i, rpc.Tversion(try), 0); err != rpc.ErrMaybe {
if i > 0 && err != rpc.ErrVersion {
t.Fatalf("Put shouldn't have happen more than once %v", err)
}
break
}
// Try put again; it should fail with ErrVersion
retried = true
}
v := 0
if ver := ts.GetJson(ck, "k", 0, &v); ver != rpc.Tversion(try+1) {
t.Fatalf("Wrong version %d expect %d", ver, try+1)
}
if v != 0 {
t.Fatalf("Wrong value %d expect %d", v, 0)
}
}
if !retried {
t.Fatalf("Clerk.Put never returned ErrMaybe")
}
ts.CheckPorcupine()
}

30
src/kvsrv1/lock/lock.go Normal file
View File

@ -0,0 +1,30 @@
package lock
import (
"6.5840/kvtest1"
)
type Lock struct {
// IKVClerk is a go interface for k/v clerks: the interfaces hides
// the specific Clerk type of ck but promises that ck supports
// Put and Get. The tester passes the clerk in when calling
// MakeLock().
ck kvtest.IKVClerk
// You may add code here
}
// The tester calls MakeLock() and passes in a k/v clerk; you code can
// perform a Put or Get by calling lk.ck.Put() or lk.ck.Get().
func MakeLock(ck kvtest.IKVClerk, l string) *Lock {
lk := &Lock{ck: ck}
// You may add code here
return lk
}
func (lk *Lock) Acquire() {
// Your code here
}
func (lk *Lock) Release() {
// Your code here
}

View File

@ -0,0 +1,89 @@
package lock
import (
"fmt"
// "log"
"strconv"
"testing"
"time"
"6.5840/kvsrv1"
"6.5840/kvsrv1/rpc"
"6.5840/kvtest1"
)
const (
NACQUIRE = 10
NCLNT = 10
NSEC = 2
)
func oneClient(t *testing.T, me int, ck kvtest.IKVClerk, done chan struct{}) kvtest.ClntRes {
lk := MakeLock(ck, "l")
ck.Put("l0", "", 0)
for i := 1; true; i++ {
select {
case <-done:
return kvtest.ClntRes{i, 0}
default:
lk.Acquire()
// log.Printf("%d: acquired lock", me)
b := strconv.Itoa(me)
val, ver, err := ck.Get("l0")
if err == rpc.OK {
if val != "" {
t.Fatalf("%d: two clients acquired lock %v", me, val)
}
} else {
t.Fatalf("%d: get failed %v", me, err)
}
err = ck.Put("l0", string(b), ver)
if !(err == rpc.OK || err == rpc.ErrMaybe) {
t.Fatalf("%d: put failed %v", me, err)
}
time.Sleep(10 * time.Millisecond)
err = ck.Put("l0", "", ver+1)
if !(err == rpc.OK || err == rpc.ErrMaybe) {
t.Fatalf("%d: put failed %v", me, err)
}
// log.Printf("%d: release lock", me)
lk.Release()
}
}
return kvtest.ClntRes{}
}
// Run test clients
func runClients(t *testing.T, nclnt int, reliable bool) {
ts := kvsrv.MakeTestKV(t, reliable)
defer ts.Cleanup()
ts.Begin(fmt.Sprintf("Test: %d lock clients", nclnt))
ts.SpawnClientsAndWait(nclnt, NSEC*time.Second, func(me int, myck kvtest.IKVClerk, done chan struct{}) kvtest.ClntRes {
return oneClient(t, me, myck, done)
})
}
func TestOneClientReliable(t *testing.T) {
runClients(t, 1, true)
}
func TestManyClientsReliable(t *testing.T) {
runClients(t, NCLNT, true)
}
func TestOneClientUnreliable(t *testing.T) {
runClients(t, 1, false)
}
func TestManyClientsUnreliable(t *testing.T) {
runClients(t, NCLNT, false)
}

39
src/kvsrv1/rpc/rpc.go Normal file
View File

@ -0,0 +1,39 @@
package rpc
type Err string
const (
// Err's returned by server and Clerk
OK = "OK"
ErrNoKey = "ErrNoKey"
ErrVersion = "ErrVersion"
// Err returned by Clerk only
ErrMaybe = "ErrMaybe"
// For future kvraft lab
ErrWrongLeader = "ErrWrongLeader"
ErrWrongGroup = "ErrWrongGroup"
)
type Tversion uint64
type PutArgs struct {
Key string
Value string
Version Tversion
}
type PutReply struct {
Err Err
}
type GetArgs struct {
Key string
}
type GetReply struct {
Value string
Version Tversion
Err Err
}

63
src/kvsrv1/server.go Normal file
View File

@ -0,0 +1,63 @@
package kvsrv
import (
"log"
"sync"
"6.5840/kvsrv1/rpc"
"6.5840/labrpc"
"6.5840/raft"
"6.5840/tester1"
)
const Debug = false
func DPrintf(format string, a ...interface{}) (n int, err error) {
if Debug {
log.Printf(format, a...)
}
return
}
type KVServer struct {
mu sync.Mutex
// Your definitions here.
}
func MakeKVServer() *KVServer {
kv := &KVServer{}
// Your code here.
return kv
}
// Get returns the value and version for args.Key, if args.Key
// exists. Otherwise, Get returns ErrNoKey.
func (kv *KVServer) Get(args *rpc.GetArgs, reply *rpc.GetReply) {
// Your code here.
}
// Update the value for a key if args.Version matches the version of
// the key on the server. If versions don't match, return ErrVersion.
// If the key doesn't exist, Put installs the value if the
// Args.Version is 0.
func (kv *KVServer) Put(args *rpc.PutArgs, reply *rpc.PutReply) {
// Your code here.
}
// You can ignore for this lab
func (kv *KVServer) Kill() {
}
// You can ignore for this lab
func (kv *KVServer) Raft() *raft.Raft {
return nil
}
// You can ignore all arguments; they are for replicated KVservers in lab 4
func StartKVServer(ends []*labrpc.ClientEnd, gid tester.Tgid, srv int, persister *raft.Persister, maxraftstate int) tester.IKVServer {
kv := MakeKVServer()
return kv
}

36
src/kvsrv1/test.go Normal file
View File

@ -0,0 +1,36 @@
package kvsrv
import (
// "log"
"testing"
"6.5840/kvtest1"
"6.5840/tester1"
)
type TestKV struct {
*kvtest.Test
t *testing.T
reliable bool
}
func MakeTestKV(t *testing.T, reliable bool) *TestKV {
cfg := tester.MakeConfig(t, 1, reliable, -1, StartKVServer)
ts := &TestKV{
t: t,
reliable: reliable,
}
ts.Test = kvtest.MakeTest(t, cfg, false, ts)
return ts
}
func (ts *TestKV) MakeClerk() kvtest.IKVClerk {
clnt := ts.Config.MakeClient()
ck := MakeClerk(clnt, tester.ServerName(tester.GRP0, 0))
return &kvtest.TestClerk{ck, clnt}
}
func (ts *TestKV) DeleteClerk(ck kvtest.IKVClerk) {
tck := ck.(*kvtest.TestClerk)
ts.DeleteClient(tck.Clnt)
}

360
src/kvtest1/kvtest.go Normal file
View File

@ -0,0 +1,360 @@
package kvtest
import (
"encoding/json"
// "log"
"math/rand"
"strconv"
"testing"
"time"
"6.5840/kvsrv1/rpc"
"6.5840/tester1"
)
// The tester generously allows solutions to complete elections in one second
// (much more than the paper's range of timeouts).
const ElectionTimeout = 1 * time.Second
func RandValue(n int) string {
const letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
b := make([]byte, n)
for i := range b {
b[i] = letterBytes[rand.Int63()%int64(len(letterBytes))]
}
return string(b)
}
type IKVClerk interface {
Get(string) (string, rpc.Tversion, rpc.Err)
Put(string, string, rpc.Tversion) rpc.Err
}
type TestClerk struct {
IKVClerk
Clnt *tester.Clnt
}
type IClerkMaker interface {
MakeClerk() IKVClerk
DeleteClerk(IKVClerk)
}
type Test struct {
*tester.Config
t *testing.T
oplog *OpLog
mck IClerkMaker
randomkeys bool
}
func MakeTest(t *testing.T, cfg *tester.Config, randomkeys bool, mck IClerkMaker) *Test {
ts := &Test{
Config: cfg,
t: t,
mck: mck,
oplog: &OpLog{},
randomkeys: randomkeys,
}
return ts
}
func (ts *Test) Cleanup() {
ts.Config.End()
ts.Config.Cleanup()
}
func (ts *Test) ConnectClnts(clnts []*tester.Clnt) {
for _, c := range clnts {
c.ConnectAll()
}
}
func (ts *Test) MakeClerk() IKVClerk {
return ts.mck.MakeClerk()
}
func (ts *Test) PutAtLeastOnce(ck IKVClerk, key, value string, ver rpc.Tversion, me int) rpc.Tversion {
for true {
if err := ts.Put(ck, key, value, ver, me); err == rpc.OK {
break
}
ver += 1
}
return ver
}
func (ts *Test) CheckGet(ck IKVClerk, key, value string, version rpc.Tversion) {
val, ver, err := ts.Get(ck, key, 0)
if err != rpc.OK {
ts.Fatalf("CheckGet err %v", err)
}
if val != value || ver != ver {
ts.Fatalf("Get(%v): expected:\n%v %v\nreceived:\n%v %v", key, value, val, version, ver)
}
}
type ClntRes struct {
Nok int
Nmaybe int
}
func (ts *Test) CheckPutConcurrent(ck IKVClerk, key string, rs []ClntRes, res *ClntRes) {
e := EntryV{}
ver0 := ts.GetJson(ck, key, -1, &e)
for _, r := range rs {
res.Nok += r.Nok
res.Nmaybe += r.Nmaybe
}
if !ts.IsReliable() && ver0 > rpc.Tversion(res.Nok+res.Nmaybe) {
ts.Fatalf("Wrong number of puts: server %d clnts %v", ver0, res)
}
if ts.IsReliable() && ver0 != rpc.Tversion(res.Nok) {
ts.Fatalf("Wrong number of puts: server %d clnts %v", ver0, res)
}
}
// a client runs the function f and then signals it is done
func (ts *Test) runClient(me int, ca chan ClntRes, done chan struct{}, mkc IClerkMaker, fn Fclnt) {
ck := mkc.MakeClerk()
v := fn(me, ck, done)
ca <- v
mkc.DeleteClerk(ck)
}
type Fclnt func(int, IKVClerk, chan struct{}) ClntRes
// spawn ncli clients
func (ts *Test) SpawnClientsAndWait(nclnt int, t time.Duration, fn Fclnt) []ClntRes {
ca := make([]chan ClntRes, nclnt)
done := make(chan struct{})
for cli := 0; cli < nclnt; cli++ {
ca[cli] = make(chan ClntRes)
go ts.runClient(cli, ca[cli], done, ts.mck, fn)
}
time.Sleep(t)
for i := 0; i < nclnt; i++ {
done <- struct{}{}
}
rs := make([]ClntRes, nclnt)
for cli := 0; cli < nclnt; cli++ {
rs[cli] = <-ca[cli]
}
return rs
}
func (ts *Test) GetJson(ck IKVClerk, key string, me int, v any) rpc.Tversion {
if val, ver, err := Get(ts.Config, ck, key, ts.oplog, me); err == rpc.OK {
if err := json.Unmarshal([]byte(val), v); err != nil {
ts.Fatalf("Unmarshal err %v", ver)
}
return ver
} else {
ts.Fatalf("%d: Get %q err %v", me, key, err)
return 0
}
}
func (ts *Test) PutJson(ck IKVClerk, key string, v any, ver rpc.Tversion, me int) rpc.Err {
b, err := json.Marshal(v)
if err != nil {
ts.Fatalf("%d: marshal %v", me, err)
}
return Put(ts.Config, ck, key, string(b), ver, ts.oplog, me)
}
func (ts *Test) PutAtLeastOnceJson(ck IKVClerk, key string, value any, ver rpc.Tversion, me int) rpc.Tversion {
for true {
if err := ts.PutJson(ck, key, value, 0, me); err != rpc.ErrMaybe {
break
}
ver += 1
}
return ver
}
type EntryV struct {
Id int
V rpc.Tversion
}
// Keep trying until we get one put succeeds while other clients
// tryint to put to the same key
func (ts *Test) OnePut(me int, ck IKVClerk, key string, ver rpc.Tversion) (rpc.Tversion, bool) {
for true {
err := ts.PutJson(ck, key, EntryV{me, ver}, ver, me)
if !(err == rpc.OK || err == rpc.ErrVersion || err == rpc.ErrMaybe) {
ts.Fatalf("Wrong error %v", err)
}
e := EntryV{}
ver0 := ts.GetJson(ck, key, me, &e)
if err == rpc.OK && ver0 == ver+1 { // my put?
if e.Id != me && e.V != ver {
ts.Fatalf("Wrong value %v", e)
}
}
ver = ver0
if err == rpc.OK || err == rpc.ErrMaybe {
return ver, err == rpc.OK
}
}
return 0, false
}
// repartition the servers periodically
func (ts *Test) Partitioner(gid tester.Tgid, ch chan bool) {
defer func() { ch <- true }()
for true {
switch {
case <-ch:
return
default:
a := make([]int, ts.Group(gid).N())
for i := 0; i < ts.Group(gid).N(); i++ {
a[i] = (rand.Int() % 2)
}
pa := make([][]int, 2)
for i := 0; i < 2; i++ {
pa[i] = make([]int, 0)
for j := 0; j < ts.Group(gid).N(); j++ {
if a[j] == i {
pa[i] = append(pa[i], j)
}
}
}
ts.Group(gid).Partition(pa[0], pa[1])
time.Sleep(ElectionTimeout + time.Duration(rand.Int63()%200)*time.Millisecond)
}
}
}
// One of perhaps many clients doing OnePut's until done signal.
func (ts *Test) OneClientPut(cli int, ck IKVClerk, ka []string, done chan struct{}) ClntRes {
res := ClntRes{}
verm := make(map[string]rpc.Tversion)
for _, k := range ka {
verm[k] = rpc.Tversion(0)
}
ok := false
for true {
select {
case <-done:
return res
default:
k := ka[0]
if ts.randomkeys {
k = ka[rand.Int()%len(ka)]
}
verm[k], ok = ts.OnePut(cli, ck, k, verm[k])
if ok {
res.Nok += 1
} else {
res.Nmaybe += 1
}
}
}
return res
}
func MakeKeys(n int) []string {
keys := make([]string, n)
for i := 0; i < n; i++ {
keys[i] = "k" + strconv.Itoa(i) // ensure multiple shards
}
return keys
}
func (ts *Test) SpreadPuts(ck IKVClerk, n int) ([]string, []string) {
ka := MakeKeys(n)
va := make([]string, n)
for i := 0; i < n; i++ {
va[i] = tester.Randstring(20)
ck.Put(ka[i], va[i], rpc.Tversion(0))
}
for i := 0; i < n; i++ {
ts.CheckGet(ck, ka[i], va[i], rpc.Tversion(1))
}
return ka, va
}
type entry struct {
Id int
N int
}
// At each iteration i, oneClient attemps to appends a tuple (me, i)
// to a key "k" shared with other clients. The client implements the
// append by first performing a Clerk.Get and then a Clerk.Put with
// the version number returned from the Get. If another client
// performs an append between the Get and the Put, the clerk may
// return ErrVersion and the client can retry. If the clerk returns
// ErrMaybe, the client's Put may have succeeded or not; in both
// cases, the client moves to the next iteration. When running with
// many clients, the server's value for key "k" has the shape [(i, 1),
// (i, 2), (j, 1), (j, 3)...]: that is, each client has entries with
// increasing N, but may some Ns may have been skipped.
func (ts *Test) OneClientAppend(me int, ck IKVClerk, done chan struct{}) ClntRes {
nmay := 0
nok := 0
for i := 0; true; i++ {
select {
case <-done:
return ClntRes{nok, nmay}
default:
// keep trying to put my i when err == ErrVersion
for true {
es := []entry{}
ver := ts.GetJson(ck, "k", me, &es)
es = append(es, entry{me, i})
if err := ts.PutJson(ck, "k", es, ver, me); err == rpc.OK {
nok += 1
break
} else if err == rpc.ErrMaybe {
// DPrintf("put %v err %v", ver, err)
nmay += 1
break
}
}
}
}
return ClntRes{}
}
type EntryN struct {
Id int
N int
}
// check reads the latest value for key "k" and checks that it has the
// correct tuples.
func (ts *Test) CheckAppends(es []EntryN, nclnt int, rs []ClntRes, ver rpc.Tversion) {
expect := make(map[int]int)
skipped := make(map[int]int)
for i := 0; i < nclnt; i++ {
expect[i] = 0
skipped[i] = 0
}
for _, e := range es {
if expect[e.Id] > e.N { // old put?
ts.Fatalf("%d: wrong expecting %v but got %v", e.Id, expect[e.Id], e.N)
} else if expect[e.Id] == e.N {
expect[e.Id] += 1
} else { // missing entries because of failed put
s := (e.N - expect[e.Id])
expect[e.Id] = e.N + 1
skipped[e.Id] += s
}
}
if len(es)+1 != int(ver) {
ts.Fatalf("%d appends in val != puts on server %d", len(es), ver)
}
for c, n := range expect {
if skipped[c] > rs[c].Nmaybe {
ts.Fatalf("%d: skipped puts %d on server > %d maybe", c, skipped[c], rs[c].Nmaybe)
}
if n > rs[c].Nok+rs[c].Nmaybe {
ts.Fatalf("%d: %d puts on server > ok+maybe %d", c, n, rs[c].Nok+rs[c].Nmaybe)
}
}
}

150
src/kvtest1/porcupine.go Normal file
View File

@ -0,0 +1,150 @@
package kvtest
import (
"fmt"
"io/ioutil"
//"log"
"sync"
"testing"
"time"
"github.com/anishathalye/porcupine"
"6.5840/kvsrv1/rpc"
"6.5840/models1"
"6.5840/tester1"
)
const linearizabilityCheckTimeout = 1 * time.Second
type OpLog struct {
operations []porcupine.Operation
sync.Mutex
}
func (log *OpLog) Len() int {
log.Lock()
defer log.Unlock()
return len(log.operations)
}
func (log *OpLog) Append(op porcupine.Operation) {
log.Lock()
defer log.Unlock()
log.operations = append(log.operations, op)
}
func (log *OpLog) Read() []porcupine.Operation {
log.Lock()
defer log.Unlock()
ops := make([]porcupine.Operation, len(log.operations))
copy(ops, log.operations)
return ops
}
// to make sure timestamps use the monotonic clock, instead of computing
// absolute timestamps with `time.Now().UnixNano()` (which uses the wall
// clock), we measure time relative to `t0` using `time.Since(t0)`, which uses
// the monotonic clock
var t0 = time.Now()
func Get(cfg *tester.Config, ck IKVClerk, key string, log *OpLog, cli int) (string, rpc.Tversion, rpc.Err) {
start := int64(time.Since(t0))
val, ver, err := ck.Get(key)
end := int64(time.Since(t0))
cfg.Op()
if log != nil {
log.Append(porcupine.Operation{
Input: models.KvInput{Op: 0, Key: key},
Output: models.KvOutput{Value: val, Version: uint64(ver), Err: string(err)},
Call: start,
Return: end,
ClientId: cli,
})
}
return val, ver, err
}
func Put(cfg *tester.Config, ck IKVClerk, key string, value string, version rpc.Tversion, log *OpLog, cli int) rpc.Err {
start := int64(time.Since(t0))
err := ck.Put(key, value, version)
end := int64(time.Since(t0))
cfg.Op()
if log != nil {
log.Append(porcupine.Operation{
Input: models.KvInput{Op: 1, Key: key, Value: value, Version: uint64(version)},
Output: models.KvOutput{Err: string(err)},
Call: start,
Return: end,
ClientId: cli,
})
}
return err
}
// Checks that the log of Clerk.Put's and Clerk.Get's is linearizable (see
// linearizability-faq.txt)
func checkPorcupine(t *testing.T, opLog *OpLog, nsec time.Duration) {
//log.Printf("oplog len %v %v", ts.oplog.Len(), ts.oplog)
res, info := porcupine.CheckOperationsVerbose(models.KvModel, opLog.Read(), nsec)
if res == porcupine.Illegal {
file, err := ioutil.TempFile("", "porcupine-*.html")
if err != nil {
fmt.Printf("info: failed to create temp file for visualization")
} else {
err = porcupine.Visualize(models.KvModel, info, file)
if err != nil {
fmt.Printf("info: failed to write history visualization to %s\n", file.Name())
} else {
fmt.Printf("info: wrote history visualization to %s\n", file.Name())
}
}
t.Fatal("history is not linearizable")
} else if res == porcupine.Unknown {
fmt.Println("info: linearizability check timed out, assuming history is ok")
}
}
// Porcupine
func (ts *Test) Get(ck IKVClerk, key string, cli int) (string, rpc.Tversion, rpc.Err) {
start := int64(time.Since(t0))
val, ver, err := ck.Get(key)
end := int64(time.Since(t0))
ts.Op()
if ts.oplog != nil {
ts.oplog.Append(porcupine.Operation{
Input: models.KvInput{Op: 0, Key: key},
Output: models.KvOutput{Value: val, Version: uint64(ver), Err: string(err)},
Call: start,
Return: end,
ClientId: cli,
})
}
return val, ver, err
}
// Porcupine
func (ts *Test) Put(ck IKVClerk, key string, value string, version rpc.Tversion, cli int) rpc.Err {
start := int64(time.Since(t0))
err := ck.Put(key, value, version)
end := int64(time.Since(t0))
ts.Op()
if ts.oplog != nil {
ts.oplog.Append(porcupine.Operation{
Input: models.KvInput{Op: 1, Key: key, Value: value, Version: uint64(version)},
Output: models.KvOutput{Err: string(err)},
Call: start,
Return: end,
ClientId: cli,
})
}
return err
}
func (ts *Test) CheckPorcupine() {
checkPorcupine(ts.t, ts.oplog, linearizabilityCheckTimeout)
}
func (ts *Test) CheckPorcupineT(nsec time.Duration) {
checkPorcupine(ts.t, ts.oplog, nsec)
}

177
src/labgob/labgob.go Normal file
View File

@ -0,0 +1,177 @@
package labgob
//
// trying to send non-capitalized fields over RPC produces a range of
// misbehavior, including both mysterious incorrect computation and
// outright crashes. so this wrapper around Go's encoding/gob warns
// about non-capitalized field names.
//
import "encoding/gob"
import "io"
import "reflect"
import "fmt"
import "sync"
import "unicode"
import "unicode/utf8"
var mu sync.Mutex
var errorCount int // for TestCapital
var checked map[reflect.Type]bool
type LabEncoder struct {
gob *gob.Encoder
}
func NewEncoder(w io.Writer) *LabEncoder {
enc := &LabEncoder{}
enc.gob = gob.NewEncoder(w)
return enc
}
func (enc *LabEncoder) Encode(e interface{}) error {
checkValue(e)
return enc.gob.Encode(e)
}
func (enc *LabEncoder) EncodeValue(value reflect.Value) error {
checkValue(value.Interface())
return enc.gob.EncodeValue(value)
}
type LabDecoder struct {
gob *gob.Decoder
}
func NewDecoder(r io.Reader) *LabDecoder {
dec := &LabDecoder{}
dec.gob = gob.NewDecoder(r)
return dec
}
func (dec *LabDecoder) Decode(e interface{}) error {
checkValue(e)
checkDefault(e)
return dec.gob.Decode(e)
}
func Register(value interface{}) {
checkValue(value)
gob.Register(value)
}
func RegisterName(name string, value interface{}) {
checkValue(value)
gob.RegisterName(name, value)
}
func checkValue(value interface{}) {
checkType(reflect.TypeOf(value))
}
func checkType(t reflect.Type) {
k := t.Kind()
mu.Lock()
// only complain once, and avoid recursion.
if checked == nil {
checked = map[reflect.Type]bool{}
}
if checked[t] {
mu.Unlock()
return
}
checked[t] = true
mu.Unlock()
switch k {
case reflect.Struct:
for i := 0; i < t.NumField(); i++ {
f := t.Field(i)
rune, _ := utf8.DecodeRuneInString(f.Name)
if unicode.IsUpper(rune) == false {
// ta da
fmt.Printf("labgob error: lower-case field %v of %v in RPC or persist/snapshot will break your Raft\n",
f.Name, t.Name())
mu.Lock()
errorCount += 1
mu.Unlock()
}
checkType(f.Type)
}
return
case reflect.Slice, reflect.Array, reflect.Ptr:
checkType(t.Elem())
return
case reflect.Map:
checkType(t.Elem())
checkType(t.Key())
return
default:
return
}
}
//
// warn if the value contains non-default values,
// as it would if one sent an RPC but the reply
// struct was already modified. if the RPC reply
// contains default values, GOB won't overwrite
// the non-default value.
//
func checkDefault(value interface{}) {
if value == nil {
return
}
checkDefault1(reflect.ValueOf(value), 1, "")
}
func checkDefault1(value reflect.Value, depth int, name string) {
if depth > 3 {
return
}
t := value.Type()
k := t.Kind()
switch k {
case reflect.Struct:
for i := 0; i < t.NumField(); i++ {
vv := value.Field(i)
name1 := t.Field(i).Name
if name != "" {
name1 = name + "." + name1
}
checkDefault1(vv, depth+1, name1)
}
return
case reflect.Ptr:
if value.IsNil() {
return
}
checkDefault1(value.Elem(), depth+1, name)
return
case reflect.Bool,
reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64,
reflect.Uintptr, reflect.Float32, reflect.Float64,
reflect.String:
if reflect.DeepEqual(reflect.Zero(t).Interface(), value.Interface()) == false {
mu.Lock()
if errorCount < 1 {
what := name
if what == "" {
what = t.Name()
}
// this warning typically arises if code re-uses the same RPC reply
// variable for multiple RPC calls, or if code restores persisted
// state into variable that already have non-default values.
fmt.Printf("labgob warning: Decoding into a non-default variable/field %v may not work\n",
what)
}
errorCount += 1
mu.Unlock()
}
return
}
}

166
src/labgob/test_test.go Normal file
View File

@ -0,0 +1,166 @@
package labgob
import "testing"
import "bytes"
type T1 struct {
T1int0 int
T1int1 int
T1string0 string
T1string1 string
}
type T2 struct {
T2slice []T1
T2map map[int]*T1
T2t3 interface{}
}
type T3 struct {
T3int999 int
}
// test that we didn't break GOB.
func TestGOB(t *testing.T) {
e0 := errorCount
w := new(bytes.Buffer)
Register(T3{})
{
x0 := 0
x1 := 1
t1 := T1{}
t1.T1int1 = 1
t1.T1string1 = "6.5840"
t2 := T2{}
t2.T2slice = []T1{T1{}, t1}
t2.T2map = map[int]*T1{}
t2.T2map[99] = &T1{1, 2, "x", "y"}
t2.T2t3 = T3{999}
e := NewEncoder(w)
e.Encode(x0)
e.Encode(x1)
e.Encode(t1)
e.Encode(t2)
}
data := w.Bytes()
{
var x0 int
var x1 int
var t1 T1
var t2 T2
r := bytes.NewBuffer(data)
d := NewDecoder(r)
if d.Decode(&x0) != nil ||
d.Decode(&x1) != nil ||
d.Decode(&t1) != nil ||
d.Decode(&t2) != nil {
t.Fatalf("Decode failed")
}
if x0 != 0 {
t.Fatalf("wrong x0 %v\n", x0)
}
if x1 != 1 {
t.Fatalf("wrong x1 %v\n", x1)
}
if t1.T1int0 != 0 {
t.Fatalf("wrong t1.T1int0 %v\n", t1.T1int0)
}
if t1.T1int1 != 1 {
t.Fatalf("wrong t1.T1int1 %v\n", t1.T1int1)
}
if t1.T1string0 != "" {
t.Fatalf("wrong t1.T1string0 %v\n", t1.T1string0)
}
if t1.T1string1 != "6.5840" {
t.Fatalf("wrong t1.T1string1 %v\n", t1.T1string1)
}
if len(t2.T2slice) != 2 {
t.Fatalf("wrong t2.T2slice len %v\n", len(t2.T2slice))
}
if t2.T2slice[1].T1int1 != 1 {
t.Fatalf("wrong slice value\n")
}
if len(t2.T2map) != 1 {
t.Fatalf("wrong t2.T2map len %v\n", len(t2.T2map))
}
if t2.T2map[99].T1string1 != "y" {
t.Fatalf("wrong map value\n")
}
t3 := (t2.T2t3).(T3)
if t3.T3int999 != 999 {
t.Fatalf("wrong t2.T2t3.T3int999\n")
}
}
if errorCount != e0 {
t.Fatalf("there were errors, but should not have been")
}
}
type T4 struct {
Yes int
no int
}
// make sure we check capitalization
// labgob prints one warning during this test.
func TestCapital(t *testing.T) {
e0 := errorCount
v := []map[*T4]int{}
w := new(bytes.Buffer)
e := NewEncoder(w)
e.Encode(v)
data := w.Bytes()
var v1 []map[T4]int
r := bytes.NewBuffer(data)
d := NewDecoder(r)
d.Decode(&v1)
if errorCount != e0+1 {
t.Fatalf("failed to warn about lower-case field")
}
}
// check that we warn when someone sends a default value over
// RPC but the target into which we're decoding holds a non-default
// value, which GOB seems not to overwrite as you'd expect.
//
// labgob does not print a warning.
func TestDefault(t *testing.T) {
e0 := errorCount
type DD struct {
X int
}
// send a default value...
dd1 := DD{}
w := new(bytes.Buffer)
e := NewEncoder(w)
e.Encode(dd1)
data := w.Bytes()
// and receive it into memory that already
// holds non-default values.
reply := DD{99}
r := bytes.NewBuffer(data)
d := NewDecoder(r)
d.Decode(&reply)
if errorCount != e0+1 {
t.Fatalf("failed to warn about decoding into non-default value")
}
}

536
src/labrpc/labrpc.go Normal file
View File

@ -0,0 +1,536 @@
package labrpc
//
// channel-based RPC, for 6.5840 labs.
//
// simulates a network that can lose requests, lose replies,
// delay messages, and entirely disconnect particular hosts.
//
// we will use the original labrpc.go to test your code for grading.
// so, while you can modify this code to help you debug, please
// test against the original before submitting.
//
// adapted from Go net/rpc/server.go.
//
// sends labgob-encoded values to ensure that RPCs
// don't include references to program objects.
//
// net := MakeNetwork() -- holds network, clients, servers.
// end := net.MakeEnd(endname) -- create a client end-point, to talk to one server.
// net.AddServer(servername, server) -- adds a named server to network.
// net.DeleteServer(servername) -- eliminate the named server.
// net.Connect(endname, servername) -- connect a client to a server.
// net.Enable(endname, enabled) -- enable/disable a client.
// net.Reliable(bool) -- false means drop/delay messages
//
// end.Call("Raft.AppendEntries", &args, &reply) -- send an RPC, wait for reply.
// the "Raft" is the name of the server struct to be called.
// the "AppendEntries" is the name of the method to be called.
// Call() returns true to indicate that the server executed the request
// and the reply is valid.
// Call() returns false if the network lost the request or reply
// or the server is down.
// It is OK to have multiple Call()s in progress at the same time on the
// same ClientEnd.
// Concurrent calls to Call() may be delivered to the server out of order,
// since the network may re-order messages.
// Call() is guaranteed to return (perhaps after a delay) *except* if the
// handler function on the server side does not return.
// the server RPC handler function must declare its args and reply arguments
// as pointers, so that their types exactly match the types of the arguments
// to Call().
//
// srv := MakeServer()
// srv.AddService(svc) -- a server can have multiple services, e.g. Raft and k/v
// pass srv to net.AddServer()
//
// svc := MakeService(receiverObject) -- obj's methods will handle RPCs
// much like Go's rpcs.Register()
// pass svc to srv.AddService()
//
import "6.5840/labgob"
import "bytes"
import "reflect"
import "sync"
import "log"
import "strings"
import "math/rand"
import "time"
import "sync/atomic"
const (
SHORTDELAY = 27 // ms
LONGDELAY = 7000 // ms
MAXDELAY = LONGDELAY + 100
)
type reqMsg struct {
endname interface{} // name of sending ClientEnd
svcMeth string // e.g. "Raft.AppendEntries"
argsType reflect.Type
args []byte
replyCh chan replyMsg
}
type replyMsg struct {
ok bool
reply []byte
}
type ClientEnd struct {
endname interface{} // this end-point's name
ch chan reqMsg // copy of Network.endCh
done chan struct{} // closed when Network is cleaned up
}
// send an RPC, wait for the reply.
// the return value indicates success; false means that
// no reply was received from the server.
func (e *ClientEnd) Call(svcMeth string, args interface{}, reply interface{}) bool {
req := reqMsg{}
req.endname = e.endname
req.svcMeth = svcMeth
req.argsType = reflect.TypeOf(args)
req.replyCh = make(chan replyMsg)
qb := new(bytes.Buffer)
qe := labgob.NewEncoder(qb)
if err := qe.Encode(args); err != nil {
panic(err)
}
req.args = qb.Bytes()
//
// send the request.
//
select {
case e.ch <- req:
// the request has been sent.
case <-e.done:
// entire Network has been destroyed.
return false
}
//
// wait for the reply.
//
rep := <-req.replyCh
if rep.ok {
rb := bytes.NewBuffer(rep.reply)
rd := labgob.NewDecoder(rb)
if err := rd.Decode(reply); err != nil {
log.Fatalf("ClientEnd.Call(): decode reply: %v\n", err)
}
return true
} else {
return false
}
}
type Network struct {
mu sync.Mutex
reliable bool
longDelays bool // pause a long time on send on disabled connection
longReordering bool // sometimes delay replies a long time
ends map[interface{}]*ClientEnd // ends, by name
enabled map[interface{}]bool // by end name
servers map[interface{}]*Server // servers, by name
connections map[interface{}]interface{} // endname -> servername
endCh chan reqMsg
done chan struct{} // closed when Network is cleaned up
count int32 // total RPC count, for statistics
bytes int64 // total bytes send, for statistics
}
func MakeNetwork() *Network {
rn := &Network{}
rn.reliable = true
rn.ends = map[interface{}]*ClientEnd{}
rn.enabled = map[interface{}]bool{}
rn.servers = map[interface{}]*Server{}
rn.connections = map[interface{}](interface{}){}
rn.endCh = make(chan reqMsg)
rn.done = make(chan struct{})
// single goroutine to handle all ClientEnd.Call()s
go func() {
for {
select {
case xreq := <-rn.endCh:
atomic.AddInt32(&rn.count, 1)
atomic.AddInt64(&rn.bytes, int64(len(xreq.args)))
go rn.processReq(xreq)
case <-rn.done:
return
}
}
}()
return rn
}
func (rn *Network) Cleanup() {
close(rn.done)
}
func (rn *Network) Reliable(yes bool) {
rn.mu.Lock()
defer rn.mu.Unlock()
rn.reliable = yes
}
func (rn *Network) IsReliable() bool {
rn.mu.Lock()
defer rn.mu.Unlock()
return rn.reliable
}
func (rn *Network) LongReordering(yes bool) {
rn.mu.Lock()
defer rn.mu.Unlock()
rn.longReordering = yes
}
func (rn *Network) LongDelays(yes bool) {
rn.mu.Lock()
defer rn.mu.Unlock()
rn.longDelays = yes
}
func (rn *Network) readEndnameInfo(endname interface{}) (enabled bool,
servername interface{}, server *Server, reliable bool, longreordering bool,
) {
rn.mu.Lock()
defer rn.mu.Unlock()
enabled = rn.enabled[endname]
servername = rn.connections[endname]
if servername != nil {
server = rn.servers[servername]
}
reliable = rn.reliable
longreordering = rn.longReordering
return
}
func (rn *Network) isServerDead(endname interface{}, servername interface{}, server *Server) bool {
rn.mu.Lock()
defer rn.mu.Unlock()
if rn.enabled[endname] == false || rn.servers[servername] != server {
return true
}
return false
}
func (rn *Network) processReq(req reqMsg) {
enabled, servername, server, reliable, longreordering := rn.readEndnameInfo(req.endname)
if enabled && servername != nil && server != nil {
if reliable == false {
// short delay
ms := (rand.Int() % SHORTDELAY)
time.Sleep(time.Duration(ms) * time.Millisecond)
}
if reliable == false && (rand.Int()%1000) < 100 {
// drop the request, return as if timeout
req.replyCh <- replyMsg{false, nil}
return
}
// execute the request (call the RPC handler).
// in a separate thread so that we can periodically check
// if the server has been killed and the RPC should get a
// failure reply.
ech := make(chan replyMsg)
go func() {
r := server.dispatch(req)
ech <- r
}()
// wait for handler to return,
// but stop waiting if DeleteServer() has been called,
// and return an error.
var reply replyMsg
replyOK := false
serverDead := false
for replyOK == false && serverDead == false {
select {
case reply = <-ech:
replyOK = true
case <-time.After(100 * time.Millisecond):
serverDead = rn.isServerDead(req.endname, servername, server)
if serverDead {
go func() {
<-ech // drain channel to let the goroutine created earlier terminate
}()
}
}
}
// do not reply if DeleteServer() has been called, i.e.
// the server has been killed. this is needed to avoid
// situation in which a client gets a positive reply
// to an Append, but the server persisted the update
// into the old Persister. config.go is careful to call
// DeleteServer() before superseding the Persister.
serverDead = rn.isServerDead(req.endname, servername, server)
if replyOK == false || serverDead == true {
// server was killed while we were waiting; return error.
req.replyCh <- replyMsg{false, nil}
} else if reliable == false && (rand.Int()%1000) < 100 {
// drop the reply, return as if timeout
req.replyCh <- replyMsg{false, nil}
} else if longreordering == true && rand.Intn(900) < 600 {
// delay the response for a while
ms := 200 + rand.Intn(1+rand.Intn(2000))
// Russ points out that this timer arrangement will decrease
// the number of goroutines, so that the race
// detector is less likely to get upset.
time.AfterFunc(time.Duration(ms)*time.Millisecond, func() {
atomic.AddInt64(&rn.bytes, int64(len(reply.reply)))
req.replyCh <- reply
})
} else {
atomic.AddInt64(&rn.bytes, int64(len(reply.reply)))
req.replyCh <- reply
}
} else {
// simulate no reply and eventual timeout.
ms := 0
if rn.longDelays {
// let Raft tests check that leader doesn't send
// RPCs synchronously.
ms = (rand.Int() % LONGDELAY)
} else {
// many kv tests require the client to try each
// server in fairly rapid succession.
ms = (rand.Int() % 100)
}
time.AfterFunc(time.Duration(ms)*time.Millisecond, func() {
req.replyCh <- replyMsg{false, nil}
})
}
}
// create a client end-point.
// start the thread that listens and delivers.
func (rn *Network) MakeEnd(endname interface{}) *ClientEnd {
rn.mu.Lock()
defer rn.mu.Unlock()
if _, ok := rn.ends[endname]; ok {
log.Fatalf("MakeEnd: %v already exists\n", endname)
}
e := &ClientEnd{}
e.endname = endname
e.ch = rn.endCh
e.done = rn.done
rn.ends[endname] = e
rn.enabled[endname] = false
rn.connections[endname] = nil
return e
}
func (rn *Network) DeleteEnd(endname interface{}) {
rn.mu.Lock()
defer rn.mu.Unlock()
if _, ok := rn.ends[endname]; !ok {
log.Fatalf("MakeEnd: %v doesn't exists\n", endname)
}
delete(rn.ends, endname)
delete(rn.enabled, endname)
delete(rn.connections, endname)
}
func (rn *Network) AddServer(servername interface{}, rs *Server) {
rn.mu.Lock()
defer rn.mu.Unlock()
rn.servers[servername] = rs
}
func (rn *Network) DeleteServer(servername interface{}) {
rn.mu.Lock()
defer rn.mu.Unlock()
rn.servers[servername] = nil
}
// connect a ClientEnd to a server.
// a ClientEnd can only be connected once in its lifetime.
func (rn *Network) Connect(endname interface{}, servername interface{}) {
rn.mu.Lock()
defer rn.mu.Unlock()
rn.connections[endname] = servername
}
// enable/disable a ClientEnd.
func (rn *Network) Enable(endname interface{}, enabled bool) {
rn.mu.Lock()
defer rn.mu.Unlock()
rn.enabled[endname] = enabled
}
// get a server's count of incoming RPCs.
func (rn *Network) GetCount(servername interface{}) int {
rn.mu.Lock()
defer rn.mu.Unlock()
svr := rn.servers[servername]
return svr.GetCount()
}
func (rn *Network) GetTotalCount() int {
x := atomic.LoadInt32(&rn.count)
return int(x)
}
func (rn *Network) GetTotalBytes() int64 {
x := atomic.LoadInt64(&rn.bytes)
return x
}
// a server is a collection of services, all sharing
// the same rpc dispatcher. so that e.g. both a Raft
// and a k/v server can listen to the same rpc endpoint.
type Server struct {
mu sync.Mutex
services map[string]*Service
count int // incoming RPCs
}
func MakeServer() *Server {
rs := &Server{}
rs.services = map[string]*Service{}
return rs
}
func (rs *Server) AddService(svc *Service) {
rs.mu.Lock()
defer rs.mu.Unlock()
rs.services[svc.name] = svc
}
func (rs *Server) dispatch(req reqMsg) replyMsg {
rs.mu.Lock()
rs.count += 1
// split Raft.AppendEntries into service and method
dot := strings.LastIndex(req.svcMeth, ".")
serviceName := req.svcMeth[:dot]
methodName := req.svcMeth[dot+1:]
service, ok := rs.services[serviceName]
rs.mu.Unlock()
if ok {
return service.dispatch(methodName, req)
} else {
choices := []string{}
for k, _ := range rs.services {
choices = append(choices, k)
}
log.Fatalf("labrpc.Server.dispatch(): unknown service %v in %v.%v; expecting one of %v\n",
serviceName, serviceName, methodName, choices)
return replyMsg{false, nil}
}
}
func (rs *Server) GetCount() int {
rs.mu.Lock()
defer rs.mu.Unlock()
return rs.count
}
// an object with methods that can be called via RPC.
// a single server may have more than one Service.
type Service struct {
name string
rcvr reflect.Value
typ reflect.Type
methods map[string]reflect.Method
}
func MakeService(rcvr interface{}) *Service {
svc := &Service{}
svc.typ = reflect.TypeOf(rcvr)
svc.rcvr = reflect.ValueOf(rcvr)
svc.name = reflect.Indirect(svc.rcvr).Type().Name()
svc.methods = map[string]reflect.Method{}
for m := 0; m < svc.typ.NumMethod(); m++ {
method := svc.typ.Method(m)
mtype := method.Type
mname := method.Name
//fmt.Printf("%v pp %v ni %v 1k %v 2k %v no %v\n",
// mname, method.PkgPath, mtype.NumIn(), mtype.In(1).Kind(), mtype.In(2).Kind(), mtype.NumOut())
if method.PkgPath != "" || // capitalized?
mtype.NumIn() != 3 ||
//mtype.In(1).Kind() != reflect.Ptr ||
mtype.In(2).Kind() != reflect.Ptr ||
mtype.NumOut() != 0 {
// the method is not suitable for a handler
//fmt.Printf("bad method: %v\n", mname)
} else {
// the method looks like a handler
svc.methods[mname] = method
}
}
return svc
}
func (svc *Service) dispatch(methname string, req reqMsg) replyMsg {
if method, ok := svc.methods[methname]; ok {
// prepare space into which to read the argument.
// the Value's type will be a pointer to req.argsType.
args := reflect.New(req.argsType)
// decode the argument.
ab := bytes.NewBuffer(req.args)
ad := labgob.NewDecoder(ab)
ad.Decode(args.Interface())
// allocate space for the reply.
replyType := method.Type.In(2)
replyType = replyType.Elem()
replyv := reflect.New(replyType)
// call the method.
function := method.Func
function.Call([]reflect.Value{svc.rcvr, args.Elem(), replyv})
// encode the reply.
rb := new(bytes.Buffer)
re := labgob.NewEncoder(rb)
re.EncodeValue(replyv)
return replyMsg{true, rb.Bytes()}
} else {
choices := []string{}
for k, _ := range svc.methods {
choices = append(choices, k)
}
log.Fatalf("labrpc.Service.dispatch(): unknown method %v in %v; expecting one of %v\n",
methname, req.svcMeth, choices)
return replyMsg{false, nil}
}
}

597
src/labrpc/test_test.go Normal file
View File

@ -0,0 +1,597 @@
package labrpc
import "testing"
import "strconv"
import "sync"
import "runtime"
import "time"
import "fmt"
type JunkArgs struct {
X int
}
type JunkReply struct {
X string
}
type JunkServer struct {
mu sync.Mutex
log1 []string
log2 []int
}
func (js *JunkServer) Handler1(args string, reply *int) {
js.mu.Lock()
defer js.mu.Unlock()
js.log1 = append(js.log1, args)
*reply, _ = strconv.Atoi(args)
}
func (js *JunkServer) Handler2(args int, reply *string) {
js.mu.Lock()
defer js.mu.Unlock()
js.log2 = append(js.log2, args)
*reply = "handler2-" + strconv.Itoa(args)
}
func (js *JunkServer) Handler3(args int, reply *int) {
js.mu.Lock()
defer js.mu.Unlock()
time.Sleep(20 * time.Second)
*reply = -args
}
// args is a pointer
func (js *JunkServer) Handler4(args *JunkArgs, reply *JunkReply) {
reply.X = "pointer"
}
// args is a not pointer
func (js *JunkServer) Handler5(args JunkArgs, reply *JunkReply) {
reply.X = "no pointer"
}
func (js *JunkServer) Handler6(args string, reply *int) {
js.mu.Lock()
defer js.mu.Unlock()
*reply = len(args)
}
func (js *JunkServer) Handler7(args int, reply *string) {
js.mu.Lock()
defer js.mu.Unlock()
*reply = ""
for i := 0; i < args; i++ {
*reply = *reply + "y"
}
}
func TestBasic(t *testing.T) {
runtime.GOMAXPROCS(4)
rn := MakeNetwork()
defer rn.Cleanup()
e := rn.MakeEnd("end1-99")
js := &JunkServer{}
svc := MakeService(js)
rs := MakeServer()
rs.AddService(svc)
rn.AddServer("server99", rs)
rn.Connect("end1-99", "server99")
rn.Enable("end1-99", true)
{
reply := ""
e.Call("JunkServer.Handler2", 111, &reply)
if reply != "handler2-111" {
t.Fatalf("wrong reply from Handler2")
}
}
{
reply := 0
e.Call("JunkServer.Handler1", "9099", &reply)
if reply != 9099 {
t.Fatalf("wrong reply from Handler1")
}
}
}
func TestTypes(t *testing.T) {
runtime.GOMAXPROCS(4)
rn := MakeNetwork()
defer rn.Cleanup()
e := rn.MakeEnd("end1-99")
js := &JunkServer{}
svc := MakeService(js)
rs := MakeServer()
rs.AddService(svc)
rn.AddServer("server99", rs)
rn.Connect("end1-99", "server99")
rn.Enable("end1-99", true)
{
var args JunkArgs
var reply JunkReply
// args must match type (pointer or not) of handler.
e.Call("JunkServer.Handler4", &args, &reply)
if reply.X != "pointer" {
t.Fatalf("wrong reply from Handler4")
}
}
{
var args JunkArgs
var reply JunkReply
// args must match type (pointer or not) of handler.
e.Call("JunkServer.Handler5", args, &reply)
if reply.X != "no pointer" {
t.Fatalf("wrong reply from Handler5")
}
}
}
//
// does net.Enable(endname, false) really disconnect a client?
//
func TestDisconnect(t *testing.T) {
runtime.GOMAXPROCS(4)
rn := MakeNetwork()
defer rn.Cleanup()
e := rn.MakeEnd("end1-99")
js := &JunkServer{}
svc := MakeService(js)
rs := MakeServer()
rs.AddService(svc)
rn.AddServer("server99", rs)
rn.Connect("end1-99", "server99")
{
reply := ""
e.Call("JunkServer.Handler2", 111, &reply)
if reply != "" {
t.Fatalf("unexpected reply from Handler2")
}
}
rn.Enable("end1-99", true)
{
reply := 0
e.Call("JunkServer.Handler1", "9099", &reply)
if reply != 9099 {
t.Fatalf("wrong reply from Handler1")
}
}
}
//
// test net.GetCount()
//
func TestCounts(t *testing.T) {
runtime.GOMAXPROCS(4)
rn := MakeNetwork()
defer rn.Cleanup()
e := rn.MakeEnd("end1-99")
js := &JunkServer{}
svc := MakeService(js)
rs := MakeServer()
rs.AddService(svc)
rn.AddServer(99, rs)
rn.Connect("end1-99", 99)
rn.Enable("end1-99", true)
for i := 0; i < 17; i++ {
reply := ""
e.Call("JunkServer.Handler2", i, &reply)
wanted := "handler2-" + strconv.Itoa(i)
if reply != wanted {
t.Fatalf("wrong reply %v from Handler1, expecting %v", reply, wanted)
}
}
n := rn.GetCount(99)
if n != 17 {
t.Fatalf("wrong GetCount() %v, expected 17\n", n)
}
}
//
// test net.GetTotalBytes()
//
func TestBytes(t *testing.T) {
runtime.GOMAXPROCS(4)
rn := MakeNetwork()
defer rn.Cleanup()
e := rn.MakeEnd("end1-99")
js := &JunkServer{}
svc := MakeService(js)
rs := MakeServer()
rs.AddService(svc)
rn.AddServer(99, rs)
rn.Connect("end1-99", 99)
rn.Enable("end1-99", true)
for i := 0; i < 17; i++ {
args := "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
args = args + args
args = args + args
reply := 0
e.Call("JunkServer.Handler6", args, &reply)
wanted := len(args)
if reply != wanted {
t.Fatalf("wrong reply %v from Handler6, expecting %v", reply, wanted)
}
}
n := rn.GetTotalBytes()
if n < 4828 || n > 6000 {
t.Fatalf("wrong GetTotalBytes() %v, expected about 5000\n", n)
}
for i := 0; i < 17; i++ {
args := 107
reply := ""
e.Call("JunkServer.Handler7", args, &reply)
wanted := args
if len(reply) != wanted {
t.Fatalf("wrong reply len=%v from Handler6, expecting %v", len(reply), wanted)
}
}
nn := rn.GetTotalBytes() - n
if nn < 1800 || nn > 2500 {
t.Fatalf("wrong GetTotalBytes() %v, expected about 2000\n", nn)
}
}
//
// test RPCs from concurrent ClientEnds
//
func TestConcurrentMany(t *testing.T) {
runtime.GOMAXPROCS(4)
rn := MakeNetwork()
defer rn.Cleanup()
js := &JunkServer{}
svc := MakeService(js)
rs := MakeServer()
rs.AddService(svc)
rn.AddServer(1000, rs)
ch := make(chan int)
nclients := 20
nrpcs := 10
for ii := 0; ii < nclients; ii++ {
go func(i int) {
n := 0
defer func() { ch <- n }()
e := rn.MakeEnd(i)
rn.Connect(i, 1000)
rn.Enable(i, true)
for j := 0; j < nrpcs; j++ {
arg := i*100 + j
reply := ""
e.Call("JunkServer.Handler2", arg, &reply)
wanted := "handler2-" + strconv.Itoa(arg)
if reply != wanted {
t.Fatalf("wrong reply %v from Handler1, expecting %v", reply, wanted)
}
n += 1
}
}(ii)
}
total := 0
for ii := 0; ii < nclients; ii++ {
x := <-ch
total += x
}
if total != nclients*nrpcs {
t.Fatalf("wrong number of RPCs completed, got %v, expected %v", total, nclients*nrpcs)
}
n := rn.GetCount(1000)
if n != total {
t.Fatalf("wrong GetCount() %v, expected %v\n", n, total)
}
}
//
// test unreliable
//
func TestUnreliable(t *testing.T) {
runtime.GOMAXPROCS(4)
rn := MakeNetwork()
defer rn.Cleanup()
rn.Reliable(false)
js := &JunkServer{}
svc := MakeService(js)
rs := MakeServer()
rs.AddService(svc)
rn.AddServer(1000, rs)
ch := make(chan int)
nclients := 300
for ii := 0; ii < nclients; ii++ {
go func(i int) {
n := 0
defer func() { ch <- n }()
e := rn.MakeEnd(i)
rn.Connect(i, 1000)
rn.Enable(i, true)
arg := i * 100
reply := ""
ok := e.Call("JunkServer.Handler2", arg, &reply)
if ok {
wanted := "handler2-" + strconv.Itoa(arg)
if reply != wanted {
t.Fatalf("wrong reply %v from Handler1, expecting %v", reply, wanted)
}
n += 1
}
}(ii)
}
total := 0
for ii := 0; ii < nclients; ii++ {
x := <-ch
total += x
}
if total == nclients || total == 0 {
t.Fatalf("all RPCs succeeded despite unreliable")
}
}
//
// test concurrent RPCs from a single ClientEnd
//
func TestConcurrentOne(t *testing.T) {
runtime.GOMAXPROCS(4)
rn := MakeNetwork()
defer rn.Cleanup()
js := &JunkServer{}
svc := MakeService(js)
rs := MakeServer()
rs.AddService(svc)
rn.AddServer(1000, rs)
e := rn.MakeEnd("c")
rn.Connect("c", 1000)
rn.Enable("c", true)
ch := make(chan int)
nrpcs := 20
for ii := 0; ii < nrpcs; ii++ {
go func(i int) {
n := 0
defer func() { ch <- n }()
arg := 100 + i
reply := ""
e.Call("JunkServer.Handler2", arg, &reply)
wanted := "handler2-" + strconv.Itoa(arg)
if reply != wanted {
t.Fatalf("wrong reply %v from Handler2, expecting %v", reply, wanted)
}
n += 1
}(ii)
}
total := 0
for ii := 0; ii < nrpcs; ii++ {
x := <-ch
total += x
}
if total != nrpcs {
t.Fatalf("wrong number of RPCs completed, got %v, expected %v", total, nrpcs)
}
js.mu.Lock()
defer js.mu.Unlock()
if len(js.log2) != nrpcs {
t.Fatalf("wrong number of RPCs delivered")
}
n := rn.GetCount(1000)
if n != total {
t.Fatalf("wrong GetCount() %v, expected %v\n", n, total)
}
}
//
// regression: an RPC that's delayed during Enabled=false
// should not delay subsequent RPCs (e.g. after Enabled=true).
//
func TestRegression1(t *testing.T) {
runtime.GOMAXPROCS(4)
rn := MakeNetwork()
defer rn.Cleanup()
js := &JunkServer{}
svc := MakeService(js)
rs := MakeServer()
rs.AddService(svc)
rn.AddServer(1000, rs)
e := rn.MakeEnd("c")
rn.Connect("c", 1000)
// start some RPCs while the ClientEnd is disabled.
// they'll be delayed.
rn.Enable("c", false)
ch := make(chan bool)
nrpcs := 20
for ii := 0; ii < nrpcs; ii++ {
go func(i int) {
ok := false
defer func() { ch <- ok }()
arg := 100 + i
reply := ""
// this call ought to return false.
e.Call("JunkServer.Handler2", arg, &reply)
ok = true
}(ii)
}
time.Sleep(100 * time.Millisecond)
// now enable the ClientEnd and check that an RPC completes quickly.
t0 := time.Now()
rn.Enable("c", true)
{
arg := 99
reply := ""
e.Call("JunkServer.Handler2", arg, &reply)
wanted := "handler2-" + strconv.Itoa(arg)
if reply != wanted {
t.Fatalf("wrong reply %v from Handler2, expecting %v", reply, wanted)
}
}
dur := time.Since(t0).Seconds()
if dur > 0.03 {
t.Fatalf("RPC took too long (%v) after Enable", dur)
}
for ii := 0; ii < nrpcs; ii++ {
<-ch
}
js.mu.Lock()
defer js.mu.Unlock()
if len(js.log2) != 1 {
t.Fatalf("wrong number (%v) of RPCs delivered, expected 1", len(js.log2))
}
n := rn.GetCount(1000)
if n != 1 {
t.Fatalf("wrong GetCount() %v, expected %v\n", n, 1)
}
}
//
// if an RPC is stuck in a server, and the server
// is killed with DeleteServer(), does the RPC
// get un-stuck?
//
func TestKilled(t *testing.T) {
runtime.GOMAXPROCS(4)
rn := MakeNetwork()
defer rn.Cleanup()
e := rn.MakeEnd("end1-99")
js := &JunkServer{}
svc := MakeService(js)
rs := MakeServer()
rs.AddService(svc)
rn.AddServer("server99", rs)
rn.Connect("end1-99", "server99")
rn.Enable("end1-99", true)
doneCh := make(chan bool)
go func() {
reply := 0
ok := e.Call("JunkServer.Handler3", 99, &reply)
doneCh <- ok
}()
time.Sleep(1000 * time.Millisecond)
select {
case <-doneCh:
t.Fatalf("Handler3 should not have returned yet")
case <-time.After(100 * time.Millisecond):
}
rn.DeleteServer("server99")
select {
case x := <-doneCh:
if x != false {
t.Fatalf("Handler3 returned successfully despite DeleteServer()")
}
case <-time.After(100 * time.Millisecond):
t.Fatalf("Handler3 should return after DeleteServer()")
}
}
func TestBenchmark(t *testing.T) {
runtime.GOMAXPROCS(4)
rn := MakeNetwork()
defer rn.Cleanup()
e := rn.MakeEnd("end1-99")
js := &JunkServer{}
svc := MakeService(js)
rs := MakeServer()
rs.AddService(svc)
rn.AddServer("server99", rs)
rn.Connect("end1-99", "server99")
rn.Enable("end1-99", true)
t0 := time.Now()
n := 100000
for iters := 0; iters < n; iters++ {
reply := ""
e.Call("JunkServer.Handler2", 111, &reply)
if reply != "handler2-111" {
t.Fatalf("wrong reply from Handler2")
}
}
fmt.Printf("%v for %v\n", time.Since(t0), n)
// march 2016, rtm laptop, 22 microseconds per RPC
}

74
src/main/diskvd.go Normal file
View File

@ -0,0 +1,74 @@
package main
//
// start a diskvd server. it's a member of some replica
// group, which has other members, and it needs to know
// how to talk to the members of the shardmaster service.
// used by ../diskv/test_test.go
//
// arguments:
// -g groupid
// -m masterport1 -m masterport2 ...
// -s replicaport1 -s replicaport2 ...
// -i my-index-in-server-port-list
// -u unreliable
// -d directory
// -r restart
import "time"
import "6.5840/diskv"
import "os"
import "fmt"
import "strconv"
import "runtime"
func usage() {
fmt.Printf("Usage: diskvd -g gid -m master... -s server... -i my-index -d dir\n")
os.Exit(1)
}
func main() {
var gid int64 = -1 // my replica group ID
masters := []string{} // ports of shardmasters
replicas := []string{} // ports of servers in my replica group
me := -1 // my index in replicas[]
unreliable := false
dir := "" // store persistent data here
restart := false
for i := 1; i+1 < len(os.Args); i += 2 {
a0 := os.Args[i]
a1 := os.Args[i+1]
if a0 == "-g" {
gid, _ = strconv.ParseInt(a1, 10, 64)
} else if a0 == "-m" {
masters = append(masters, a1)
} else if a0 == "-s" {
replicas = append(replicas, a1)
} else if a0 == "-i" {
me, _ = strconv.Atoi(a1)
} else if a0 == "-u" {
unreliable, _ = strconv.ParseBool(a1)
} else if a0 == "-d" {
dir = a1
} else if a0 == "-r" {
restart, _ = strconv.ParseBool(a1)
} else {
usage()
}
}
if gid < 0 || me < 0 || len(masters) < 1 || me >= len(replicas) || dir == "" {
usage()
}
runtime.GOMAXPROCS(4)
srv := diskv.StartServer(gid, masters, replicas, me, dir, restart)
srv.Setunreliable(unreliable)
// for safety, force quit after 10 minutes.
time.Sleep(10 * 60 * time.Second)
mep, _ := os.FindProcess(os.Getpid())
mep.Kill()
}

31
src/main/lockc.go Normal file
View File

@ -0,0 +1,31 @@
package main
//
// see comments in lockd.go
//
import "6.5840/lockservice"
import "os"
import "fmt"
func usage() {
fmt.Printf("Usage: lockc -l|-u primaryport backupport lockname\n")
os.Exit(1)
}
func main() {
if len(os.Args) == 5 {
ck := lockservice.MakeClerk(os.Args[2], os.Args[3])
var ok bool
if os.Args[1] == "-l" {
ok = ck.Lock(os.Args[4])
} else if os.Args[1] == "-u" {
ok = ck.Unlock(os.Args[4])
} else {
usage()
}
fmt.Printf("reply: %v\n", ok)
} else {
usage()
}
}

31
src/main/lockd.go Normal file
View File

@ -0,0 +1,31 @@
package main
// export GOPATH=~/6.5840
// go build lockd.go
// go build lockc.go
// ./lockd -p a b &
// ./lockd -b a b &
// ./lockc -l a b lx
// ./lockc -u a b lx
//
// on Athena, use /tmp/myname-a and /tmp/myname-b
// instead of a and b.
import "time"
import "6.5840/lockservice"
import "os"
import "fmt"
func main() {
if len(os.Args) == 4 && os.Args[1] == "-p" {
lockservice.StartServer(os.Args[2], os.Args[3], true)
} else if len(os.Args) == 4 && os.Args[1] == "-b" {
lockservice.StartServer(os.Args[2], os.Args[3], false)
} else {
fmt.Printf("Usage: lockd -p|-b primaryport backupport\n")
os.Exit(1)
}
for {
time.Sleep(100 * time.Second)
}
}

29
src/main/mrcoordinator.go Normal file
View File

@ -0,0 +1,29 @@
package main
//
// start the coordinator process, which is implemented
// in ../mr/coordinator.go
//
// go run mrcoordinator.go pg*.txt
//
// Please do not change this file.
//
import "6.5840/mr"
import "time"
import "os"
import "fmt"
func main() {
if len(os.Args) < 2 {
fmt.Fprintf(os.Stderr, "Usage: mrcoordinator inputfiles...\n")
os.Exit(1)
}
m := mr.MakeCoordinator(os.Args[1:], 10)
for m.Done() == false {
time.Sleep(time.Second)
}
time.Sleep(time.Second)
}

108
src/main/mrsequential.go Normal file
View File

@ -0,0 +1,108 @@
package main
//
// simple sequential MapReduce.
//
// go run mrsequential.go wc.so pg*.txt
//
import "fmt"
import "6.5840/mr"
import "plugin"
import "os"
import "log"
import "io/ioutil"
import "sort"
// for sorting by key.
type ByKey []mr.KeyValue
// for sorting by key.
func (a ByKey) Len() int { return len(a) }
func (a ByKey) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a ByKey) Less(i, j int) bool { return a[i].Key < a[j].Key }
func main() {
if len(os.Args) < 3 {
fmt.Fprintf(os.Stderr, "Usage: mrsequential xxx.so inputfiles...\n")
os.Exit(1)
}
mapf, reducef := loadPlugin(os.Args[1])
//
// read each input file,
// pass it to Map,
// accumulate the intermediate Map output.
//
intermediate := []mr.KeyValue{}
for _, filename := range os.Args[2:] {
file, err := os.Open(filename)
if err != nil {
log.Fatalf("cannot open %v", filename)
}
content, err := ioutil.ReadAll(file)
if err != nil {
log.Fatalf("cannot read %v", filename)
}
file.Close()
kva := mapf(filename, string(content))
intermediate = append(intermediate, kva...)
}
//
// a big difference from real MapReduce is that all the
// intermediate data is in one place, intermediate[],
// rather than being partitioned into NxM buckets.
//
sort.Sort(ByKey(intermediate))
oname := "mr-out-0"
ofile, _ := os.Create(oname)
//
// call Reduce on each distinct key in intermediate[],
// and print the result to mr-out-0.
//
i := 0
for i < len(intermediate) {
j := i + 1
for j < len(intermediate) && intermediate[j].Key == intermediate[i].Key {
j++
}
values := []string{}
for k := i; k < j; k++ {
values = append(values, intermediate[k].Value)
}
output := reducef(intermediate[i].Key, values)
// this is the correct format for each line of Reduce output.
fmt.Fprintf(ofile, "%v %v\n", intermediate[i].Key, output)
i = j
}
ofile.Close()
}
// load the application Map and Reduce functions
// from a plugin file, e.g. ../mrapps/wc.so
func loadPlugin(filename string) (func(string, string) []mr.KeyValue, func(string, []string) string) {
p, err := plugin.Open(filename)
if err != nil {
log.Fatalf("cannot load plugin %v", filename)
}
xmapf, err := p.Lookup("Map")
if err != nil {
log.Fatalf("cannot find Map in %v", filename)
}
mapf := xmapf.(func(string, string) []mr.KeyValue)
xreducef, err := p.Lookup("Reduce")
if err != nil {
log.Fatalf("cannot find Reduce in %v", filename)
}
reducef := xreducef.(func(string, []string) string)
return mapf, reducef
}

49
src/main/mrworker.go Normal file
View File

@ -0,0 +1,49 @@
package main
//
// start a worker process, which is implemented
// in ../mr/worker.go. typically there will be
// multiple worker processes, talking to one coordinator.
//
// go run mrworker.go wc.so
//
// Please do not change this file.
//
import "6.5840/mr"
import "plugin"
import "os"
import "fmt"
import "log"
func main() {
if len(os.Args) != 2 {
fmt.Fprintf(os.Stderr, "Usage: mrworker xxx.so\n")
os.Exit(1)
}
mapf, reducef := loadPlugin(os.Args[1])
mr.Worker(mapf, reducef)
}
// load the application Map and Reduce functions
// from a plugin file, e.g. ../mrapps/wc.so
func loadPlugin(filename string) (func(string, string) []mr.KeyValue, func(string, []string) string) {
p, err := plugin.Open(filename)
if err != nil {
log.Fatalf("cannot load plugin %v", filename)
}
xmapf, err := p.Lookup("Map")
if err != nil {
log.Fatalf("cannot find Map in %v", filename)
}
mapf := xmapf.(func(string, string) []mr.KeyValue)
xreducef, err := p.Lookup("Reduce")
if err != nil {
log.Fatalf("cannot find Reduce in %v", filename)
}
reducef := xreducef.(func(string, []string) string)
return mapf, reducef
}

44
src/main/pbc.go Normal file
View File

@ -0,0 +1,44 @@
package main
//
// pbservice client application
//
// export GOPATH=~/6.5840
// go build viewd.go
// go build pbd.go
// go build pbc.go
// ./viewd /tmp/rtm-v &
// ./pbd /tmp/rtm-v /tmp/rtm-1 &
// ./pbd /tmp/rtm-v /tmp/rtm-2 &
// ./pbc /tmp/rtm-v key1 value1
// ./pbc /tmp/rtm-v key1
//
// change "rtm" to your user name.
// start the pbd programs in separate windows and kill
// and restart them to exercise fault tolerance.
//
import "6.5840/pbservice"
import "os"
import "fmt"
func usage() {
fmt.Printf("Usage: pbc viewport key\n")
fmt.Printf(" pbc viewport key value\n")
os.Exit(1)
}
func main() {
if len(os.Args) == 3 {
// get
ck := pbservice.MakeClerk(os.Args[1], "")
v := ck.Get(os.Args[2])
fmt.Printf("%v\n", v)
} else if len(os.Args) == 4 {
// put
ck := pbservice.MakeClerk(os.Args[1], "")
ck.Put(os.Args[2], os.Args[3])
} else {
usage()
}
}

23
src/main/pbd.go Normal file
View File

@ -0,0 +1,23 @@
package main
//
// see directions in pbc.go
//
import "time"
import "6.5840/pbservice"
import "os"
import "fmt"
func main() {
if len(os.Args) != 3 {
fmt.Printf("Usage: pbd viewport myport\n")
os.Exit(1)
}
pbservice.StartServer(os.Args[1], os.Args[2])
for {
time.Sleep(100 * time.Second)
}
}

3495
src/main/pg-being_ernest.txt Normal file

File diff suppressed because it is too large Load Diff

8904
src/main/pg-dorian_gray.txt Normal file

File diff suppressed because it is too large Load Diff

7653
src/main/pg-frankenstein.txt Normal file

File diff suppressed because it is too large Load Diff

9569
src/main/pg-grimm.txt Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

9206
src/main/pg-tom_sawyer.txt Normal file

File diff suppressed because it is too large Load Diff

23
src/main/test-mr-many.sh Normal file
View File

@ -0,0 +1,23 @@
#!/usr/bin/env bash
if [ $# -ne 1 ]; then
echo "Usage: $0 numTrials"
exit 1
fi
trap 'kill -INT -$pid; exit 1' INT
# Note: because the socketID is based on the current userID,
# ./test-mr.sh cannot be run in parallel
runs=$1
chmod +x test-mr.sh
for i in $(seq 1 $runs); do
timeout -k 2s 900s ./test-mr.sh &
pid=$!
if ! wait $pid; then
echo '***' FAILED TESTS IN TRIAL $i
exit 1
fi
done
echo '***' PASSED ALL $i TESTING TRIALS

338
src/main/test-mr.sh Normal file
View File

@ -0,0 +1,338 @@
#!/usr/bin/env bash
#
# map-reduce tests
#
# un-comment this to run the tests with the Go race detector.
# RACE=-race
if [[ "$OSTYPE" = "darwin"* ]]
then
if go version | grep 'go1.17.[012345]'
then
# -race with plug-ins on x86 MacOS 12 with
# go1.17 before 1.17.6 sometimes crash.
RACE=
echo '*** Turning off -race since it may not work on a Mac'
echo ' with ' `go version`
fi
fi
ISQUIET=$1
maybe_quiet() {
if [ "$ISQUIET" == "quiet" ]; then
"$@" > /dev/null 2>&1
else
"$@"
fi
}
TIMEOUT=timeout
TIMEOUT2=""
if timeout 2s sleep 1 > /dev/null 2>&1
then
:
else
if gtimeout 2s sleep 1 > /dev/null 2>&1
then
TIMEOUT=gtimeout
else
# no timeout command
TIMEOUT=
echo '*** Cannot find timeout command; proceeding without timeouts.'
fi
fi
if [ "$TIMEOUT" != "" ]
then
TIMEOUT2=$TIMEOUT
TIMEOUT2+=" -k 2s 120s "
TIMEOUT+=" -k 2s 45s "
fi
# run the test in a fresh sub-directory.
rm -rf mr-tmp
mkdir mr-tmp || exit 1
cd mr-tmp || exit 1
rm -f mr-*
# make sure software is freshly built.
(cd ../../mrapps && go clean)
(cd .. && go clean)
(cd ../../mrapps && go build $RACE -buildmode=plugin wc.go) || exit 1
(cd ../../mrapps && go build $RACE -buildmode=plugin indexer.go) || exit 1
(cd ../../mrapps && go build $RACE -buildmode=plugin mtiming.go) || exit 1
(cd ../../mrapps && go build $RACE -buildmode=plugin rtiming.go) || exit 1
(cd ../../mrapps && go build $RACE -buildmode=plugin jobcount.go) || exit 1
(cd ../../mrapps && go build $RACE -buildmode=plugin early_exit.go) || exit 1
(cd ../../mrapps && go build $RACE -buildmode=plugin crash.go) || exit 1
(cd ../../mrapps && go build $RACE -buildmode=plugin nocrash.go) || exit 1
(cd .. && go build $RACE mrcoordinator.go) || exit 1
(cd .. && go build $RACE mrworker.go) || exit 1
(cd .. && go build $RACE mrsequential.go) || exit 1
failed_any=0
#########################################################
# first word-count
# generate the correct output
../mrsequential ../../mrapps/wc.so ../pg*txt || exit 1
sort mr-out-0 > mr-correct-wc.txt
rm -f mr-out*
echo '***' Starting wc test.
maybe_quiet $TIMEOUT ../mrcoordinator ../pg*txt &
pid=$!
# give the coordinator time to create the sockets.
sleep 1
# start multiple workers.
(maybe_quiet $TIMEOUT ../mrworker ../../mrapps/wc.so) &
(maybe_quiet $TIMEOUT ../mrworker ../../mrapps/wc.so) &
(maybe_quiet $TIMEOUT ../mrworker ../../mrapps/wc.so) &
# wait for the coordinator to exit.
wait $pid
# since workers are required to exit when a job is completely finished,
# and not before, that means the job has finished.
sort mr-out* | grep . > mr-wc-all
if cmp mr-wc-all mr-correct-wc.txt
then
echo '---' wc test: PASS
else
echo '---' wc output is not the same as mr-correct-wc.txt
echo '---' wc test: FAIL
failed_any=1
fi
# wait for remaining workers and coordinator to exit.
wait
#########################################################
# now indexer
rm -f mr-*
# generate the correct output
../mrsequential ../../mrapps/indexer.so ../pg*txt || exit 1
sort mr-out-0 > mr-correct-indexer.txt
rm -f mr-out*
echo '***' Starting indexer test.
maybe_quiet $TIMEOUT ../mrcoordinator ../pg*txt &
sleep 1
# start multiple workers
maybe_quiet $TIMEOUT ../mrworker ../../mrapps/indexer.so &
maybe_quiet $TIMEOUT ../mrworker ../../mrapps/indexer.so
sort mr-out* | grep . > mr-indexer-all
if cmp mr-indexer-all mr-correct-indexer.txt
then
echo '---' indexer test: PASS
else
echo '---' indexer output is not the same as mr-correct-indexer.txt
echo '---' indexer test: FAIL
failed_any=1
fi
wait
#########################################################
echo '***' Starting map parallelism test.
rm -f mr-*
maybe_quiet $TIMEOUT ../mrcoordinator ../pg*txt &
sleep 1
maybe_quiet $TIMEOUT ../mrworker ../../mrapps/mtiming.so &
maybe_quiet $TIMEOUT ../mrworker ../../mrapps/mtiming.so
NT=`cat mr-out* | grep '^times-' | wc -l | sed 's/ //g'`
if [ "$NT" != "2" ]
then
echo '---' saw "$NT" workers rather than 2
echo '---' map parallelism test: FAIL
failed_any=1
fi
if cat mr-out* | grep '^parallel.* 2' > /dev/null
then
echo '---' map parallelism test: PASS
else
echo '---' map workers did not run in parallel
echo '---' map parallelism test: FAIL
failed_any=1
fi
wait
#########################################################
echo '***' Starting reduce parallelism test.
rm -f mr-*
maybe_quiet $TIMEOUT ../mrcoordinator ../pg*txt &
sleep 1
maybe_quiet $TIMEOUT ../mrworker ../../mrapps/rtiming.so &
maybe_quiet $TIMEOUT ../mrworker ../../mrapps/rtiming.so
NT=`cat mr-out* | grep '^[a-z] 2' | wc -l | sed 's/ //g'`
if [ "$NT" -lt "2" ]
then
echo '---' too few parallel reduces.
echo '---' reduce parallelism test: FAIL
failed_any=1
else
echo '---' reduce parallelism test: PASS
fi
wait
#########################################################
echo '***' Starting job count test.
rm -f mr-*
maybe_quiet $TIMEOUT ../mrcoordinator ../pg*txt &
sleep 1
maybe_quiet $TIMEOUT ../mrworker ../../mrapps/jobcount.so &
maybe_quiet $TIMEOUT ../mrworker ../../mrapps/jobcount.so
maybe_quiet $TIMEOUT ../mrworker ../../mrapps/jobcount.so &
maybe_quiet $TIMEOUT ../mrworker ../../mrapps/jobcount.so
NT=`cat mr-out* | awk '{print $2}'`
if [ "$NT" -eq "8" ]
then
echo '---' job count test: PASS
else
echo '---' map jobs ran incorrect number of times "($NT != 8)"
echo '---' job count test: FAIL
failed_any=1
fi
wait
#########################################################
# test whether any worker or coordinator exits before the
# task has completed (i.e., all output files have been finalized)
rm -f mr-*
echo '***' Starting early exit test.
DF=anydone$$
rm -f $DF
(maybe_quiet $TIMEOUT ../mrcoordinator ../pg*txt; touch $DF) &
# give the coordinator time to create the sockets.
sleep 1
# start multiple workers.
(maybe_quiet $TIMEOUT ../mrworker ../../mrapps/early_exit.so; touch $DF) &
(maybe_quiet $TIMEOUT ../mrworker ../../mrapps/early_exit.so; touch $DF) &
(maybe_quiet $TIMEOUT ../mrworker ../../mrapps/early_exit.so; touch $DF) &
# wait for any of the coord or workers to exit.
# `jobs` ensures that any completed old processes from other tests
# are not waited upon.
jobs &> /dev/null
if [[ "$OSTYPE" = "darwin"* ]]
then
# bash on the Mac doesn't have wait -n
while [ ! -e $DF ]
do
sleep 0.2
done
else
# the -n causes wait to wait for just one child process,
# rather than waiting for all to finish.
wait -n
fi
rm -f $DF
# a process has exited. this means that the output should be finalized
# otherwise, either a worker or the coordinator exited early
sort mr-out* | grep . > mr-wc-all-initial
# wait for remaining workers and coordinator to exit.
wait
# compare initial and final outputs
sort mr-out* | grep . > mr-wc-all-final
if cmp mr-wc-all-final mr-wc-all-initial
then
echo '---' early exit test: PASS
else
echo '---' output changed after first worker exited
echo '---' early exit test: FAIL
failed_any=1
fi
rm -f mr-*
#########################################################
echo '***' Starting crash test.
# generate the correct output
../mrsequential ../../mrapps/nocrash.so ../pg*txt || exit 1
sort mr-out-0 > mr-correct-crash.txt
rm -f mr-out*
rm -f mr-done
((maybe_quiet $TIMEOUT2 ../mrcoordinator ../pg*txt); touch mr-done ) &
sleep 1
# start multiple workers
maybe_quiet $TIMEOUT2 ../mrworker ../../mrapps/crash.so &
# mimic rpc.go's coordinatorSock()
SOCKNAME=/var/tmp/5840-mr-`id -u`
( while [ -e $SOCKNAME -a ! -f mr-done ]
do
maybe_quiet $TIMEOUT2 ../mrworker ../../mrapps/crash.so
sleep 1
done ) &
( while [ -e $SOCKNAME -a ! -f mr-done ]
do
maybe_quiet $TIMEOUT2 ../mrworker ../../mrapps/crash.so
sleep 1
done ) &
while [ -e $SOCKNAME -a ! -f mr-done ]
do
maybe_quiet $TIMEOUT2 ../mrworker ../../mrapps/crash.so
sleep 1
done
wait
rm $SOCKNAME
sort mr-out* | grep . > mr-crash-all
if cmp mr-crash-all mr-correct-crash.txt
then
echo '---' crash test: PASS
else
echo '---' crash output is not the same as mr-correct-crash.txt
echo '---' crash test: FAIL
failed_any=1
fi
#########################################################
if [ $failed_any -eq 0 ]; then
echo '***' PASSED ALL TESTS
else
echo '***' FAILED SOME TESTS
exit 1
fi

23
src/main/viewd.go Normal file
View File

@ -0,0 +1,23 @@
package main
//
// see directions in pbc.go
//
import "time"
import "6.5840/viewservice"
import "os"
import "fmt"
func main() {
if len(os.Args) != 2 {
fmt.Printf("Usage: viewd port\n")
os.Exit(1)
}
viewservice.StartServer(os.Args[1])
for {
time.Sleep(100 * time.Second)
}
}

80
src/models1/kv.go Normal file
View File

@ -0,0 +1,80 @@
package models
import "github.com/anishathalye/porcupine"
import "fmt"
import "sort"
type KvInput struct {
Op uint8 // 0 => get, 1 => put
Key string
Value string
Version uint64
}
type KvOutput struct {
Value string
Version uint64
Err string
}
type KvState struct {
Value string
Version uint64
}
var KvModel = porcupine.Model{
Partition: func(history []porcupine.Operation) [][]porcupine.Operation {
m := make(map[string][]porcupine.Operation)
for _, v := range history {
key := v.Input.(KvInput).Key
m[key] = append(m[key], v)
}
keys := make([]string, 0, len(m))
for k := range m {
keys = append(keys, k)
}
sort.Strings(keys)
ret := make([][]porcupine.Operation, 0, len(keys))
for _, k := range keys {
ret = append(ret, m[k])
}
return ret
},
Init: func() interface{} {
// note: we are modeling a single key's value here;
// we're partitioning by key, so this is okay
return KvState{"", 0}
},
Step: func(state, input, output interface{}) (bool, interface{}) {
inp := input.(KvInput)
out := output.(KvOutput)
st := state.(KvState)
switch inp.Op {
case 0:
// get
return out.Value == st.Value, state
case 1:
// put
if st.Version == inp.Version {
return out.Err == "OK" || out.Err == "ErrMaybe", KvState{inp.Value, st.Version + 1}
} else {
return out.Err == "ErrVersion" || out.Err == "ErrMaybe", st
}
default:
return false, "<invalid>"
}
},
DescribeOperation: func(input, output interface{}) string {
inp := input.(KvInput)
out := output.(KvOutput)
switch inp.Op {
case 0:
return fmt.Sprintf("get('%s') -> ('%s', '%d', '%s')", inp.Key, out.Value, out.Version, out.Err)
case 1:
return fmt.Sprintf("put('%s', '%s', '%d') -> ('%s')", inp.Key, inp.Value, inp.Version, out.Err)
default:
return "<invalid>"
}
},
}

70
src/mr/coordinator.go Normal file
View File

@ -0,0 +1,70 @@
package mr
import "log"
import "net"
import "os"
import "net/rpc"
import "net/http"
type Coordinator struct {
// Your definitions here.
}
// Your code here -- RPC handlers for the worker to call.
//
// an example RPC handler.
//
// the RPC argument and reply types are defined in rpc.go.
//
func (c *Coordinator) Example(args *ExampleArgs, reply *ExampleReply) error {
reply.Y = args.X + 1
return nil
}
//
// start a thread that listens for RPCs from worker.go
//
func (c *Coordinator) server() {
rpc.Register(c)
rpc.HandleHTTP()
//l, e := net.Listen("tcp", ":1234")
sockname := coordinatorSock()
os.Remove(sockname)
l, e := net.Listen("unix", sockname)
if e != nil {
log.Fatal("listen error:", e)
}
go http.Serve(l, nil)
}
//
// main/mrcoordinator.go calls Done() periodically to find out
// if the entire job has finished.
//
func (c *Coordinator) Done() bool {
ret := false
// Your code here.
return ret
}
//
// create a Coordinator.
// main/mrcoordinator.go calls this function.
// nReduce is the number of reduce tasks to use.
//
func MakeCoordinator(files []string, nReduce int) *Coordinator {
c := Coordinator{}
// Your code here.
c.server()
return &c
}

36
src/mr/rpc.go Normal file
View File

@ -0,0 +1,36 @@
package mr
//
// RPC definitions.
//
// remember to capitalize all names.
//
import "os"
import "strconv"
//
// example to show how to declare the arguments
// and reply for an RPC.
//
type ExampleArgs struct {
X int
}
type ExampleReply struct {
Y int
}
// Add your RPC definitions here.
// Cook up a unique-ish UNIX-domain socket name
// in /var/tmp, for the coordinator.
// Can't use the current directory since
// Athena AFS doesn't support UNIX-domain sockets.
func coordinatorSock() string {
s := "/var/tmp/5840-mr-"
s += strconv.Itoa(os.Getuid())
return s
}

91
src/mr/worker.go Normal file
View File

@ -0,0 +1,91 @@
package mr
import "fmt"
import "log"
import "net/rpc"
import "hash/fnv"
//
// Map functions return a slice of KeyValue.
//
type KeyValue struct {
Key string
Value string
}
//
// use ihash(key) % NReduce to choose the reduce
// task number for each KeyValue emitted by Map.
//
func ihash(key string) int {
h := fnv.New32a()
h.Write([]byte(key))
return int(h.Sum32() & 0x7fffffff)
}
//
// main/mrworker.go calls this function.
//
func Worker(mapf func(string, string) []KeyValue,
reducef func(string, []string) string) {
// Your worker implementation here.
// uncomment to send the Example RPC to the coordinator.
// CallExample()
}
//
// example function to show how to make an RPC call to the coordinator.
//
// the RPC argument and reply types are defined in rpc.go.
//
func CallExample() {
// declare an argument structure.
args := ExampleArgs{}
// fill in the argument(s).
args.X = 99
// declare a reply structure.
reply := ExampleReply{}
// send the RPC request, wait for the reply.
// the "Coordinator.Example" tells the
// receiving server that we'd like to call
// the Example() method of struct Coordinator.
ok := call("Coordinator.Example", &args, &reply)
if ok {
// reply.Y should be 100.
fmt.Printf("reply.Y %v\n", reply.Y)
} else {
fmt.Printf("call failed!\n")
}
}
//
// send an RPC request to the coordinator, wait for the response.
// usually returns true.
// returns false if something goes wrong.
//
func call(rpcname string, args interface{}, reply interface{}) bool {
// c, err := rpc.DialHTTP("tcp", "127.0.0.1"+":1234")
sockname := coordinatorSock()
c, err := rpc.DialHTTP("unix", sockname)
if err != nil {
log.Fatal("dialing:", err)
}
defer c.Close()
err = c.Call(rpcname, args, reply)
if err == nil {
return true
}
fmt.Println(err)
return false
}

55
src/mrapps/crash.go Normal file
View File

@ -0,0 +1,55 @@
package main
//
// a MapReduce pseudo-application that sometimes crashes,
// and sometimes takes a long time,
// to test MapReduce's ability to recover.
//
// go build -buildmode=plugin crash.go
//
import "6.5840/mr"
import crand "crypto/rand"
import "math/big"
import "strings"
import "os"
import "sort"
import "strconv"
import "time"
func maybeCrash() {
max := big.NewInt(1000)
rr, _ := crand.Int(crand.Reader, max)
if rr.Int64() < 330 {
// crash!
os.Exit(1)
} else if rr.Int64() < 660 {
// delay for a while.
maxms := big.NewInt(10 * 1000)
ms, _ := crand.Int(crand.Reader, maxms)
time.Sleep(time.Duration(ms.Int64()) * time.Millisecond)
}
}
func Map(filename string, contents string) []mr.KeyValue {
maybeCrash()
kva := []mr.KeyValue{}
kva = append(kva, mr.KeyValue{"a", filename})
kva = append(kva, mr.KeyValue{"b", strconv.Itoa(len(filename))})
kva = append(kva, mr.KeyValue{"c", strconv.Itoa(len(contents))})
kva = append(kva, mr.KeyValue{"d", "xyzzy"})
return kva
}
func Reduce(key string, values []string) string {
maybeCrash()
// sort values to ensure deterministic output.
vv := make([]string, len(values))
copy(vv, values)
sort.Strings(vv)
val := strings.Join(vv, " ")
return val
}

36
src/mrapps/early_exit.go Normal file
View File

@ -0,0 +1,36 @@
package main
//
// a word-count application "plugin" for MapReduce.
//
// go build -buildmode=plugin wc_long.go
//
import (
"strconv"
"strings"
"time"
"6.5840/mr"
)
// The map function is called once for each file of input.
// This map function just returns 1 for each file
func Map(filename string, contents string) []mr.KeyValue {
kva := []mr.KeyValue{}
kva = append(kva, mr.KeyValue{filename, "1"})
return kva
}
// The reduce function is called once for each key generated by the
// map tasks, with a list of all the values created for that key by
// any map task.
func Reduce(key string, values []string) string {
// some reduce tasks sleep for a long time; potentially seeing if
// a worker will accidentally exit early
if strings.Contains(key, "sherlock") || strings.Contains(key, "tom") {
time.Sleep(time.Duration(3 * time.Second))
}
// return the number of occurrences of this file.
return strconv.Itoa(len(values))
}

39
src/mrapps/indexer.go Normal file
View File

@ -0,0 +1,39 @@
package main
//
// an indexing application "plugin" for MapReduce.
//
// go build -buildmode=plugin indexer.go
//
import "fmt"
import "6.5840/mr"
import "strings"
import "unicode"
import "sort"
// The mapping function is called once for each piece of the input.
// In this framework, the key is the name of the file that is being processed,
// and the value is the file's contents. The return value should be a slice of
// key/value pairs, each represented by a mr.KeyValue.
func Map(document string, value string) (res []mr.KeyValue) {
m := make(map[string]bool)
words := strings.FieldsFunc(value, func(x rune) bool { return !unicode.IsLetter(x) })
for _, w := range words {
m[w] = true
}
for w := range m {
kv := mr.KeyValue{w, document}
res = append(res, kv)
}
return
}
// The reduce function is called once for each key generated by Map, with a
// list of that key's string value (merged across all inputs). The return value
// should be a single output value for that key.
func Reduce(key string, values []string) string {
sort.Strings(values)
return fmt.Sprintf("%d %s", len(values), strings.Join(values, ","))
}

46
src/mrapps/jobcount.go Normal file
View File

@ -0,0 +1,46 @@
package main
//
// a MapReduce pseudo-application that counts the number of times map/reduce
// tasks are run, to test whether jobs are assigned multiple times even when
// there is no failure.
//
// go build -buildmode=plugin crash.go
//
import "6.5840/mr"
import "math/rand"
import "strings"
import "strconv"
import "time"
import "fmt"
import "os"
import "io/ioutil"
var count int
func Map(filename string, contents string) []mr.KeyValue {
me := os.Getpid()
f := fmt.Sprintf("mr-worker-jobcount-%d-%d", me, count)
count++
err := ioutil.WriteFile(f, []byte("x"), 0666)
if err != nil {
panic(err)
}
time.Sleep(time.Duration(2000+rand.Intn(3000)) * time.Millisecond)
return []mr.KeyValue{mr.KeyValue{"a", "x"}}
}
func Reduce(key string, values []string) string {
files, err := ioutil.ReadDir(".")
if err != nil {
panic(err)
}
invocations := 0
for _, f := range files {
if strings.HasPrefix(f.Name(), "mr-worker-jobcount") {
invocations++
}
}
return strconv.Itoa(invocations)
}

91
src/mrapps/mtiming.go Normal file
View File

@ -0,0 +1,91 @@
package main
//
// a MapReduce pseudo-application to test that workers
// execute map tasks in parallel.
//
// go build -buildmode=plugin mtiming.go
//
import "6.5840/mr"
import "strings"
import "fmt"
import "os"
import "syscall"
import "time"
import "sort"
import "io/ioutil"
func nparallel(phase string) int {
// create a file so that other workers will see that
// we're running at the same time as them.
pid := os.Getpid()
myfilename := fmt.Sprintf("mr-worker-%s-%d", phase, pid)
err := ioutil.WriteFile(myfilename, []byte("x"), 0666)
if err != nil {
panic(err)
}
// are any other workers running?
// find their PIDs by scanning directory for mr-worker-XXX files.
dd, err := os.Open(".")
if err != nil {
panic(err)
}
names, err := dd.Readdirnames(1000000)
if err != nil {
panic(err)
}
ret := 0
for _, name := range names {
var xpid int
pat := fmt.Sprintf("mr-worker-%s-%%d", phase)
n, err := fmt.Sscanf(name, pat, &xpid)
if n == 1 && err == nil {
err := syscall.Kill(xpid, 0)
if err == nil {
// if err == nil, xpid is alive.
ret += 1
}
}
}
dd.Close()
time.Sleep(1 * time.Second)
err = os.Remove(myfilename)
if err != nil {
panic(err)
}
return ret
}
func Map(filename string, contents string) []mr.KeyValue {
t0 := time.Now()
ts := float64(t0.Unix()) + (float64(t0.Nanosecond()) / 1000000000.0)
pid := os.Getpid()
n := nparallel("map")
kva := []mr.KeyValue{}
kva = append(kva, mr.KeyValue{
fmt.Sprintf("times-%v", pid),
fmt.Sprintf("%.1f", ts)})
kva = append(kva, mr.KeyValue{
fmt.Sprintf("parallel-%v", pid),
fmt.Sprintf("%d", n)})
return kva
}
func Reduce(key string, values []string) string {
//n := nparallel("reduce")
// sort values to ensure deterministic output.
vv := make([]string, len(values))
copy(vv, values)
sort.Strings(vv)
val := strings.Join(vv, " ")
return val
}

47
src/mrapps/nocrash.go Normal file
View File

@ -0,0 +1,47 @@
package main
//
// same as crash.go but doesn't actually crash.
//
// go build -buildmode=plugin nocrash.go
//
import "6.5840/mr"
import crand "crypto/rand"
import "math/big"
import "strings"
import "os"
import "sort"
import "strconv"
func maybeCrash() {
max := big.NewInt(1000)
rr, _ := crand.Int(crand.Reader, max)
if false && rr.Int64() < 500 {
// crash!
os.Exit(1)
}
}
func Map(filename string, contents string) []mr.KeyValue {
maybeCrash()
kva := []mr.KeyValue{}
kva = append(kva, mr.KeyValue{"a", filename})
kva = append(kva, mr.KeyValue{"b", strconv.Itoa(len(filename))})
kva = append(kva, mr.KeyValue{"c", strconv.Itoa(len(contents))})
kva = append(kva, mr.KeyValue{"d", "xyzzy"})
return kva
}
func Reduce(key string, values []string) string {
maybeCrash()
// sort values to ensure deterministic output.
vv := make([]string, len(values))
copy(vv, values)
sort.Strings(vv)
val := strings.Join(vv, " ")
return val
}

84
src/mrapps/rtiming.go Normal file
View File

@ -0,0 +1,84 @@
package main
//
// a MapReduce pseudo-application to test that workers
// execute reduce tasks in parallel.
//
// go build -buildmode=plugin rtiming.go
//
import "6.5840/mr"
import "fmt"
import "os"
import "syscall"
import "time"
import "io/ioutil"
func nparallel(phase string) int {
// create a file so that other workers will see that
// we're running at the same time as them.
pid := os.Getpid()
myfilename := fmt.Sprintf("mr-worker-%s-%d", phase, pid)
err := ioutil.WriteFile(myfilename, []byte("x"), 0666)
if err != nil {
panic(err)
}
// are any other workers running?
// find their PIDs by scanning directory for mr-worker-XXX files.
dd, err := os.Open(".")
if err != nil {
panic(err)
}
names, err := dd.Readdirnames(1000000)
if err != nil {
panic(err)
}
ret := 0
for _, name := range names {
var xpid int
pat := fmt.Sprintf("mr-worker-%s-%%d", phase)
n, err := fmt.Sscanf(name, pat, &xpid)
if n == 1 && err == nil {
err := syscall.Kill(xpid, 0)
if err == nil {
// if err == nil, xpid is alive.
ret += 1
}
}
}
dd.Close()
time.Sleep(1 * time.Second)
err = os.Remove(myfilename)
if err != nil {
panic(err)
}
return ret
}
func Map(filename string, contents string) []mr.KeyValue {
kva := []mr.KeyValue{}
kva = append(kva, mr.KeyValue{"a", "1"})
kva = append(kva, mr.KeyValue{"b", "1"})
kva = append(kva, mr.KeyValue{"c", "1"})
kva = append(kva, mr.KeyValue{"d", "1"})
kva = append(kva, mr.KeyValue{"e", "1"})
kva = append(kva, mr.KeyValue{"f", "1"})
kva = append(kva, mr.KeyValue{"g", "1"})
kva = append(kva, mr.KeyValue{"h", "1"})
kva = append(kva, mr.KeyValue{"i", "1"})
kva = append(kva, mr.KeyValue{"j", "1"})
return kva
}
func Reduce(key string, values []string) string {
n := nparallel("reduce")
val := fmt.Sprintf("%d", n)
return val
}

40
src/mrapps/wc.go Normal file
View File

@ -0,0 +1,40 @@
package main
//
// a word-count application "plugin" for MapReduce.
//
// go build -buildmode=plugin wc.go
//
import "6.5840/mr"
import "unicode"
import "strings"
import "strconv"
// The map function is called once for each file of input. The first
// argument is the name of the input file, and the second is the
// file's complete contents. You should ignore the input file name,
// and look only at the contents argument. The return value is a slice
// of key/value pairs.
func Map(filename string, contents string) []mr.KeyValue {
// function to detect word separators.
ff := func(r rune) bool { return !unicode.IsLetter(r) }
// split contents into an array of words.
words := strings.FieldsFunc(contents, ff)
kva := []mr.KeyValue{}
for _, w := range words {
kv := mr.KeyValue{w, "1"}
kva = append(kva, kv)
}
return kva
}
// The reduce function is called once for each key generated by the
// map tasks, with a list of all the values created for that key by
// any map task.
func Reduce(key string, values []string) string {
// return the number of occurrences of this word.
return strconv.Itoa(len(values))
}

648
src/raft/config.go Normal file
View File

@ -0,0 +1,648 @@
package raft
//
// support for Raft tester.
//
// we will use the original config.go to test your code for grading.
// so, while you can modify this code to help you debug, please
// test with the original before submitting.
//
import "6.5840/labgob"
import "6.5840/labrpc"
import "bytes"
import "log"
import "sync"
import "sync/atomic"
import "testing"
import "runtime"
import "math/rand"
import crand "crypto/rand"
import "math/big"
import "encoding/base64"
import "time"
import "fmt"
func randstring(n int) string {
b := make([]byte, 2*n)
crand.Read(b)
s := base64.URLEncoding.EncodeToString(b)
return s[0:n]
}
func makeSeed() int64 {
max := big.NewInt(int64(1) << 62)
bigx, _ := crand.Int(crand.Reader, max)
x := bigx.Int64()
return x
}
type config struct {
mu sync.Mutex
t *testing.T
finished int32
net *labrpc.Network
n int
rafts []*Raft
applyErr []string // from apply channel readers
connected []bool // whether each server is on the net
saved []*Persister
endnames [][]string // the port file names each sends to
logs []map[int]interface{} // copy of each server's committed entries
lastApplied []int
start time.Time // time at which make_config() was called
// begin()/end() statistics
t0 time.Time // time at which test_test.go called cfg.begin()
rpcs0 int // rpcTotal() at start of test
cmds0 int // number of agreements
bytes0 int64
maxIndex int
maxIndex0 int
}
var ncpu_once sync.Once
func make_config(t *testing.T, n int, unreliable bool, snapshot bool) *config {
ncpu_once.Do(func() {
if runtime.NumCPU() < 2 {
fmt.Printf("warning: only one CPU, which may conceal locking bugs\n")
}
rand.Seed(makeSeed())
})
runtime.GOMAXPROCS(4)
cfg := &config{}
cfg.t = t
cfg.net = labrpc.MakeNetwork()
cfg.n = n
cfg.applyErr = make([]string, cfg.n)
cfg.rafts = make([]*Raft, cfg.n)
cfg.connected = make([]bool, cfg.n)
cfg.saved = make([]*Persister, cfg.n)
cfg.endnames = make([][]string, cfg.n)
cfg.logs = make([]map[int]interface{}, cfg.n)
cfg.lastApplied = make([]int, cfg.n)
cfg.start = time.Now()
cfg.setunreliable(unreliable)
cfg.net.LongDelays(true)
applier := cfg.applier
if snapshot {
applier = cfg.applierSnap
}
// create a full set of Rafts.
for i := 0; i < cfg.n; i++ {
cfg.logs[i] = map[int]interface{}{}
cfg.start1(i, applier)
}
// connect everyone
for i := 0; i < cfg.n; i++ {
cfg.connect(i)
}
return cfg
}
// shut down a Raft server but save its persistent state.
func (cfg *config) crash1(i int) {
cfg.disconnect(i)
cfg.net.DeleteServer(i) // disable client connections to the server.
cfg.mu.Lock()
defer cfg.mu.Unlock()
// a fresh persister, in case old instance
// continues to update the Persister.
// but copy old persister's content so that we always
// pass Make() the last persisted state.
if cfg.saved[i] != nil {
cfg.saved[i] = cfg.saved[i].Copy()
}
rf := cfg.rafts[i]
if rf != nil {
cfg.mu.Unlock()
rf.Kill()
cfg.mu.Lock()
cfg.rafts[i] = nil
}
if cfg.saved[i] != nil {
raftlog := cfg.saved[i].ReadRaftState()
snapshot := cfg.saved[i].ReadSnapshot()
cfg.saved[i] = &Persister{}
cfg.saved[i].Save(raftlog, snapshot)
}
}
func (cfg *config) checkLogs(i int, m ApplyMsg) (string, bool) {
err_msg := ""
v := m.Command
for j := 0; j < len(cfg.logs); j++ {
if old, oldok := cfg.logs[j][m.CommandIndex]; oldok && old != v {
log.Printf("%v: log %v; server %v\n", i, cfg.logs[i], cfg.logs[j])
// some server has already committed a different value for this entry!
err_msg = fmt.Sprintf("commit index=%v server=%v %v != server=%v %v",
m.CommandIndex, i, m.Command, j, old)
}
}
_, prevok := cfg.logs[i][m.CommandIndex-1]
cfg.logs[i][m.CommandIndex] = v
if m.CommandIndex > cfg.maxIndex {
cfg.maxIndex = m.CommandIndex
}
return err_msg, prevok
}
// applier reads message from apply ch and checks that they match the log
// contents
func (cfg *config) applier(i int, applyCh chan ApplyMsg) {
for m := range applyCh {
if m.CommandValid == false {
// ignore other types of ApplyMsg
} else {
cfg.mu.Lock()
err_msg, prevok := cfg.checkLogs(i, m)
cfg.mu.Unlock()
if m.CommandIndex > 1 && prevok == false {
err_msg = fmt.Sprintf("server %v apply out of order %v", i, m.CommandIndex)
}
if err_msg != "" {
log.Fatalf("apply error: %v", err_msg)
cfg.applyErr[i] = err_msg
// keep reading after error so that Raft doesn't block
// holding locks...
}
}
}
}
// returns "" or error string
func (cfg *config) ingestSnap(i int, snapshot []byte, index int) string {
if snapshot == nil {
log.Fatalf("nil snapshot")
return "nil snapshot"
}
r := bytes.NewBuffer(snapshot)
d := labgob.NewDecoder(r)
var lastIncludedIndex int
var xlog []interface{}
if d.Decode(&lastIncludedIndex) != nil ||
d.Decode(&xlog) != nil {
log.Fatalf("snapshot decode error")
return "snapshot Decode() error"
}
if index != -1 && index != lastIncludedIndex {
err := fmt.Sprintf("server %v snapshot doesn't match m.SnapshotIndex", i)
return err
}
cfg.logs[i] = map[int]interface{}{}
for j := 0; j < len(xlog); j++ {
cfg.logs[i][j] = xlog[j]
}
cfg.lastApplied[i] = lastIncludedIndex
return ""
}
const SnapShotInterval = 10
// periodically snapshot raft state
func (cfg *config) applierSnap(i int, applyCh chan ApplyMsg) {
cfg.mu.Lock()
rf := cfg.rafts[i]
cfg.mu.Unlock()
if rf == nil {
return // ???
}
for m := range applyCh {
err_msg := ""
if m.SnapshotValid {
cfg.mu.Lock()
err_msg = cfg.ingestSnap(i, m.Snapshot, m.SnapshotIndex)
cfg.mu.Unlock()
} else if m.CommandValid {
if m.CommandIndex != cfg.lastApplied[i]+1 {
err_msg = fmt.Sprintf("server %v apply out of order, expected index %v, got %v", i, cfg.lastApplied[i]+1, m.CommandIndex)
}
if err_msg == "" {
cfg.mu.Lock()
var prevok bool
err_msg, prevok = cfg.checkLogs(i, m)
cfg.mu.Unlock()
if m.CommandIndex > 1 && prevok == false {
err_msg = fmt.Sprintf("server %v apply out of order %v", i, m.CommandIndex)
}
}
cfg.mu.Lock()
cfg.lastApplied[i] = m.CommandIndex
cfg.mu.Unlock()
if (m.CommandIndex+1)%SnapShotInterval == 0 {
w := new(bytes.Buffer)
e := labgob.NewEncoder(w)
e.Encode(m.CommandIndex)
var xlog []interface{}
for j := 0; j <= m.CommandIndex; j++ {
xlog = append(xlog, cfg.logs[i][j])
}
e.Encode(xlog)
rf.Snapshot(m.CommandIndex, w.Bytes())
}
} else {
// Ignore other types of ApplyMsg.
}
if err_msg != "" {
log.Fatalf("apply error: %v", err_msg)
cfg.applyErr[i] = err_msg
// keep reading after error so that Raft doesn't block
// holding locks...
}
}
}
// start or re-start a Raft.
// if one already exists, "kill" it first.
// allocate new outgoing port file names, and a new
// state persister, to isolate previous instance of
// this server. since we cannot really kill it.
func (cfg *config) start1(i int, applier func(int, chan ApplyMsg)) {
cfg.crash1(i)
// a fresh set of outgoing ClientEnd names.
// so that old crashed instance's ClientEnds can't send.
cfg.endnames[i] = make([]string, cfg.n)
for j := 0; j < cfg.n; j++ {
cfg.endnames[i][j] = randstring(20)
}
// a fresh set of ClientEnds.
ends := make([]*labrpc.ClientEnd, cfg.n)
for j := 0; j < cfg.n; j++ {
ends[j] = cfg.net.MakeEnd(cfg.endnames[i][j])
cfg.net.Connect(cfg.endnames[i][j], j)
}
cfg.mu.Lock()
cfg.lastApplied[i] = 0
// a fresh persister, so old instance doesn't overwrite
// new instance's persisted state.
// but copy old persister's content so that we always
// pass Make() the last persisted state.
if cfg.saved[i] != nil {
cfg.saved[i] = cfg.saved[i].Copy()
snapshot := cfg.saved[i].ReadSnapshot()
if snapshot != nil && len(snapshot) > 0 {
// mimic KV server and process snapshot now.
// ideally Raft should send it up on applyCh...
err := cfg.ingestSnap(i, snapshot, -1)
if err != "" {
cfg.t.Fatal(err)
}
}
} else {
cfg.saved[i] = MakePersister()
}
cfg.mu.Unlock()
applyCh := make(chan ApplyMsg)
rf := Make(ends, i, cfg.saved[i], applyCh)
cfg.mu.Lock()
cfg.rafts[i] = rf
cfg.mu.Unlock()
go applier(i, applyCh)
svc := labrpc.MakeService(rf)
srv := labrpc.MakeServer()
srv.AddService(svc)
cfg.net.AddServer(i, srv)
}
func (cfg *config) checkTimeout() {
// enforce a two minute real-time limit on each test
if !cfg.t.Failed() && time.Since(cfg.start) > 120*time.Second {
cfg.t.Fatal("test took longer than 120 seconds")
}
}
func (cfg *config) checkFinished() bool {
z := atomic.LoadInt32(&cfg.finished)
return z != 0
}
func (cfg *config) cleanup() {
atomic.StoreInt32(&cfg.finished, 1)
for i := 0; i < len(cfg.rafts); i++ {
if cfg.rafts[i] != nil {
cfg.rafts[i].Kill()
}
}
cfg.net.Cleanup()
cfg.checkTimeout()
}
// attach server i to the net.
func (cfg *config) connect(i int) {
// fmt.Printf("connect(%d)\n", i)
cfg.connected[i] = true
// outgoing ClientEnds
for j := 0; j < cfg.n; j++ {
if cfg.connected[j] {
endname := cfg.endnames[i][j]
cfg.net.Enable(endname, true)
}
}
// incoming ClientEnds
for j := 0; j < cfg.n; j++ {
if cfg.connected[j] {
endname := cfg.endnames[j][i]
cfg.net.Enable(endname, true)
}
}
}
// detach server i from the net.
func (cfg *config) disconnect(i int) {
// fmt.Printf("disconnect(%d)\n", i)
cfg.connected[i] = false
// outgoing ClientEnds
for j := 0; j < cfg.n; j++ {
if cfg.endnames[i] != nil {
endname := cfg.endnames[i][j]
cfg.net.Enable(endname, false)
}
}
// incoming ClientEnds
for j := 0; j < cfg.n; j++ {
if cfg.endnames[j] != nil {
endname := cfg.endnames[j][i]
cfg.net.Enable(endname, false)
}
}
}
func (cfg *config) rpcCount(server int) int {
return cfg.net.GetCount(server)
}
func (cfg *config) rpcTotal() int {
return cfg.net.GetTotalCount()
}
func (cfg *config) setunreliable(unrel bool) {
cfg.net.Reliable(!unrel)
}
func (cfg *config) bytesTotal() int64 {
return cfg.net.GetTotalBytes()
}
func (cfg *config) setlongreordering(longrel bool) {
cfg.net.LongReordering(longrel)
}
// check that one of the connected servers thinks
// it is the leader, and that no other connected
// server thinks otherwise.
//
// try a few times in case re-elections are needed.
func (cfg *config) checkOneLeader() int {
for iters := 0; iters < 10; iters++ {
ms := 450 + (rand.Int63() % 100)
time.Sleep(time.Duration(ms) * time.Millisecond)
leaders := make(map[int][]int)
for i := 0; i < cfg.n; i++ {
if cfg.connected[i] {
if term, leader := cfg.rafts[i].GetState(); leader {
leaders[term] = append(leaders[term], i)
}
}
}
lastTermWithLeader := -1
for term, leaders := range leaders {
if len(leaders) > 1 {
cfg.t.Fatalf("term %d has %d (>1) leaders", term, len(leaders))
}
if term > lastTermWithLeader {
lastTermWithLeader = term
}
}
if len(leaders) != 0 {
return leaders[lastTermWithLeader][0]
}
}
cfg.t.Fatalf("expected one leader, got none")
return -1
}
// check that everyone agrees on the term.
func (cfg *config) checkTerms() int {
term := -1
for i := 0; i < cfg.n; i++ {
if cfg.connected[i] {
xterm, _ := cfg.rafts[i].GetState()
if term == -1 {
term = xterm
} else if term != xterm {
cfg.t.Fatalf("servers disagree on term")
}
}
}
return term
}
// check that none of the connected servers
// thinks it is the leader.
func (cfg *config) checkNoLeader() {
for i := 0; i < cfg.n; i++ {
if cfg.connected[i] {
_, is_leader := cfg.rafts[i].GetState()
if is_leader {
cfg.t.Fatalf("expected no leader among connected servers, but %v claims to be leader", i)
}
}
}
}
// how many servers think a log entry is committed?
func (cfg *config) nCommitted(index int) (int, interface{}) {
count := 0
var cmd interface{} = nil
for i := 0; i < len(cfg.rafts); i++ {
if cfg.applyErr[i] != "" {
cfg.t.Fatal(cfg.applyErr[i])
}
cfg.mu.Lock()
cmd1, ok := cfg.logs[i][index]
cfg.mu.Unlock()
if ok {
if count > 0 && cmd != cmd1 {
cfg.t.Fatalf("committed values do not match: index %v, %v, %v",
index, cmd, cmd1)
}
count += 1
cmd = cmd1
}
}
return count, cmd
}
// wait for at least n servers to commit.
// but don't wait forever.
func (cfg *config) wait(index int, n int, startTerm int) interface{} {
to := 10 * time.Millisecond
for iters := 0; iters < 30; iters++ {
nd, _ := cfg.nCommitted(index)
if nd >= n {
break
}
time.Sleep(to)
if to < time.Second {
to *= 2
}
if startTerm > -1 {
for _, r := range cfg.rafts {
if t, _ := r.GetState(); t > startTerm {
// someone has moved on
// can no longer guarantee that we'll "win"
return -1
}
}
}
}
nd, cmd := cfg.nCommitted(index)
if nd < n {
cfg.t.Fatalf("only %d decided for index %d; wanted %d",
nd, index, n)
}
return cmd
}
// do a complete agreement.
// it might choose the wrong leader initially,
// and have to re-submit after giving up.
// entirely gives up after about 10 seconds.
// indirectly checks that the servers agree on the
// same value, since nCommitted() checks this,
// as do the threads that read from applyCh.
// returns index.
// if retry==true, may submit the command multiple
// times, in case a leader fails just after Start().
// if retry==false, calls Start() only once, in order
// to simplify the early Lab 3B tests.
func (cfg *config) one(cmd interface{}, expectedServers int, retry bool) int {
t0 := time.Now()
starts := 0
for time.Since(t0).Seconds() < 10 && cfg.checkFinished() == false {
// try all the servers, maybe one is the leader.
index := -1
for si := 0; si < cfg.n; si++ {
starts = (starts + 1) % cfg.n
var rf *Raft
cfg.mu.Lock()
if cfg.connected[starts] {
rf = cfg.rafts[starts]
}
cfg.mu.Unlock()
if rf != nil {
index1, _, ok := rf.Start(cmd)
if ok {
index = index1
break
}
}
}
if index != -1 {
// somebody claimed to be the leader and to have
// submitted our command; wait a while for agreement.
t1 := time.Now()
for time.Since(t1).Seconds() < 2 {
nd, cmd1 := cfg.nCommitted(index)
if nd > 0 && nd >= expectedServers {
// committed
if cmd1 == cmd {
// and it was the command we submitted.
return index
}
}
time.Sleep(20 * time.Millisecond)
}
if retry == false {
cfg.t.Fatalf("one(%v) failed to reach agreement", cmd)
}
} else {
time.Sleep(50 * time.Millisecond)
}
}
if cfg.checkFinished() == false {
cfg.t.Fatalf("one(%v) failed to reach agreement", cmd)
}
return -1
}
// start a Test.
// print the Test message.
// e.g. cfg.begin("Test (3B): RPC counts aren't too high")
func (cfg *config) begin(description string) {
fmt.Printf("%s ...\n", description)
cfg.t0 = time.Now()
cfg.rpcs0 = cfg.rpcTotal()
cfg.bytes0 = cfg.bytesTotal()
cfg.cmds0 = 0
cfg.maxIndex0 = cfg.maxIndex
}
// end a Test -- the fact that we got here means there
// was no failure.
// print the Passed message,
// and some performance numbers.
func (cfg *config) end() {
cfg.checkTimeout()
if cfg.t.Failed() == false {
cfg.mu.Lock()
t := time.Since(cfg.t0).Seconds() // real time
npeers := cfg.n // number of Raft peers
nrpc := cfg.rpcTotal() - cfg.rpcs0 // number of RPC sends
nbytes := cfg.bytesTotal() - cfg.bytes0 // number of bytes
ncmds := cfg.maxIndex - cfg.maxIndex0 // number of Raft agreements reported
cfg.mu.Unlock()
fmt.Printf(" ... Passed --")
fmt.Printf(" %4.1f %d %4d %7d %4d\n", t, npeers, nrpc, nbytes, ncmds)
}
}
// Maximum log size across all servers
func (cfg *config) LogSize() int {
logsize := 0
for i := 0; i < cfg.n; i++ {
n := cfg.saved[i].RaftStateSize()
if n > logsize {
logsize = n
}
}
return logsize
}

70
src/raft/persister.go Normal file
View File

@ -0,0 +1,70 @@
package raft
//
// support for Raft and kvraft to save persistent
// Raft state (log &c) and k/v server snapshots.
//
// we will use the original persister.go to test your code for grading.
// so, while you can modify this code to help you debug, please
// test with the original before submitting.
//
import "sync"
type Persister struct {
mu sync.Mutex
raftstate []byte
snapshot []byte
}
func MakePersister() *Persister {
return &Persister{}
}
func clone(orig []byte) []byte {
x := make([]byte, len(orig))
copy(x, orig)
return x
}
func (ps *Persister) Copy() *Persister {
ps.mu.Lock()
defer ps.mu.Unlock()
np := MakePersister()
np.raftstate = ps.raftstate
np.snapshot = ps.snapshot
return np
}
func (ps *Persister) ReadRaftState() []byte {
ps.mu.Lock()
defer ps.mu.Unlock()
return clone(ps.raftstate)
}
func (ps *Persister) RaftStateSize() int {
ps.mu.Lock()
defer ps.mu.Unlock()
return len(ps.raftstate)
}
// Save both Raft state and K/V snapshot as a single atomic action,
// to help avoid them getting out of sync.
func (ps *Persister) Save(raftstate []byte, snapshot []byte) {
ps.mu.Lock()
defer ps.mu.Unlock()
ps.raftstate = clone(raftstate)
ps.snapshot = clone(snapshot)
}
func (ps *Persister) ReadSnapshot() []byte {
ps.mu.Lock()
defer ps.mu.Unlock()
return clone(ps.snapshot)
}
func (ps *Persister) SnapshotSize() int {
ps.mu.Lock()
defer ps.mu.Unlock()
return len(ps.snapshot)
}

259
src/raft/raft.go Normal file
View File

@ -0,0 +1,259 @@
package raft
//
// this is an outline of the API that raft must expose to
// the service (or tester). see comments below for
// each of these functions for more details.
//
// rf = Make(...)
// create a new Raft server.
// rf.Start(command interface{}) (index, term, isleader)
// start agreement on a new log entry
// rf.GetState() (term, isLeader)
// ask a Raft for its current term, and whether it thinks it is leader
// ApplyMsg
// each time a new entry is committed to the log, each Raft peer
// should send an ApplyMsg to the service (or tester)
// in the same server.
//
import (
// "bytes"
"math/rand"
"sync"
"sync/atomic"
"time"
// "6.5840/labgob"
"6.5840/labrpc"
)
// as each Raft peer becomes aware that successive log entries are
// committed, the peer should send an ApplyMsg to the service (or
// tester) on the same server, via the applyCh passed to Make(). set
// CommandValid to true to indicate that the ApplyMsg contains a newly
// committed log entry.
//
// in part 3D you'll want to send other kinds of messages (e.g.,
// snapshots) on the applyCh, but set CommandValid to false for these
// other uses.
type ApplyMsg struct {
CommandValid bool
Command interface{}
CommandIndex int
// For 3D:
SnapshotValid bool
Snapshot []byte
SnapshotTerm int
SnapshotIndex int
}
// A Go object implementing a single Raft peer.
type Raft struct {
mu sync.Mutex // Lock to protect shared access to this peer's state
peers []*labrpc.ClientEnd // RPC end points of all peers
persister *Persister // Object to hold this peer's persisted state
me int // this peer's index into peers[]
dead int32 // set by Kill()
// Your data here (3A, 3B, 3C).
// Look at the paper's Figure 2 for a description of what
// state a Raft server must maintain.
}
// return currentTerm and whether this server
// believes it is the leader.
func (rf *Raft) GetState() (int, bool) {
var term int
var isleader bool
// Your code here (3A).
return term, isleader
}
// save Raft's persistent state to stable storage,
// where it can later be retrieved after a crash and restart.
// see paper's Figure 2 for a description of what should be persistent.
// before you've implemented snapshots, you should pass nil as the
// second argument to persister.Save().
// after you've implemented snapshots, pass the current snapshot
// (or nil if there's not yet a snapshot).
func (rf *Raft) persist() {
// Your code here (3C).
// Example:
// w := new(bytes.Buffer)
// e := labgob.NewEncoder(w)
// e.Encode(rf.xxx)
// e.Encode(rf.yyy)
// raftstate := w.Bytes()
// rf.persister.Save(raftstate, nil)
}
// restore previously persisted state.
func (rf *Raft) readPersist(data []byte) {
if data == nil || len(data) < 1 { // bootstrap without any state?
return
}
// Your code here (3C).
// Example:
// r := bytes.NewBuffer(data)
// d := labgob.NewDecoder(r)
// var xxx
// var yyy
// if d.Decode(&xxx) != nil ||
// d.Decode(&yyy) != nil {
// error...
// } else {
// rf.xxx = xxx
// rf.yyy = yyy
// }
}
// the service says it has created a snapshot that has
// all info up to and including index. this means the
// service no longer needs the log through (and including)
// that index. Raft should now trim its log as much as possible.
func (rf *Raft) Snapshot(index int, snapshot []byte) {
// Your code here (3D).
}
// example RequestVote RPC arguments structure.
// field names must start with capital letters!
type RequestVoteArgs struct {
// Your data here (3A, 3B).
}
// example RequestVote RPC reply structure.
// field names must start with capital letters!
type RequestVoteReply struct {
// Your data here (3A).
}
// example RequestVote RPC handler.
func (rf *Raft) RequestVote(args *RequestVoteArgs, reply *RequestVoteReply) {
// Your code here (3A, 3B).
}
// example code to send a RequestVote RPC to a server.
// server is the index of the target server in rf.peers[].
// expects RPC arguments in args.
// fills in *reply with RPC reply, so caller should
// pass &reply.
// the types of the args and reply passed to Call() must be
// the same as the types of the arguments declared in the
// handler function (including whether they are pointers).
//
// The labrpc package simulates a lossy network, in which servers
// may be unreachable, and in which requests and replies may be lost.
// Call() sends a request and waits for a reply. If a reply arrives
// within a timeout interval, Call() returns true; otherwise
// Call() returns false. Thus Call() may not return for a while.
// A false return can be caused by a dead server, a live server that
// can't be reached, a lost request, or a lost reply.
//
// Call() is guaranteed to return (perhaps after a delay) *except* if the
// handler function on the server side does not return. Thus there
// is no need to implement your own timeouts around Call().
//
// look at the comments in ../labrpc/labrpc.go for more details.
//
// if you're having trouble getting RPC to work, check that you've
// capitalized all field names in structs passed over RPC, and
// that the caller passes the address of the reply struct with &, not
// the struct itself.
func (rf *Raft) sendRequestVote(server int, args *RequestVoteArgs, reply *RequestVoteReply) bool {
ok := rf.peers[server].Call("Raft.RequestVote", args, reply)
return ok
}
// the service using Raft (e.g. a k/v server) wants to start
// agreement on the next command to be appended to Raft's log. if this
// server isn't the leader, returns false. otherwise start the
// agreement and return immediately. there is no guarantee that this
// command will ever be committed to the Raft log, since the leader
// may fail or lose an election. even if the Raft instance has been killed,
// this function should return gracefully.
//
// the first return value is the index that the command will appear at
// if it's ever committed. the second return value is the current
// term. the third return value is true if this server believes it is
// the leader.
func (rf *Raft) Start(command interface{}) (int, int, bool) {
index := -1
term := -1
isLeader := true
// Your code here (3B).
return index, term, isLeader
}
// the tester doesn't halt goroutines created by Raft after each test,
// but it does call the Kill() method. your code can use killed() to
// check whether Kill() has been called. the use of atomic avoids the
// need for a lock.
//
// the issue is that long-running goroutines use memory and may chew
// up CPU time, perhaps causing later tests to fail and generating
// confusing debug output. any goroutine with a long-running loop
// should call killed() to check whether it should stop.
func (rf *Raft) Kill() {
atomic.StoreInt32(&rf.dead, 1)
// Your code here, if desired.
}
func (rf *Raft) killed() bool {
z := atomic.LoadInt32(&rf.dead)
return z == 1
}
func (rf *Raft) ticker() {
for rf.killed() == false {
// Your code here (3A)
// Check if a leader election should be started.
// pause for a random amount of time between 50 and 350
// milliseconds.
ms := 50 + (rand.Int63() % 300)
time.Sleep(time.Duration(ms) * time.Millisecond)
}
}
// the service or tester wants to create a Raft server. the ports
// of all the Raft servers (including this one) are in peers[]. this
// server's port is peers[me]. all the servers' peers[] arrays
// have the same order. persister is a place for this server to
// save its persistent state, and also initially holds the most
// recent saved state, if any. applyCh is a channel on which the
// tester or service expects Raft to send ApplyMsg messages.
// Make() must return quickly, so it should start goroutines
// for any long-running work.
func Make(peers []*labrpc.ClientEnd, me int,
persister *Persister, applyCh chan ApplyMsg) *Raft {
rf := &Raft{}
rf.peers = peers
rf.persister = persister
rf.me = me
// Your initialization code here (3A, 3B, 3C).
// initialize from state persisted before a crash
rf.readPersist(persister.ReadRaftState())
// start ticker goroutine to start elections
go rf.ticker()
return rf
}

1270
src/raft/test_test.go Normal file

File diff suppressed because it is too large Load Diff

12
src/raft/util.go Normal file
View File

@ -0,0 +1,12 @@
package raft
import "log"
// Debugging
const Debug = false
func DPrintf(format string, a ...interface{}) {
if Debug {
log.Printf(format, a...)
}
}

51
src/shardkv1/client.go Normal file
View File

@ -0,0 +1,51 @@
package shardkv
//
// client code to talk to a sharded key/value service.
//
// the client uses the shardctrler's clerk to query for the current
// configuration and find the assignment of shards (keys) to groups,
// and then talks to the group that holds the key's shard.
//
import (
"6.5840/kvsrv1/rpc"
"6.5840/kvtest1"
"6.5840/shardkv1/shardctrler"
"6.5840/tester1"
)
type Clerk struct {
clnt *tester.Clnt
qck *shardctrler.QueryClerk
// You will have to modify this struct.
}
// The tester calls MakeClerk and passes in a clerk for the
// shardctrler with only the Query method.
func MakeClerk(clnt *tester.Clnt, qck *shardctrler.QueryClerk) kvtest.IKVClerk {
ck := &Clerk{
clnt: clnt,
qck: qck,
}
// You'll have to add code here.
return ck
}
// Get a key from a shardgrp. You can use shardcfg.Key2Shard(key) to
// find the shard responsible for the key and ck.qck.Query() to read
// the current configuration and lookup the servers in the group
// responsible for key. You can make a clerk for that group by
// calling shardgrp.MakeClerk(ck.clnt, servers).
func (ck *Clerk) Get(key string) (string, rpc.Tversion, rpc.Err) {
// You will have to modify this function.
return "", 0, ""
}
// Put a key to a shard group.
func (ck *Clerk) Put(key string, value string, version rpc.Tversion) rpc.Err {
// You will have to modify this function.
return ""
}

View File

@ -0,0 +1,275 @@
package shardcfg
import (
"encoding/json"
"hash/fnv"
"log"
"runtime/debug"
"slices"
"testing"
"6.5840/tester1"
)
type Tshid int
type Tnum int
const (
NShards = 12 // The number of shards.
NumFirst = Tnum(1)
)
const (
Gid1 = tester.Tgid(1)
)
// which shard is a key in?
// please use this function,
// and please do not change it.
func Key2Shard(key string) Tshid {
h := fnv.New32a()
h.Write([]byte(key))
shard := Tshid(Tshid(h.Sum32()) % NShards)
return shard
}
// A configuration -- an assignment of shards to groups.
// Please don't change this.
type ShardConfig struct {
Num Tnum // config number
Shards [NShards]tester.Tgid // shard -> gid
Groups map[tester.Tgid][]string // gid -> servers[]
}
func MakeShardConfig() *ShardConfig {
c := &ShardConfig{
Groups: make(map[tester.Tgid][]string),
}
return c
}
func (cfg *ShardConfig) String() string {
b, err := json.Marshal(cfg)
if err != nil {
log.Fatalf("Unmarshall err %v", err)
}
return string(b)
}
func FromString(s string) *ShardConfig {
scfg := &ShardConfig{}
if err := json.Unmarshal([]byte(s), scfg); err != nil {
log.Fatalf("Unmarshall err %v", err)
}
return scfg
}
func (cfg *ShardConfig) Copy() *ShardConfig {
c := MakeShardConfig()
c.Num = cfg.Num
c.Shards = cfg.Shards
for k, srvs := range cfg.Groups {
s := make([]string, len(srvs))
copy(s, srvs)
c.Groups[k] = s
}
return c
}
// mostgroup, mostn, leastgroup, leastn
func analyze(c *ShardConfig) (tester.Tgid, int, tester.Tgid, int) {
counts := map[tester.Tgid]int{}
for _, g := range c.Shards {
counts[g] += 1
}
mn := -1
var mg tester.Tgid = -1
ln := 257
var lg tester.Tgid = -1
// Enforce deterministic ordering, map iteration
// is randomized in go
groups := make([]tester.Tgid, len(c.Groups))
i := 0
for k := range c.Groups {
groups[i] = k
i++
}
slices.Sort(groups)
for _, g := range groups {
if counts[g] < ln {
ln = counts[g]
lg = g
}
if counts[g] > mn {
mn = counts[g]
mg = g
}
}
return mg, mn, lg, ln
}
// return GID of group with least number of
// assigned shards.
func least(c *ShardConfig) tester.Tgid {
_, _, lg, _ := analyze(c)
return lg
}
// balance assignment of shards to groups.
// modifies c.
func (c *ShardConfig) Rebalance() {
// if no groups, un-assign all shards
if len(c.Groups) < 1 {
for s, _ := range c.Shards {
c.Shards[s] = 0
}
return
}
// assign all unassigned shards
for s, g := range c.Shards {
_, ok := c.Groups[g]
if ok == false {
lg := least(c)
c.Shards[s] = lg
}
}
// move shards from most to least heavily loaded
for {
mg, mn, lg, ln := analyze(c)
if mn < ln+2 {
break
}
// move 1 shard from mg to lg
for s, g := range c.Shards {
if g == mg {
c.Shards[s] = lg
break
}
}
}
}
func (cfg *ShardConfig) Join(servers map[tester.Tgid][]string) {
changed := false
for gid, servers := range servers {
_, ok := cfg.Groups[gid]
if ok {
log.Fatalf("re-Join %v", gid)
}
for xgid, xservers := range cfg.Groups {
for _, s1 := range xservers {
for _, s2 := range servers {
if s1 == s2 {
log.Fatalf("Join(%v) puts server %v in groups %v and %v", gid, s1, xgid, gid)
}
}
}
}
// new GID
// modify cfg to reflect the Join()
cfg.Groups[gid] = servers
changed = true
}
if changed == false {
log.Fatalf("Join but no change")
}
cfg.Num += 1
}
func (cfg *ShardConfig) Leave(gids []tester.Tgid) {
changed := false
for _, gid := range gids {
_, ok := cfg.Groups[gid]
if ok == false {
// already no GID!
debug.PrintStack()
log.Fatalf("Leave(%v) but not in config", gid)
} else {
// modify op.Config to reflect the Leave()
delete(cfg.Groups, gid)
changed = true
}
}
if changed == false {
debug.PrintStack()
log.Fatalf("Leave but no change")
}
cfg.Num += 1
}
func (cfg *ShardConfig) JoinBalance(servers map[tester.Tgid][]string) {
cfg.Join(servers)
cfg.Rebalance()
}
func (cfg *ShardConfig) LeaveBalance(gids []tester.Tgid) {
cfg.Leave(gids)
cfg.Rebalance()
}
func (cfg *ShardConfig) GidServers(sh Tshid) (tester.Tgid, []string, bool) {
gid := cfg.Shards[sh]
srvs, ok := cfg.Groups[gid]
return gid, srvs, ok
}
func (cfg *ShardConfig) IsMember(gid tester.Tgid) bool {
for _, g := range cfg.Shards {
if g == gid {
return true
}
}
return false
}
func (cfg *ShardConfig) CheckConfig(t *testing.T, groups []tester.Tgid) {
if len(cfg.Groups) != len(groups) {
fatalf(t, "wanted %v groups, got %v", len(groups), len(cfg.Groups))
}
// are the groups as expected?
for _, g := range groups {
_, ok := cfg.Groups[g]
if ok != true {
fatalf(t, "missing group %v", g)
}
}
// any un-allocated shards?
if len(groups) > 0 {
for s, g := range cfg.Shards {
_, ok := cfg.Groups[g]
if ok == false {
fatalf(t, "shard %v -> invalid group %v", s, g)
}
}
}
// more or less balanced sharding?
counts := map[tester.Tgid]int{}
for _, g := range cfg.Shards {
counts[g] += 1
}
min := 257
max := 0
for g, _ := range cfg.Groups {
if counts[g] > max {
max = counts[g]
}
if counts[g] < min {
min = counts[g]
}
}
if max > min+1 {
fatalf(t, "max %v too much larger than min %v", max, min)
}
}
func fatalf(t *testing.T, format string, args ...any) {
debug.PrintStack()
t.Fatalf(format, args...)
}

View File

@ -0,0 +1,62 @@
package shardcfg
import (
"testing"
"6.5840/kvtest1"
)
func check_same_config(t *testing.T, c1 ShardConfig, c2 ShardConfig) {
if c1.Num != c2.Num {
t.Fatalf("Num wrong")
}
if c1.Shards != c2.Shards {
t.Fatalf("Shards wrong")
}
if len(c1.Groups) != len(c2.Groups) {
t.Fatalf("number of Groups is wrong")
}
for gid, sa := range c1.Groups {
sa1, ok := c2.Groups[gid]
if ok == false || len(sa1) != len(sa) {
t.Fatalf("len(Groups) wrong")
}
if ok && len(sa1) == len(sa) {
for j := 0; j < len(sa); j++ {
if sa[j] != sa1[j] {
t.Fatalf("Groups wrong")
}
}
}
}
}
func TestBasic(t *testing.T) {
const (
Gid1 = 1
Gid2 = 2
)
cfg := MakeShardConfig()
cfg.CheckConfig(t, []kvtest.Tgid{})
cfg.JoinBalance(map[kvtest.Tgid][]string{Gid1: []string{"x", "y", "z"}})
cfg.CheckConfig(t, []kvtest.Tgid{Gid1})
cfg.JoinBalance(map[kvtest.Tgid][]string{Gid2: []string{"a", "b", "c"}})
cfg.CheckConfig(t, []kvtest.Tgid{Gid1, Gid2})
sa1 := cfg.Groups[Gid1]
if len(sa1) != 3 || sa1[0] != "x" || sa1[1] != "y" || sa1[2] != "z" {
t.Fatalf("wrong servers for gid %v: %v\n", Gid1, sa1)
}
sa2 := cfg.Groups[Gid2]
if len(sa2) != 3 || sa2[0] != "a" || sa2[1] != "b" || sa2[2] != "c" {
t.Fatalf("wrong servers for gid %v: %v\n", Gid2, sa2)
}
cfg.LeaveBalance([]kvtest.Tgid{Gid1})
cfg.CheckConfig(t, []kvtest.Tgid{Gid2})
cfg.LeaveBalance([]kvtest.Tgid{Gid2})
cfg.CheckConfig(t, []kvtest.Tgid{})
}

View File

@ -0,0 +1,49 @@
package shardctrler
import (
// "log"
"sync/atomic"
"6.5840/kvsrv1/rpc"
"6.5840/tester1"
)
type Clerk struct {
clnt *tester.Clnt
servers []string
deposed *int32
// You will have to modify this struct.
}
// The shard controller can use MakeClerk to make a clerk for the kvraft
// group with the servers `servers`.
func MakeClerk(clnt *tester.Clnt, servers []string, deposed *int32) *Clerk {
ck := &Clerk{clnt: clnt, servers: servers, deposed: deposed}
// You may add code here.
return ck
}
func (ck *Clerk) isDeposed() bool {
z := atomic.LoadInt32(ck.deposed)
return z == 1
}
// You can reuse your kvraft Get
func (ck *Clerk) Get(key string) (string, rpc.Tversion, rpc.Err) {
args := rpc.GetArgs{}
args.Key = key
// You'll have to add code here.
return "", 0, ""
}
// You can reuse your kvraft Put
func (ck *Clerk) Put(key string, value string, version rpc.Tversion) rpc.Err {
args := rpc.PutArgs{}
args.Key = key
args.Value = value
args.Version = version
// You'll have to add code here.
return ""
}

View File

@ -0,0 +1,58 @@
package lock
import (
"log"
"time"
"6.5840/kvsrv1/rpc"
"6.5840/kvtest1"
)
type Lock struct {
kvtest.IKVClerk
l string
id string
ver rpc.Tversion
}
func MakeLock(ck kvtest.IKVClerk, l string) *Lock {
lk := &Lock{IKVClerk: ck}
// You may add core here
return lk
}
func (lk *Lock) AcquireLeadership() {
for {
if val, ver, err := lk.Get(lk.l); err == rpc.OK {
if val == "" { // put only when lock is free
if err := lk.Put(lk.l, lk.id, ver); err == rpc.OK {
lk.ver = ver + 1
return
} else if err == rpc.ErrMaybe { // check if put succeeded?
if val, ver, err := lk.Get(lk.l); err == rpc.OK {
if val == lk.id {
lk.ver = ver
return
}
}
}
}
time.Sleep(1 * time.Millisecond)
}
}
}
// for two testing purposes: 1) for the ctrler that is a leader to
// give up its leadership; 2) to take back leadership from a
// partitioned/deposed ctrler using a new ctrler.
func (lk *Lock) ReleaseLeadership() rpc.Err {
_, ver, err := lk.Get(lk.l)
if err != rpc.OK {
log.Printf("ResetLock: %v err %v", lk.l, err)
}
if err := lk.Put(lk.l, "", ver); err == rpc.OK || err == rpc.ErrMaybe {
return rpc.OK
} else {
return err
}
}

View File

@ -0,0 +1,115 @@
package shardctrler
//
// Shardctrler implemented as a clerk.
//
import (
"sync/atomic"
"6.5840/kvraft1"
"6.5840/kvsrv1/rpc"
"6.5840/kvtest1"
"6.5840/shardkv1/shardcfg"
"6.5840/tester1"
)
const (
ErrDeposed = "ErrDeposed"
)
// The query clerk must support only Query(); it is intended for use
// by shardkv clerks to read the current configuration (see
// ../client.go).
type QueryClerk struct {
kvtest.IKVClerk
// Your data here.
}
// Make a query clerk for controller's kvraft group to invoke just
// Query()
func MakeQueryClerk(clnt *tester.Clnt, servers []string) *QueryClerk {
qck := &QueryClerk{
IKVClerk: kvraft.MakeClerk(clnt, servers),
}
// Your code here.
return qck
}
// Return the current configuration. You can use Get() to retrieve
// the string representing the configuration and shardcfg.ToShardCfg
// to unmarshal the string into a ShardConfig.
func (qck *QueryClerk) Query() (*shardcfg.ShardConfig, rpc.Tversion) {
// Your code here.
return nil, 0
}
// ShardCtrlerClerk for the shard controller. It implements the
// methods for Init(), Join(), Leave(), etc.
type ShardCtrlerClerk struct {
clnt *tester.Clnt
deposed int32 // set by Stepdown()
// Your data here.
}
// Make a ShardCltlerClerk for the shard controller, which stores its
// state in a kvraft group. You can call (and implement) the
// MakeClerk method in client.go to make a kvraft clerk for the kvraft
// group with the servers `servers`.
func MakeShardCtrlerClerk(clnt *tester.Clnt, servers []string) *ShardCtrlerClerk {
sck := &ShardCtrlerClerk{clnt: clnt}
// Your code here.
return sck
}
// Called once by the tester to supply the first configuration. You
// can marshal ShardConfig into a string using shardcfg.String(), and
// then Put it in the kvraft group for the controller at version 0.
// You can pick the key to name the configuration.
func (sck *ShardCtrlerClerk) Init(cfg *shardcfg.ShardConfig) rpc.Err {
// Your code here
return rpc.OK
}
// Add group gid. Use shardcfg.JoinBalance() to compute the new
// configuration; the supplied `srvrs` are the servers for the new
// group. You can find the servers for existing groups in the
// configuration (which you can retrieve using Query()) and you can
// make a clerk for a group by calling shardgrp.MakeClerk(sck.clnt,
// servers), and then invoke its Freeze/InstallShard methods.
func (sck *ShardCtrlerClerk) Join(gid tester.Tgid, srvs []string) rpc.Err {
// Your code here
return rpc.ErrNoKey
}
// Group gid leaves. You can use shardcfg.LeaveBalance() to compute
// the new configuration.
func (sck *ShardCtrlerClerk) Leave(gid tester.Tgid) rpc.Err {
// Your code here
return rpc.ErrNoKey
}
// the tester calls Stepdown() to force a ctrler to step down while it
// is perhaps in the middle of a join/move. for your convenience, we
// also supply isDeposed() method to test rf.dead in long-running
// loops
func (sck *ShardCtrlerClerk) Stepdown() {
atomic.StoreInt32(&sck.deposed, 1)
}
func (sck *ShardCtrlerClerk) isDeposed() bool {
z := atomic.LoadInt32(&sck.deposed)
return z == 1
}
// Return the current configuration
func (sck *ShardCtrlerClerk) Query() (*shardcfg.ShardConfig, rpc.Tversion, rpc.Err) {
// Your code here.
return nil, 0, ""
}

View File

@ -0,0 +1,38 @@
package shardgrp
import (
"6.5840/kvsrv1/rpc"
"6.5840/shardkv1/shardcfg"
"6.5840/tester1"
)
type Clerk struct {
clnt *tester.Clnt
servers []string
leader int // last successful leader (index into servers[])
}
func MakeClerk(clnt *tester.Clnt, servers []string) *Clerk {
ck := &Clerk{clnt: clnt, servers: servers}
return ck
}
func (ck *Clerk) Get(cid shardcfg.Tnum, key string, n shardcfg.Tnum) (string, rpc.Tversion, rpc.Err) {
// Your code here
return "", 0, ""
}
func (ck *Clerk) Put(key string, value string, version rpc.Tversion, n shardcfg.Tnum) (bool, rpc.Err) {
// Your code here
return false, ""
}
func (ck *Clerk) Freeze(s shardcfg.Tshid, num shardcfg.Tnum) ([]byte, rpc.Err) {
return nil, ""
}
func (ck *Clerk) InstallShard(s shardcfg.Tshid, state []byte, num shardcfg.Tnum) rpc.Err {
return ""
}

View File

@ -0,0 +1,99 @@
package shardgrp
import (
"sync/atomic"
"6.5840/kvraft1/rsm"
"6.5840/kvsrv1/rpc"
"6.5840/labgob"
"6.5840/labrpc"
"6.5840/raft"
"6.5840/shardkv1/shardgrp/shardrpc"
)
type KVServer struct {
gid tester.Tgid
me int
dead int32 // set by Kill()
rsm *rsm.RSM
}
func (kv *KVServer) DoOp(req any) any {
// Your code here
return nil
}
func (kv *KVServer) Snapshot() []byte {
// Your code here
return nil
}
func (kv *KVServer) Restore(data []byte) {
// Your code here
}
func (kv *KVServer) Get(args *shardrpc.GetArgs, reply *rpc.GetReply) {
// Your code here
}
func (kv *KVServer) Put(args *shardrpc.PutArgs, reply *rpc.PutReply) {
// Your code here
}
// Freeze the specified shard (i.e., reject future Get/Puts for this
// shard) and return the key/values stored in that shard.
func (kv *KVServer) Freeze(args *shardrpc.FreezeArgs, reply *shardrpc.FreezeReply) {
// Your code here
}
// Install the supplied state for the specified shard.
func (kv *KVServer) InstallShard(args *shardrpc.InstallShardArgs, reply *shardrpc.InstallShardReply) {
// Your code here
}
// the tester calls Kill() when a KVServer instance won't
// be needed again. for your convenience, we supply
// code to set rf.dead (without needing a lock),
// and a killed() method to test rf.dead in
// long-running loops. you can also add your own
// code to Kill(). you're not required to do anything
// about this, but it may be convenient (for example)
// to suppress debug output from a Kill()ed instance.
func (kv *KVServer) Kill() {
atomic.StoreInt32(&kv.dead, 1)
// Your code here, if desired.
}
// Return kv's raft struct
func (kv *KVServer) Raft() *raft.Raft {
return kv.rsm.Raft()
}
func (kv *KVServer) killed() bool {
z := atomic.LoadInt32(&kv.dead)
return z == 1
}
// StartKVServer() and MakeRSM() must return quickly, so they should
// start goroutines for any long-running work.
func StartKVServer(servers []*labrpc.ClientEnd, gid tester.Tgid, me int, persister *raft.Persister, maxraftstate int) tester.IKVServer {
// call labgob.Register on structures you want
// Go's RPC library to marshall/unmarshall.
labgob.Register(shardrpc.PutArgs{})
labgob.Register(shardrpc.GetArgs{})
labgob.Register(shardrpc.FreezeArgs{})
labgob.Register(shardrpc.InstallShardArgs{})
labgob.Register(shardrpc.DeleteShardArgs{})
labgob.Register(rsm.Op{})
kv := &KVServer{gid: gid, me: me}
kv.rsm = rsm.MakeRSM(servers, me, persister, maxraftstate, kv)
// Your code here
return kv
}

View File

@ -0,0 +1,50 @@
package shardrpc
import (
"6.5840/kvsrv1/rpc"
"6.5840/shardkv1/shardcfg"
)
// Same as Put in kvsrv1/rpc, but with a configuration number.
type PutArgs struct {
Key string
Value string
Version rpc.Tversion
Num shardcfg.Tnum
}
// Same as Get in kvsrv1/rpc, but with a configuration number.
type GetArgs struct {
Key string
Num shardcfg.Tnum
}
type FreezeArgs struct {
Shard shardcfg.Tshid
Num shardcfg.Tnum
}
type FreezeReply struct {
State []byte
Num shardcfg.Tnum
Err rpc.Err
}
type InstallShardArgs struct {
Shard shardcfg.Tshid
State []byte
Num shardcfg.Tnum
}
type InstallShardReply struct {
Err rpc.Err
}
type DeleteShardArgs struct {
Shard shardcfg.Tshid
Num shardcfg.Tnum
}
type DeleteShardReply struct {
Err rpc.Err
}

View File

@ -0,0 +1,304 @@
package shardkv
import (
"log"
"testing"
"time"
"6.5840/kvsrv1/rpc"
"6.5840/kvtest1"
"6.5840/shardkv1/shardcfg"
"6.5840/tester1"
// "6.5840/shardkv1/shardctrler"
)
const (
NGRP = 8
)
// Setup a k/v service with 1 shardgrp (group 0) for storing the
// controller to store its state and 1 shardgrp (group 1) to store all
// shards. Test's controller's Init() and Query(), and shardkv's
// Get/Put without reconfiguration.
func TestStaticOneShardGroup5A(t *testing.T) {
ts := MakeTest(t, "Test (5A): one shard group ...", true, false)
defer ts.Cleanup()
// The tester's setupKVService() sets up a kvraft group for the
// controller to store configurations and calls the controller's
// Init() method to create the first configuration.
ts.setupKVService()
sck := ts.ShardCtrler() // get the controller clerk from tester
// Read the initial configuration and check it
cfg, v, err := sck.Query()
if err != rpc.OK {
ts.Fatalf("Query failed %v", err)
}
if v != 1 || cfg.Num != 1 || cfg.Shards[0] != shardcfg.Gid1 {
ts.Fatalf("Static wrong %v %v", cfg, v)
}
cfg.CheckConfig(t, []tester.Tgid{shardcfg.Gid1})
ck := ts.MakeClerk() // make a shardkv clerk
ka, va := ts.SpreadPuts(ck, shardcfg.NShards) // do some puts
n := len(ka)
for i := 0; i < n; i++ {
ts.CheckGet(ck, ka[i], va[i], rpc.Tversion(1)) // check the puts
}
}
// test shardctrler's join, which adds a new group Gid2 and must move
// shards to the new group and the old group should reject Get/Puts on
// shards that moved.
func TestJoinBasic5A(t *testing.T) {
ts := MakeTest(t, "Test (5A): a group joins...", true, false)
defer ts.Cleanup()
gid1 := ts.setupKVService()
ck := ts.MakeClerk()
ka, va := ts.SpreadPuts(ck, shardcfg.NShards)
sck := ts.ShardCtrler()
cfg, _, err := sck.Query()
if err != rpc.OK {
ts.t.Fatalf("Query: err %v", err)
}
gid2 := ts.newGid()
err = ts.joinGroups(sck, []tester.Tgid{gid2})
if err != rpc.OK {
ts.t.Fatalf("joinGroups: err %v", err)
}
cfg1, _, err := sck.Query()
if err != rpc.OK {
ts.t.Fatalf("Query 1: err %v", err)
}
if cfg.Num+1 != cfg1.Num {
ts.t.Fatalf("wrong num %d expected %d ", cfg1.Num, cfg.Num+1)
}
if !cfg1.IsMember(gid2) {
ts.t.Fatalf("%d isn't a member of %v", gid2, cfg1)
}
// check shards at shardcfg.Gid2
ts.checkShutdownSharding(gid1, gid2, ka, va)
for i := 0; i < len(ka); i++ {
ts.CheckGet(ck, ka[i], va[i], rpc.Tversion(1))
}
// check shards at shardcfg.Gid1
ts.checkShutdownSharding(gid2, gid1, ka, va)
for i := 0; i < len(ka); i++ {
ts.CheckGet(ck, ka[i], va[i], rpc.Tversion(1))
}
}
// test shardctrler's leave
func TestJoinLeaveBasic5A(t *testing.T) {
ts := MakeTest(t, "Test (5A): basic groups join/leave ...", true, false)
defer ts.Cleanup()
gid1 := ts.setupKVService()
ck := ts.MakeClerk()
ka, va := ts.SpreadPuts(ck, shardcfg.NShards)
sck := ts.ShardCtrler()
gid2 := ts.newGid()
err := ts.joinGroups(sck, []tester.Tgid{gid2})
if err != rpc.OK {
ts.t.Fatalf("joinGroups: err %v", err)
}
// check shards at shardcfg.Gid2
ts.checkShutdownSharding(gid1, gid2, ka, va)
for i := 0; i < len(ka); i++ {
ts.CheckGet(ck, ka[i], va[i], rpc.Tversion(1))
}
err = sck.Leave(shardcfg.Gid1)
if err != rpc.OK {
ts.t.Fatalf("Leave: err %v", err)
}
cfg, _, err := sck.Query()
if err != rpc.OK {
ts.t.Fatalf("Query err %v", err)
}
if cfg.IsMember(shardcfg.Gid1) {
ts.t.Fatalf("%d is a member of %v", shardcfg.Gid1, cfg)
}
ts.Group(shardcfg.Gid1).Shutdown()
for i := 0; i < len(ka); i++ {
ts.CheckGet(ck, ka[i], va[i], rpc.Tversion(1))
}
// bring the crashed shard/group back to life.
ts.Group(shardcfg.Gid1).StartServers()
// Rejoin
sck.Join(shardcfg.Gid1, ts.Group(shardcfg.Gid1).SrvNames())
for i := 0; i < len(ka); i++ {
ts.CheckGet(ck, ka[i], va[i], rpc.Tversion(1))
}
// check shards at shardcfg.Gid2
ts.checkShutdownSharding(gid2, gid1, ka, va)
}
// test many groups joining and leaving, reliable or unreliable
func joinLeave5A(t *testing.T, reliable bool, part string) {
ts := MakeTest(t, "Test (5A): many groups join/leave ...", reliable, false)
defer ts.Cleanup()
ts.setupKVService()
ck := ts.MakeClerk()
ka, va := ts.SpreadPuts(ck, shardcfg.NShards)
sck := ts.ShardCtrler()
grps := ts.groups(NGRP)
ts.joinGroups(sck, grps)
ts.checkShutdownSharding(grps[0], grps[1], ka, va)
for i := 0; i < len(ka); i++ {
ts.CheckGet(ck, ka[i], va[i], rpc.Tversion(1))
}
ts.leaveGroups(sck, grps)
for i := 0; i < len(ka); i++ {
ts.CheckGet(ck, ka[i], va[i], rpc.Tversion(1))
}
}
func TestManyJoinLeaveReliable5A(t *testing.T) {
joinLeave5A(t, true, "Test (5A): many groups join/leave reliable...")
}
func TestManyJoinLeaveUnreliable5A(t *testing.T) {
joinLeave5A(t, false, "Test (5A): many groups join/leave unreliable...")
}
// Test we can recover from complete shutdown using snapshots
func TestSnapshot5A(t *testing.T) {
const NGRP = 3
ts := MakeTest(t, "Test (5A): snapshots ...", true, false)
defer ts.Cleanup()
ts.setupKVService()
ck := ts.MakeClerk()
ka, va := ts.SpreadPuts(ck, shardcfg.NShards)
sck := ts.ShardCtrler()
grps := ts.groups(2)
ts.joinGroups(sck, grps)
// check shards at shardcfg.Gid2
ts.checkShutdownSharding(grps[0], grps[1], ka, va)
for i := 0; i < len(ka); i++ {
ts.CheckGet(ck, ka[i], va[i], rpc.Tversion(1))
}
for i := tester.Tgid(0); i < NGRP; i++ {
ts.Group(shardcfg.Gid1).Shutdown()
}
for i := tester.Tgid(0); i < NGRP; i++ {
ts.Group(shardcfg.Gid1).StartServers()
}
for i := 0; i < len(ka); i++ {
ts.CheckGet(ck, ka[i], va[i], rpc.Tversion(1))
}
}
// Test linearizability with groups joining/leaving and `nclnt`
// concurrent clerks put/get's in `unreliable` net.
func concurrentClerk(t *testing.T, nclnt int, reliable bool, part string) {
const (
NSEC = 20
)
ts := MakeTest(t, part, reliable, true)
defer ts.Cleanup()
ts.setupKVService()
ka := kvtest.MakeKeys(shardcfg.NShards)
ch := make(chan []kvtest.ClntRes)
start := time.Now()
go func(ch chan []kvtest.ClntRes) {
rs := ts.SpawnClientsAndWait(nclnt, NSEC*time.Second, func(me int, ck kvtest.IKVClerk, done chan struct{}) kvtest.ClntRes {
return ts.OneClientPut(me, ck, ka, done)
})
ch <- rs
}(ch)
sck := ts.ShardCtrler()
grps := ts.groups(NGRP)
ts.joinGroups(sck, grps)
ts.leaveGroups(sck, grps)
log.Printf("time joining/leaving %v", time.Since(start))
rsa := <-ch
log.Printf("rsa %v", rsa)
ts.CheckPorcupine()
}
// Test linearizability with groups joining/leaving and 1 concurrent clerks put/get's
func TestOneConcurrentClerkReliable5A(t *testing.T) {
concurrentClerk(t, 1, true, "Test (5A): one concurrent clerk reliable...")
}
// Test linearizability with groups joining/leaving and many concurrent clerks put/get's
func TestManyConcurrentClerkReliable5A(t *testing.T) {
const NCLNT = 10
concurrentClerk(t, NCLNT, true, "Test (5A): many concurrent clerks reliable...")
}
// Test linearizability with groups joining/leaving and 1 concurrent clerks put/get's
func TestOneConcurrentClerkUnreliable5A(t *testing.T) {
concurrentClerk(t, 1, false, "Test (5A): one concurrent clerk unreliable ...")
}
// Test linearizability with groups joining/leaving and many concurrent clerks put/get's
func TestManyConcurrentClerkUnreliable5A(t *testing.T) {
const NCLNT = 10
concurrentClerk(t, NCLNT, false, "Test (5A): many concurrent clerks unreliable...")
}
// test recovery of partitioned controlers
func TestRecoverCtrler5B(t *testing.T) {
const (
NPARITITON = 10
)
ts := MakeTest(t, "Test (5B): recover controler ...", true, false)
defer ts.Cleanup()
ts.setupKVService()
ck := ts.MakeClerk()
ka, va := ts.SpreadPuts(ck, shardcfg.NShards)
for i := 0; i < NPARITITON; i++ {
ts.partitionCtrler(ck, ka, va)
}
}

303
src/shardkv1/test.go Normal file
View File

@ -0,0 +1,303 @@
package shardkv
import (
"fmt"
"log"
"math/rand"
"sync"
"testing"
"time"
"6.5840/kvraft1"
"6.5840/kvsrv1/rpc"
"6.5840/kvtest1"
"6.5840/labrpc"
"6.5840/shardkv1/shardcfg"
"6.5840/shardkv1/shardctrler"
"6.5840/shardkv1/shardgrp"
"6.5840/tester1"
)
type Test struct {
t *testing.T
*kvtest.Test
sck *shardctrler.ShardCtrlerClerk
part string
mu sync.Mutex
ngid tester.Tgid
}
const (
Controler = tester.Tgid(0) // controler uses group 0 for a kvraft group
NSRV = 3 // servers per group
INTERGRPDELAY = 200 // time in ms between group changes
)
// Setup a kvraft group (group 0) for the shard controller and make
// the controller clerk.
func MakeTest(t *testing.T, part string, reliable, randomkeys bool) *Test {
cfg := tester.MakeConfig(t, NSRV, reliable, -1, kvraft.StartKVServer)
ts := &Test{
ngid: shardcfg.Gid1 + 1, // Gid1 is in use
t: t,
}
ts.Test = kvtest.MakeTest(t, cfg, randomkeys, ts)
ts.sck = ts.makeShardCtrlerClerk()
ts.Begin(part)
return ts
}
func (ts *Test) MakeClerk() kvtest.IKVClerk {
clnt := ts.Config.MakeClient()
ck := MakeClerk(clnt, ts.makeQueryClerk())
return &kvtest.TestClerk{ck, clnt}
}
func (ts *Test) DeleteClerk(ck kvtest.IKVClerk) {
tck := ck.(*kvtest.TestClerk)
ts.DeleteClient(tck.Clnt)
}
func (ts *Test) ShardCtrler() *shardctrler.ShardCtrlerClerk {
return ts.sck
}
func (ts *Test) makeShardCtrlerClerk() *shardctrler.ShardCtrlerClerk {
ck, _ := ts.makeShardCtrlerClerkClnt()
return ck
}
func (ts *Test) makeShardCtrlerClerkClnt() (*shardctrler.ShardCtrlerClerk, *tester.Clnt) {
srvs := ts.Group(Controler).SrvNames()
clnt := ts.Config.MakeClient()
return shardctrler.MakeShardCtrlerClerk(clnt, srvs), clnt
}
func (ts *Test) makeQueryClerk() *shardctrler.QueryClerk {
srvs := ts.Group(Controler).SrvNames()
clnt := ts.Config.MakeClient()
return shardctrler.MakeQueryClerk(clnt, srvs)
}
func (ts *Test) newGid() tester.Tgid {
ts.mu.Lock()
defer ts.mu.Unlock()
gid := ts.ngid
ts.ngid += 1
return gid
}
func (ts *Test) groups(n int) []tester.Tgid {
grps := make([]tester.Tgid, n)
for i := 0; i < n; i++ {
grps[i] = ts.newGid()
}
return grps
}
// Set up KVServervice with one group Gid1. Gid1 should initialize
// itself to own all shards.
func (ts *Test) setupKVService() tester.Tgid {
scfg := shardcfg.MakeShardConfig()
ts.Config.MakeGroupStart(shardcfg.Gid1, NSRV, -1, shardgrp.StartKVServer)
scfg.JoinBalance(map[tester.Tgid][]string{shardcfg.Gid1: ts.Group(shardcfg.Gid1).SrvNames()})
if err := ts.sck.Init(scfg); err != rpc.OK {
ts.t.Fatalf("Init err %v", err)
}
//ts.sck.AcquireLeadership()
return shardcfg.Gid1
}
func (ts *Test) joinGroups(sck *shardctrler.ShardCtrlerClerk, gids []tester.Tgid) rpc.Err {
for i, gid := range gids {
ts.Config.MakeGroupStart(gid, NSRV, -1, shardgrp.StartKVServer)
if err := sck.Join(gid, ts.Group(gid).SrvNames()); err != rpc.OK {
return err
}
if i < len(gids)-1 {
time.Sleep(INTERGRPDELAY * time.Millisecond)
}
}
return rpc.OK
}
func (ts *Test) leaveGroups(sck *shardctrler.ShardCtrlerClerk, gids []tester.Tgid) rpc.Err {
for i, gid := range gids {
if err := sck.Leave(gid); err != rpc.OK {
return err
}
ts.Config.ExitGroup(gid)
if i < len(gids)-1 {
time.Sleep(INTERGRPDELAY * time.Millisecond)
}
}
return rpc.OK
}
func (ts *Test) checkLogs(gids []tester.Tgid) {
for _, gid := range gids {
n := ts.Group(gid).LogSize()
s := ts.Group(gid).SnapshotSize()
if ts.Group(gid).Maxraftstate >= 0 && n > 8*ts.Group(gid).Maxraftstate {
ts.t.Fatalf("persister.RaftStateSize() %v, but maxraftstate %v",
n, ts.Group(gid).Maxraftstate)
}
if ts.Group(gid).Maxraftstate < 0 && s > 0 {
ts.t.Fatalf("maxraftstate is -1, but snapshot is non-empty!")
}
}
}
// make sure that the data really is sharded by
// shutting down one shard and checking that some
// Get()s don't succeed.
func (ts *Test) checkShutdownSharding(down, up tester.Tgid, ka []string, va []string) {
const NSEC = 2
ts.Group(down).Shutdown()
ts.checkLogs([]tester.Tgid{down, up}) // forbid snapshots
n := len(ka)
ch := make(chan string)
for xi := 0; xi < n; xi++ {
ck1 := ts.MakeClerk()
go func(i int) {
v, _, _ := ck1.Get(ka[i])
if v != va[i] {
ch <- fmt.Sprintf("Get(%v): expected:\n%v\nreceived:\n%v", ka[i], va[i], v)
} else {
ch <- ""
}
}(xi)
}
// wait a bit, only about half the Gets should succeed.
ndone := 0
done := false
for done == false {
select {
case err := <-ch:
if err != "" {
ts.Fatalf(err)
}
ndone += 1
case <-time.After(time.Second * NSEC):
done = true
break
}
}
// log.Printf("%d completions out of %d with %d groups", ndone, n, ngrp)
if ndone >= n {
ts.Fatalf("expected less than %d completions with one shard dead\n", n)
}
// bring the crashed shard/group back to life.
ts.Group(down).StartServers()
}
// Run one controler and then partitioned it forever after some time
// Run another cntrler that must finish the first ctrler's unfinished
// shard moves, if there are any.
func (ts *Test) partitionCtrler(ck kvtest.IKVClerk, ka, va []string) {
const (
MSEC = 20
RAND = 2000 // maybe measure?
)
ch := make(chan tester.Tgid)
sck, clnt := ts.makeShardCtrlerClerkClnt()
cfg, _, err := ts.ShardCtrler().Query()
num := cfg.Num
go func() {
for true {
ngid := ts.newGid()
//log.Printf("join %d", ngid)
//s := time.Now()
ch <- ngid
err := ts.joinGroups(sck, []tester.Tgid{ngid})
if err == rpc.OK {
err = ts.leaveGroups(sck, []tester.Tgid{ngid})
}
//log.Printf("join err %v time %v", err, time.Since(s))
if err == shardctrler.ErrDeposed {
log.Printf("disposed")
return
}
if err != rpc.OK {
ts.t.Fatalf("join/leave err %v", err)
}
time.Sleep(INTERGRPDELAY * time.Millisecond)
}
}()
lastgid := <-ch
d := time.Duration(rand.Int()%RAND) * time.Millisecond
time.Sleep(MSEC*time.Millisecond + d)
log.Printf("disconnect sck %v", d)
// partition sck forever
clnt.DisconnectAll()
// force sck to step down
sck.Stepdown()
// wait until sck has no more requests in the network
time.Sleep(labrpc.MAXDELAY)
cfg, _, err = ts.ShardCtrler().Query()
if err != rpc.OK {
ts.Fatalf("Query err %v", err)
}
recovery := false
present := cfg.IsMember(lastgid)
join := num == cfg.Num
leave := num+1 == cfg.Num
if !present && join {
recovery = true
}
if present && leave {
recovery = true
}
// start new controler to pick up where sck left off
sck0, clnt0 := ts.makeShardCtrlerClerkClnt()
if err != rpc.OK {
ts.Fatalf("Query err %v", err)
}
cfg, _, err = sck0.Query()
if recovery {
s := "join"
if leave {
s = "leave"
}
//log.Printf("%v in progress", s)
present = cfg.IsMember(lastgid)
if (join && !present) || (leave && present) {
ts.Fatalf("didn't recover %d correctly after %v", lastgid, s)
}
}
if present {
// cleanup if disconnected after join but before leave
ts.leaveGroups(sck0, []tester.Tgid{lastgid})
}
for i := 0; i < len(ka); i++ {
ts.CheckGet(ck, ka[i], va[i], rpc.Tversion(1))
}
ts.Config.DeleteClient(clnt)
ts.Config.DeleteClient(clnt0)
}

153
src/tester1/clnts.go Normal file
View File

@ -0,0 +1,153 @@
package tester
import (
//"log"
"os"
"sync"
"6.5840/labrpc"
)
type end struct {
name string
end *labrpc.ClientEnd
}
// Servers are named by ServerName() and clerks lazily make a
// per-clerk ClientEnd to a server. Each clerk has a Clnt with a map
// of the allocated ends for this clerk.
type Clnt struct {
mu sync.Mutex
net *labrpc.Network
ends map[string]end
// if nil client can connect to all servers
// if len(srvs) = 0, client cannot connect to any servers
srvs []string
}
func makeClntTo(net *labrpc.Network, srvs []string) *Clnt {
return &Clnt{ends: make(map[string]end), net: net, srvs: srvs}
}
// caller must acquire lock
func (clnt *Clnt) allowedL(server string) bool {
if clnt.srvs == nil {
return true
}
for _, n := range clnt.srvs {
if n == server {
return true
}
}
return false
}
func (clnt *Clnt) makeEnd(server string) end {
clnt.mu.Lock()
defer clnt.mu.Unlock()
if end, ok := clnt.ends[server]; ok {
return end
}
name := Randstring(20)
//log.Printf("%p: makEnd %v %v allowed %t", clnt, name, server, clnt.allowedL(server))
end := end{name: name, end: clnt.net.MakeEnd(name)}
clnt.net.Connect(name, server)
if clnt.allowedL(server) {
clnt.net.Enable(name, true)
} else {
clnt.net.Enable(name, false)
}
clnt.ends[server] = end
return end
}
func (clnt *Clnt) Call(server, method string, args interface{}, reply interface{}) bool {
end := clnt.makeEnd(server)
ok := end.end.Call(method, args, reply)
//log.Printf("%p: Call e %v m %v %v %v ok %v", clnt, end.name, method, args, reply, ok)
return ok
}
func (clnt *Clnt) ConnectAll() {
clnt.mu.Lock()
defer clnt.mu.Unlock()
for _, e := range clnt.ends {
// log.Printf("%p: ConnectAll: enable %v", clnt, e.name)
clnt.net.Enable(e.name, true)
}
clnt.srvs = nil
}
func (clnt *Clnt) DisconnectAll() {
clnt.mu.Lock()
defer clnt.mu.Unlock()
for _, e := range clnt.ends {
//log.Printf("%p: Disconnectall: disable %v", clnt, e.name)
clnt.net.Enable(e.name, false)
}
clnt.srvs = make([]string, 0)
}
func (clnt *Clnt) remove() {
clnt.mu.Lock()
defer clnt.mu.Unlock()
for _, e := range clnt.ends {
os.Remove(e.name)
}
}
type Clnts struct {
mu sync.Mutex
net *labrpc.Network
clerks map[*Clnt]struct{}
}
func makeClnts(net *labrpc.Network) *Clnts {
clnts := &Clnts{net: net, clerks: make(map[*Clnt]struct{})}
return clnts
}
func (clnts *Clnts) makeEnd(servername string) *labrpc.ClientEnd {
name := Randstring(20)
end := clnts.net.MakeEnd(name)
clnts.net.Connect(name, servername)
clnts.net.Enable(name, true)
return end
}
// Create a clnt for a clerk with specific server names, but allow
// only connections to connections to servers in to[].
func (clnts *Clnts) MakeClient() *Clnt {
return clnts.MakeClientTo(nil)
}
func (clnts *Clnts) MakeClientTo(srvs []string) *Clnt {
clnts.mu.Lock()
defer clnts.mu.Unlock()
clnt := makeClntTo(clnts.net, srvs)
clnts.clerks[clnt] = struct{}{}
return clnt
}
func (clnts *Clnts) cleanup() {
clnts.mu.Lock()
defer clnts.mu.Unlock()
for clnt, _ := range clnts.clerks {
clnt.remove()
}
}
func (clnts *Clnts) DeleteClient(clnt *Clnt) {
clnts.mu.Lock()
defer clnts.mu.Unlock()
clnt.remove()
delete(clnts.clerks, clnt)
}

180
src/tester1/config.go Normal file
View File

@ -0,0 +1,180 @@
package tester
import (
crand "crypto/rand"
"encoding/base64"
"fmt"
// "log"
"math/big"
"math/rand"
"runtime"
"runtime/debug"
"sync"
"sync/atomic"
"testing"
"time"
"6.5840/labrpc"
"6.5840/raft"
)
const GRP0 = 0
type IKVServer interface {
Raft() *raft.Raft
Kill()
}
type Config struct {
*Clnts // The clnts in the test
*Groups // The server groups in the test
t *testing.T
net *labrpc.Network // The network shared by clnts and servers
start time.Time // time at which make_config() was called
// begin()/end() statistics
t0 time.Time // time at which test_test.go called cfg.begin()
rpcs0 int // rpcTotal() at start of test
ops int32 // number of clerk get/put/append method calls
}
func MakeConfig(t *testing.T, n int, reliable bool, maxraftstate int, mks FstartServer) *Config {
ncpu_once.Do(func() {
if runtime.NumCPU() < 2 {
fmt.Printf("warning: only one CPU, which may conceal locking bugs\n")
}
rand.Seed(makeSeed())
})
runtime.GOMAXPROCS(4)
cfg := &Config{}
cfg.t = t
cfg.net = labrpc.MakeNetwork()
cfg.Groups = newGroups(cfg.net)
cfg.MakeGroupStart(GRP0, n, maxraftstate, mks)
cfg.Clnts = makeClnts(cfg.net)
cfg.start = time.Now()
cfg.net.Reliable(reliable)
return cfg
}
func (cfg *Config) SetReliable(reliable bool) {
cfg.net.Reliable(reliable)
}
func (cfg *Config) IsReliable() bool {
return cfg.net.IsReliable()
}
func (cfg *Config) SetLongReordering(longrel bool) {
cfg.net.LongReordering(longrel)
}
func (cfg *Config) SetLongDelays(longdel bool) {
cfg.net.LongDelays(longdel)
}
func (cfg *Config) Group(gid Tgid) *ServerGrp {
return cfg.lookupGroup(gid)
}
func (cfg *Config) Cleanup() {
cfg.Clnts.cleanup()
cfg.Groups.cleanup()
cfg.net.Cleanup()
cfg.CheckTimeout()
}
func (cfg *Config) MakeGroupStart(gid Tgid, nsrv, maxraftstate int, mks FstartServer) {
cfg.MakeGroup(gid, nsrv, maxraftstate, mks)
cfg.Group(gid).StartServers()
}
func (cfg *Config) ExitGroup(gid Tgid) {
cfg.Group(gid).Shutdown()
cfg.Groups.delete(gid)
}
var ncpu_once sync.Once
func (cfg *Config) RpcTotal() int {
return cfg.net.GetTotalCount()
}
func (cfg *Config) BytesTotal() int64 {
return cfg.net.GetTotalBytes()
}
// start a Test.
// print the Test message.
// e.g. cfg.begin("Test (2B): RPC counts aren't too high")
func (cfg *Config) Begin(description string) {
rel := "reliable"
if !cfg.net.IsReliable() {
rel = "unreliable"
}
fmt.Printf("%s (%s network)...\n", description, rel)
cfg.t0 = time.Now()
cfg.rpcs0 = cfg.RpcTotal()
atomic.StoreInt32(&cfg.ops, 0)
}
func (cfg *Config) Op() {
atomic.AddInt32(&cfg.ops, 1)
}
// end a Test -- the fact that we got here means there
// was no failure.
// print the Passed message,
// and some performance numbers.
func (cfg *Config) End() {
cfg.CheckTimeout()
if cfg.t.Failed() == false {
t := time.Since(cfg.t0).Seconds() // real time
npeers := cfg.Group(GRP0).N() // number of Raft peers
nrpc := cfg.RpcTotal() - cfg.rpcs0 // number of RPC sends
ops := atomic.LoadInt32(&cfg.ops) // number of clerk get/put/append calls
fmt.Printf(" ... Passed --")
fmt.Printf(" %4.1f %d %5d %4d\n", t, npeers, nrpc, ops)
}
}
func (cfg *Config) Fatalf(format string, args ...any) {
debug.PrintStack()
cfg.t.Fatalf(format, args...)
}
func Randstring(n int) string {
b := make([]byte, 2*n)
crand.Read(b)
s := base64.URLEncoding.EncodeToString(b)
return s[0:n]
}
func (cfg *Config) CheckTimeout() {
// enforce a two minute real-time limit on each test
if !cfg.t.Failed() && time.Since(cfg.start) > 120*time.Second {
cfg.t.Fatal("test took longer than 120 seconds")
}
}
func makeSeed() int64 {
max := big.NewInt(int64(1) << 62)
bigx, _ := crand.Int(crand.Reader, max)
x := bigx.Int64()
return x
}
// Randomize server handles
func random_handles(kvh []*labrpc.ClientEnd) []*labrpc.ClientEnd {
sa := make([]*labrpc.ClientEnd, len(kvh))
copy(sa, kvh)
for i := range sa {
j := rand.Intn(i + 1)
sa[i], sa[j] = sa[j], sa[i]
}
return sa
}

306
src/tester1/group.go Normal file
View File

@ -0,0 +1,306 @@
package tester
import (
//"log"
"strconv"
"sync"
"6.5840/labrpc"
"6.5840/raft"
)
type Tgid int
type FstartServer func(ends []*labrpc.ClientEnd, grp Tgid, srv int, persister *raft.Persister, maxraftstate int) IKVServer
// Each server has a name: i'th server of group gid. If there is only a single
// server, it its gid = 0 and its i is 0.
func ServerName(gid Tgid, i int) string {
return "server-" + strconv.Itoa(int(gid)) + "-" + strconv.Itoa(i)
}
// The tester may have many groups of servers (e.g., one per Raft group).
// Groups are named 0, 1, and so on.
type Groups struct {
mu sync.Mutex
net *labrpc.Network
grps map[Tgid]*ServerGrp
}
func newGroups(net *labrpc.Network) *Groups {
return &Groups{net: net, grps: make(map[Tgid]*ServerGrp)}
}
func (gs *Groups) MakeGroup(gid Tgid, nsrv, maxraftstate int, mks FstartServer) {
gs.mu.Lock()
defer gs.mu.Unlock()
gs.grps[gid] = makeSrvGrp(gs.net, gid, nsrv, maxraftstate, mks)
}
func (gs *Groups) lookupGroup(gid Tgid) *ServerGrp {
gs.mu.Lock()
defer gs.mu.Unlock()
return gs.grps[gid]
}
func (gs *Groups) delete(gid Tgid) {
gs.mu.Lock()
defer gs.mu.Unlock()
delete(gs.grps, gid)
}
func (gs *Groups) cleanup() {
gs.mu.Lock()
defer gs.mu.Unlock()
for _, sg := range gs.grps {
sg.cleanup()
}
}
type ServerGrp struct {
Maxraftstate int
net *labrpc.Network
srvs []*Server
servernames []string
gid Tgid
connected []bool // whether each server is on the net
mks FstartServer
}
func makeSrvGrp(net *labrpc.Network, gid Tgid, n, m int, mks FstartServer) *ServerGrp {
sg := &ServerGrp{
Maxraftstate: m,
net: net,
srvs: make([]*Server, n),
gid: gid,
connected: make([]bool, n),
mks: mks,
}
for i, _ := range sg.srvs {
sg.srvs[i] = makeServer(net, gid, n)
}
sg.servernames = make([]string, n)
for i := 0; i < n; i++ {
sg.servernames[i] = ServerName(gid, i)
}
return sg
}
func (sg *ServerGrp) N() int {
return len(sg.srvs)
}
func (sg *ServerGrp) SrvNames() []string {
return sg.servernames
}
func (sg *ServerGrp) SrvNamesTo(to []int) []string {
ns := make([]string, 0, len(to))
for _, i := range to {
ns = append(ns, sg.servernames[i])
}
return ns
}
func (sg *ServerGrp) all() []int {
all := make([]int, len(sg.srvs))
for i, _ := range sg.srvs {
all[i] = i
}
return all
}
func (sg *ServerGrp) ConnectAll() {
for i, _ := range sg.srvs {
sg.ConnectOne(i)
}
}
func (sg *ServerGrp) ConnectOne(i int) {
sg.connect(i, sg.all())
}
func (sg *ServerGrp) cleanup() {
for _, s := range sg.srvs {
if s.kvsrv != nil {
s.kvsrv.Kill()
}
}
}
// attach server i to servers listed in to
// caller must hold cfg.mu
func (sg *ServerGrp) connect(i int, to []int) {
//log.Printf("connect peer %d to %v\n", i, to)
sg.connected[i] = true
// outgoing socket files
sg.srvs[i].connect(to)
// incoming socket files
for j := 0; j < len(to); j++ {
if sg.IsConnected(j) {
endname := sg.srvs[to[j]].endNames[i]
sg.net.Enable(endname, true)
}
}
}
// detach server from the servers listed in from
// caller must hold cfg.mu
func (sg *ServerGrp) disconnect(i int, from []int) {
// log.Printf("%p: disconnect peer %d from %v\n", sg, i, from)
sg.connected[i] = false
// outgoing socket files
sg.srvs[i].disconnect(from)
// incoming socket files
for j := 0; j < len(from); j++ {
s := sg.srvs[from[j]]
if s.endNames != nil {
endname := s.endNames[i]
// log.Printf("%p: disconnect: %v", sg, endname)
sg.net.Enable(endname, false)
}
}
}
func (sg *ServerGrp) DisconnectAll(i int) {
sg.disconnect(i, sg.all())
}
func (sg *ServerGrp) IsConnected(i int) bool {
return sg.connected[i]
}
// Maximum log size across all servers
func (sg *ServerGrp) LogSize() int {
logsize := 0
for _, s := range sg.srvs {
n := s.saved.RaftStateSize()
if n > logsize {
logsize = n
}
}
return logsize
}
// Maximum snapshot size across all servers
func (sg *ServerGrp) SnapshotSize() int {
snapshotsize := 0
for _, s := range sg.srvs {
n := s.saved.SnapshotSize()
if n > snapshotsize {
snapshotsize = n
}
}
return snapshotsize
}
// If restart servers, first call shutdownserver
func (sg *ServerGrp) StartServer(i int) {
srv := sg.srvs[i].startServer(sg.gid)
sg.srvs[i] = srv
srv.kvsrv = sg.mks(srv.clntEnds, sg.gid, i, srv.saved, sg.Maxraftstate)
kvsvc := labrpc.MakeService(srv.kvsrv)
labsrv := labrpc.MakeServer()
labsrv.AddService(kvsvc)
if len(sg.srvs) > 1 { // Run with raft?
rfsvc := labrpc.MakeService(srv.kvsrv.Raft())
labsrv.AddService(rfsvc)
}
sg.net.AddServer(ServerName(sg.gid, i), labsrv)
}
// create a full set of KV servers.
func (sg *ServerGrp) StartServers() {
sg.start()
sg.ConnectAll()
}
// Shutdown a server by isolating it
func (sg *ServerGrp) ShutdownServer(i int) {
sg.disconnect(i, sg.all())
// disable client connections to the server.
// it's important to do this before creating
// the new Persister in saved[i], to avoid
// the possibility of the server returning a
// positive reply to an Append but persisting
// the result in the superseded Persister.
sg.net.DeleteServer(ServerName(sg.gid, i))
sg.srvs[i].shutdownServer()
}
func (sg *ServerGrp) Shutdown() {
for i, _ := range sg.srvs {
sg.ShutdownServer(i)
}
}
func (sg *ServerGrp) start() {
for i, _ := range sg.srvs {
sg.StartServer(i)
}
}
func (sg *ServerGrp) GetState(i int) (int, bool) {
return sg.srvs[i].kvsrv.Raft().GetState()
}
func (sg *ServerGrp) Leader() (bool, int) {
for i, _ := range sg.srvs {
_, is_leader := sg.GetState(i)
if is_leader {
return true, i
}
}
return false, 0
}
// Partition servers into 2 groups and put current leader in minority
func (sg *ServerGrp) MakePartition() ([]int, []int) {
_, l := sg.Leader()
n := len(sg.srvs)
p1 := make([]int, n/2+1)
p2 := make([]int, n/2)
j := 0
for i := 0; i < n; i++ {
if i != l {
if j < len(p1) {
p1[j] = i
} else {
p2[j-len(p1)] = i
}
j++
}
}
p2[len(p2)-1] = l
return p1, p2
}
func (sg *ServerGrp) Partition(p1 []int, p2 []int) {
// log.Printf("partition servers into: %v %v\n", p1, p2)
for i := 0; i < len(p1); i++ {
sg.disconnect(p1[i], p2)
sg.connect(p1[i], p1)
}
for i := 0; i < len(p2); i++ {
sg.disconnect(p2[i], p1)
sg.connect(p2[i], p2)
}
}
func (sg *ServerGrp) RpcCount(server int) int {
return sg.net.GetCount(ServerName(sg.gid, server))
}

80
src/tester1/srv.go Normal file
View File

@ -0,0 +1,80 @@
package tester
import (
// "log"
"6.5840/labrpc"
"6.5840/raft"
)
type Server struct {
net *labrpc.Network
saved *raft.Persister
kvsrv IKVServer
endNames []string
clntEnds []*labrpc.ClientEnd
}
func makeServer(net *labrpc.Network, gid Tgid, nsrv int) *Server {
srv := &Server{net: net}
srv.endNames = make([]string, nsrv)
srv.clntEnds = make([]*labrpc.ClientEnd, nsrv)
for j := 0; j < nsrv; j++ {
// a fresh set of ClientEnds.
srv.endNames[j] = Randstring(20)
// a fresh set of ClientEnds.
srv.clntEnds[j] = net.MakeEnd(srv.endNames[j])
net.Connect(srv.endNames[j], ServerName(gid, j))
}
return srv
}
// If restart servers, first call ShutdownServer
func (s *Server) startServer(gid Tgid) *Server {
srv := makeServer(s.net, gid, len(s.endNames))
// a fresh persister, so old instance doesn't overwrite
// new instance's persisted state.
// give the fresh persister a copy of the old persister's
// state, so that the spec is that we pass StartKVServer()
// the last persisted state.
if s.saved != nil {
srv.saved = s.saved.Copy()
} else {
srv.saved = raft.MakePersister()
}
return srv
}
func (s *Server) connect(to []int) {
for j := 0; j < len(to); j++ {
endname := s.endNames[to[j]]
s.net.Enable(endname, true)
}
}
func (s *Server) disconnect(from []int) {
if s.endNames == nil {
return
}
for j := 0; j < len(from); j++ {
endname := s.endNames[from[j]]
s.net.Enable(endname, false)
}
}
// XXX lock s?
func (s *Server) shutdownServer() {
// a fresh persister, in case old instance
// continues to update the Persister.
// but copy old persister's content so that we always
// pass Make() the last persisted state.
if s.saved != nil {
s.saved = s.saved.Copy()
}
kv := s.kvsrv
if kv != nil {
kv.Kill()
s.kvsrv = nil
}
}