Protocol Buffers Encoding

用Go学习Protobuf编码

31 July 2017

Chapman Ou

protobuf编码

先看一个案例

message Test1 {
  required int32 a = 1;
}

编码

/*
* File Name:	encoding.go
* Description:
* Author:	Chapman Ou <ochapman.cn@gmail.com>
* Created:	2017-07-25
 */

package main

import (
	"fmt"

	"bitbucket.org/ochapman/protobuf-encoding/example"

	"github.com/golang/protobuf/proto"
)

func main() {
    test := &example.Test1{
        A: proto.Int32(150),
    }
    t, _ := proto.Marshal(test)
    fmt.Printf("%#v", t)
}

编码后输出

[]byte{0x8, 0x96, 0x1}

protobuf是如何编码的

[]byte{0x8, 0x96, 0x1}

0x8, 0x96, 0x1分别是怎么来的

学习protobuf的编码,基础是可变整数表示法。

可变整数表示法(varints)

例子

数字"1",表示为

0000 0001

数字"300", 表示为

1010 1100 0000 0010

去掉每个字节的最高位

1010 1100 0000 0010
 010 1100  000 0010

低有效字节是放在前面,反转一下(000 0010)放在(10 1100)前面

000 0010  010 1100
-> 100101100
-> 256 + 32 + 8 + 4 = 300

消息结构

(field_number << 3) | wire_type

例子

开始的例子,输出是

[]byte{0x8, 0x96, 0x1}

第一个字节"0x8"是key, 去掉最高位

000 1000

最后3位值为"0"(000), 即wire_type是varint
前面3位值为"1"(001), 即field number是1

第二个字节是"0x96"(10010110), 因为最高位"1", 所以后面还有字节"0x01"

96 01 = 1001 0110  0000 0001
      → 000 0001  ++  001 0110 (drop the msb and reverse the groups of 7 bits)
      → 10010110
      → 2 + 4 + 16 + 128 = 150

有符号整数

message Test5 {
  required int64 e = 1;
}

编码

/*
* File Name:	encoding_standard.go
* Description:
* Author:	Chapman Ou <ochapman.cn@gmail.com>
* Created:	2017-08-12
 */
package main

import (
	"fmt"

	"bitbucket.org/ochapman/protobuf-encoding/example"

	"github.com/golang/protobuf/proto"
)

func main() {
    test := &example.Test5{
        E: proto.Int64(-1),
    }
    t, _ := proto.Marshal(test)
    fmt.Printf("%#v", t)
}

输出

[]byte{0x8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1}

有符号整数

message Test6 {
  required sint64 f = 1;
}

编码

/*
* File Name:	encoding_sint.go
* Description:
* Author:	Chapman Ou <ochapman.cn@gmail.com>
* Created:	2017-08-12
 */
package main

import (
	"fmt"

	"bitbucket.org/ochapman/protobuf-encoding/example"

	"github.com/golang/protobuf/proto"
)

func main() {
    test := &example.Test6{
        F: proto.Int64(-1),
    }
    t, _ := proto.Marshal(test)
    fmt.Printf("%#v", t)
}

输出

[]byte{0x8, 0x1}

是不是极大地节省了存储空间?

ZigZag编码

sint32

(n << 1) ^ (n >> 31)

sint64

(n << 1) ^ (n >> 63)

Strings

message Test2 {
  required string s = 1;
}
/*
* File Name:	encoding_string.go
* Description:
* Author:	Chapman Ou <chapmanou@tencent.com>
* Created:	2017-07-30
 */
package main

import (
	"fmt"

	"bitbucket.org/ochapman/protobuf-encoding/example"

	"github.com/golang/protobuf/proto"
)

func main() {
    test := &example.Test2{
        S: proto.String("hello"),
    }
    t, _ := proto.Marshal(test)
    fmt.Printf("%#v", t)
}

结果输出

[]byte{0xa, 0x5, 0x68, 0x65, 0x6c, 0x6c, 0x6f}

Strings

结果分析

[]byte{0xa, 0x5, 0x68, 0x65, 0x6c, 0x6c, 0x6f}

Embedded Messages

message Test1 {
  required int32 a = 1;
}
message Test3 {
  required Test1 c = 3;
}
/*
* File Name:	encoding_embedded.go
* Description:
* Author:	Chapman Ou <chapmanou@tencent.com>
* Created:	2017-07-30
 */

package main

import (
	"fmt"

	"bitbucket.org/ochapman/protobuf-encoding/example"

	"github.com/golang/protobuf/proto"
)

func main() {
    test1 := &example.Test1{
        A: proto.Int32(150),
    }
    test := &example.Test3{
        C: test1,
    }
    t, _ := proto.Marshal(test)
    fmt.Printf("%#v", t)
}

Embedded Messages

结果分析

1a 03 08 96 01

Repeated

message Test4 {
  repeated int32 d = 4 [packed=true];
}
/*
* File Name:	encoding_repeated.go
* Description:
* Author:	Chapman Ou <chapmanou@tencent.com>
* Created:	2017-07-31
 */
package main

import (
	"fmt"

	"bitbucket.org/ochapman/protobuf-encoding/example"

	"github.com/golang/protobuf/proto"
)

func main() {
    ds := []int32{3, 270, 86942}
    test := &example.Test4{
        D: ds,
    }
    t, _ := proto.Marshal(test)
    fmt.Printf("%#v", t)
}

Repeated

编码后

0x22 0x6 0x3 0x8e 0x2 0x9e 0xa7 0x5

分析

0x22            // Key (field number 4, wire type 2)
0x6             // 后面带字节的个数 (6 bytes)
0x3             // 第一个元素(varint 3)
0x8e 0x2        // 第二个元素(varint 270)
0x9e 0xa7 0x5   // 第三个元素(varint 86942)

小结

Thank you

Chapman Ou

Use the left and right arrow keys or click the left and right edges of the page to navigate between slides.
(Press 'H' or navigate to hide this message.)