Skip to content

Commit f2b20f4

Browse files
authored
feat(bigquery managedwriter): schema conversion support (#4357)
This is the first of multiple PRs to build up the functionality of a new thick client over the new BigQuery Storage API's write mechanism. This PR exposes schema conversion between the main bigquery package and the bigquery storage API. Towards: #4366
1 parent 58d4055 commit f2b20f4

File tree

4 files changed

+384
-0
lines changed

4 files changed

+384
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
// Copyright 2021 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// https://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
// Package adapt adds functionality related to converting bigquery representations
16+
// like schema and data type representations.
17+
//
18+
// It is EXPERIMENTAL and subject to change or removal without notice.
19+
package adapt
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
// Copyright 2021 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// https://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package adapt
16+
17+
import (
18+
"fmt"
19+
20+
"cloud.google.com/go/bigquery"
21+
storagepb "google.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta2"
22+
)
23+
24+
var fieldTypeMap = map[bigquery.FieldType]storagepb.TableFieldSchema_Type{
25+
bigquery.StringFieldType: storagepb.TableFieldSchema_STRING,
26+
bigquery.BytesFieldType: storagepb.TableFieldSchema_BYTES,
27+
bigquery.IntegerFieldType: storagepb.TableFieldSchema_INT64,
28+
bigquery.FloatFieldType: storagepb.TableFieldSchema_DOUBLE,
29+
bigquery.BooleanFieldType: storagepb.TableFieldSchema_BOOL,
30+
bigquery.TimestampFieldType: storagepb.TableFieldSchema_TIMESTAMP,
31+
bigquery.RecordFieldType: storagepb.TableFieldSchema_STRUCT,
32+
bigquery.DateFieldType: storagepb.TableFieldSchema_DATE,
33+
bigquery.TimeFieldType: storagepb.TableFieldSchema_TIME,
34+
bigquery.DateTimeFieldType: storagepb.TableFieldSchema_DATETIME,
35+
bigquery.NumericFieldType: storagepb.TableFieldSchema_NUMERIC,
36+
bigquery.BigNumericFieldType: storagepb.TableFieldSchema_BIGNUMERIC,
37+
bigquery.GeographyFieldType: storagepb.TableFieldSchema_GEOGRAPHY,
38+
}
39+
40+
func bqFieldToProto(in *bigquery.FieldSchema) (*storagepb.TableFieldSchema, error) {
41+
if in == nil {
42+
return nil, nil
43+
}
44+
out := &storagepb.TableFieldSchema{
45+
Name: in.Name,
46+
Description: in.Description,
47+
}
48+
49+
// Type conversion.
50+
typ, ok := fieldTypeMap[in.Type]
51+
if !ok {
52+
return nil, fmt.Errorf("could not convert field (%s) due to unknown type value: %s", in.Name, in.Type)
53+
}
54+
out.Type = typ
55+
56+
// Mode conversion. Repeated trumps required.
57+
out.Mode = storagepb.TableFieldSchema_NULLABLE
58+
if in.Repeated {
59+
out.Mode = storagepb.TableFieldSchema_REPEATED
60+
}
61+
if !in.Repeated && in.Required {
62+
out.Mode = storagepb.TableFieldSchema_REQUIRED
63+
}
64+
65+
for _, s := range in.Schema {
66+
subField, err := bqFieldToProto(s)
67+
if err != nil {
68+
return nil, err
69+
}
70+
out.Fields = append(out.Fields, subField)
71+
}
72+
return out, nil
73+
}
74+
75+
func protoToBQField(in *storagepb.TableFieldSchema) (*bigquery.FieldSchema, error) {
76+
if in == nil {
77+
return nil, nil
78+
}
79+
out := &bigquery.FieldSchema{
80+
Name: in.GetName(),
81+
Description: in.GetDescription(),
82+
Repeated: in.GetMode() == storagepb.TableFieldSchema_REPEATED,
83+
Required: in.GetMode() == storagepb.TableFieldSchema_REQUIRED,
84+
}
85+
86+
typeResolved := false
87+
for k, v := range fieldTypeMap {
88+
if v == in.GetType() {
89+
out.Type = k
90+
typeResolved = true
91+
break
92+
}
93+
}
94+
if !typeResolved {
95+
return nil, fmt.Errorf("could not convert proto type to bigquery type: %v", in.GetType().String())
96+
}
97+
98+
for _, s := range in.Fields {
99+
subField, err := protoToBQField(s)
100+
if err != nil {
101+
return nil, err
102+
}
103+
out.Schema = append(out.Schema, subField)
104+
}
105+
return out, nil
106+
}
107+
108+
// BQSchemaToStorageTableSchema converts a bigquery Schema into the protobuf-based TableSchema used
109+
// by the BigQuery Storage WriteClient.
110+
func BQSchemaToStorageTableSchema(in bigquery.Schema) (*storagepb.TableSchema, error) {
111+
if in == nil {
112+
return nil, nil
113+
}
114+
out := &storagepb.TableSchema{}
115+
for _, s := range in {
116+
converted, err := bqFieldToProto(s)
117+
if err != nil {
118+
return nil, err
119+
}
120+
out.Fields = append(out.Fields, converted)
121+
}
122+
return out, nil
123+
}
124+
125+
// StorageTableSchemaToBQSchema converts a TableSchema from the BigQuery Storage WriteClient
126+
// into the equivalent BigQuery Schema.
127+
func StorageTableSchemaToBQSchema(in *storagepb.TableSchema) (bigquery.Schema, error) {
128+
if in == nil {
129+
return nil, nil
130+
}
131+
var out bigquery.Schema
132+
for _, s := range in.Fields {
133+
converted, err := protoToBQField(s)
134+
if err != nil {
135+
return nil, err
136+
}
137+
out = append(out, converted)
138+
}
139+
return out, nil
140+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
// Copyright 2021 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// https://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package adapt
16+
17+
import (
18+
"testing"
19+
20+
"cloud.google.com/go/bigquery"
21+
"cloud.google.com/go/internal/testutil"
22+
"github.com/google/go-cmp/cmp"
23+
storagepb "google.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta2"
24+
"google.golang.org/protobuf/testing/protocmp"
25+
)
26+
27+
func TestFieldConversions(t *testing.T) {
28+
testCases := []struct {
29+
desc string
30+
bq *bigquery.FieldSchema
31+
proto *storagepb.TableFieldSchema
32+
}{
33+
{
34+
desc: "nil",
35+
bq: nil,
36+
proto: nil,
37+
},
38+
{
39+
desc: "string field",
40+
bq: &bigquery.FieldSchema{
41+
Name: "name",
42+
Type: bigquery.StringFieldType,
43+
Description: "description",
44+
},
45+
proto: &storagepb.TableFieldSchema{
46+
Name: "name",
47+
Type: storagepb.TableFieldSchema_STRING,
48+
Description: "description",
49+
Mode: storagepb.TableFieldSchema_NULLABLE,
50+
},
51+
},
52+
{
53+
desc: "required integer field",
54+
bq: &bigquery.FieldSchema{
55+
Name: "name",
56+
Type: bigquery.IntegerFieldType,
57+
Description: "description",
58+
Required: true,
59+
},
60+
proto: &storagepb.TableFieldSchema{
61+
Name: "name",
62+
Type: storagepb.TableFieldSchema_INT64,
63+
Description: "description",
64+
Mode: storagepb.TableFieldSchema_REQUIRED,
65+
},
66+
},
67+
{
68+
desc: "struct with repeated bytes subfield",
69+
bq: &bigquery.FieldSchema{
70+
Name: "name",
71+
Type: bigquery.RecordFieldType,
72+
Description: "description",
73+
Required: true,
74+
Schema: bigquery.Schema{
75+
&bigquery.FieldSchema{
76+
Name: "inner1",
77+
Repeated: true,
78+
Description: "repeat",
79+
Type: bigquery.BytesFieldType,
80+
},
81+
},
82+
},
83+
proto: &storagepb.TableFieldSchema{
84+
Name: "name",
85+
Type: storagepb.TableFieldSchema_STRUCT,
86+
Description: "description",
87+
Mode: storagepb.TableFieldSchema_REQUIRED,
88+
Fields: []*storagepb.TableFieldSchema{
89+
{
90+
Name: "inner1",
91+
Mode: storagepb.TableFieldSchema_REPEATED,
92+
Description: "repeat",
93+
Type: storagepb.TableFieldSchema_BYTES,
94+
},
95+
},
96+
},
97+
},
98+
}
99+
100+
for _, tc := range testCases {
101+
// first, bq to proto
102+
converted, err := bqFieldToProto(tc.bq)
103+
if err != nil {
104+
t.Errorf("case (%s) failed conversion from bq: %v", tc.desc, err)
105+
}
106+
if diff := cmp.Diff(converted, tc.proto, protocmp.Transform()); diff != "" {
107+
t.Errorf("conversion to proto diff (%s):\n%v", tc.desc, diff)
108+
}
109+
// reverse conversion, proto to bq
110+
reverse, err := protoToBQField(tc.proto)
111+
if err != nil {
112+
t.Errorf("case (%s) failed conversion from proto: %v", tc.desc, err)
113+
}
114+
if diff := cmp.Diff(reverse, tc.bq); diff != "" {
115+
t.Errorf("conversion to BQ diff (%s):\n%v", tc.desc, diff)
116+
}
117+
}
118+
}
119+
120+
func TestSchemaConversion(t *testing.T) {
121+
122+
testCases := []struct {
123+
description string
124+
bqSchema bigquery.Schema
125+
storageSchema *storagepb.TableSchema
126+
}{
127+
{
128+
description: "nil",
129+
bqSchema: nil,
130+
storageSchema: nil,
131+
},
132+
{
133+
description: "scalars",
134+
bqSchema: bigquery.Schema{
135+
{Name: "f1", Type: bigquery.StringFieldType},
136+
{Name: "f2", Type: bigquery.IntegerFieldType},
137+
{Name: "f3", Type: bigquery.BooleanFieldType},
138+
},
139+
storageSchema: &storagepb.TableSchema{
140+
Fields: []*storagepb.TableFieldSchema{
141+
{Name: "f1", Type: storagepb.TableFieldSchema_STRING, Mode: storagepb.TableFieldSchema_NULLABLE},
142+
{Name: "f2", Type: storagepb.TableFieldSchema_INT64, Mode: storagepb.TableFieldSchema_NULLABLE},
143+
{Name: "f3", Type: storagepb.TableFieldSchema_BOOL, Mode: storagepb.TableFieldSchema_NULLABLE},
144+
},
145+
},
146+
},
147+
{
148+
description: "array",
149+
bqSchema: bigquery.Schema{
150+
{Name: "arr", Type: bigquery.NumericFieldType, Repeated: true},
151+
{Name: "big", Type: bigquery.BigNumericFieldType, Required: true},
152+
},
153+
storageSchema: &storagepb.TableSchema{
154+
Fields: []*storagepb.TableFieldSchema{
155+
{Name: "arr", Type: storagepb.TableFieldSchema_NUMERIC, Mode: storagepb.TableFieldSchema_REPEATED},
156+
{Name: "big", Type: storagepb.TableFieldSchema_BIGNUMERIC, Mode: storagepb.TableFieldSchema_REQUIRED},
157+
},
158+
},
159+
},
160+
{
161+
description: "nested",
162+
bqSchema: bigquery.Schema{
163+
{Name: "struct1", Type: bigquery.RecordFieldType, Schema: []*bigquery.FieldSchema{
164+
{Name: "leaf1", Type: bigquery.DateFieldType},
165+
{Name: "leaf2", Type: bigquery.DateTimeFieldType},
166+
}},
167+
{Name: "field2", Type: bigquery.StringFieldType},
168+
},
169+
storageSchema: &storagepb.TableSchema{
170+
Fields: []*storagepb.TableFieldSchema{
171+
{Name: "struct1",
172+
Type: storagepb.TableFieldSchema_STRUCT,
173+
Mode: storagepb.TableFieldSchema_NULLABLE,
174+
Fields: []*storagepb.TableFieldSchema{
175+
{Name: "leaf1", Type: storagepb.TableFieldSchema_DATE, Mode: storagepb.TableFieldSchema_NULLABLE},
176+
{Name: "leaf2", Type: storagepb.TableFieldSchema_DATETIME, Mode: storagepb.TableFieldSchema_NULLABLE},
177+
}},
178+
{Name: "field2", Type: storagepb.TableFieldSchema_STRING, Mode: storagepb.TableFieldSchema_NULLABLE},
179+
},
180+
},
181+
},
182+
}
183+
for _, tc := range testCases {
184+
185+
// BQ -> Storage
186+
storageS, err := BQSchemaToStorageTableSchema(tc.bqSchema)
187+
if err != nil {
188+
t.Errorf("BQSchemaToStorageTableSchema(%s): %v", tc.description, err)
189+
}
190+
if diff := testutil.Diff(storageS, tc.storageSchema); diff != "" {
191+
t.Fatalf("BQSchemaToStorageTableSchema(%s): -got, +want:\n%s", tc.description, diff)
192+
}
193+
194+
// Storage -> BQ
195+
bqS, err := StorageTableSchemaToBQSchema(tc.storageSchema)
196+
if err != nil {
197+
t.Errorf("StorageTableSchemaToBQSchema(%s): %v", tc.description, err)
198+
}
199+
if diff := testutil.Diff(bqS, tc.bqSchema); diff != "" {
200+
t.Fatalf("StorageTableSchemaToBQSchema(%s): -got, +want:\n%s", tc.description, diff)
201+
}
202+
}
203+
}

bigquery/storage/managedwriter/doc.go

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
// Copyright 2021 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// https://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
// Package managedwriter will be a thick client around the storage API's BigQueryWriteClient.
16+
//
17+
// It is EXPERIMENTAL and subject to change or removal without notice.
18+
//
19+
// Currently, the BigQueryWriteClient this library targets is exposed in the storage v1beta2 endpoint, and is
20+
// a successor to the streaming interface. API method tabledata.insertAll is the primary backend method, and
21+
// the Inserter abstraction is the equivalent to this in the cloud.google.com/go/bigquery package.
22+
package managedwriter

0 commit comments

Comments
 (0)