avro规范允许使用不同的写入和读取模式,只要它们匹配即可.该规范还允许别名来满足读取和写入模式之间的差异.以下python 2.7试图说明这一点.
import uuid
import avro.schema
import json
from avro.datafile import DataFileReader,DataFileWriter
from avro.io import DatumReader,DatumWriter
write_schema = {
"namespace": "example.avro","type": "record","name": "User","fields": [
{"name": "name","type": "string"},{"name": "favorite_number","type": ["int","null"]},{"name": "favorite_color","type": ["string","null"]}
]
}
writer = DataFileWriter(open("users.avro","wb"),DatumWriter(write_schema))
writer.append({"name": "Alyssa","favorite_number": 256})
writer.append({"name": "Ben","favorite_number": 7,"favorite_color": "red"})
writer.close()
read_schema = {
"namespace": "example.avro","fields": [
{"name": "first_name","type": "string","aliases": ["name"]},"null"]}
]
}
# 1. open avro and extract passport + data
reader = DataFileReader(open("users.avro","rb"),DatumReader(write_schema,read_schema))
reader.close()
/Library/Frameworks/Python.framework/Versions/2.7/bin/python2.7 /Users/simonshapiro/python_beam/src/avrov_test.py
Traceback (most recent call last):
File "/Users/simonshapiro/python_beam/src/avrov_test.py",line 67,in
使用此行在没有不同模式的情况下运行时
reader = DataFileReader(open("users.avro",DatumReader())
它工作正常.
最佳答案
经过一些工作后,我发现模式设置不正确.此代码按预期工作:
import uuid
import avro.schema
import json
from avro.datafile import DataFileReader,DatumWriter
write_schema = avro.schema.parse(json.dumps({
"namespace": "example.avro","null"]}
]
}))
writer = DataFileWriter(open("users.avro",DatumWriter(),write_schema)
writer.append({"name": "Alyssa","favorite_color": "red"})
writer.close()
read_schema = avro.schema.parse(json.dumps({
"namespace": "example.avro","default": "","null"]}
]
}))
# 1. open avro and extract passport + data
reader = DataFileReader(open("users.avro",read_schema))
new_schema = reader.get_Meta("avro.schema")
users = []
for user in reader:
users.append(user)
reader.close()