/** * 获取schema信息 * @throws IOException */ @Test public void testGetSchema() throws IOException { Configuration configuration = new Configuration(true); configuration.set("fs.defaultFS","hdfs://10.0.1.xx:9000"); ParquetMetadata readFooter = null; Path parquetFilePath = new Path("/user/yanglei/parquet/douban.parquet"); readFooter = ParquetFileReader.readFooter(configuration, parquetFilePath, ParquetMetadataConverter.NO_FILTER); MessageType schema =readFooter.getFileMetaData().getSchema(); System.out.println(schema.toString()); } public static final MessageType FILE_SCHEMA = Types.buildMessage() .required(PrimitiveType.PrimitiveTypeName.BINARY).named("user_name") .required(PrimitiveType.PrimitiveTypeName.INT64).named("bookid") .required(PrimitiveType.PrimitiveTypeName.INT32).named("bookscore") .named("douban"); /** * 写parquet文件 * @throws Exception */ @Test public void testWriteParquet() throws Exception { Configuration conf = new Configuration(true); conf.set("fs.defaultFS","hdfs://10.0.1.xx:9000"); String file = "/user/yanglei/parquet/douban.parquet"; Path path = new Path(file); FileSystem fs = path.getFileSystem(conf); if (fs.exists(path)) { fs.delete(path, true); } GroupWriteSupport.setSchema(FILE_SCHEMA, conf); SimpleGroupFactory f = new SimpleGroupFactory(FILE_SCHEMA); ParquetWriterwriter = new ParquetWriter<>(path, new GroupWriteSupport(), CompressionCodecName.GZIP, 1024, 1024, 512, true, false, ParquetProperties.WriterVersion.PARQUET_2_0, conf); for (int i = 0; i < 1000; i++) { writer.write( f.newGroup() .append("user_name", String.valueOf(i)) .append("bookid", 64l) .append("bookscore",i)); } writer.close(); }