Last active
September 24, 2018 01:23
-
-
Save thamht4190/cbe2bfa40daf8e0ea8da9133a3f122f9 to your computer and use it in GitHub Desktop.
Revisions
-
thamht4190 revised this gist
Sep 24, 2018 . 1 changed file with 145 additions and 91 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -28,6 +28,7 @@ #include "parquet/schema.h" #include "parquet/types.h" #include "parquet/util/logging.h" #include "parquet/util/macros.h" #include "parquet/util/memory.h" #include "parquet/util/visibility.h" @@ -43,9 +44,9 @@ static bool DEFAULT_USE_BUFFERED_STREAM = false; class PARQUET_EXPORT ColumnEncryptionProperties { public: class Builder { public: Builder(const std::string& path, bool encrypt) : path_(path), encrypt_(encrypt), encrypted_with_footer_key_(encrypt) {} Builder* key(const std::string& key) { DCHECK(key.length() == 16 || key.length() == 24 || key.length() == 32); @@ -67,11 +68,11 @@ class PARQUET_EXPORT ColumnEncryptionProperties { } std::shared_ptr<ColumnEncryptionProperties> build() { return std::make_shared<ColumnEncryptionProperties>( path_, encrypt_, encrypted_with_footer_key_, key_, key_metadata_); } private: std::string path_; bool encrypt_; bool encrypted_with_footer_key_; @@ -83,10 +84,14 @@ class PARQUET_EXPORT ColumnEncryptionProperties { ColumnEncryptionProperties(const ColumnEncryptionProperties& other) = default; ColumnEncryptionProperties(ColumnEncryptionProperties&& other) = default; ColumnEncryptionProperties(const std::string& path, bool encrypt, bool encrypted_with_footer_key, const std::string& key, const std::string& key_metadata) : path_(path), encrypt_(encrypt), encrypted_with_footer_key_(encrypted_with_footer_key), key_(key), key_metadata_(key_metadata) {} const std::string& path() const { return path_; } bool encrypted() const { return encrypt_; } @@ -102,36 +107,78 @@ class PARQUET_EXPORT ColumnEncryptionProperties { std::string key_metadata_; }; class PARQUET_EXPORT ColumnDecryptionProperties { public: class Builder { public: Builder() = default; Builder(const std::string& path) : path_(path) {} Builder* key(const std::string& key) { key_ = key; return this; } std::shared_ptr<ColumnDecryptionProperties> build() { return std::make_shared<ColumnDecryptionProperties>(path_, key_); } private: std::string path_; std::string key_; }; ColumnDecryptionProperties(const std::string& path, const std::string& key) : path_(path), key_(key) {} const std::string& path() { return path_; } const std::string& key() { return key_; } private: std::string path_; std::string key_; }; class PARQUET_EXPORT FileDecryptionProperties { public: class Builder { public: Builder* footer_key(const std::string& key) { footer_key_ = key; return this; } Builder* column_properties(const std::map<std::string, std::shared_ptr<ColumnDecryptionProperties>>& column_properties) { column_properties_ = column_properties; return this; } Builder* key_retriever(const std::shared_ptr<DecryptionKeyRetriever>& key_retriever) { key_retriever_ = key_retriever; return this; } Builder* aad(const std::string& aad) { aad_ = aad; return this; } std::shared_ptr<FileDecryptionProperties> build() { return std::make_shared<FileDecryptionProperties>(footer_key_, key_retriever_, aad_, column_properties_); } private: std::string footer_key_; std::shared_ptr<DecryptionKeyRetriever> key_retriever_; std::string aad_; std::map<std::string, std::shared_ptr<ColumnDecryptionProperties>> column_properties_; }; FileDecryptionProperties(const std::string& footer_key, const std::shared_ptr<DecryptionKeyRetriever>& key_retriever, const std::string& aad, const std::map<std::string, std::shared_ptr<ColumnDecryptionProperties>>& column_properties) : footer_key_(footer_key), key_retriever_(key_retriever), aad_(aad), column_properties_(column_properties) {} const std::string& GetColumnKey(const std::shared_ptr<schema::ColumnPath>& columnPath, const std::string& key_metadata = "") { if (key_metadata.empty()) { return column_properties_.at(columnPath->ToDotString())->key(); } if (key_retriever_ == NULLPTR) { throw ParquetException("no key retriever is provided for column key metadata"); } return key_retriever_->GetKey(key_metadata); @@ -141,7 +188,7 @@ class PARQUET_EXPORT FileDecryptionProperties { if (footer_key_metadata.empty()) { return footer_key_; } if (key_retriever_ == NULLPTR) { throw ParquetException("no key retriever is provided for footer key metadata"); } return key_retriever_->GetKey(footer_key_metadata); @@ -150,11 +197,9 @@ class PARQUET_EXPORT FileDecryptionProperties { private: std::string footer_key_; std::shared_ptr<DecryptionKeyRetriever> key_retriever_; std::string aad_; std::map<std::string, std::shared_ptr<ColumnDecryptionProperties>> column_properties_; }; class PARQUET_EXPORT ReaderProperties { @@ -271,11 +316,11 @@ class PARQUET_EXPORT ColumnProperties { class PARQUET_EXPORT FileEncryptionProperties { public: class Builder { public: Builder() : algorithm_(DEFAULT_ENCRYPTION_ALGORITHM), uniform_encryption_(true) {} Builder(const std::string& key) : algorithm_(DEFAULT_ENCRYPTION_ALGORITHM), uniform_encryption_(true) { DCHECK(key.length() == 16 || key.length() == 24 || key.length() == 32); footer_key_ = key; } @@ -293,7 +338,8 @@ class PARQUET_EXPORT FileEncryptionProperties { Builder* footer_key_metadata(const std::string& key_metadata) { DCHECK(!footer_key_.empty()); DCHECK(!key_metadata.empty() && key_metadata.length() < MAXIMAL_KEY_METADATA_LENGTH); footer_key_metadata_ = key_metadata; return this; } @@ -306,7 +352,8 @@ class PARQUET_EXPORT FileEncryptionProperties { Builder* aad_metadata(const std::string& aad_metadata) { DCHECK(!aad_.empty()); DCHECK(!aad_metadata.empty() && aad_metadata.length() < MAXIMAL_AAD_METADATA_LENGTH); aad_metadata_ = aad_metadata; return this; } @@ -317,51 +364,54 @@ class PARQUET_EXPORT FileEncryptionProperties { * if encrypt_the_rest = true, other columns will be encrypted with footer key * else, other columns will be unencrypted */ Builder* column_properties( const std::map<std::string, std::shared_ptr<ColumnEncryptionProperties>>& column_properties, bool encrypt_the_rest = DEFAULT_ENCRYPT_THE_REST) { encrypt_the_rest_ = encrypt_the_rest; column_properties_ = column_properties; if (!footer_key_.empty()) { uniform_encryption_ = true; for (const auto& col : column_properties) { if (col.second->key().compare(footer_key_) != 0) { uniform_encryption_ = false; break; } } } else { if (encrypt_the_rest) { throw ParquetException("Encrypt the rest with null footer key"); } bool all_are_unencrypted = true; for (const auto& col : column_properties) { if (col.second->encrypted()) { if (col.second->key().empty()) { throw ParquetException("Encrypt column with null footer key"); } all_are_unencrypted = false; } } if (all_are_unencrypted) { throw ParquetException("Footer and all columns unencrypted"); } } return this; } std::shared_ptr<FileEncryptionProperties> build() { std::shared_ptr<EncryptionProperties> footer_encryption; if (!footer_key_.empty()) { footer_encryption.reset(new EncryptionProperties(algorithm_, footer_key_, aad_)); } return std::make_shared<FileEncryptionProperties>( footer_encryption, footer_key_metadata_, aad_metadata_, uniform_encryption_, column_properties_, encrypt_the_rest_); } private: Encryption::type algorithm_; std::string footer_key_; std::string footer_key_metadata_; @@ -375,17 +425,19 @@ class PARQUET_EXPORT FileEncryptionProperties { bool encrypt_the_rest_; }; FileEncryptionProperties( const std::shared_ptr<EncryptionProperties>& footer_encryption, const std::string& footer_key_metadata, const std::string& aad_metadata, bool uniform_encryption, const std::map<std::string, std::shared_ptr<ColumnEncryptionProperties>>& column_properties, bool encrypt_the_rest) : footer_encryption_(footer_encryption), footer_key_metadata_(footer_key_metadata), aad_metadata_(aad_metadata), uniform_encryption_(uniform_encryption), column_properties_(column_properties), encrypt_the_rest_(encrypt_the_rest) {} std::shared_ptr<EncryptionProperties> GetFooterEncryptionProperties() { return footer_encryption_; @@ -405,7 +457,7 @@ class PARQUET_EXPORT FileEncryptionProperties { // non-uniform encryption std::string path_str = path->ToDotString(); if (column_properties_.find(path_str) != column_properties_.end()) { return column_properties_[path_str]; } // encrypted with footer key @@ -427,15 +479,16 @@ class PARQUET_EXPORT FileEncryptionProperties { // non-uniform encryption std::string path_str = path->ToDotString(); if (column_properties_.find(path_str) != column_properties_.end()) { return std::make_shared<EncryptionProperties>(footer_encryption_->algorithm(), column_properties_[path_str]->key(), footer_encryption_->aad()); } if (encrypt_the_rest_) { return footer_encryption_; } return NULLPTR; } private: @@ -589,7 +642,8 @@ class PARQUET_EXPORT WriterProperties { return this->compression(path->ToDotString(), codec); } Builder* encryption( const std::shared_ptr<FileEncryptionProperties>& file_encryption) { file_encryption_ = file_encryption; return this; } @@ -682,8 +736,8 @@ class PARQUET_EXPORT WriterProperties { } inline std::shared_ptr<EncryptionProperties> footer_encryption() const { if (parquet_file_encryption_ == NULLPTR) { return NULLPTR; } else { return parquet_file_encryption_->GetFooterEncryptionProperties(); } @@ -737,7 +791,7 @@ class PARQUET_EXPORT WriterProperties { if (parquet_file_encryption_) { return parquet_file_encryption_->GetColumnCryptoMetaData(path); } else { return NULLPTR; } } @@ -746,7 +800,7 @@ class PARQUET_EXPORT WriterProperties { if (parquet_file_encryption_) { return parquet_file_encryption_->GetColumnEncryptionProperties(path); } else { return NULLPTR; } } @@ -785,4 +839,4 @@ std::shared_ptr<WriterProperties> PARQUET_EXPORT default_writer_properties(); } // namespace parquet #endif // PARQUET_COLUMN_PROPERTIES_H -
thamht4190 revised this gist
Sep 12, 2018 . 1 changed file with 22 additions and 20 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -79,6 +79,10 @@ class PARQUET_EXPORT ColumnEncryptionProperties { std::string key_metadata_; }; ColumnEncryptionProperties() = default; ColumnEncryptionProperties(const ColumnEncryptionProperties& other) = default; ColumnEncryptionProperties(ColumnEncryptionProperties&& other) = default; ColumnEncryptionProperties(const std::string& path, bool encrypt, bool encrypted_with_footer_key, const std::string& key, const std::string& key_metadata) : path_(path), encrypt_(encrypt), encrypted_with_footer_key_(encrypted_with_footer_key), @@ -313,7 +317,7 @@ class PARQUET_EXPORT FileEncryptionProperties { * if encrypt_the_rest = true, other columns will be encrypted with footer key * else, other columns will be unencrypted */ Builder* column_properties(const std::map<std::string, std::shared_ptr<ColumnEncryptionProperties>>& column_properties, bool encrypt_the_rest = DEFAULT_ENCRYPT_THE_REST) { encrypt_the_rest_ = encrypt_the_rest; column_properties_ = column_properties; @@ -322,7 +326,7 @@ class PARQUET_EXPORT FileEncryptionProperties { uniform_encryption_ = true; for (const auto& col : column_properties) { if (col.second->key().compare(footer_key_) != 0) { uniform_encryption_ = false; break; } @@ -333,8 +337,8 @@ class PARQUET_EXPORT FileEncryptionProperties { } bool all_are_unencrypted = true; for (const auto& col : column_properties) { if (col.second->encrypted()) { if (col.second->key().empty()) { throw ParquetException("Encrypt column with null footer key"); } all_are_unencrypted = false; @@ -353,7 +357,7 @@ class PARQUET_EXPORT FileEncryptionProperties { if (!footer_key_.empty()) { footer_encryption.reset(new EncryptionProperties(algorithm_, footer_key_, aad_)); } return std::make_shared<FileEncryptionProperties>(footer_encryption, footer_key_metadata_, aad_metadata_, uniform_encryption_, column_properties_, encrypt_the_rest_); } @@ -367,27 +371,29 @@ class PARQUET_EXPORT FileEncryptionProperties { bool uniform_encryption_; std::map<std::string, std::shared_ptr<ColumnEncryptionProperties>> column_properties_; bool encrypt_the_rest_; }; FileEncryptionProperties(const std::shared_ptr<EncryptionProperties>& footer_encryption, const std::string& footer_key_metadata, const std::string& aad_metadata, bool uniform_encryption, const std::map<std::string, std::shared_ptr<ColumnEncryptionProperties>>& column_properties, bool encrypt_the_rest) : footer_encryption_(footer_encryption) , footer_key_metadata_(footer_key_metadata) , aad_metadata_(aad_metadata) , uniform_encryption_(uniform_encryption) , column_properties_(column_properties) , encrypt_the_rest_(encrypt_the_rest) {} std::shared_ptr<EncryptionProperties> GetFooterEncryptionProperties() { return footer_encryption_; } const std::string& footer_key_metadata() const { return footer_key_metadata_; } const std::string& aad_metadata() const { return aad_metadata_; } std::shared_ptr<ColumnEncryptionProperties> GetColumnCryptoMetaData( const std::shared_ptr<schema::ColumnPath>& path) { @@ -398,12 +404,10 @@ class PARQUET_EXPORT FileEncryptionProperties { // non-uniform encryption std::string path_str = path->ToDotString(); if (column_properties_.find(path_str) != column_properties_.end()) { return column_properties_[path_str]; } // encrypted with footer key if (encrypt_the_rest_) { return ColumnEncryptionProperties::Builder(path->ToDotString(), true).build(); @@ -422,11 +426,9 @@ class PARQUET_EXPORT FileEncryptionProperties { // non-uniform encryption std::string path_str = path->ToDotString(); if (column_properties_.find(path_str) != column_properties_.end()) { return std::make_shared<EncryptionProperties>( footer_encryption_->algorithm(), column_properties_[path_str]->key(), footer_encryption_->aad()); } if (encrypt_the_rest_) { @@ -443,7 +445,7 @@ class PARQUET_EXPORT FileEncryptionProperties { bool uniform_encryption_; std::map<std::string, std::shared_ptr<ColumnEncryptionProperties>> column_properties_; bool encrypt_the_rest_; }; @@ -783,4 +785,4 @@ std::shared_ptr<WriterProperties> PARQUET_EXPORT default_writer_properties(); } // namespace parquet #endif // PARQUET_COLUMN_PROPERTIES_H -
thamht4190 revised this gist
Sep 11, 2018 . 1 changed file with 56 additions and 71 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -62,7 +62,7 @@ class PARQUET_EXPORT ColumnEncryptionProperties { Builder* key_id(uint32_t key_id) { std::string key_metadata = std::string(reinterpret_cast<char*>(&key_id), 4); this->key_metadata(key_metadata); return this; } @@ -213,9 +213,9 @@ static constexpr ParquetVersion::type DEFAULT_WRITER_VERSION = static const char DEFAULT_CREATED_BY[] = CREATED_BY_VERSION; static constexpr Compression::type DEFAULT_COMPRESSION_TYPE = Compression::UNCOMPRESSED; static constexpr Encryption::type DEFAULT_ENCRYPTION_ALGORITHM = Encryption::AES_GCM_V1; static constexpr int32_t MAXIMAL_KEY_METADATA_LENGTH = 256; static constexpr int32_t MAXIMAL_AAD_METADATA_LENGTH = 256; static constexpr bool DEFAULT_ENCRYPT_THE_REST = true; class PARQUET_EXPORT ColumnProperties { public: @@ -269,7 +269,7 @@ class PARQUET_EXPORT FileEncryptionProperties { class Builder { public: Builder() : algorithm_(DEFAULT_ENCRYPTION_ALGORITHM), uniform_encryption_(true) {} Builder(const std::string& key) : algorithm_(DEFAULT_ENCRYPTION_ALGORITHM), uniform_encryption_(true) { DCHECK(key.length() == 16 || key.length() == 24 || key.length() == 32); @@ -294,79 +294,35 @@ class PARQUET_EXPORT FileEncryptionProperties { return this; } Builder* aad(const std::string& aad) { DCHECK(!aad.empty()); aad_ = aad; return this; } Builder* aad_metadata(const std::string& aad_metadata) { DCHECK(!aad_.empty()); DCHECK(!aad_metadata.empty() && aad_metadata.length() < MAXIMAL_AAD_METADATA_LENGTH); aad_metadata_ = aad_metadata; return this; } /** * encrypt_the_rest will define if other columns (not defined in columns argument) * will be encrypted or not * if encrypt_the_rest = true, other columns will be encrypted with footer key * else, other columns will be unencrypted */ Builder* column_properties(const std::map<std::string, ColumnEncryptionProperties>& column_properties, bool encrypt_the_rest = DEFAULT_ENCRYPT_THE_REST) { encrypt_the_rest_ = encrypt_the_rest; column_properties_ = column_properties; if (!footer_key_.empty()) { uniform_encryption_ = true; for (const auto& col : column_properties) { if (col.second.key().compare(footer_key_) != 0) { uniform_encryption_ = false; break; } @@ -376,9 +332,9 @@ class PARQUET_EXPORT FileEncryptionProperties { throw ParquetException("Encrypt the rest with null footer key"); } bool all_are_unencrypted = true; for (const auto& col : column_properties) { if (col.second.encrypted()) { if (col.second.key().empty()) { throw ParquetException("Encrypt column with null footer key"); } all_are_unencrypted = false; @@ -389,7 +345,41 @@ class PARQUET_EXPORT FileEncryptionProperties { throw ParquetException("Footer and all columns unencrypted"); } } return this; } std::shared_ptr<FileEncryptionProperties> build() { std::shared_ptr<EncryptionProperties> footer_encryption; if (!footer_key_.empty()) { footer_encryption.reset(new EncryptionProperties(algorithm_, footer_key_, aad_)); } return std::make_shared<FileEncryptionProperties>(footer_encryption, footer_key_metadata_ aad_metadata_, uniform_encryption_, column_properties_, encrypt_the_rest_); } private: Encryption::type algorithm_; std::string footer_key_; std::string footer_key_metadata_; std::string aad_; std::string aad_metadata_; bool uniform_encryption_; std::map<std::string, ColumnEncryptionProperties> column_properties_; bool encrypt_the_rest_; }; FileEncryptionProperties(const std::shared_ptr<EncryptionProperties>& footer_encryption, const std::string& footer_key_metadata, const std::string& aad_metadata, const std::map<std::string, ColumnEncryptionProperties>& column_properties, bool encrypt_the_rest) : footer_encryption_(footer_encryption) , footer_key_metadata_(footer_key_metadata) , aad_metadata_(aad_metadata) , column_properties_(column_properties) , encrypt_the_rest_(encrypt_the_rest) {} std::shared_ptr<EncryptionProperties> GetFooterEncryptionProperties() { return footer_encryption_; @@ -446,11 +436,6 @@ class PARQUET_EXPORT FileEncryptionProperties { return nullptr; } private: std::shared_ptr<EncryptionProperties> footer_encryption_; std::string footer_key_metadata_; -
thamht4190 revised this gist
Sep 7, 2018 . 2 changed files with 801 additions and 273 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,273 +0,0 @@ This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,801 @@ // Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #ifndef PARQUET_COLUMN_PROPERTIES_H #define PARQUET_COLUMN_PROPERTIES_H #include <memory> #include <string> #include <unordered_map> #include "parquet/encryption.h" #include "parquet/exception.h" #include "parquet/parquet_version.h" #include "parquet/schema.h" #include "parquet/types.h" #include "parquet/util/logging.h" #include "parquet/util/memory.h" #include "parquet/util/visibility.h" namespace parquet { struct ParquetVersion { enum type { PARQUET_1_0, PARQUET_2_0 }; }; static int64_t DEFAULT_BUFFER_SIZE = 0; static bool DEFAULT_USE_BUFFERED_STREAM = false; class PARQUET_EXPORT ColumnEncryptionProperties { public: class Builder { public: Builder(const std::string& path, bool encrypt) : path_(path), encrypt_(encrypt), encrypted_with_footer_key_(encrypt) {} Builder* key(const std::string& key) { DCHECK(key.length() == 16 || key.length() == 24 || key.length() == 32); DCHECK(encrypt_); key_ = key; return this; } Builder* key_metadata(const std::string& key_id) { DCHECK(!key_id.empty()); key_metadata_ = key_id; return this; } Builder* key_id(uint32_t key_id) { std::string key_metadata = std::string(reinterpret_cast<char*>(&key_id), 4); key_metadata(key_metadata); return this; } std::shared_ptr<ColumnEncryptionProperties> build() { return std::make_shared<ColumnEncryptionProperties>(path_, encrypt_, encrypted_with_footer_key_, key_, key_metadata_); } private: std::string path_; bool encrypt_; bool encrypted_with_footer_key_; std::string key_; std::string key_metadata_; }; ColumnEncryptionProperties(const std::string& path, bool encrypt, bool encrypted_with_footer_key, const std::string& key, const std::string& key_metadata) : path_(path), encrypt_(encrypt), encrypted_with_footer_key_(encrypted_with_footer_key), key_(key), key_metadata_(key_metadata) {} const std::string& path() const { return path_; } bool encrypted() const { return encrypt_; } bool encrypted_with_footer_key() const { return encrypted_with_footer_key_; } const std::string& key() const { return key_; } const std::string& key_metadata() const { return key_metadata_; } private: std::string path_; bool encrypt_; bool encrypted_with_footer_key_; std::string key_; std::string key_metadata_; }; class PARQUET_EXPORT FileDecryptionProperties { public: FileDecryptionProperties(const std::string& footer_key) : footer_key_(footer_key) { DCHECK(footer_key_.length() == 16 || footer_key_.length() == 24 || footer_key_.length() == 32); } FileDecryptionProperties(const std::shared_ptr<DecryptionKeyRetriever>& key_retriever) : key_retriever_(key_retriever) {} void SetAad(const std::string& aad) { aad_ = aad; } void SetColumnKey(const std::string& name, const std::string& key) { SetColumnKey(std::vector<std::string>({name}), key); } void SetColumnKey(const std::vector<std::string>& paths, const std::string& key) { DCHECK(key.length() == 16 || key.length() == 24 || key.length() == 32); schema::ColumnPath columnPath(paths); column_keys_[columnPath.ToDotString()] = key; } const std::string& GetColumnKey(const std::shared_ptr<schema::ColumnPath>& columnPath, const std::string& key_metadata = "") { if (key_metadata.empty()) { return column_keys_.at(columnPath->ToDotString()); } if (key_retriever_ == nullptr) { throw ParquetException("no key retriever is provided for column key metadata"); } return key_retriever_->GetKey(key_metadata); } const std::string& GetFooterKey(const std::string& footer_key_metadata = "") { if (footer_key_metadata.empty()) { return footer_key_; } if (key_retriever_ == nullptr) { throw ParquetException("no key retriever is provided for footer key metadata"); } return key_retriever_->GetKey(footer_key_metadata); } const std::string& GetAad() { return aad_; } private: std::string footer_key_; std::string aad_; std::map<std::string, std::string> column_keys_; std::shared_ptr<DecryptionKeyRetriever> key_retriever_; }; class PARQUET_EXPORT ReaderProperties { public: explicit ReaderProperties(::arrow::MemoryPool* pool = ::arrow::default_memory_pool()) : pool_(pool) { buffered_stream_enabled_ = DEFAULT_USE_BUFFERED_STREAM; buffer_size_ = DEFAULT_BUFFER_SIZE; } ::arrow::MemoryPool* memory_pool() const { return pool_; } std::unique_ptr<InputStream> GetStream(RandomAccessSource* source, int64_t start, int64_t num_bytes) { std::unique_ptr<InputStream> stream; if (buffered_stream_enabled_) { stream.reset( new BufferedInputStream(pool_, buffer_size_, source, start, num_bytes)); } else { stream.reset(new InMemoryInputStream(source, start, num_bytes)); } return stream; } bool is_buffered_stream_enabled() const { return buffered_stream_enabled_; } void enable_buffered_stream() { buffered_stream_enabled_ = true; } void disable_buffered_stream() { buffered_stream_enabled_ = false; } void set_buffer_size(int64_t buf_size) { buffer_size_ = buf_size; } int64_t buffer_size() const { return buffer_size_; } void file_decryption(const std::shared_ptr<FileDecryptionProperties>& decryption) { file_decryption_ = decryption; } FileDecryptionProperties* file_decryption() { return file_decryption_.get(); } private: ::arrow::MemoryPool* pool_; int64_t buffer_size_; bool buffered_stream_enabled_; std::shared_ptr<FileDecryptionProperties> file_decryption_; }; ReaderProperties PARQUET_EXPORT default_reader_properties(); static constexpr int64_t DEFAULT_PAGE_SIZE = 1024 * 1024; static constexpr bool DEFAULT_IS_DICTIONARY_ENABLED = true; static constexpr int64_t DEFAULT_DICTIONARY_PAGE_SIZE_LIMIT = DEFAULT_PAGE_SIZE; static constexpr int64_t DEFAULT_WRITE_BATCH_SIZE = 1024; static constexpr int64_t DEFAULT_MAX_ROW_GROUP_LENGTH = 64 * 1024 * 1024; static constexpr bool DEFAULT_ARE_STATISTICS_ENABLED = true; static constexpr int64_t DEFAULT_MAX_STATISTICS_SIZE = 4096; static constexpr Encoding::type DEFAULT_ENCODING = Encoding::PLAIN; static constexpr ParquetVersion::type DEFAULT_WRITER_VERSION = ParquetVersion::PARQUET_1_0; static const char DEFAULT_CREATED_BY[] = CREATED_BY_VERSION; static constexpr Compression::type DEFAULT_COMPRESSION_TYPE = Compression::UNCOMPRESSED; static constexpr Encryption::type DEFAULT_ENCRYPTION_ALGORITHM = Encryption::AES_GCM_V1; static constexpt int32_t MAXIMAL_KEY_METADATA_LENGTH = 256; static constexpt int32_t MAXIMAL_AAD_METADATA_LENGTH = 256; static constexpt bool ENCRYPT_THE_REST_DEFAULT = true; class PARQUET_EXPORT ColumnProperties { public: ColumnProperties(Encoding::type encoding = DEFAULT_ENCODING, Compression::type codec = DEFAULT_COMPRESSION_TYPE, bool dictionary_enabled = DEFAULT_IS_DICTIONARY_ENABLED, bool statistics_enabled = DEFAULT_ARE_STATISTICS_ENABLED, size_t max_stats_size = DEFAULT_MAX_STATISTICS_SIZE) : encoding_(encoding), codec_(codec), dictionary_enabled_(dictionary_enabled), statistics_enabled_(statistics_enabled), max_stats_size_(max_stats_size) {} void set_encoding(Encoding::type encoding) { encoding_ = encoding; } void set_compression(Compression::type codec) { codec_ = codec; } void set_dictionary_enabled(bool dictionary_enabled) { dictionary_enabled_ = dictionary_enabled; } void set_statistics_enabled(bool statistics_enabled) { statistics_enabled_ = statistics_enabled; } void set_max_statistics_size(size_t max_stats_size) { max_stats_size_ = max_stats_size; } Encoding::type encoding() const { return encoding_; } Compression::type compression() const { return codec_; } bool dictionary_enabled() const { return dictionary_enabled_; } bool statistics_enabled() const { return statistics_enabled_; } size_t max_statistics_size() const { return max_stats_size_; } private: Encoding::type encoding_; Compression::type codec_; bool dictionary_enabled_; bool statistics_enabled_; size_t max_stats_size_; }; class PARQUET_EXPORT FileEncryptionProperties { public: class Builder { public: Builder() : algorithm_(DEFAULT_ENCRYPTION_ALGORITHM), uniform_encryption_(true) {} Builder(const std::string& key) : algorithm_(DEFAULT_ENCRYPTION_ALGORITHM), uniform_encryption_(true) { DCHECK(key.length() == 16 || key.length() == 24 || key.length() == 32); footer_key_ = key; } Builder* algorithm(Encryption::type algorithm) { algorithm_ = algorithm; return this; } Builder* footer_key(const std::string& key) { DCHECK(key.length() == 16 || key.length() == 24 || key.length() == 32); footer_key_ = key; return this; } Builder* footer_key_metadata(const std::string& key_metadata) { DCHECK(!footer_key_.empty()); DCHECK(!key_metadata.empty() && key_metadata.length() < MAXIMAL_KEY_METADATA_LENGTH); footer_key_metadata_ = key_metadata; return this; } Builder* aad(const std::string& aad) { DCHECK(!aad.empty()); aad_ = aad; return this; } Builder* aad_metadata(const std::string& aad_metadata) { DCHECK(!aad_.empty()); DCHECK(!aad_metadata.empty() && aad_metadata.length() < MAXIMAL_AAD_METADATA_LENGTH); aad_metadata_ = aad_metadata; return this; } Builder* column_properties(const std::map<std::string, ColumnEncryptionProperties>& column_properties, bool encrypt_the_rest) { column_properties_ = column_properties; encrypt_the_rest_ = encrypt_the_rest; return this; } std::shared_ptr<FileEncryptionProperties> build() { } private: Encryption::type algorithm_; std::string footer_key_; std::string footer_key_metadata_; std::string aad_; std::string aad_metadata_; bool uniform_encryption_; std::map<std::string, ColumnEncryptionProperties> column_properties_; bool encrypt_the_rest_; }; FileEncryptionProperties(const FileEncryptionProperties&) = default; FileEncryptionProperties(Encryption::type algorithm, const std::string& key, const std::string& key_metadata) { DCHECK(key.length() == 16 || key.length() == 24 || key.length() == 32); if (!key_metadata.empty()) { DCHECK(key_metadata.length() <= 256); } footer_encryption_.reset(new EncryptionProperties(algorithm, key)); footer_key_metadata_ = key_metadata; uniform_encryption_ = !key.empty(); } FileEncryptionProperties(Encryption::type algorithm, const std::string& key, int key_id) : FileEncryptionProperties( algorithm, key, key_id == 0 ? "" : std::string(reinterpret_cast<char*>(&key_id), 4)) {} /** * encrypt_the_rest will define if other columns (not defined in columns argument) * will be encrypted or not * if encrypt_the_rest = true, other columns will be encrypted with footer key * else, other columns will be unencrypted */ void SetupColumns(const std::vector<ColumnEncryptionProperties>& columns, bool encrypt_the_rest) { encrypt_the_rest_ = encrypt_the_rest; columns_ = columns; if (!footer_encryption_->key().empty()) { uniform_encryption_ = true; for (const auto& col : columns) { if (col.key().compare(footer_encryption_->key()) != 0) { uniform_encryption_ = false; break; } } } else { if (encrypt_the_rest) { throw ParquetException("Encrypt the rest with null footer key"); } bool all_are_unencrypted = true; for (const auto& col : columns) { if (col.encrypted()) { if (col.key().empty()) { throw ParquetException("Encrypt column with null footer key"); } all_are_unencrypted = false; } } if (all_are_unencrypted) { throw ParquetException("Footer and all columns unencrypted"); } } } std::shared_ptr<EncryptionProperties> GetFooterEncryptionProperties() { return footer_encryption_; } const std::string& footer_key_metadata() { return footer_key_metadata_; } const std::string& aad_metadata() { return aad_metadata_; } std::shared_ptr<ColumnEncryptionProperties> GetColumnCryptoMetaData( const std::shared_ptr<schema::ColumnPath>& path) { // uniform encryption if (uniform_encryption_) { return ColumnEncryptionProperties::Builder(path->ToDotString(), true).build(); } // non-uniform encryption std::string path_str = path->ToDotString(); for (const auto& col : columns_) { if (col.path() == path_str) { return std::shared_ptr<ColumnEncryptionProperties>( const_cast<ColumnEncryptionProperties*>(&col)); } } // encrypted with footer key if (encrypt_the_rest_) { return ColumnEncryptionProperties::Builder(path->ToDotString(), true).build(); } // unencrypted return ColumnEncryptionProperties::Builder(path->ToDotString(), false).build(); } std::shared_ptr<EncryptionProperties> GetColumnEncryptionProperties( const std::shared_ptr<schema::ColumnPath>& path) { // uniform encryption if (uniform_encryption_) { return footer_encryption_; } // non-uniform encryption std::string path_str = path->ToDotString(); for (const auto& col : columns_) { if (col.path() == path_str) { return std::make_shared<EncryptionProperties>( footer_encryption_->algorithm(), col.key(), footer_encryption_->aad()); } } if (encrypt_the_rest_) { return footer_encryption_; } return nullptr; } void SetupAad(const std::string& aad, const std::string& aad_metadata = "") { footer_encryption_->aad(aad); aad_metadata_ = aad_metadata; } private: std::shared_ptr<EncryptionProperties> footer_encryption_; std::string footer_key_metadata_; std::string aad_metadata_; bool uniform_encryption_; std::vector<ColumnEncryptionProperties> columns_; bool encrypt_the_rest_; }; class PARQUET_EXPORT WriterProperties { public: class Builder { public: Builder() : pool_(::arrow::default_memory_pool()), dictionary_pagesize_limit_(DEFAULT_DICTIONARY_PAGE_SIZE_LIMIT), write_batch_size_(DEFAULT_WRITE_BATCH_SIZE), max_row_group_length_(DEFAULT_MAX_ROW_GROUP_LENGTH), pagesize_(DEFAULT_PAGE_SIZE), version_(DEFAULT_WRITER_VERSION), created_by_(DEFAULT_CREATED_BY) {} virtual ~Builder() {} Builder* memory_pool(::arrow::MemoryPool* pool) { pool_ = pool; return this; } Builder* enable_dictionary() { default_column_properties_.set_dictionary_enabled(true); return this; } Builder* disable_dictionary() { default_column_properties_.set_dictionary_enabled(false); return this; } Builder* enable_dictionary(const std::string& path) { dictionary_enabled_[path] = true; return this; } Builder* enable_dictionary(const std::shared_ptr<schema::ColumnPath>& path) { return this->enable_dictionary(path->ToDotString()); } Builder* disable_dictionary(const std::string& path) { dictionary_enabled_[path] = false; return this; } Builder* disable_dictionary(const std::shared_ptr<schema::ColumnPath>& path) { return this->disable_dictionary(path->ToDotString()); } Builder* dictionary_pagesize_limit(int64_t dictionary_psize_limit) { dictionary_pagesize_limit_ = dictionary_psize_limit; return this; } Builder* write_batch_size(int64_t write_batch_size) { write_batch_size_ = write_batch_size; return this; } Builder* max_row_group_length(int64_t max_row_group_length) { max_row_group_length_ = max_row_group_length; return this; } Builder* data_pagesize(int64_t pg_size) { pagesize_ = pg_size; return this; } Builder* version(ParquetVersion::type version) { version_ = version; return this; } Builder* created_by(const std::string& created_by) { created_by_ = created_by; return this; } /** * Define the encoding that is used when we don't utilise dictionary encoding. * * This either apply if dictionary encoding is disabled or if we fallback * as the dictionary grew too large. */ Builder* encoding(Encoding::type encoding_type) { if (encoding_type == Encoding::PLAIN_DICTIONARY || encoding_type == Encoding::RLE_DICTIONARY) { throw ParquetException("Can't use dictionary encoding as fallback encoding"); } default_column_properties_.set_encoding(encoding_type); return this; } /** * Define the encoding that is used when we don't utilise dictionary encoding. * * This either apply if dictionary encoding is disabled or if we fallback * as the dictionary grew too large. */ Builder* encoding(const std::string& path, Encoding::type encoding_type) { if (encoding_type == Encoding::PLAIN_DICTIONARY || encoding_type == Encoding::RLE_DICTIONARY) { throw ParquetException("Can't use dictionary encoding as fallback encoding"); } encodings_[path] = encoding_type; return this; } /** * Define the encoding that is used when we don't utilise dictionary encoding. * * This either apply if dictionary encoding is disabled or if we fallback * as the dictionary grew too large. */ Builder* encoding(const std::shared_ptr<schema::ColumnPath>& path, Encoding::type encoding_type) { return this->encoding(path->ToDotString(), encoding_type); } Builder* compression(Compression::type codec) { default_column_properties_.set_compression(codec); return this; } Builder* max_statistics_size(size_t max_stats_sz) { default_column_properties_.set_max_statistics_size(max_stats_sz); return this; } Builder* compression(const std::string& path, Compression::type codec) { codecs_[path] = codec; return this; } Builder* compression(const std::shared_ptr<schema::ColumnPath>& path, Compression::type codec) { return this->compression(path->ToDotString(), codec); } Builder* encryption(const std::shared_ptr<FileEncryptionProperties>& file_encryption) { file_encryption_ = file_encryption; return this; } Builder* enable_statistics() { default_column_properties_.set_statistics_enabled(true); return this; } Builder* disable_statistics() { default_column_properties_.set_statistics_enabled(false); return this; } Builder* enable_statistics(const std::string& path) { statistics_enabled_[path] = true; return this; } Builder* enable_statistics(const std::shared_ptr<schema::ColumnPath>& path) { return this->enable_statistics(path->ToDotString()); } Builder* disable_statistics(const std::string& path) { statistics_enabled_[path] = false; return this; } Builder* disable_statistics(const std::shared_ptr<schema::ColumnPath>& path) { return this->disable_statistics(path->ToDotString()); } std::shared_ptr<WriterProperties> build() { std::unordered_map<std::string, ColumnProperties> column_properties; auto get = [&](const std::string& key) -> ColumnProperties& { auto it = column_properties.find(key); if (it == column_properties.end()) return column_properties[key] = default_column_properties_; else return it->second; }; for (const auto& item : encodings_) get(item.first).set_encoding(item.second); for (const auto& item : codecs_) get(item.first).set_compression(item.second); for (const auto& item : dictionary_enabled_) get(item.first).set_dictionary_enabled(item.second); for (const auto& item : statistics_enabled_) get(item.first).set_statistics_enabled(item.second); return std::shared_ptr<WriterProperties>(new WriterProperties( pool_, dictionary_pagesize_limit_, write_batch_size_, max_row_group_length_, pagesize_, version_, created_by_, std::move(file_encryption_), default_column_properties_, column_properties)); } private: ::arrow::MemoryPool* pool_; int64_t dictionary_pagesize_limit_; int64_t write_batch_size_; int64_t max_row_group_length_; int64_t pagesize_; ParquetVersion::type version_; std::string created_by_; std::shared_ptr<FileEncryptionProperties> file_encryption_; // Settings used for each column unless overridden in any of the maps below ColumnProperties default_column_properties_; std::unordered_map<std::string, Encoding::type> encodings_; std::unordered_map<std::string, Compression::type> codecs_; std::unordered_map<std::string, bool> dictionary_enabled_; std::unordered_map<std::string, bool> statistics_enabled_; }; inline ::arrow::MemoryPool* memory_pool() const { return pool_; } inline int64_t dictionary_pagesize_limit() const { return dictionary_pagesize_limit_; } inline int64_t write_batch_size() const { return write_batch_size_; } inline int64_t max_row_group_length() const { return max_row_group_length_; } inline int64_t data_pagesize() const { return pagesize_; } inline ParquetVersion::type version() const { return parquet_version_; } inline std::string created_by() const { return parquet_created_by_; } inline FileEncryptionProperties* file_encryption() const { return parquet_file_encryption_.get(); } inline std::shared_ptr<EncryptionProperties> footer_encryption() const { if (parquet_file_encryption_ == nullptr) { return nullptr; } else { return parquet_file_encryption_->GetFooterEncryptionProperties(); } } inline Encoding::type dictionary_index_encoding() const { if (parquet_version_ == ParquetVersion::PARQUET_1_0) { return Encoding::PLAIN_DICTIONARY; } else { return Encoding::RLE_DICTIONARY; } } inline Encoding::type dictionary_page_encoding() const { if (parquet_version_ == ParquetVersion::PARQUET_1_0) { return Encoding::PLAIN_DICTIONARY; } else { return Encoding::PLAIN; } } const ColumnProperties& column_properties( const std::shared_ptr<schema::ColumnPath>& path) const { auto it = column_properties_.find(path->ToDotString()); if (it != column_properties_.end()) return it->second; return default_column_properties_; } Encoding::type encoding(const std::shared_ptr<schema::ColumnPath>& path) const { return column_properties(path).encoding(); } Compression::type compression(const std::shared_ptr<schema::ColumnPath>& path) const { return column_properties(path).compression(); } bool dictionary_enabled(const std::shared_ptr<schema::ColumnPath>& path) const { return column_properties(path).dictionary_enabled(); } bool statistics_enabled(const std::shared_ptr<schema::ColumnPath>& path) const { return column_properties(path).statistics_enabled(); } size_t max_statistics_size(const std::shared_ptr<schema::ColumnPath>& path) const { return column_properties(path).max_statistics_size(); } std::shared_ptr<ColumnEncryptionProperties> column_encryption_props( const std::shared_ptr<schema::ColumnPath>& path) const { if (parquet_file_encryption_) { return parquet_file_encryption_->GetColumnCryptoMetaData(path); } else { return nullptr; } } std::shared_ptr<EncryptionProperties> encryption( const std::shared_ptr<schema::ColumnPath>& path) const { if (parquet_file_encryption_) { return parquet_file_encryption_->GetColumnEncryptionProperties(path); } else { return nullptr; } } private: explicit WriterProperties( ::arrow::MemoryPool* pool, int64_t dictionary_pagesize_limit, int64_t write_batch_size, int64_t max_row_group_length, int64_t pagesize, ParquetVersion::type version, const std::string& created_by, std::shared_ptr<FileEncryptionProperties> file_encryption, const ColumnProperties& default_column_properties, const std::unordered_map<std::string, ColumnProperties>& column_properties) : pool_(pool), dictionary_pagesize_limit_(dictionary_pagesize_limit), write_batch_size_(write_batch_size), max_row_group_length_(max_row_group_length), pagesize_(pagesize), parquet_version_(version), parquet_created_by_(created_by), parquet_file_encryption_(file_encryption), default_column_properties_(default_column_properties), column_properties_(column_properties) {} ::arrow::MemoryPool* pool_; int64_t dictionary_pagesize_limit_; int64_t write_batch_size_; int64_t max_row_group_length_; int64_t pagesize_; ParquetVersion::type parquet_version_; std::string parquet_created_by_; std::shared_ptr<FileEncryptionProperties> parquet_file_encryption_; ColumnProperties default_column_properties_; std::unordered_map<std::string, ColumnProperties> column_properties_; }; std::shared_ptr<WriterProperties> PARQUET_EXPORT default_writer_properties(); } // namespace parquet #endif // PARQUET_COLUMN_PROPERTIES_H -
thamht4190 created this gist
Aug 13, 2018 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,273 @@ // Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #ifndef PARQUET_ENCRYPTION_H #define PARQUET_ENCRYPTION_H #include <map> #include <string> #include "parquet/util/crypto.h" #include "parquet/util/logging.h" #include "parquet/util/visibility.h" #include "parquet/exception.h" #include "parquet/schema.h" #include "parquet/types.h" namespace parquet { class PARQUET_EXPORT DecryptionKeyRetriever { public: virtual const std::string& GetKey(const std::string& key_metadata) = 0; virtual ~DecryptionKeyRetriever() {} }; // Simple integer key retriever class PARQUET_EXPORT IntegerKeyIdRetriever : public DecryptionKeyRetriever { public: void PutKey(uint32_t key_id, const std::string& key); const std::string& GetKey(const std::string& key_metadata); private: std::map<uint32_t, std::string> key_map_; }; // Simple string key retriever class PARQUET_EXPORT StringKeyIdRetriever : public DecryptionKeyRetriever { public: void PutKey(const std::string& key_id, const std::string& key); const std::string& GetKey(const std::string& key_metadata); private: std::map<std::string, std::string> key_map_; }; class PARQUET_EXPORT ColumnEncryptionProperties { public: ColumnEncryptionProperties() = default; ColumnEncryptionProperties(bool encrypt, std::string path) : encrypt_(encrypt), path_(path), encrypted_with_footer_key_(encrypt) {} bool encrypted() const { return encrypt_; } bool encrypted_with_footer_key() const { return encrypted_with_footer_key_; } const std::string& key() const { return key_; } const std::string& key_metadata() const { return key_metadata_; } void SetEncryptionKey(const std::string& key, uint32_t key_id = 0) { std::string key_metadata = key_id == 0 ? "" : std::string(reinterpret_cast<char*>(&key_id), 4); SetEncryptionKey(key, key_metadata); } void SetEncryptionKey(const std::string& key, const std::string& key_metadata) { if (!encrypt_) throw ParquetException("Setting key on unencrypted column: " + path_); if (key.empty()) throw ParquetException("Null key for " + path_); encrypted_with_footer_key_ = false; key_ = key; key_metadata_ = key_metadata; } const std::string& path() const { return path_; } private: bool encrypt_; std::string path_; bool encrypted_with_footer_key_; std::string key_; std::string key_metadata_; }; class PARQUET_EXPORT FileEncryptionProperties { public: FileEncryptionProperties() = default; FileEncryptionProperties(const FileEncryptionProperties&) = default; FileEncryptionProperties(Encryption::type algorithm, const std::string& key, const std::string& key_metadata) { DCHECK(key.length() == 16 || key.length() == 24 || key.length() == 32); if (!key_metadata.empty()) { DCHECK(key_metadata.length() <= 256); } footer_encryption_.reset(new EncryptionProperties(algorithm, key, key_metadata)); uniform_encryption_ = !key.empty(); } FileEncryptionProperties(Encryption::type algorithm, const std::string& key, int key_id) : FileEncryptionProperties( algorithm, key, key_id == 0 ? "" : std::string(reinterpret_cast<char*>(&key_id), 4)) {} ~FileEncryptionProperties() {} /** * encrypt_the_rest will define if other columns (not defined in columns argument) * will be encrypted or not * if encrypt_the_rest = true, other columns will be encrypted with footer key * else, other columns will be unencrypted */ void SetupColumns(const std::vector<ColumnEncryptionProperties>& columns, bool encrypt_the_rest) { encrypt_the_rest_ = encrypt_the_rest; columns_ = columns; if (!footer_encryption_->key().empty()) { uniform_encryption_ = true; for (const auto& col : columns) { if (col.key().compare(footer_encryption_->key()) != 0) { uniform_encryption_ = false; break; } } } else { if (encrypt_the_rest) { throw ParquetException("Encrypt the rest with null footer key"); } bool all_are_unencrypted = true; for (const auto& col : columns) { if (col.encrypted()) { if (col.key().empty()) { throw ParquetException("Encrypt column with null footer key"); } all_are_unencrypted = false; } } if (all_are_unencrypted) { throw ParquetException("Footer and all columns unencrypted"); } } } std::shared_ptr<EncryptionProperties> GetFooterEncryptionProperties() { return footer_encryption_; } std::shared_ptr<ColumnEncryptionProperties> GetColumnCryptoMetaData( const std::shared_ptr<schema::ColumnPath>& path) { // uniform encryption if (uniform_encryption_) { return std::make_shared<ColumnEncryptionProperties>(true, path->ToDotString()); } // non-uniform encryption std::string path_str = path->ToDotString(); for (const auto& col : columns_) { if (col.path() == path_str) { return std::shared_ptr<ColumnEncryptionProperties>( const_cast<ColumnEncryptionProperties*>(&col)); } } // encrypted with footer key if (encrypt_the_rest_) { return std::make_shared<ColumnEncryptionProperties>(true, path->ToDotString()); } // unencrypted return std::shared_ptr<ColumnEncryptionProperties>( new ColumnEncryptionProperties(false, path->ToDotString())); } std::shared_ptr<EncryptionProperties> GetColumnEncryptionProperties( const std::shared_ptr<schema::ColumnPath>& path) { // uniform encryption if (uniform_encryption_) { return footer_encryption_; } // non-uniform encryption std::string path_str = path->ToDotString(); for (const auto& col : columns_) { if (col.path() == path_str) { return std::make_shared<EncryptionProperties>(footer_encryption_->algorithm(), col.key(), col.key_metadata(), footer_encryption_->aad()); } } if (encrypt_the_rest_) { return footer_encryption_; } return nullptr; } void SetupAad(const std::string& aad) { footer_encryption_->aad(aad); } private: std::shared_ptr<EncryptionProperties> footer_encryption_; bool uniform_encryption_; std::vector<ColumnEncryptionProperties> columns_; bool encrypt_the_rest_; }; class PARQUET_EXPORT Encryptor { public: Encryptor(Encryption::type algorithm, bool metadata, const std::string& key, const std::string& aad) : algorithm_(algorithm), metadata_(metadata), key_(key), aad_(aad) {} int Encrypt(const uint8_t* plaintext, int plaintext_len, uint8_t* ciphertext) { auto key_bytes = reinterpret_cast<uint8_t*>(const_cast<char*>(key_.c_str())); auto aad_bytes = reinterpret_cast<uint8_t*>(const_cast<char*>(aad_.c_str())); int key_length = static_cast<int>(key_.length()); int aad_length = static_cast<int>(aad_.length()); return parquet_encryption::Encrypt(algorithm_, metadata_, plaintext, plaintext_len, key_bytes, key_length, aad_bytes, aad_length, ciphertext); } private: Encryption::type algorithm_; bool metadata_; std::string key_; std::string aad_; }; class PARQUET_EXPORT ParquetFileEncryptor { public: ParquetFileEncryptor(std::unique_ptr<FileEncryptionProperties> encryption_setup); private: Encryption::type algorithm_; std::string footer_key_; std::unique_ptr<FileEncryptionProperties> encryption_setup_; std::string footer_key_metadata_; bool uniform_encryption_; // std::vector<ColumnEncryptionSetup> column_md_list_; std::string aad_; bool encrypt_footer_; }; // class PARQUET_EXPORT ParquetFileDecryptor { // }; // class PARQUET_EXPORT ParquetEncryptionFactory { // public: // static std::unique_ptr<ParquetFileEncryptor> createFileEncryptor(const std::string& key); // static std::unique_ptr<ParquetFileEncryptor> createFileEncryptor(const FileEncryptionProperties& eSetup); // static std::unique_ptr<ParquetFileDecryptor> createFileDecryptor(const std::string& key); // static std::unique_ptr<ParquetFileDecryptor> createFileDecryptor(const FileDecryptionProperties& dSetup); // }; } // namespace parquet #endif // PARQUET_ENCRYPTION_H