Skip to content
This repository was archived by the owner on Jun 20, 2025. It is now read-only.

Commit 2b0c379

Browse files
Ajinkya Ghongefacebook-github-bot
authored andcommitted
Add logic to write output with encoded Ids. (#2286)
Summary: Pull Request resolved: #2286 # Context As per PC Translator design, we need a runtime library will be called during PC run. This library will be called at the beginning of PC run to encode specified fields in publisher side input into a encoded breakdown (aggregation) Ids based on active PC instruction sets for the run. The library will filter the active PC Instruction sets for the run based on parsing the pcs_features i.e. gatekeepers for the particular run. # Product decisions In this stack we would focus solely on functionality required for private lift runs. We would focus on the MVP implementation of the library and its integration with fbpcf ORAM encoder library in this stack. # Stack 1. Create runtime pc_translator library. 2. Add logic to retrieve and parse PC instruction set, filtered based on the active gatekeepers for the run. 3. Integrate pc_translator library with fbpcf ORAM encoder. 4. Add logic to generate transformed publisher output with encoded breakdown ID and write the output. 5. Add support for filter constraints in pc_translator. # In this diff Add logic to generate transformed publisher output with encoded breakdown ID and write the output. Differential Revision: D44645325 Privacy Context Container: L416713 fbshipit-source-id: 953989f8440827ff528f51c202e498c01fcf31bd
1 parent bc2b134 commit 2b0c379

File tree

4 files changed

+95
-17
lines changed

4 files changed

+95
-17
lines changed

fbpcs/pc_translator/PCTranslator.cpp

Lines changed: 56 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -14,24 +14,27 @@
1414
#include <fbpcf/mpc_std_lib/oram/encoder/IOramEncoder.h>
1515
#include <fbpcf/mpc_std_lib/oram/encoder/OramEncoder.h>
1616
#include <algorithm>
17+
#include <cstdint>
18+
#include <iterator>
1719
#include <set>
1820
#include <stdexcept>
21+
#include <string>
1922
#include "fbpcs/emp_games/common/Csv.h"
2023
#include "folly/String.h"
2124

2225
namespace pc_translator {
2326

24-
std::string PCTranslator::encode(const std::string& inputDataset) {
27+
std::string PCTranslator::encode(const std::string& inputDatasetPath) {
2528
auto validInstructionSetNames =
2629
PCTranslator::retrieveInstructionSetNamesForRun(pcsFeatures_);
2730
auto pcInstructionSets =
2831
PCTranslator::retrieveInstructionSets(validInstructionSetNames);
2932
if (pcInstructionSets.empty()) {
3033
// No instruction set found. return the input dataset path.
31-
return inputDataset;
34+
return inputDatasetPath;
3235
}
3336
return PCTranslator::transformDataset(
34-
inputDataset, pcInstructionSets.front());
37+
inputDatasetPath, pcInstructionSets.front());
3538
}
3639

3740
std::string PCTranslator::decode(
@@ -79,30 +82,43 @@ std::vector<std::string> PCTranslator::retrieveInstructionSetNamesForRun(
7982
}
8083

8184
std::string PCTranslator::transformDataset(
82-
const std::string& inputData,
85+
const std::string& inputDatasetPath,
8386
std::shared_ptr<pc_translator::PCInstructionSet> pcInstructionSet) {
8487
// Parse the input CSV
8588
auto lineNo = 0;
8689
std::vector<std::vector<uint32_t>> inputColums;
90+
std::vector<std::string> outputHeader;
91+
std::vector<std::vector<std::string>> outputContent;
8792
private_measurement::csv::readCsv(
88-
inputData,
93+
inputDatasetPath,
8994
[&](const std::vector<std::string>& header,
9095
const std::vector<std::string>& parts) {
9196
std::vector<uint32_t> inputColumnPerRow;
97+
std::string column;
98+
std::uint32_t value;
99+
bool found = false;
100+
std::vector<std::string> outputContentPerRow;
92101
for (std::vector<std::string>::size_type i = 0; i < header.size();
93102
++i) {
94-
auto& column = header[i];
95-
auto value = std::atoi(parts[i].c_str());
96-
auto iter = std::find(
97-
pcInstructionSet->getGroupByIds().begin(),
98-
pcInstructionSet->getGroupByIds().end(),
99-
column);
100-
if (iter != pcInstructionSet->getGroupByIds().end()) {
103+
column = header[i];
104+
value = std::atoi(parts[i].c_str());
105+
found =
106+
(std::find(
107+
pcInstructionSet->getGroupByIds().begin(),
108+
pcInstructionSet->getGroupByIds().end(),
109+
column) != pcInstructionSet->getGroupByIds().end());
110+
if (found) {
101111
inputColumnPerRow.push_back(value);
112+
} else {
113+
if (lineNo == 0) {
114+
outputHeader.push_back(header[i]);
115+
}
116+
outputContentPerRow.push_back(parts[i]);
102117
}
103118
}
104119

105120
inputColums.push_back(inputColumnPerRow);
121+
outputContent.push_back(outputContentPerRow);
106122
lineNo++;
107123
});
108124

@@ -114,9 +130,34 @@ std::string PCTranslator::transformDataset(
114130

115131
auto encodedIndexes = encoder->generateORAMIndexes(inputColums);
116132

117-
// TODO : Append the enodedIndexes at the end of publisher output and return
118-
// output path.
119-
return "";
133+
auto dir = inputDatasetPath.substr(0, inputDatasetPath.rfind("/") + 1);
134+
auto output_dataset_path = dir + "transformed_publisher_input.csv";
135+
136+
PCTranslator::putOutputData(
137+
output_dataset_path, outputHeader, outputContent, encodedIndexes);
138+
return output_dataset_path;
139+
}
140+
141+
void PCTranslator::putOutputData(
142+
const std::string& output_dataset_path,
143+
std::vector<std::string>& outputHeader,
144+
std::vector<std::vector<std::string>>& outputContent,
145+
const std::vector<uint32_t>& encodedIndexes) {
146+
outputHeader.push_back("breakdown_id");
147+
148+
if (outputContent.size() != encodedIndexes.size()) {
149+
throw std::runtime_error(
150+
"Encoded index vector size should match the input vector size.");
151+
}
152+
153+
for (std::vector<std::string>::size_type i = 0; i < encodedIndexes.size();
154+
++i) {
155+
auto indexVec = std::to_string(encodedIndexes[i]);
156+
outputContent[i].push_back(indexVec);
157+
}
158+
159+
private_measurement::csv::writeCsv(
160+
output_dataset_path, outputHeader, outputContent);
120161
}
121162

122163
std::shared_ptr<PCInstructionSet> PCTranslator::parseInstructionSet(

fbpcs/pc_translator/PCTranslator.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,12 @@ class PCTranslator {
5555
std::string transformDataset(
5656
const std::string& inputData,
5757
std::shared_ptr<pc_translator::PCInstructionSet> pcInstructionSet);
58+
59+
void putOutputData(
60+
const std::string& output_dataset_path,
61+
std::vector<std::string>& outputHeader,
62+
std::vector<std::vector<std::string>>& outputContent,
63+
const std::vector<uint32_t>& encodedIndexes);
5864
};
5965

6066
} // namespace pc_translator

fbpcs/pc_translator/tests/TestPCTranslator.cpp

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
* LICENSE file in the root directory of this source tree.
66
*/
77

8+
#include <fbpcf/io/api/FileIOWrappers.h>
89
#include <gtest/gtest.h>
910
#include "../../emp_games/common/TestUtil.h"
1011
#include "fbpcs/pc_translator/PCTranslator.h"
@@ -16,21 +17,38 @@ class TestPCTranslator : public ::testing::Test {
1617
std::string pcs_features_;
1718
std::string test_instruction_set_base_path_;
1819
std::string test_publisher_input_path_;
20+
std::string test_transformed_output_path_;
21+
std::string expected_transformed_output_path_;
1922

2023
void SetUp() override {
2124
pcs_features_ =
2225
"'num_mpc_container_mutation', 'private_lift_unified_data_process', 'pc_instr_test_instruction_set'";
2326
std::string baseDir =
2427
private_measurement::test_util::getBaseDirFromPath(__FILE__);
2528
test_instruction_set_base_path_ = baseDir + "input_processing/";
26-
test_publisher_input_path_ = baseDir + "publisher_unittest.csv";
29+
test_publisher_input_path_ = "/tmp/publisher_unittest.csv";
30+
test_transformed_output_path_ = "/tmp/transformed_publisher_input.csv";
31+
expected_transformed_output_path_ =
32+
baseDir + "expected_transformed_publisher_input.csv";
33+
auto contents =
34+
fbpcf::io::FileIOWrappers::readFile(baseDir + "publisher_unittest.csv");
35+
fbpcf::io::FileIOWrappers::writeFile(test_publisher_input_path_, contents);
36+
}
37+
38+
void TearDown() override {
39+
std::remove(test_publisher_input_path_.c_str());
40+
std::remove(test_transformed_output_path_.c_str());
2741
}
2842
};
2943

3044
TEST_F(TestPCTranslator, TestEncode) {
3145
auto pcTranslator = std::make_shared<PCTranslator>(
3246
pcs_features_, test_instruction_set_base_path_);
3347
auto outputPath = pcTranslator->encode(test_publisher_input_path_);
34-
EXPECT_EQ(outputPath, "");
48+
auto contents = fbpcf::io::FileIOWrappers::readFile(outputPath);
49+
auto expectedContents =
50+
fbpcf::io::FileIOWrappers::readFile(expected_transformed_output_path_);
51+
EXPECT_EQ(outputPath, test_transformed_output_path_);
52+
EXPECT_EQ(contents, expectedContents);
3553
}
3654
} // namespace pc_translator
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
id_,opportunity,test_flag,opportunity_timestamp,breakdown_id
2+
cfcd208495d565ef66e7dff9f98764da,1,0,1600000430,0
3+
c4ca4238a0b923820dcc509a6f75849b,1,1,1600000401,1
4+
c81e728d9d4c2f636f067f89cc14862c,0,0,0,2
5+
eccbc87e4b5ce2fe28308fd9f2a7baf3,0,0,0,3
6+
a87ff679a2f3e71d9181a67b7542122c,0,0,0,0
7+
e4da3b7fbbce2345d7772b0674a318d5,1,1,1600000461,4
8+
1679091c5a880faf6fb5e6087eb1b2dc,1,0,1600000052,5
9+
8f14e45fceea167a5a36dedd4bea2543,1,0,1600000831,6
10+
c9f0f895fb98ab9159f51fd0297e236d,1,0,1600000530,7
11+
45c48cce2e2d7fbdea1afc51c7c6ad26,1,0,1600000972,5
12+
d3d9446802a44259755d38e6d163e820,0,0,0,0
13+
6512bd43d9caa6e02c990b0a82652dca,0,0,0,0

0 commit comments

Comments
 (0)