diff --git a/java-sdk/connectors/simple-connector/src/main/java/com/lucidworks/connector/plugins/random/config/RandomContentConfig.java b/java-sdk/connectors/simple-connector/src/main/java/com/lucidworks/connector/plugins/random/config/RandomContentConfig.java index 6a9051fc..6b84a7ad 100644 --- a/java-sdk/connectors/simple-connector/src/main/java/com/lucidworks/connector/plugins/random/config/RandomContentConfig.java +++ b/java-sdk/connectors/simple-connector/src/main/java/com/lucidworks/connector/plugins/random/config/RandomContentConfig.java @@ -4,6 +4,7 @@ import com.lucidworks.connector.plugins.random.config.RandomContentConfig.Properties; import com.lucidworks.fusion.connector.plugin.api.config.ConnectorConfig; import com.lucidworks.fusion.connector.plugin.api.config.ConnectorPluginProperties; +import com.lucidworks.fusion.schema.SchemaAnnotations.NumberSchema; import com.lucidworks.fusion.schema.SchemaAnnotations.Property; import com.lucidworks.fusion.schema.SchemaAnnotations.RootSchema; @@ -30,5 +31,15 @@ interface Properties extends ConnectorPluginProperties { description = "Random Content properties" ) RandomContentProperties getRandomContentProperties(); + + @Property( + title = "Number of candidates per item", + description = "The number of candidates to emit per document generated", + order = 2 + ) + @NumberSchema( + defaultValue = 1 + ) + Integer numberOfCandidates(); } } diff --git a/java-sdk/connectors/simple-connector/src/main/java/com/lucidworks/connector/plugins/random/fetcher/RandomContentFetcher.java b/java-sdk/connectors/simple-connector/src/main/java/com/lucidworks/connector/plugins/random/fetcher/RandomContentFetcher.java index 40aede98..86e46a19 100644 --- a/java-sdk/connectors/simple-connector/src/main/java/com/lucidworks/connector/plugins/random/fetcher/RandomContentFetcher.java +++ b/java-sdk/connectors/simple-connector/src/main/java/com/lucidworks/connector/plugins/random/fetcher/RandomContentFetcher.java @@ -8,14 +8,15 @@ import com.lucidworks.fusion.connector.plugin.api.fetcher.result.FetchResult; import com.lucidworks.fusion.connector.plugin.api.fetcher.type.content.ContentFetcher; import com.lucidworks.fusion.connector.plugin.api.fetcher.type.content.FetchInput; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import javax.inject.Inject; import java.time.Instant; import java.time.LocalDateTime; import java.util.stream.IntStream; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + public class RandomContentFetcher implements ContentFetcher { private static final Logger logger = LoggerFactory.getLogger(RandomContentFetcher.class); @@ -23,10 +24,12 @@ public class RandomContentFetcher implements ContentFetcher { private static final String CONTENT_ID = "content-example"; private static final String ERROR_ID = "no-number-this-should-fail"; private static final String COUNTER_FIELD = "number"; + private static final String CANDIDATE_NUMBER = "candidate-number"; private final RandomContentGenerator generator; private final RandomContentProperties randomContentProperties; private final String hostname; + private final long numberOfCandidates; @Inject public RandomContentFetcher( @@ -37,6 +40,7 @@ public RandomContentFetcher( this.randomContentProperties = randomContentConfig.properties().getRandomContentProperties(); this.generator = generator; this.hostname = hostnameProvider.get(); + this.numberOfCandidates = randomContentConfig.properties().numberOfCandidates(); } @Override @@ -48,11 +52,9 @@ public FetchResult fetch(FetchContext fetchContext) { int totalNumberOfDocs = randomContentProperties.totalNumDocs(); IntStream.range(0, totalNumberOfDocs) .asLongStream() - .forEach(i -> { - logger.info("Emitting candidate -> number {}", i); - fetchContext.newCandidate(String.valueOf(i)) - .metadata(m -> m.setLong(COUNTER_FIELD, i)) - .emit(); + .forEach(index -> { + logger.info("Emitting candidate for item {}", index); + emitCandidate(fetchContext, index, 1); }); // Simulating an error item here... because we're emitting an item without a "number", // the fetch() call will attempt to convert the number into a long and throw an exception. @@ -65,13 +67,32 @@ public FetchResult fetch(FetchContext fetchContext) { if (CONTENT_ID.equals(fetchContext.getFetchInput().getId())) { emitContent(fetchContext, input); + } else if (input.getMetadata().get(CANDIDATE_NUMBER) != null) { + long candidate = Integer.valueOf(input.getMetadata().get(CANDIDATE_NUMBER).toString()) + 1; + if (candidate <= numberOfCandidates) { + long num = (Long) input.getMetadata().get(COUNTER_FIELD); + emitCandidate(fetchContext, num, candidate); + return fetchContext.newResult(); + } + emitDocument(fetchContext, input); } else { emitDocument(fetchContext, input); } - return fetchContext.newResult(); } + private void emitCandidate(FetchContext fetchContext, long num, long candidate) { + boolean latestCandidate = candidate == numberOfCandidates; + fetchContext + .newCandidate(latestCandidate ? String.valueOf(num) : num + "-" + candidate) + .metadata(m -> { + m.setLong(COUNTER_FIELD, num); + m.setLong(CANDIDATE_NUMBER, candidate); + }) + .withTransient(latestCandidate ? false : true) // not need to save candidates that will not be indexed + .emit(); + } + private void emitDocument(FetchContext fetchContext, FetchInput input) { try { long num = (Long) input.getMetadata().get(COUNTER_FIELD); @@ -80,7 +101,7 @@ private void emitDocument(FetchContext fetchContext, FetchInput input) { .forEach( (k, v) -> logger.info("Input [{}:{}[{}]]", k, v, v.getClass()) ); - logger.info("Emitting Document -> number {}", num); + logger.info("Emitting Document -> id {} - {}", num, input.getMetadata()); int min = randomContentProperties.minimumNumberSentences(); int max = randomContentProperties.maximumNumberSentences(); @@ -121,7 +142,7 @@ private void emitContent(FetchContext fetchContext, FetchInput input) { f.setLocalDateTime("crawl_date", LocalDateTime.now()); }) .emit(); - } catch(ContentEmitException e) { + } catch (ContentEmitException e) { logger.error("Failed to emit content", e); fetchContext.newError(input.getId(), e.toString()) .emit();