Skip to content

Commit e5be686

Browse files
committed
Try to fix detecting replies to a comment on the previous page
When getting a page which is not the initial page there it is possible that the first comments are replies to a comment from a previous page.
1 parent b6e3015 commit e5be686

File tree

3 files changed

+115
-44
lines changed

3 files changed

+115
-44
lines changed

extractor/src/main/java/org/schabi/newpipe/extractor/Page.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
* <br>
1414
* A page has an {@link #id}, an {@link #url}, as well as information on possible {@link #cookies}.
1515
* In case the data behind the URL has already been retrieved,
16-
* it can be accessed by using @link #getBody()} and {@link #getContent()}.
16+
* it can be accessed by using {@link #getBody()} or {@link #getContent()}.
1717
*/
1818
public class Page implements Serializable {
1919
private final String url;

extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudCommentsExtractor.java

Lines changed: 87 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,24 @@
2121
import org.schabi.newpipe.extractor.services.soundcloud.SoundcloudParsingHelper;
2222

2323
import java.io.IOException;
24+
import java.util.ArrayList;
25+
import java.util.List;
2426

2527
import javax.annotation.Nonnull;
28+
import javax.annotation.Nullable;
2629

2730
public class SoundcloudCommentsExtractor extends CommentsExtractor {
2831
public static final String COLLECTION = "collection";
2932
public static final String NEXT_HREF = "next_href";
3033

34+
/**
35+
* The last comment which was a top level comment.
36+
* Next pages might start with replies to the last top level comment
37+
* and therefore the {@link SoundcloudCommentsInfoItemExtractor#replyCount}
38+
* of the last top level comment cannot be determined certainly.
39+
*/
40+
@Nullable private JsonObject lastTopLevelComment;
41+
3142
public SoundcloudCommentsExtractor(final StreamingService service,
3243
final ListLinkHandler uiHandler) {
3344
super(service, uiHandler);
@@ -50,14 +61,15 @@ public InfoItemsPage<CommentsInfoItem> getInitialPage() throws ExtractionExcepti
5061
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(
5162
getServiceId());
5263

53-
collectCommentsFrom(collector, json);
64+
collectCommentsFrom(collector, json, null);
5465

5566
return new InfoItemsPage<>(collector, new Page(json.getString(NEXT_HREF)));
5667
}
5768

5869
@Override
59-
public InfoItemsPage<CommentsInfoItem> getPage(final Page page) throws ExtractionException,
60-
IOException {
70+
public InfoItemsPage<CommentsInfoItem> getPage(final Page page)
71+
throws ExtractionException, IOException {
72+
6173
if (page == null || isNullOrEmpty(page.getUrl())) {
6274
throw new IllegalArgumentException("Page doesn't contain an URL");
6375
}
@@ -88,7 +100,7 @@ public InfoItemsPage<CommentsInfoItem> getPage(final Page page) throws Extractio
88100
} catch (final JsonParserException e) {
89101
throw new ParsingException("Could not parse json", e);
90102
}
91-
collectCommentsFrom(collector, json);
103+
collectCommentsFrom(collector, json, lastTopLevelComment);
92104
}
93105

94106
if (hasNextPage) {
@@ -101,27 +113,86 @@ public InfoItemsPage<CommentsInfoItem> getPage(final Page page) throws Extractio
101113
@Override
102114
public void onFetchPage(@Nonnull final Downloader downloader) { }
103115

104-
private void collectCommentsFrom(final CommentsInfoItemsCollector collector,
105-
final JsonObject json) throws ParsingException {
116+
/**
117+
* Collect top level comments from a SoundCloud API response.
118+
* @param collector the collector which collects the the top level comments
119+
* @param json the JsonObject of the API response
120+
* @param lastTopLevelComment the last top level comment from the previous page or {@code null}
121+
* if this method is run for the initial page.
122+
* @throws ParsingException
123+
*/
124+
private void collectCommentsFrom(@Nonnull final CommentsInfoItemsCollector collector,
125+
@Nonnull final JsonObject json,
126+
@Nullable final JsonObject lastTopLevelComment)
127+
throws ParsingException {
128+
final List<SoundcloudCommentsInfoItemExtractor> extractors = new ArrayList<>();
106129
final String url = getUrl();
107130
final JsonArray entries = json.getArray(COLLECTION);
108-
JsonObject lastTopComment = null;
131+
/**
132+
* The current top level comment.
133+
*/
134+
JsonObject currentTopLevelComment = null;
135+
boolean isLastCommentReply = true;
136+
// Check whether the first comment in the list is a reply to the last top level comment
137+
// from the previous page if there was a previous page.
138+
if (lastTopLevelComment != null) {
139+
final JsonObject firstComment = entries.getObject(0);
140+
if (SoundcloudParsingHelper.isReplyTo(lastTopLevelComment, firstComment)) {
141+
currentTopLevelComment = lastTopLevelComment;
142+
} else {
143+
extractors.add(new SoundcloudCommentsInfoItemExtractor(
144+
json, SoundcloudCommentsInfoItemExtractor.PREVIOUS_PAGE_INDEX,
145+
firstComment, url, null));
146+
}
147+
}
148+
109149
for (int i = 0; i < entries.size(); i++) {
110150
final JsonObject entry = entries.getObject(i);
111-
if (i == 0
151+
// extract all top level comments
152+
// The first comment is either a top level comment
153+
// if it is not a reply to the last top level comment
154+
//
155+
if (i == 0 && currentTopLevelComment == null
112156
|| (!SoundcloudParsingHelper.isReplyTo(entries.getObject(i - 1), entry)
113-
&& !SoundcloudParsingHelper.isReplyTo(lastTopComment, entry))) {
114-
lastTopComment = entry;
115-
collector.commit(new SoundcloudCommentsInfoItemExtractor(
116-
json, i, entry, url));
157+
&& !SoundcloudParsingHelper.isReplyTo(currentTopLevelComment, entry))) {
158+
currentTopLevelComment = entry;
159+
if (i == entries.size() - 1) {
160+
isLastCommentReply = false;
161+
this.lastTopLevelComment = currentTopLevelComment;
162+
// Do not collect the last comment if it is a top level comment
163+
// because it might have replies.
164+
// That is information we cannot get from the comment itself
165+
// (thanks SoundCloud...) but needs to be obtained from the next comment.
166+
// The comment will therefore be collected
167+
// when collecting the items from the next page.
168+
break;
169+
}
170+
extractors.add(new SoundcloudCommentsInfoItemExtractor(
171+
json, i, entry, url, lastTopLevelComment));
117172
}
118173
}
174+
if (isLastCommentReply) {
175+
// Do not collect the last top level comment if it has replies and the retrieved
176+
// comment list ends with a reply. We do not know whether the next page starts
177+
// with more replies to the last top level comment.
178+
this.lastTopLevelComment = extractors.remove(extractors.size() - 1).item;
179+
}
180+
extractors.stream().forEach(collector::commit);
181+
119182
}
120183

121-
private boolean collectRepliesFrom(final CommentsInfoItemsCollector collector,
122-
final JsonObject json,
123-
final int id,
124-
final String url) {
184+
/**
185+
* Collect replies to a top level comment from a SoundCloud API response.
186+
* @param collector the collector which collects the the replies
187+
* @param json the SoundCloud API response
188+
* @param id the comment's id for which the replies are collected
189+
* @param url the corresponding page's URL
190+
* @return
191+
*/
192+
private boolean collectRepliesFrom(@Nonnull final CommentsInfoItemsCollector collector,
193+
@Nonnull final JsonObject json,
194+
final int id,
195+
@Nonnull final String url) {
125196
JsonObject originalComment = null;
126197
final JsonArray entries = json.getArray(COLLECTION);
127198
boolean moreReplies = false;

extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudCommentsInfoItemExtractor.java

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -6,43 +6,51 @@
66
import com.grack.nanojson.JsonObject;
77

88
import org.schabi.newpipe.extractor.Page;
9-
import org.schabi.newpipe.extractor.ServiceList;
109
import org.schabi.newpipe.extractor.comments.CommentsInfoItem;
1110
import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
12-
import org.schabi.newpipe.extractor.comments.CommentsInfoItemsCollector;
1311
import org.schabi.newpipe.extractor.exceptions.ParsingException;
1412
import org.schabi.newpipe.extractor.localization.DateWrapper;
1513
import org.schabi.newpipe.extractor.services.soundcloud.SoundcloudParsingHelper;
1614
import org.schabi.newpipe.extractor.stream.Description;
1715

1816
import java.util.Objects;
1917

18+
import javax.annotation.Nonnull;
2019
import javax.annotation.Nullable;
2120

2221
public class SoundcloudCommentsInfoItemExtractor implements CommentsInfoItemExtractor {
22+
public static final int PREVIOUS_PAGE_INDEX = -1;
2323
public static final String BODY = "body";
2424
public static final String USER_PERMALINK = "permalink";
2525
public static final String USER_FULL_NAME = "full_name";
2626
public static final String USER_USERNAME = "username";
2727

28-
private final JsonObject json;
28+
@Nonnull private final JsonObject json;
2929
private final int index;
30-
private final JsonObject item;
30+
@Nonnull public final JsonObject item;
3131
private final String url;
32-
private final JsonObject user;
33-
private final JsonObject superComment;
34-
32+
@Nonnull private final JsonObject user;
33+
/**
34+
* A comment to which this comment is a reply.
35+
* Is {@code null} if this comment is itself a top level comment.
36+
*/
37+
@Nullable private final JsonObject topLevelComment;
38+
39+
/**
40+
* The reply count is not given by the SoundCloud API, but needs to be obtained
41+
* by counting the comments which come directly after this item and have the same timestamp.
42+
*/
3543
private int replyCount = CommentsInfoItem.UNKNOWN_REPLY_COUNT;
3644
private Page repliesPage = null;
3745

38-
public SoundcloudCommentsInfoItemExtractor(final JsonObject json, final int index,
39-
final JsonObject item, final String url,
40-
@Nullable final JsonObject superComment) {
46+
public SoundcloudCommentsInfoItemExtractor(@Nonnull final JsonObject json, final int index,
47+
@Nonnull final JsonObject item, final String url,
48+
@Nullable final JsonObject topLevelComment) {
4149
this.json = json;
4250
this.index = index;
4351
this.item = item;
4452
this.url = url;
45-
this.superComment = superComment;
53+
this.topLevelComment = topLevelComment;
4654
this.user = item.getObject("user");
4755
}
4856

@@ -58,7 +66,7 @@ public String getCommentId() {
5866
@Override
5967
public Description getCommentText() {
6068
String commentContent = item.getString(BODY);
61-
if (superComment == null) {
69+
if (topLevelComment == null) {
6270
return new Description(commentContent, Description.PLAIN_TEXT);
6371
}
6472
// This comment is a reply to another comment.
@@ -78,7 +86,7 @@ public Description getCommentText() {
7886
}
7987
}
8088
if (author == null) {
81-
author = superComment.getObject("user");
89+
author = topLevelComment.getObject("user");
8290
}
8391
final String name = isNullOrEmpty(author.getString(USER_FULL_NAME))
8492
? author.getString(USER_USERNAME) : author.getString(USER_FULL_NAME);
@@ -149,24 +157,17 @@ public String getThumbnailUrl() {
149157
@Override
150158
public Page getReplies() {
151159
if (replyCount == CommentsInfoItem.UNKNOWN_REPLY_COUNT) {
152-
final JsonArray replies = new JsonArray();
153-
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(
154-
ServiceList.SoundCloud.getServiceId());
160+
replyCount = 0;
155161
// SoundCloud has only comments and top level replies, but not nested replies.
156162
// Therefore, replies cannot have further replies.
157-
if (superComment == null) {
163+
if (topLevelComment == null) {
158164
// Loop through all comments which come after the original comment
159165
// to find its replies.
160166
final JsonArray allItems = json.getArray(SoundcloudCommentsExtractor.COLLECTION);
161-
boolean foundReply = false;
162167
for (int i = index + 1; i < allItems.size(); i++) {
163-
final JsonObject comment = allItems.getObject(i);
164-
if (SoundcloudParsingHelper.isReplyTo(item, comment)) {
165-
replies.add(comment);
166-
collector.commit(new SoundcloudCommentsInfoItemExtractor(
167-
json, i, comment, url, item));
168-
foundReply = true;
169-
} else if (foundReply) {
168+
if (SoundcloudParsingHelper.isReplyTo(item, allItems.getObject(i))) {
169+
replyCount++;
170+
} else {
170171
// Only the comments directly after the original comment
171172
// having the same timestamp are replies to the original comment.
172173
// The first comment not having the same timestamp
@@ -175,8 +176,7 @@ public Page getReplies() {
175176
}
176177
}
177178
}
178-
replyCount = replies.size();
179-
if (collector.getItems().isEmpty()) {
179+
if (replyCount == 0) {
180180
return null;
181181
}
182182
repliesPage = new Page(getUrl(), getCommentId());

0 commit comments

Comments
 (0)