Skip to content

Commit b4b5fd2

Browse files
New API, BufferedSource.indexOf(ByteString, fromIndex, toIndex) (#1626)
* New API, BufferedSource.indexOf(ByteString, fromIndex, toIndex) This is surprisingly interesting. To minimize unnecessary reads for toIndex it is necessary to check whether a prefix of the query matches a suffix of the currently-loaded data. This read-avoidance is useful in practice. When doing HTTP multipart decoding the caller may scan for a boundary separator with a bounded range, and we don't want to block reading when doing so won't impact the result of the call. * apiDump * Check both maximum and minimum prefix sizes --------- Co-authored-by: Jesse Wilson <[email protected]>
1 parent 5ce2d92 commit b4b5fd2

File tree

13 files changed

+319
-11
lines changed

13 files changed

+319
-11
lines changed

okio/api/okio.api

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ public final class okio/Buffer : java/lang/Cloneable, java/nio/channels/ByteChan
9393
public fun indexOf (BJJ)J
9494
public fun indexOf (Lokio/ByteString;)J
9595
public fun indexOf (Lokio/ByteString;J)J
96+
public fun indexOf (Lokio/ByteString;JJ)J
9697
public fun indexOfElement (Lokio/ByteString;)J
9798
public fun indexOfElement (Lokio/ByteString;J)J
9899
public fun inputStream ()Ljava/io/InputStream;
@@ -250,6 +251,7 @@ public abstract interface class okio/BufferedSource : java/nio/channels/Readable
250251
public abstract fun indexOf (BJJ)J
251252
public abstract fun indexOf (Lokio/ByteString;)J
252253
public abstract fun indexOf (Lokio/ByteString;J)J
254+
public abstract fun indexOf (Lokio/ByteString;JJ)J
253255
public abstract fun indexOfElement (Lokio/ByteString;)J
254256
public abstract fun indexOfElement (Lokio/ByteString;J)J
255257
public abstract fun inputStream ()Ljava/io/InputStream;

okio/src/commonMain/kotlin/okio/Buffer.kt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ expect class Buffer() : BufferedSource, BufferedSink {
118118
override fun indexOf(b: Byte, fromIndex: Long, toIndex: Long): Long
119119
override fun indexOf(bytes: ByteString): Long
120120
override fun indexOf(bytes: ByteString, fromIndex: Long): Long
121+
override fun indexOf(bytes: ByteString, fromIndex: Long, toIndex: Long): Long
121122
override fun indexOfElement(targetBytes: ByteString): Long
122123
override fun indexOfElement(targetBytes: ByteString, fromIndex: Long): Long
123124
override fun peek(): BufferedSource

okio/src/commonMain/kotlin/okio/BufferedSource.kt

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -514,6 +514,28 @@ expect sealed interface BufferedSource : Source {
514514
*/
515515
fun indexOf(bytes: ByteString, fromIndex: Long): Long
516516

517+
/**
518+
* Returns the index of the first match for `bytes` in the buffer that is at or after `fromIndex`,
519+
* and that is also less than `toIndex`. Returns -1 if `bytes` isn't found in that range. If
520+
* `fromIndex == toIndex` then search range is empty and -1 is returned.
521+
*
522+
* This may attempt to expand the buffer. It won't attempt to expand the buffer if doing so
523+
* couldn't change the result.
524+
*
525+
* ```java
526+
* ByteString MOVE = ByteString.encodeUtf8("move");
527+
*
528+
* Buffer buffer = new Buffer();
529+
* buffer.writeUtf8("Don't move! He can't see us if we don't move.");
530+
*
531+
* assertEquals( 6, buffer.indexOf(MOVE, 0, 40));
532+
* assertEquals( 6, buffer.indexOf(MOVE, 0, 100));
533+
* assertEquals(-1, buffer.indexOf(MOVE, 7, 40));
534+
* assertEquals(40, buffer.indexOf(MOVE, 7, 100));
535+
* ```
536+
*/
537+
fun indexOf(bytes: ByteString, fromIndex: Long, toIndex: Long): Long
538+
517539
/** Equivalent to [indexOfElement(targetBytes, 0)][indexOfElement]. */
518540
fun indexOfElement(targetBytes: ByteString): Long
519541

okio/src/commonMain/kotlin/okio/RealBufferedSource.kt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ internal expect class RealBufferedSource(
3030
override fun indexOf(b: Byte, fromIndex: Long, toIndex: Long): Long
3131
override fun indexOf(bytes: ByteString): Long
3232
override fun indexOf(bytes: ByteString, fromIndex: Long): Long
33+
override fun indexOf(bytes: ByteString, fromIndex: Long, toIndex: Long): Long
3334
override fun indexOfElement(targetBytes: ByteString): Long
3435
override fun indexOfElement(targetBytes: ByteString, fromIndex: Long): Long
3536
override fun peek(): BufferedSource

okio/src/commonMain/kotlin/okio/internal/Buffer.kt

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1285,10 +1285,19 @@ internal inline fun Buffer.commonIndexOf(b: Byte, fromIndex: Long, toIndex: Long
12851285
}
12861286
}
12871287

1288-
internal inline fun Buffer.commonIndexOf(bytes: ByteString, fromIndex: Long): Long {
1289-
var fromIndex = fromIndex
1288+
internal inline fun Buffer.commonIndexOf(
1289+
bytes: ByteString,
1290+
fromIndex: Long,
1291+
toIndex: Long = Long.MAX_VALUE,
1292+
): Long {
12901293
require(bytes.size > 0) { "bytes is empty" }
1291-
require(fromIndex >= 0L) { "fromIndex < 0: $fromIndex" }
1294+
require(fromIndex >= 0) { "fromIndex < 0: $fromIndex" }
1295+
require(fromIndex <= toIndex) { "fromIndex > toIndex: $fromIndex > $toIndex" }
1296+
1297+
var fromIndex = fromIndex
1298+
var toIndex = toIndex
1299+
if (toIndex > size) toIndex = size
1300+
if (fromIndex == toIndex) return -1L
12921301

12931302
seek(fromIndex) { s, offset ->
12941303
var s = s ?: return -1L
@@ -1299,11 +1308,11 @@ internal inline fun Buffer.commonIndexOf(bytes: ByteString, fromIndex: Long): Lo
12991308
val targetByteArray = bytes.internalArray()
13001309
val b0 = targetByteArray[0]
13011310
val bytesSize = bytes.size
1302-
val resultLimit = size - bytesSize + 1L
1311+
val resultLimit = minOf(toIndex, size - bytesSize + 1L)
13031312
while (offset < resultLimit) {
13041313
// Scan through the current segment.
13051314
val data = s.data
1306-
val segmentLimit = okio.minOf(s.limit, s.pos + resultLimit - offset).toInt()
1315+
val segmentLimit = minOf(s.limit, s.pos + resultLimit - offset).toInt()
13071316
for (pos in (s.pos + fromIndex - offset).toInt() until segmentLimit) {
13081317
if (data[pos] == b0 && rangeEquals(s, pos + 1, targetByteArray, 1, bytesSize)) {
13091318
return pos - s.pos + offset
@@ -1624,7 +1633,7 @@ internal inline fun UnsafeCursor.commonResizeBuffer(newSize: Long): Long {
16241633
val tailSize = tail!!.limit - tail.pos
16251634
if (tailSize <= bytesToSubtract) {
16261635
buffer.head = tail.pop()
1627-
okio.SegmentPool.recycle(tail)
1636+
SegmentPool.recycle(tail)
16281637
bytesToSubtract -= tailSize.toLong()
16291638
} else {
16301639
tail.limit -= bytesToSubtract.toInt()

okio/src/commonMain/kotlin/okio/internal/RealBufferedSource.kt

Lines changed: 49 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ import okio.Segment
3333
import okio.Sink
3434
import okio.buffer
3535
import okio.checkOffsetAndCount
36+
import okio.minOf
3637

3738
internal inline fun RealBufferedSource.commonRead(sink: Buffer, byteCount: Long): Long {
3839
require(byteCount >= 0L) { "byteCount < 0: $byteCount" }
@@ -332,22 +333,67 @@ internal inline fun RealBufferedSource.commonIndexOf(b: Byte, fromIndex: Long, t
332333
return -1L
333334
}
334335

335-
internal inline fun RealBufferedSource.commonIndexOf(bytes: ByteString, fromIndex: Long): Long {
336+
internal inline fun RealBufferedSource.commonIndexOf(
337+
bytes: ByteString,
338+
fromIndex: Long,
339+
toIndex: Long = Long.MAX_VALUE,
340+
): Long {
336341
var fromIndex = fromIndex
337342
check(!closed) { "closed" }
338343

339344
while (true) {
340-
val result = buffer.indexOf(bytes, fromIndex)
345+
val result = buffer.indexOf(bytes, fromIndex, toIndex)
341346
if (result != -1L) return result
342347

343348
val lastBufferSize = buffer.size
349+
val nextFromIndex = lastBufferSize - bytes.size + 1
350+
if (nextFromIndex >= toIndex) return -1L
351+
352+
if (!matchPossibleByExpandingBuffer(buffer, bytes, fromIndex, toIndex)) return -1L
344353
if (source.read(buffer, Segment.SIZE.toLong()) == -1L) return -1L
345354

346355
// Keep searching, picking up from where we left off.
347-
fromIndex = maxOf(fromIndex, lastBufferSize - bytes.size + 1)
356+
fromIndex = maxOf(fromIndex, nextFromIndex)
348357
}
349358
}
350359

360+
/**
361+
* Returns true if loading more data could result in an `indexOf` match.
362+
*
363+
* This function's utility is avoiding potentially-slow `read` calls that cannot impact the result
364+
* of an `indexOf` call. For example, consider this situation:
365+
*
366+
* ```
367+
* val source = ...
368+
* source.indexOf("hello", fromIndex = 0, toIndex = 4)
369+
* ```
370+
*
371+
* If the source's loaded content is the string "shell", it is necessary to load more data because
372+
* if the next loaded byte is 'o' then the result will be 1. But if the source's loaded content is
373+
* 'look', we know the result is -1 without loading more data.
374+
*/
375+
private fun matchPossibleByExpandingBuffer(
376+
buffer: Buffer,
377+
bytes: ByteString,
378+
fromIndex: Long,
379+
toIndex: Long,
380+
): Boolean {
381+
// Load new data if the match could come entirely in that new data.
382+
if (buffer.size < toIndex) return true
383+
384+
// Load new data if a prefix of 'bytes' matches a suffix of 'buffer'.
385+
val begin = maxOf(1, buffer.size - toIndex + 1).toInt()
386+
val limit = minOf(bytes.size, buffer.size - fromIndex + 1).toInt()
387+
for (i in limit - 1 downTo begin) {
388+
if (buffer.rangeEquals(buffer.size - i, bytes, 0, i)) {
389+
return true
390+
}
391+
}
392+
393+
// No matter what we load, we won't find a match.
394+
return false
395+
}
396+
351397
internal inline fun RealBufferedSource.commonIndexOfElement(targetBytes: ByteString, fromIndex: Long): Long {
352398
var fromIndex = fromIndex
353399
check(!closed) { "closed" }

okio/src/jvmMain/kotlin/okio/Buffer.kt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -485,6 +485,10 @@ actual class Buffer : BufferedSource, BufferedSink, Cloneable, ByteChannel {
485485
@Throws(IOException::class)
486486
actual override fun indexOf(bytes: ByteString, fromIndex: Long): Long = commonIndexOf(bytes, fromIndex)
487487

488+
@Throws(IOException::class)
489+
actual override fun indexOf(bytes: ByteString, fromIndex: Long, toIndex: Long): Long =
490+
commonIndexOf(bytes, fromIndex, toIndex)
491+
488492
actual override fun indexOfElement(targetBytes: ByteString) = indexOfElement(targetBytes, 0L)
489493

490494
actual override fun indexOfElement(targetBytes: ByteString, fromIndex: Long): Long =

okio/src/jvmMain/kotlin/okio/BufferedSource.kt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,9 @@ actual sealed interface BufferedSource : Source, ReadableByteChannel {
147147
@Throws(IOException::class)
148148
actual fun indexOf(bytes: ByteString, fromIndex: Long): Long
149149

150+
@Throws(IOException::class)
151+
actual fun indexOf(bytes: ByteString, fromIndex: Long, toIndex: Long): Long
152+
150153
@Throws(IOException::class)
151154
actual fun indexOfElement(targetBytes: ByteString): Long
152155

okio/src/jvmMain/kotlin/okio/RealBufferedSource.kt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,8 @@ internal actual class RealBufferedSource actual constructor(
127127
actual override fun indexOf(bytes: ByteString): Long = indexOf(bytes, 0L)
128128
actual override fun indexOf(bytes: ByteString, fromIndex: Long): Long =
129129
commonIndexOf(bytes, fromIndex)
130+
actual override fun indexOf(bytes: ByteString, fromIndex: Long, toIndex: Long): Long =
131+
commonIndexOf(bytes, fromIndex, toIndex)
130132
actual override fun indexOfElement(targetBytes: ByteString): Long =
131133
indexOfElement(targetBytes, 0L)
132134
actual override fun indexOfElement(targetBytes: ByteString, fromIndex: Long): Long =

0 commit comments

Comments
 (0)