Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@ replay_pid*
.idea
*.iml

### Eclipse/JDTLS ###
.settings/
.classpath
.project
.factorypath

### VS Code ###
.vscode/

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@
import java.io.Closeable;
import java.io.DataOutput;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.FloatBuffer;


/**
* Interface for writing index data.
Expand All @@ -30,4 +34,13 @@ public interface IndexWriter extends DataOutput, Closeable {
* @throws IOException if an I/O error occurs
*/
long position() throws IOException;

default void writeFloats(float[] floats, int offset, int count) throws IOException {
FloatBuffer fb = FloatBuffer.wrap(floats, offset, count);
ByteBuffer bb = ByteBuffer.allocate(fb.capacity() * Float.BYTES);
// DataOutput specifies BIG_ENDIAN for float
bb.order(ByteOrder.BIG_ENDIAN).asFloatBuffer().put(fb);
bb.rewind();
write(bb.array());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import java.io.IOException;
import java.io.RandomAccessFile;
import java.lang.reflect.Field;
import java.nio.ByteOrder;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.file.Path;
Expand Down Expand Up @@ -73,6 +74,7 @@ public Supplier(Path path) throws IOException {
throw new RuntimeException("SimpleMappedReader doesn't support files above 2GB");
}
this.buffer = raf.getChannel().map(FileChannel.MapMode.READ_ONLY, 0, raf.length());
this.buffer.order(ByteOrder.BIG_ENDIAN);
this.buffer.load();
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@

package io.github.jbellis.jvector.graph.disk.feature;

import java.io.DataOutput;
import io.github.jbellis.jvector.disk.IndexWriter;

import java.io.IOException;
import java.util.EnumMap;
import java.util.function.IntFunction;
Expand All @@ -35,9 +36,9 @@ default boolean isFused() {

int featureSize();

void writeHeader(DataOutput out) throws IOException;
void writeHeader(IndexWriter out) throws IOException;

default void writeInline(DataOutput out, State state) throws IOException {
default void writeInline(IndexWriter out, State state) throws IOException {
// default no-op
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@

package io.github.jbellis.jvector.graph.disk.feature;

import io.github.jbellis.jvector.disk.IndexWriter;
import io.github.jbellis.jvector.disk.RandomAccessReader;
import io.github.jbellis.jvector.util.Accountable;

import java.io.DataOutput;
import java.io.IOException;

/**
Expand All @@ -33,7 +33,7 @@ default boolean isFused() {
return true;
}

void writeSourceFeature(DataOutput out, State state) throws IOException;
void writeSourceFeature(IndexWriter out, State state) throws IOException;

interface InlineSource extends Accountable {}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

package io.github.jbellis.jvector.graph.disk.feature;

import io.github.jbellis.jvector.disk.IndexWriter;
import io.github.jbellis.jvector.disk.RandomAccessReader;
import io.github.jbellis.jvector.graph.ImmutableGraphIndex;
import io.github.jbellis.jvector.graph.disk.CommonHeader;
Expand All @@ -31,7 +32,6 @@
import io.github.jbellis.jvector.vector.types.VectorFloat;
import io.github.jbellis.jvector.vector.types.VectorTypeSupport;

import java.io.DataOutput;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.function.IntFunction;
Expand Down Expand Up @@ -97,14 +97,14 @@ public ScoreFunction.ApproximateScoreFunction approximateScoreFunctionFor(Vector
}

@Override
public void writeHeader(DataOutput out) throws IOException {
public void writeHeader(IndexWriter out) throws IOException {
pq.write(out, OnDiskGraphIndex.CURRENT_VERSION);
}

// this is an awkward fit for the Feature.State design since we need to
// generate the fused set based on the neighbors of the node, not just the node itself
@Override
public void writeInline(DataOutput out, Feature.State state_) throws IOException {
public void writeInline(IndexWriter out, Feature.State state_) throws IOException {
var state = (FusedPQ.State) state_;

var neighbors = state.view.getNeighborsIterator(0, state.nodeId);
Expand Down Expand Up @@ -138,7 +138,7 @@ public State(ImmutableGraphIndex.View view, IntFunction<ByteSequence<?>> compres
}

@Override
public void writeSourceFeature(DataOutput out, Feature.State state_) throws IOException {
public void writeSourceFeature(IndexWriter out, Feature.State state_) throws IOException {
var state = (FusedPQ.State) state_;
var compressed = state.compressedVectorFunction.apply(state.nodeId);
var temp = pqCodeScratch.get();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@

package io.github.jbellis.jvector.graph.disk.feature;

import io.github.jbellis.jvector.disk.IndexWriter;
import io.github.jbellis.jvector.disk.RandomAccessReader;
import io.github.jbellis.jvector.graph.disk.CommonHeader;
import io.github.jbellis.jvector.vector.VectorizationProvider;
import io.github.jbellis.jvector.vector.types.VectorFloat;
import io.github.jbellis.jvector.vector.types.VectorTypeSupport;

import java.io.DataOutput;
import java.io.IOException;

/**
Expand Down Expand Up @@ -59,12 +59,12 @@ static InlineVectors load(CommonHeader header, RandomAccessReader reader) {
}

@Override
public void writeHeader(DataOutput out) {
public void writeHeader(IndexWriter out) {
// common header contains dimension, which is sufficient
}

@Override
public void writeInline(DataOutput out, Feature.State state) throws IOException {
public void writeInline(IndexWriter out, Feature.State state) throws IOException {
vectorTypeSupport.writeFloatVector(out, ((InlineVectors.State) state).vector);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

package io.github.jbellis.jvector.graph.disk.feature;

import io.github.jbellis.jvector.disk.IndexWriter;
import io.github.jbellis.jvector.disk.RandomAccessReader;
import io.github.jbellis.jvector.graph.disk.CommonHeader;
import io.github.jbellis.jvector.graph.disk.OnDiskGraphIndex;
Expand All @@ -26,7 +27,6 @@
import io.github.jbellis.jvector.vector.VectorSimilarityFunction;
import io.github.jbellis.jvector.vector.types.VectorFloat;

import java.io.DataOutput;
import java.io.IOException;
import java.io.UncheckedIOException;

Expand Down Expand Up @@ -70,12 +70,12 @@ static NVQ load(CommonHeader header, RandomAccessReader reader) {
}

@Override
public void writeHeader(DataOutput out) throws IOException {
public void writeHeader(IndexWriter out) throws IOException {
nvq.write(out, OnDiskGraphIndex.CURRENT_VERSION);
}

@Override
public void writeInline(DataOutput out, Feature.State state_) throws IOException {
public void writeInline(IndexWriter out, Feature.State state_) throws IOException {
var state = (NVQ.State) state_;
state.vector.write(out);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,13 @@

package io.github.jbellis.jvector.graph.disk.feature;

import java.io.DataOutput;
import io.github.jbellis.jvector.disk.IndexWriter;

import java.io.IOException;

public interface SeparatedFeature extends Feature {
void setOffset(long offset);
long getOffset();

void writeSeparately(DataOutput out, State state) throws IOException;
void writeSeparately(IndexWriter out, State state) throws IOException;
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

package io.github.jbellis.jvector.graph.disk.feature;

import io.github.jbellis.jvector.disk.IndexWriter;
import io.github.jbellis.jvector.disk.RandomAccessReader;
import io.github.jbellis.jvector.graph.disk.CommonHeader;
import io.github.jbellis.jvector.graph.disk.OnDiskGraphIndex;
Expand All @@ -25,7 +26,6 @@
import io.github.jbellis.jvector.vector.VectorSimilarityFunction;
import io.github.jbellis.jvector.vector.types.VectorFloat;

import java.io.DataOutput;
import java.io.IOException;
import java.io.UncheckedIOException;

Expand Down Expand Up @@ -68,13 +68,13 @@ public int featureSize() {
}

@Override
public void writeHeader(DataOutput out) throws IOException {
public void writeHeader(IndexWriter out) throws IOException {
nvq.write(out, OnDiskGraphIndex.CURRENT_VERSION);
out.writeLong(offset);
}

@Override
public void writeSeparately(DataOutput out, State state_) throws IOException {
public void writeSeparately(IndexWriter out, State state_) throws IOException {
var state = (NVQ.State) state_;
if (state.vector != null) {
state.vector.write(out);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@

package io.github.jbellis.jvector.graph.disk.feature;

import io.github.jbellis.jvector.disk.IndexWriter;
import io.github.jbellis.jvector.disk.RandomAccessReader;
import io.github.jbellis.jvector.graph.disk.CommonHeader;
import io.github.jbellis.jvector.vector.VectorizationProvider;
import io.github.jbellis.jvector.vector.types.VectorTypeSupport;

import java.io.DataOutput;
import java.io.IOException;
import java.io.UncheckedIOException;

Expand Down Expand Up @@ -61,12 +61,12 @@ public int featureSize() {
}

@Override
public void writeHeader(DataOutput out) throws IOException {
public void writeHeader(IndexWriter out) throws IOException {
out.writeLong(offset);
}

@Override
public void writeSeparately(DataOutput out, State state_) throws IOException {
public void writeSeparately(IndexWriter out, State state_) throws IOException {
var state = (InlineVectors.State) state_;
if (state.vector != null) {
vectorTypeSupport.writeFloatVector(out, state.vector);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,14 @@

package io.github.jbellis.jvector.quantization;

import io.github.jbellis.jvector.disk.IndexWriter;
import io.github.jbellis.jvector.disk.RandomAccessReader;
import io.github.jbellis.jvector.graph.similarity.ScoreFunction;
import io.github.jbellis.jvector.util.RamUsageEstimator;
import io.github.jbellis.jvector.vector.VectorSimilarityFunction;
import io.github.jbellis.jvector.vector.VectorUtil;
import io.github.jbellis.jvector.vector.types.VectorFloat;

import java.io.DataOutput;
import java.io.IOException;
import java.util.Arrays;
import java.util.Objects;
Expand All @@ -37,7 +37,7 @@ protected BQVectors(BinaryQuantization bq) {
}

@Override
public void write(DataOutput out, int version) throws IOException {
public void write(IndexWriter out, int version) throws IOException {
// BQ centering data
bq.write(out, version);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@

package io.github.jbellis.jvector.quantization;

import io.github.jbellis.jvector.disk.IndexWriter;
import io.github.jbellis.jvector.disk.RandomAccessReader;
import io.github.jbellis.jvector.graph.RandomAccessVectorValues;
import io.github.jbellis.jvector.vector.VectorizationProvider;
import io.github.jbellis.jvector.vector.types.VectorFloat;
import io.github.jbellis.jvector.vector.types.VectorTypeSupport;

import java.io.DataOutput;
import java.io.IOException;
import java.util.Objects;
import java.util.concurrent.ForkJoinPool;
Expand Down Expand Up @@ -121,7 +121,7 @@ public int compressedVectorSize() {
}

@Override
public void write(DataOutput out, int version) throws IOException {
public void write(IndexWriter out, int version) throws IOException {
out.writeInt(dimension);
// We used to record the center of the dataset but this actually degrades performance.
// Write a zero vector to maintain compatibility.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,27 +16,27 @@

package io.github.jbellis.jvector.quantization;

import io.github.jbellis.jvector.disk.IndexWriter;
import io.github.jbellis.jvector.graph.disk.OnDiskGraphIndex;
import io.github.jbellis.jvector.graph.similarity.ScoreFunction;
import io.github.jbellis.jvector.util.Accountable;
import io.github.jbellis.jvector.vector.VectorSimilarityFunction;
import io.github.jbellis.jvector.vector.types.VectorFloat;

import java.io.DataOutput;
import java.io.IOException;

public interface CompressedVectors extends Accountable {
/**
* Write the compressed vectors to the given DataOutput
* @param out the DataOutput to write to
* Write the compressed vectors to the given IndexWriter
* @param out the IndexWriter to write to
* @param version the serialization version. versions 2 and 3 are supported
*/
void write(DataOutput out, int version) throws IOException;
void write(IndexWriter out, int version) throws IOException;

/**
* Write the compressed vectors to the given DataOutput at the current serialization version
* Write the compressed vectors to the given IndexWriter at the current serialization version
*/
default void write(DataOutput out) throws IOException {
default void write(IndexWriter out) throws IOException {
write(out, OnDiskGraphIndex.CURRENT_VERSION);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@

package io.github.jbellis.jvector.quantization;

import io.github.jbellis.jvector.disk.IndexWriter;
import io.github.jbellis.jvector.disk.RandomAccessReader;
import io.github.jbellis.jvector.graph.similarity.ScoreFunction;
import io.github.jbellis.jvector.util.RamUsageEstimator;
import io.github.jbellis.jvector.vector.VectorSimilarityFunction;
import io.github.jbellis.jvector.vector.types.VectorFloat;

import java.io.DataOutput;
import java.io.IOException;
import java.util.Arrays;
import java.util.Objects;
Expand All @@ -48,7 +48,7 @@ public int count() {
}

@Override
public void write(DataOutput out, int version) throws IOException
public void write(IndexWriter out, int version) throws IOException
{
// serializing NVQ at the given version
nvq.write(out, version);
Expand Down
Loading