diff --git a/site/content/arangodb/3.12/develop/http-api/indexes/inverted.md b/site/content/arangodb/3.12/develop/http-api/indexes/inverted.md
index d2c5939c25..97fdd8bef8 100644
--- a/site/content/arangodb/3.12/develop/http-api/indexes/inverted.md
+++ b/site/content/arangodb/3.12/develop/http-api/indexes/inverted.md
@@ -567,6 +567,8 @@ paths:
default: tier
segmentsBytesFloor:
description: |
+ This option is only available up to v3.12.6:
+
Defines the value (in bytes) to treat all smaller segments as equal for
consolidation selection.
type: integer
@@ -578,21 +580,74 @@ paths:
default: 8589934592
segmentsMax:
description: |
+ This option is only available up to v3.12.6:
+
The maximum number of segments that are evaluated as candidates for
consolidation.
type: integer
default: 200
segmentsMin:
description: |
+ This option is only available up to v3.12.6:
+
The minimum number of segments that are evaluated as candidates for
consolidation.
type: integer
default: 50
minScore:
description: |
+ This option is only available up to v3.12.6:
+
Filter out consolidation candidates with a score less than this.
type: integer
default: 0
+ maxSkewThreshold:
+ description: |
+ This option is available from v3.12.7 onward:
+
+ Merge a subset of segments where the ratio of the largest segment size
+ to the combined segment size is within this threshold. Increasing the
+ threshold leads to fewer segment files and thus a potentially higher
+ read performance and less file descriptors but at the expense of more
+ frequent consolidations and thus higher write load.
+
+ The skew describes how much segment files vary in size. It is a number
+ between `0.0` and `1.0` and calculated by dividing the largest file size
+ of a set of segment files by the total size.
+
+ Multiple combinations of candidate segments are checked and the one with
+ the lowest skew value is selected for consolidation. This rather selects
+ many than few segments, but the new merged segment will be below the
+ configured `segmentsBytesMax`. The skew threshold prevents unnecessary
+ consolidation of e.g. a big segment file with a very small one, where the
+ cost of writing a merged segment is higher than the gain in read performance.
+ type: number
+ minimum: 0.0
+ maximum: 1.0
+ default: 0.4
+ minDeletionRatio:
+ description: |
+ This option is available from v3.12.7 onward:
+
+ Clean up segments where the ratio of deleted documents is at least
+ this high. Decreasing the minimum ratio leads to earlier consolidation
+ of segments with many deleted documents and thus reclamation of
+ disk space but causes a higher write load.
+
+ The deletion ratio is the percentage of deleted documents across one
+ or more segment files. It is a number between `0.0` and `1.0` and
+ calculated by dividing the number of deleted documents by the total
+ number of documents.
+
+ The segment files with the highest individual deletion ratio are
+ the candidates. As many as possible candidates are selected for
+ consolidation (in order of decreasing ratio), but the overall ratio
+ has to be at least `minDeletionRatio` and the new segment with the
+ active documents needs to be below the configured `segmentsBytesMax`.
+ type: integer
+ minimum: 0.0
+ maximum: 1.0
+ default: 0.5
writebufferIdle:
description: |
Maximum number of writers (segments) cached in the pool
diff --git a/site/content/arangodb/3.12/develop/http-api/views/arangosearch-views.md b/site/content/arangodb/3.12/develop/http-api/views/arangosearch-views.md
index 2f33e5c772..825adc789a 100644
--- a/site/content/arangodb/3.12/develop/http-api/views/arangosearch-views.md
+++ b/site/content/arangodb/3.12/develop/http-api/views/arangosearch-views.md
@@ -348,6 +348,8 @@ paths:
maximum: 1.0
segmentsBytesFloor:
description: |
+ This option is only available up to v3.12.6:
+
Defines the value (in bytes) to treat all smaller segments
as equal for consolidation selection.
type: integer
@@ -359,21 +361,74 @@ paths:
default: 8589934592
segmentsMax:
description: |
+ This option is only available up to v3.12.6:
+
The maximum number of segments that are evaluated as
candidates for consolidation.
type: integer
default: 200
segmentsMin:
description: |
+ This option is only available up to v3.12.6:
+
The minimum number of segments that are
evaluated as candidates for consolidation
type: integer
default: 50
minScore:
description: |
+ This option is only available up to v3.12.6:
+
Filter out consolidation candidates with a score less than this.
type: integer
default: 0
+ maxSkewThreshold:
+ description: |
+ This option is available from v3.12.7 onward:
+
+ Merge a subset of segments where the ratio of the largest segment size
+ to the combined segment size is within this threshold. Increasing the
+ threshold leads to fewer segment files and thus a potentially higher
+ read performance and less file descriptors but at the expense of more
+ frequent consolidations and thus higher write load.
+
+ The skew describes how much segment files vary in size. It is a number
+ between `0.0` and `1.0` and calculated by dividing the largest file size
+ of a set of segment files by the total size.
+
+ Multiple combinations of candidate segments are checked and the one with
+ the lowest skew value is selected for consolidation. This rather selects
+ many than few segments, but the new merged segment will be below the
+ configured `segmentsBytesMax`. The skew threshold prevents unnecessary
+ consolidation of e.g. a big segment file with a very small one, where the
+ cost of writing a merged segment is higher than the gain in read performance.
+ type: number
+ minimum: 0.0
+ maximum: 1.0
+ default: 0.4
+ minDeletionRatio:
+ description: |
+ This option is available from v3.12.7 onward:
+
+ Clean up segments where the ratio of deleted documents is at least
+ this high. Decreasing the minimum ratio leads to earlier consolidation
+ of segments with many deleted documents and thus reclamation of
+ disk space but causes a higher write load.
+
+ The deletion ratio is the percentage of deleted documents across one
+ or more segment files. It is a number between `0.0` and `1.0` and
+ calculated by dividing the number of deleted documents by the total
+ number of documents.
+
+ The segment files with the highest individual deletion ratio are
+ the candidates. As many as possible candidates are selected for
+ consolidation (in order of decreasing ratio), but the overall ratio
+ has to be at least `minDeletionRatio` and the new segment with the
+ active documents needs to be below the configured `segmentsBytesMax`.
+ type: integer
+ minimum: 0.0
+ maximum: 1.0
+ default: 0.5
writebufferIdle:
description: |
Maximum number of writers (segments) cached in the pool
@@ -569,6 +624,8 @@ paths:
maximum: 1.0
segmentsBytesFloor:
description: |
+ This option is only available up to v3.12.6:
+
Defines the value (in bytes) to treat all smaller segments
as equal for consolidation selection.
type: integer
@@ -578,18 +635,69 @@ paths:
type: integer
segmentsMax:
description: |
+ This option is only available up to v3.12.6:
+
The maximum number of segments that are evaluated as
candidates for consolidation.
type: integer
segmentsMin:
description: |
+ This option is only available up to v3.12.6:
+
The minimum number of segments that are
evaluated as candidates for consolidation
type: integer
minScore:
description: |
+ This option is only available up to v3.12.6:
+
Filter out consolidation candidates with a score less than this.
type: integer
+ maxSkewThreshold:
+ description: |
+ This option is available from v3.12.7 onward:
+
+ Merge a subset of segments where the ratio of the largest segment size
+ to the combined segment size is within this threshold. Increasing the
+ threshold leads to fewer segment files and thus a potentially higher
+ read performance and less file descriptors but at the expense of more
+ frequent consolidations and thus higher write load.
+
+ The skew describes how much segment files vary in size. It is a number
+ between `0.0` and `1.0` and calculated by dividing the largest file size
+ of a set of segment files by the total size.
+
+ Multiple combinations of candidate segments are checked and the one with
+ the lowest skew value is selected for consolidation. This rather selects
+ many than few segments, but the new merged segment will be below the
+ configured `segmentsBytesMax`. The skew threshold prevents unnecessary
+ consolidation of e.g. a big segment file with a very small one, where the
+ cost of writing a merged segment is higher than the gain in read performance.
+ type: number
+ minimum: 0.0
+ maximum: 1.0
+ minDeletionRatio:
+ description: |
+ This option is available from v3.12.7 onward:
+
+ Clean up segments where the ratio of deleted documents is at least
+ this high. Decreasing the minimum ratio leads to earlier consolidation
+ of segments with many deleted documents and thus reclamation of
+ disk space but causes a higher write load.
+
+ The deletion ratio is the percentage of deleted documents across one
+ or more segment files. It is a number between `0.0` and `1.0` and
+ calculated by dividing the number of deleted documents by the total
+ number of documents.
+
+ The segment files with the highest individual deletion ratio are
+ the candidates. As many as possible candidates are selected for
+ consolidation (in order of decreasing ratio), but the overall ratio
+ has to be at least `minDeletionRatio` and the new segment with the
+ active documents needs to be below the configured `segmentsBytesMax`.
+ type: integer
+ minimum: 0.0
+ maximum: 1.0
writebufferIdle:
description: |
Maximum number of writers (segments) cached in the pool (`0` = disabled).
@@ -1041,6 +1149,8 @@ paths:
maximum: 1.0
segmentsBytesFloor:
description: |
+ This option is only available up to v3.12.6:
+
Defines the value (in bytes) to treat all smaller segments
as equal for consolidation selection.
type: integer
@@ -1050,18 +1160,69 @@ paths:
type: integer
segmentsMax:
description: |
+ This option is only available up to v3.12.6:
+
The maximum number of segments that are evaluated as
candidates for consolidation.
type: integer
segmentsMin:
description: |
+ This option is only available up to v3.12.6:
+
The minimum number of segments that are
evaluated as candidates for consolidation
type: integer
minScore:
description: |
+ This option is only available up to v3.12.6:
+
Filter out consolidation candidates with a score less than this.
type: integer
+ maxSkewThreshold:
+ description: |
+ This option is available from v3.12.7 onward:
+
+ Merge a subset of segments where the ratio of the largest segment size
+ to the combined segment size is within this threshold. Increasing the
+ threshold leads to fewer segment files and thus a potentially higher
+ read performance and less file descriptors but at the expense of more
+ frequent consolidations and thus higher write load.
+
+ The skew describes how much segment files vary in size. It is a number
+ between `0.0` and `1.0` and calculated by dividing the largest file size
+ of a set of segment files by the total size.
+
+ Multiple combinations of candidate segments are checked and the one with
+ the lowest skew value is selected for consolidation. This rather selects
+ many than few segments, but the new merged segment will be below the
+ configured `segmentsBytesMax`. The skew threshold prevents unnecessary
+ consolidation of e.g. a big segment file with a very small one, where the
+ cost of writing a merged segment is higher than the gain in read performance.
+ type: number
+ minimum: 0.0
+ maximum: 1.0
+ minDeletionRatio:
+ description: |
+ This option is available from v3.12.7 onward:
+
+ Clean up segments where the ratio of deleted documents is at least
+ this high. Decreasing the minimum ratio leads to earlier consolidation
+ of segments with many deleted documents and thus reclamation of
+ disk space but causes a higher write load.
+
+ The deletion ratio is the percentage of deleted documents across one
+ or more segment files. It is a number between `0.0` and `1.0` and
+ calculated by dividing the number of deleted documents by the total
+ number of documents.
+
+ The segment files with the highest individual deletion ratio are
+ the candidates. As many as possible candidates are selected for
+ consolidation (in order of decreasing ratio), but the overall ratio
+ has to be at least `minDeletionRatio` and the new segment with the
+ active documents needs to be below the configured `segmentsBytesMax`.
+ type: integer
+ minimum: 0.0
+ maximum: 1.0
writebufferIdle:
description: |
Maximum number of writers (segments) cached in the pool (`0` = disabled).
@@ -1444,6 +1605,8 @@ paths:
maximum: 1.0
segmentsBytesFloor:
description: |
+ This option is only available up to v3.12.6:
+
Defines the value (in bytes) to treat all smaller segments
as equal for consolidation selection.
type: integer
@@ -1455,21 +1618,74 @@ paths:
default: 8589934592
segmentsMax:
description: |
+ This option is only available up to v3.12.6:
+
The maximum number of segments that are evaluated as
candidates for consolidation.
type: integer
default: 200
segmentsMin:
description: |
+ This option is only available up to v3.12.6:
+
The minimum number of segments that are
evaluated as candidates for consolidation
type: integer
default: 50
minScore:
description: |
+ This option is only available up to v3.12.6:
+
Filter out consolidation candidates with a score less than this.
type: integer
default: 0
+ maxSkewThreshold:
+ description: |
+ This option is available from v3.12.7 onward:
+
+ Merge a subset of segments where the ratio of the largest segment size
+ to the combined segment size is within this threshold. Increasing the
+ threshold leads to fewer segment files and thus a potentially higher
+ read performance and less file descriptors but at the expense of more
+ frequent consolidations and thus higher write load.
+
+ The skew describes how much segment files vary in size. It is a number
+ between `0.0` and `1.0` and calculated by dividing the largest file size
+ of a set of segment files by the total size.
+
+ Multiple combinations of candidate segments are checked and the one with
+ the lowest skew value is selected for consolidation. This rather selects
+ many than few segments, but the new merged segment will be below the
+ configured `segmentsBytesMax`. The skew threshold prevents unnecessary
+ consolidation of e.g. a big segment file with a very small one, where the
+ cost of writing a merged segment is higher than the gain in read performance.
+ type: number
+ minimum: 0.0
+ maximum: 1.0
+ default: 0.4
+ minDeletionRatio:
+ description: |
+ This option is available from v3.12.7 onward:
+
+ Clean up segments where the ratio of deleted documents is at least
+ this high. Decreasing the minimum ratio leads to earlier consolidation
+ of segments with many deleted documents and thus reclamation of
+ disk space but causes a higher write load.
+
+ The deletion ratio is the percentage of deleted documents across one
+ or more segment files. It is a number between `0.0` and `1.0` and
+ calculated by dividing the number of deleted documents by the total
+ number of documents.
+
+ The segment files with the highest individual deletion ratio are
+ the candidates. As many as possible candidates are selected for
+ consolidation (in order of decreasing ratio), but the overall ratio
+ has to be at least `minDeletionRatio` and the new segment with the
+ active documents needs to be below the configured `segmentsBytesMax`.
+ type: integer
+ minimum: 0.0
+ maximum: 1.0
+ default: 0.5
responses:
'200':
description: |
@@ -1643,6 +1859,8 @@ paths:
maximum: 1.0
segmentsBytesFloor:
description: |
+ This option is only available up to v3.12.6:
+
Defines the value (in bytes) to treat all smaller segments
as equal for consolidation selection.
type: integer
@@ -1652,18 +1870,69 @@ paths:
type: integer
segmentsMax:
description: |
+ This option is only available up to v3.12.6:
+
The maximum number of segments that are evaluated as
candidates for consolidation.
type: integer
segmentsMin:
description: |
+ This option is only available up to v3.12.6:
+
The minimum number of segments that are
evaluated as candidates for consolidation
type: integer
minScore:
description: |
+ This option is only available up to v3.12.6:
+
Filter out consolidation candidates with a score less than this.
type: integer
+ maxSkewThreshold:
+ description: |
+ This option is available from v3.12.7 onward:
+
+ Merge a subset of segments where the ratio of the largest segment size
+ to the combined segment size is within this threshold. Increasing the
+ threshold leads to fewer segment files and thus a potentially higher
+ read performance and less file descriptors but at the expense of more
+ frequent consolidations and thus higher write load.
+
+ The skew describes how much segment files vary in size. It is a number
+ between `0.0` and `1.0` and calculated by dividing the largest file size
+ of a set of segment files by the total size.
+
+ Multiple combinations of candidate segments are checked and the one with
+ the lowest skew value is selected for consolidation. This rather selects
+ many than few segments, but the new merged segment will be below the
+ configured `segmentsBytesMax`. The skew threshold prevents unnecessary
+ consolidation of e.g. a big segment file with a very small one, where the
+ cost of writing a merged segment is higher than the gain in read performance.
+ type: number
+ minimum: 0.0
+ maximum: 1.0
+ minDeletionRatio:
+ description: |
+ This option is available from v3.12.7 onward:
+
+ Clean up segments where the ratio of deleted documents is at least
+ this high. Decreasing the minimum ratio leads to earlier consolidation
+ of segments with many deleted documents and thus reclamation of
+ disk space but causes a higher write load.
+
+ The deletion ratio is the percentage of deleted documents across one
+ or more segment files. It is a number between `0.0` and `1.0` and
+ calculated by dividing the number of deleted documents by the total
+ number of documents.
+
+ The segment files with the highest individual deletion ratio are
+ the candidates. As many as possible candidates are selected for
+ consolidation (in order of decreasing ratio), but the overall ratio
+ has to be at least `minDeletionRatio` and the new segment with the
+ active documents needs to be below the configured `segmentsBytesMax`.
+ type: integer
+ minimum: 0.0
+ maximum: 1.0
writebufferIdle:
description: |
Maximum number of writers (segments) cached in the pool (`0` = disabled).
@@ -1952,6 +2221,8 @@ paths:
maximum: 1.0
segmentsBytesFloor:
description: |
+ This option is only available up to v3.12.6:
+
Defines the value (in bytes) to treat all smaller segments
as equal for consolidation selection.
type: integer
@@ -1963,21 +2234,74 @@ paths:
default: 8589934592
segmentsMax:
description: |
+ This option is only available up to v3.12.6:
+
The maximum number of segments that are evaluated as
candidates for consolidation.
type: integer
default: 200
segmentsMin:
description: |
+ This option is only available up to v3.12.6:
+
The minimum number of segments that are
evaluated as candidates for consolidation
type: integer
default: 50
minScore:
description: |
+ This option is only available up to v3.12.6:
+
Filter out consolidation candidates with a score less than this.
type: integer
default: 0
+ maxSkewThreshold:
+ description: |
+ This option is available from v3.12.7 onward:
+
+ Merge a subset of segments where the ratio of the largest segment size
+ to the combined segment size is within this threshold. Increasing the
+ threshold leads to fewer segment files and thus a potentially higher
+ read performance and less file descriptors but at the expense of more
+ frequent consolidations and thus higher write load.
+
+ The skew describes how much segment files vary in size. It is a number
+ between `0.0` and `1.0` and calculated by dividing the largest file size
+ of a set of segment files by the total size.
+
+ Multiple combinations of candidate segments are checked and the one with
+ the lowest skew value is selected for consolidation. This rather selects
+ many than few segments, but the new merged segment will be below the
+ configured `segmentsBytesMax`. The skew threshold prevents unnecessary
+ consolidation of e.g. a big segment file with a very small one, where the
+ cost of writing a merged segment is higher than the gain in read performance.
+ type: number
+ minimum: 0.0
+ maximum: 1.0
+ default: 0.4
+ minDeletionRatio:
+ description: |
+ This option is available from v3.12.7 onward:
+
+ Clean up segments where the ratio of deleted documents is at least
+ this high. Decreasing the minimum ratio leads to earlier consolidation
+ of segments with many deleted documents and thus reclamation of
+ disk space but causes a higher write load.
+
+ The deletion ratio is the percentage of deleted documents across one
+ or more segment files. It is a number between `0.0` and `1.0` and
+ calculated by dividing the number of deleted documents by the total
+ number of documents.
+
+ The segment files with the highest individual deletion ratio are
+ the candidates. As many as possible candidates are selected for
+ consolidation (in order of decreasing ratio), but the overall ratio
+ has to be at least `minDeletionRatio` and the new segment with the
+ active documents needs to be below the configured `segmentsBytesMax`.
+ type: integer
+ minimum: 0.0
+ maximum: 1.0
+ default: 0.5
responses:
'200':
description: |
@@ -2151,6 +2475,8 @@ paths:
maximum: 1.0
segmentsBytesFloor:
description: |
+ This option is only available up to v3.12.6:
+
Defines the value (in bytes) to treat all smaller segments
as equal for consolidation selection.
type: integer
@@ -2160,18 +2486,69 @@ paths:
type: integer
segmentsMax:
description: |
+ This option is only available up to v3.12.6:
+
The maximum number of segments that are evaluated as
candidates for consolidation.
type: integer
segmentsMin:
description: |
+ This option is only available up to v3.12.6:
+
The minimum number of segments that are
evaluated as candidates for consolidation
type: integer
minScore:
description: |
+ This option is only available up to v3.12.6:
+
Filter out consolidation candidates with a score less than this.
type: integer
+ maxSkewThreshold:
+ description: |
+ This option is available from v3.12.7 onward:
+
+ Merge a subset of segments where the ratio of the largest segment size
+ to the combined segment size is within this threshold. Increasing the
+ threshold leads to fewer segment files and thus a potentially higher
+ read performance and less file descriptors but at the expense of more
+ frequent consolidations and thus higher write load.
+
+ The skew describes how much segment files vary in size. It is a number
+ between `0.0` and `1.0` and calculated by dividing the largest file size
+ of a set of segment files by the total size.
+
+ Multiple combinations of candidate segments are checked and the one with
+ the lowest skew value is selected for consolidation. This rather selects
+ many than few segments, but the new merged segment will be below the
+ configured `segmentsBytesMax`. The skew threshold prevents unnecessary
+ consolidation of e.g. a big segment file with a very small one, where the
+ cost of writing a merged segment is higher than the gain in read performance.
+ type: number
+ minimum: 0.0
+ maximum: 1.0
+ minDeletionRatio:
+ description: |
+ This option is available from v3.12.7 onward:
+
+ Clean up segments where the ratio of deleted documents is at least
+ this high. Decreasing the minimum ratio leads to earlier consolidation
+ of segments with many deleted documents and thus reclamation of
+ disk space but causes a higher write load.
+
+ The deletion ratio is the percentage of deleted documents across one
+ or more segment files. It is a number between `0.0` and `1.0` and
+ calculated by dividing the number of deleted documents by the total
+ number of documents.
+
+ The segment files with the highest individual deletion ratio are
+ the candidates. As many as possible candidates are selected for
+ consolidation (in order of decreasing ratio), but the overall ratio
+ has to be at least `minDeletionRatio` and the new segment with the
+ active documents needs to be below the configured `segmentsBytesMax`.
+ type: integer
+ minimum: 0.0
+ maximum: 1.0
writebufferIdle:
description: |
Maximum number of writers (segments) cached in the pool (`0` = disabled).
@@ -2518,6 +2895,8 @@ paths:
maximum: 1.0
segmentsBytesFloor:
description: |
+ This option is only available up to v3.12.6:
+
Defines the value (in bytes) to treat all smaller segments
as equal for consolidation selection.
type: integer
@@ -2527,18 +2906,69 @@ paths:
type: integer
segmentsMax:
description: |
+ This option is only available up to v3.12.6:
+
The maximum number of segments that are evaluated as
candidates for consolidation.
type: integer
segmentsMin:
description: |
+ This option is only available up to v3.12.6:
+
The minimum number of segments that are
evaluated as candidates for consolidation
type: integer
minScore:
description: |
+ This option is only available up to v3.12.6:
+
Filter out consolidation candidates with a score less than this.
type: integer
+ maxSkewThreshold:
+ description: |
+ This option is available from v3.12.7 onward:
+
+ Merge a subset of segments where the ratio of the largest segment size
+ to the combined segment size is within this threshold. Increasing the
+ threshold leads to fewer segment files and thus a potentially higher
+ read performance and less file descriptors but at the expense of more
+ frequent consolidations and thus higher write load.
+
+ The skew describes how much segment files vary in size. It is a number
+ between `0.0` and `1.0` and calculated by dividing the largest file size
+ of a set of segment files by the total size.
+
+ Multiple combinations of candidate segments are checked and the one with
+ the lowest skew value is selected for consolidation. This rather selects
+ many than few segments, but the new merged segment will be below the
+ configured `segmentsBytesMax`. The skew threshold prevents unnecessary
+ consolidation of e.g. a big segment file with a very small one, where the
+ cost of writing a merged segment is higher than the gain in read performance.
+ type: number
+ minimum: 0.0
+ maximum: 1.0
+ minDeletionRatio:
+ description: |
+ This option is available from v3.12.7 onward:
+
+ Clean up segments where the ratio of deleted documents is at least
+ this high. Decreasing the minimum ratio leads to earlier consolidation
+ of segments with many deleted documents and thus reclamation of
+ disk space but causes a higher write load.
+
+ The deletion ratio is the percentage of deleted documents across one
+ or more segment files. It is a number between `0.0` and `1.0` and
+ calculated by dividing the number of deleted documents by the total
+ number of documents.
+
+ The segment files with the highest individual deletion ratio are
+ the candidates. As many as possible candidates are selected for
+ consolidation (in order of decreasing ratio), but the overall ratio
+ has to be at least `minDeletionRatio` and the new segment with the
+ active documents needs to be below the configured `segmentsBytesMax`.
+ type: integer
+ minimum: 0.0
+ maximum: 1.0
writebufferIdle:
description: |
Maximum number of writers (segments) cached in the pool (`0` = disabled).
diff --git a/site/content/arangodb/3.12/indexes-and-search/arangosearch/arangosearch-views-reference.md b/site/content/arangodb/3.12/indexes-and-search/arangosearch/arangosearch-views-reference.md
index 036758127f..e9d50f7b80 100644
--- a/site/content/arangodb/3.12/indexes-and-search/arangosearch/arangosearch-views-reference.md
+++ b/site/content/arangodb/3.12/indexes-and-search/arangosearch/arangosearch-views-reference.md
@@ -485,10 +485,14 @@ is used by these writers (in terms of "writers pool") one can use
- **segmentsMin** (_optional_; type: `integer`; default: `50`)
+ This option is only available up to v3.12.6:
+
The minimum number of segments that are evaluated as candidates for consolidation.
- **segmentsMax** (_optional_; type: `integer`; default: `200`)
+ This option is only available up to v3.12.6:
+
The maximum number of segments that are evaluated as candidates for consolidation.
- **segmentsBytesMax** (_optional_; type: `integer`; default: `8589934592`)
@@ -497,9 +501,54 @@ is used by these writers (in terms of "writers pool") one can use
- **segmentsBytesFloor** (_optional_; type: `integer`; default: `25165824`)
+ This option is only available up to v3.12.6:
+
Defines the value (in bytes) to treat all smaller segments as equal for consolidation
selection.
- **minScore** (_optional_; type: `integer`; default: `0`)
+ This option is only available up to v3.12.6:
+
Filter out consolidation candidates with a score less than this.
+
+ - **maxSkewThreshold** (_optional_; type: `number`; default: `0.4`)
+
+ This option is available from v3.12.7 onward:
+
+ Merge a subset of segments where the ratio of the largest segment size
+ to the combined segment size is within this threshold. Increasing the
+ threshold leads to fewer segment files and thus a potentially higher
+ read performance and less file descriptors but at the expense of more
+ frequent consolidations and thus higher write load.
+
+ The skew describes how much segment files vary in size. It is a number
+ between `0.0` and `1.0` and calculated by dividing the largest file size
+ of a set of segment files by the total size.
+
+ Multiple combinations of candidate segments are checked and the one with
+ the lowest skew value is selected for consolidation. This rather selects
+ many than few segments, but the new merged segment will be below the
+ configured `segmentsBytesMax`. The skew threshold prevents unnecessary
+ consolidation of e.g. a big segment file with a very small one, where the
+ cost of writing a merged segment is higher than the gain in read performance.
+
+ - **minDeletionRatio** (_optional_; type: `number`; default: `0.5`)
+
+ This option is available from v3.12.7 onward:
+
+ Clean up segments where the ratio of deleted documents is at least
+ this high. Decreasing the minimum ratio leads to earlier consolidation
+ of segments with many deleted documents and thus reclamation of
+ disk space but causes a higher write load.
+
+ The deletion ratio is the percentage of deleted documents across one
+ or more segment files. It is a number between `0.0` and `1.0` and
+ calculated by dividing the number of deleted documents by the total
+ number of documents.
+
+ The segment files with the highest individual deletion ratio are
+ the candidates. As many as possible candidates are selected for
+ consolidation (in order of decreasing ratio), but the overall ratio
+ has to be at least `minDeletionRatio` and the new segment with the
+ active documents needs to be below the configured `segmentsBytesMax`.
diff --git a/site/content/arangodb/3.12/release-notes/version-3.12/api-changes-in-3-12.md b/site/content/arangodb/3.12/release-notes/version-3.12/api-changes-in-3-12.md
index 8cc010cc0a..e3a588dbca 100644
--- a/site/content/arangodb/3.12/release-notes/version-3.12/api-changes-in-3-12.md
+++ b/site/content/arangodb/3.12/release-notes/version-3.12/api-changes-in-3-12.md
@@ -363,6 +363,25 @@ By consolidating less often and with more data, less file descriptors are used.
- `segmentsBytesMax` increased from `5368709120` (5 GiB) to `8589934592` (8 GiB)
- `segmentsBytesFloor` increased from `2097152` (2 MiB) to `25165824` (24 MiB)
+##### Added and removed consolidation options for `arangosearch` Views
+
+Introduced in: v3.12.7
+
+The following options for consolidating `arangosearch` Views have been removed
+and are now ignored when specified in a request:
+
+- `consolidationPolicy` (with `type` set to `tier`):
+ - `segmentsMin`
+ - `segmentsMax`
+ - `segmentsBytesFloor`
+ - `minScore`
+
+The following new options have been added:
+
+- `consolidationPolicy` (with `type` set to `tier`):
+ - `maxSkewThreshold` (number in range `[0.0, 1.0]`, default: `0.4`)
+ - `minDeletionRatio` (number in range `[0.0, 1.0]`, default: `0.5`)
+
#### Document API
The following endpoints accept a new `versionAttribute` query parameter that adds
@@ -501,6 +520,25 @@ By consolidating less often and with more data, less file descriptors are used.
- `segmentsBytesMax` increased from `5368709120` (5 GiB) to `8589934592` (8 GiB)
- `segmentsBytesFloor` increased from `2097152` (2 MiB) to `25165824` (24 MiB)
+##### Added and removed consolidation options for inverted indexes
+
+Introduced in: v3.12.7
+
+The following options for consolidating inverted indexes have been removed
+and are now ignored when specified in a request:
+
+- `consolidationPolicy` (with `type` set to `tier`):
+ - `segmentsMin`
+ - `segmentsMax`
+ - `segmentsBytesFloor`
+ - `minScore`
+
+The following new options have been added:
+
+- `consolidationPolicy` (with `type` set to `tier`):
+ - `maxSkewThreshold` (number in range `[0.0, 1.0]`, default: `0.4`)
+ - `minDeletionRatio` (number in range `[0.0, 1.0]`, default: `0.5`)
+
#### Optimizer rule descriptions
Introduced in: v3.10.9, v3.11.2
diff --git a/site/content/arangodb/3.12/release-notes/version-3.12/incompatible-changes-in-3-12.md b/site/content/arangodb/3.12/release-notes/version-3.12/incompatible-changes-in-3-12.md
index 959cea82cf..1fe5a31d18 100644
--- a/site/content/arangodb/3.12/release-notes/version-3.12/incompatible-changes-in-3-12.md
+++ b/site/content/arangodb/3.12/release-notes/version-3.12/incompatible-changes-in-3-12.md
@@ -994,6 +994,29 @@ more data, less file descriptors are used.
- `segmentsBytesMax` increased from `5368709120` (5 GiB) to `8589934592` (8 GiB)
- `segmentsBytesFloor` increased from `2097152` (2 MiB) to `25165824` (24 MiB)
+## Added and removed consolidation options for inverted indexs and `arangosearch` Views
+
+Introduced in: v3.12.7
+
+The following options for consolidating inverted indexes as well as
+`arangosearch` Views have been removed and are now ignored when specified in a request:
+
+- `consolidationPolicy` (with `type` set to `tier`):
+ - `segmentsMin`
+ - `segmentsMax`
+ - `segmentsBytesFloor`
+ - `minScore`
+
+The consolidation works differently now and uses the new `maxSkewThreshold` and
+`minDeletionRatio` options together with the existing `segmentsBytesMax`. If you
+previously used customized settings for the removed options, check if the default
+values of the new options are acceptable or if you need to tune them according to
+your workload.
+
+For details, see:
+- [HTTP interface for inverted indexes](../../develop/http-api/indexes/inverted.md)
+- [`arangosearch` View properties](../../indexes-and-search/arangosearch/arangosearch-views-reference.md#view-properties)
+
## HTTP RESTful API
### JavaScript-based traversal using `/_api/traversal` removed
diff --git a/site/content/arangodb/3.12/release-notes/version-3.12/whats-new-in-3-12.md b/site/content/arangodb/3.12/release-notes/version-3.12/whats-new-in-3-12.md
index d0d0c77010..199242cfb5 100644
--- a/site/content/arangodb/3.12/release-notes/version-3.12/whats-new-in-3-12.md
+++ b/site/content/arangodb/3.12/release-notes/version-3.12/whats-new-in-3-12.md
@@ -2452,6 +2452,38 @@ environment variable `NAME`. If there is an environment variable called `PID` or
`TEMP_BASE_DIR`, then `@PID@` or `@TEMP_BASE_DIR@` is substituted with the
value of the respective environment variable.
+### New consolidation algorithm for inverted indexes and `arangosearch` Views
+
+Introduced in: v3.12.7
+
+The `tier` consolidation policy now uses a different algorithm for merging
+and cleaning up segments. Overall, it avoids consolidating segments where the
+cost of writing the new segment is high and the gain in read performance is low
+(e.g. combining a big segment file with a very small one).
+
+The following options have been removed for inverted indexes as well as
+`arangosearch` Views because the new consolidation algorithm doesn't use them:
+
+- `consolidationPolicy` (with `type` set to `tier`):
+ - `segmentsMin`
+ - `segmentsMax`
+ - `segmentsBytesFloor`
+ - `minScore`
+
+The following new options have been added:
+
+- `consolidationPolicy` (with `type` set to `tier`):
+ - `maxSkewThreshold` (number in range `[0.0, 1.0]`, default: `0.4`)
+ - `minDeletionRatio` (number in range `[0.0, 1.0]`, default: `0.5`)
+
+If you previously used customized settings for the removed options, check if the
+default values of the new options are acceptable or if you need to tune them
+according to your workload.
+
+For details, see:
+- [HTTP interface for inverted indexes](../../develop/http-api/indexes/inverted.md)
+- [`arangosearch` View properties](../../indexes-and-search/arangosearch/arangosearch-views-reference.md#view-properties)
+
## Client tools
### Protocol aliases for endpoints
diff --git a/site/content/arangodb/4.0/develop/http-api/indexes/inverted.md b/site/content/arangodb/4.0/develop/http-api/indexes/inverted.md
index d2c5939c25..c24a636d46 100644
--- a/site/content/arangodb/4.0/develop/http-api/indexes/inverted.md
+++ b/site/content/arangodb/4.0/develop/http-api/indexes/inverted.md
@@ -565,34 +565,58 @@ paths:
document count as dictated by the customization attributes.
type: string
default: tier
- segmentsBytesFloor:
- description: |
- Defines the value (in bytes) to treat all smaller segments as equal for
- consolidation selection.
- type: integer
- default: 25165824
segmentsBytesMax:
description: |
The maximum allowed size of all consolidated segments in bytes.
type: integer
default: 8589934592
- segmentsMax:
+ maxSkewThreshold:
description: |
- The maximum number of segments that are evaluated as candidates for
- consolidation.
- type: integer
- default: 200
- segmentsMin:
- description: |
- The minimum number of segments that are evaluated as candidates for
- consolidation.
- type: integer
- default: 50
- minScore:
+ This option is available from v3.12.7 onward:
+
+ Merge a subset of segments where the ratio of the largest segment size
+ to the combined segment size is within this threshold. Increasing the
+ threshold leads to fewer segment files and thus a potentially higher
+ read performance and less file descriptors but at the expense of more
+ frequent consolidations and thus higher write load.
+
+ The skew describes how much segment files vary in size. It is a number
+ between `0.0` and `1.0` and calculated by dividing the largest file size
+ of a set of segment files by the total size.
+
+ Multiple combinations of candidate segments are checked and the one with
+ the lowest skew value is selected for consolidation. This rather selects
+ many than few segments, but the new merged segment will be below the
+ configured `segmentsBytesMax`. The skew threshold prevents unnecessary
+ consolidation of e.g. a big segment file with a very small one, where the
+ cost of writing a merged segment is higher than the gain in read performance.
+ type: number
+ minimum: 0.0
+ maximum: 1.0
+ default: 0.4
+ minDeletionRatio:
description: |
- Filter out consolidation candidates with a score less than this.
+ This option is available from v3.12.7 onward:
+
+ Clean up segments where the ratio of deleted documents is at least
+ this high. Decreasing the minimum ratio leads to earlier consolidation
+ of segments with many deleted documents and thus reclamation of
+ disk space but causes a higher write load.
+
+ The deletion ratio is the percentage of deleted documents across one
+ or more segment files. It is a number between `0.0` and `1.0` and
+ calculated by dividing the number of deleted documents by the total
+ number of documents.
+
+ The segment files with the highest individual deletion ratio are
+ the candidates. As many as possible candidates are selected for
+ consolidation (in order of decreasing ratio), but the overall ratio
+ has to be at least `minDeletionRatio` and the new segment with the
+ active documents needs to be below the configured `segmentsBytesMax`.
type: integer
- default: 0
+ minimum: 0.0
+ maximum: 1.0
+ default: 0.5
writebufferIdle:
description: |
Maximum number of writers (segments) cached in the pool
diff --git a/site/content/arangodb/4.0/develop/http-api/views/arangosearch-views.md b/site/content/arangodb/4.0/develop/http-api/views/arangosearch-views.md
index 2f33e5c772..1f35fb9e88 100644
--- a/site/content/arangodb/4.0/develop/http-api/views/arangosearch-views.md
+++ b/site/content/arangodb/4.0/develop/http-api/views/arangosearch-views.md
@@ -346,34 +346,54 @@ paths:
default: 0
minimum: 0.0
maximum: 1.0
- segmentsBytesFloor:
- description: |
- Defines the value (in bytes) to treat all smaller segments
- as equal for consolidation selection.
- type: integer
- default: 25165824
segmentsBytesMax:
description: |
Maximum allowed size of all consolidated segments in bytes.
type: integer
default: 8589934592
- segmentsMax:
- description: |
- The maximum number of segments that are evaluated as
- candidates for consolidation.
- type: integer
- default: 200
- segmentsMin:
+ maxSkewThreshold:
description: |
- The minimum number of segments that are
- evaluated as candidates for consolidation
- type: integer
- default: 50
- minScore:
+ Merge a subset of segments where the ratio of the largest segment size
+ to the combined segment size is within this threshold. Increasing the
+ threshold leads to fewer segment files and thus a potentially higher
+ read performance and less file descriptors but at the expense of more
+ frequent consolidations and thus higher write load.
+
+ The skew describes how much segment files vary in size. It is a number
+ between `0.0` and `1.0` and calculated by dividing the largest file size
+ of a set of segment files by the total size.
+
+ Multiple combinations of candidate segments are checked and the one with
+ the lowest skew value is selected for consolidation. This rather selects
+ many than few segments, but the new merged segment will be below the
+ configured `segmentsBytesMax`. The skew threshold prevents unnecessary
+ consolidation of e.g. a big segment file with a very small one, where the
+ cost of writing a merged segment is higher than the gain in read performance.
+ type: number
+ minimum: 0.0
+ maximum: 1.0
+ default: 0.4
+ minDeletionRatio:
description: |
- Filter out consolidation candidates with a score less than this.
+ Clean up segments where the ratio of deleted documents is at least
+ this high. Decreasing the minimum ratio leads to earlier consolidation
+ of segments with many deleted documents and thus reclamation of
+ disk space but causes a higher write load.
+
+ The deletion ratio is the percentage of deleted documents across one
+ or more segment files. It is a number between `0.0` and `1.0` and
+ calculated by dividing the number of deleted documents by the total
+ number of documents.
+
+ The segment files with the highest individual deletion ratio are
+ the candidates. As many as possible candidates are selected for
+ consolidation (in order of decreasing ratio), but the overall ratio
+ has to be at least `minDeletionRatio` and the new segment with the
+ active documents needs to be below the configured `segmentsBytesMax`.
type: integer
- default: 0
+ minimum: 0.0
+ maximum: 1.0
+ default: 0.5
writebufferIdle:
description: |
Maximum number of writers (segments) cached in the pool
@@ -567,29 +587,51 @@ paths:
type: number
minimum: 0.0
maximum: 1.0
- segmentsBytesFloor:
- description: |
- Defines the value (in bytes) to treat all smaller segments
- as equal for consolidation selection.
- type: integer
segmentsBytesMax:
description: |
Maximum allowed size of all consolidated segments in bytes.
type: integer
- segmentsMax:
- description: |
- The maximum number of segments that are evaluated as
- candidates for consolidation.
- type: integer
- segmentsMin:
+ maxSkewThreshold:
description: |
- The minimum number of segments that are
- evaluated as candidates for consolidation
- type: integer
- minScore:
+ Merge a subset of segments where the ratio of the largest segment size
+ to the combined segment size is within this threshold. Increasing the
+ threshold leads to fewer segment files and thus a potentially higher
+ read performance and less file descriptors but at the expense of more
+ frequent consolidations and thus higher write load.
+
+ The skew describes how much segment files vary in size. It is a number
+ between `0.0` and `1.0` and calculated by dividing the largest file size
+ of a set of segment files by the total size.
+
+ Multiple combinations of candidate segments are checked and the one with
+ the lowest skew value is selected for consolidation. This rather selects
+ many than few segments, but the new merged segment will be below the
+ configured `segmentsBytesMax`. The skew threshold prevents unnecessary
+ consolidation of e.g. a big segment file with a very small one, where the
+ cost of writing a merged segment is higher than the gain in read performance.
+ type: number
+ minimum: 0.0
+ maximum: 1.0
+ minDeletionRatio:
description: |
- Filter out consolidation candidates with a score less than this.
+ Clean up segments where the ratio of deleted documents is at least
+ this high. Decreasing the minimum ratio leads to earlier consolidation
+ of segments with many deleted documents and thus reclamation of
+ disk space but causes a higher write load.
+
+ The deletion ratio is the percentage of deleted documents across one
+ or more segment files. It is a number between `0.0` and `1.0` and
+ calculated by dividing the number of deleted documents by the total
+ number of documents.
+
+ The segment files with the highest individual deletion ratio are
+ the candidates. As many as possible candidates are selected for
+ consolidation (in order of decreasing ratio), but the overall ratio
+ has to be at least `minDeletionRatio` and the new segment with the
+ active documents needs to be below the configured `segmentsBytesMax`.
type: integer
+ minimum: 0.0
+ maximum: 1.0
writebufferIdle:
description: |
Maximum number of writers (segments) cached in the pool (`0` = disabled).
@@ -1039,29 +1081,51 @@ paths:
type: number
minimum: 0.0
maximum: 1.0
- segmentsBytesFloor:
- description: |
- Defines the value (in bytes) to treat all smaller segments
- as equal for consolidation selection.
- type: integer
segmentsBytesMax:
description: |
Maximum allowed size of all consolidated segments in bytes.
type: integer
- segmentsMax:
- description: |
- The maximum number of segments that are evaluated as
- candidates for consolidation.
- type: integer
- segmentsMin:
+ maxSkewThreshold:
description: |
- The minimum number of segments that are
- evaluated as candidates for consolidation
- type: integer
- minScore:
+ Merge a subset of segments where the ratio of the largest segment size
+ to the combined segment size is within this threshold. Increasing the
+ threshold leads to fewer segment files and thus a potentially higher
+ read performance and less file descriptors but at the expense of more
+ frequent consolidations and thus higher write load.
+
+ The skew describes how much segment files vary in size. It is a number
+ between `0.0` and `1.0` and calculated by dividing the largest file size
+ of a set of segment files by the total size.
+
+ Multiple combinations of candidate segments are checked and the one with
+ the lowest skew value is selected for consolidation. This rather selects
+ many than few segments, but the new merged segment will be below the
+ configured `segmentsBytesMax`. The skew threshold prevents unnecessary
+ consolidation of e.g. a big segment file with a very small one, where the
+ cost of writing a merged segment is higher than the gain in read performance.
+ type: number
+ minimum: 0.0
+ maximum: 1.0
+ minDeletionRatio:
description: |
- Filter out consolidation candidates with a score less than this.
+ Clean up segments where the ratio of deleted documents is at least
+ this high. Decreasing the minimum ratio leads to earlier consolidation
+ of segments with many deleted documents and thus reclamation of
+ disk space but causes a higher write load.
+
+ The deletion ratio is the percentage of deleted documents across one
+ or more segment files. It is a number between `0.0` and `1.0` and
+ calculated by dividing the number of deleted documents by the total
+ number of documents.
+
+ The segment files with the highest individual deletion ratio are
+ the candidates. As many as possible candidates are selected for
+ consolidation (in order of decreasing ratio), but the overall ratio
+ has to be at least `minDeletionRatio` and the new segment with the
+ active documents needs to be below the configured `segmentsBytesMax`.
type: integer
+ minimum: 0.0
+ maximum: 1.0
writebufferIdle:
description: |
Maximum number of writers (segments) cached in the pool (`0` = disabled).
@@ -1442,34 +1506,54 @@ paths:
default: 0
minimum: 0.0
maximum: 1.0
- segmentsBytesFloor:
- description: |
- Defines the value (in bytes) to treat all smaller segments
- as equal for consolidation selection.
- type: integer
- default: 25165824
segmentsBytesMax:
description: |
Maximum allowed size of all consolidated segments in bytes.
type: integer
default: 8589934592
- segmentsMax:
- description: |
- The maximum number of segments that are evaluated as
- candidates for consolidation.
- type: integer
- default: 200
- segmentsMin:
+ maxSkewThreshold:
description: |
- The minimum number of segments that are
- evaluated as candidates for consolidation
- type: integer
- default: 50
- minScore:
+ Merge a subset of segments where the ratio of the largest segment size
+ to the combined segment size is within this threshold. Increasing the
+ threshold leads to fewer segment files and thus a potentially higher
+ read performance and less file descriptors but at the expense of more
+ frequent consolidations and thus higher write load.
+
+ The skew describes how much segment files vary in size. It is a number
+ between `0.0` and `1.0` and calculated by dividing the largest file size
+ of a set of segment files by the total size.
+
+ Multiple combinations of candidate segments are checked and the one with
+ the lowest skew value is selected for consolidation. This rather selects
+ many than few segments, but the new merged segment will be below the
+ configured `segmentsBytesMax`. The skew threshold prevents unnecessary
+ consolidation of e.g. a big segment file with a very small one, where the
+ cost of writing a merged segment is higher than the gain in read performance.
+ type: number
+ minimum: 0.0
+ maximum: 1.0
+ default: 0.4
+ minDeletionRatio:
description: |
- Filter out consolidation candidates with a score less than this.
+ Clean up segments where the ratio of deleted documents is at least
+ this high. Decreasing the minimum ratio leads to earlier consolidation
+ of segments with many deleted documents and thus reclamation of
+ disk space but causes a higher write load.
+
+ The deletion ratio is the percentage of deleted documents across one
+ or more segment files. It is a number between `0.0` and `1.0` and
+ calculated by dividing the number of deleted documents by the total
+ number of documents.
+
+ The segment files with the highest individual deletion ratio are
+ the candidates. As many as possible candidates are selected for
+ consolidation (in order of decreasing ratio), but the overall ratio
+ has to be at least `minDeletionRatio` and the new segment with the
+ active documents needs to be below the configured `segmentsBytesMax`.
type: integer
- default: 0
+ minimum: 0.0
+ maximum: 1.0
+ default: 0.5
responses:
'200':
description: |
@@ -1641,29 +1725,51 @@ paths:
type: number
minimum: 0.0
maximum: 1.0
- segmentsBytesFloor:
- description: |
- Defines the value (in bytes) to treat all smaller segments
- as equal for consolidation selection.
- type: integer
segmentsBytesMax:
description: |
Maximum allowed size of all consolidated segments in bytes.
type: integer
- segmentsMax:
- description: |
- The maximum number of segments that are evaluated as
- candidates for consolidation.
- type: integer
- segmentsMin:
+ maxSkewThreshold:
description: |
- The minimum number of segments that are
- evaluated as candidates for consolidation
- type: integer
- minScore:
+ Merge a subset of segments where the ratio of the largest segment size
+ to the combined segment size is within this threshold. Increasing the
+ threshold leads to fewer segment files and thus a potentially higher
+ read performance and less file descriptors but at the expense of more
+ frequent consolidations and thus higher write load.
+
+ The skew describes how much segment files vary in size. It is a number
+ between `0.0` and `1.0` and calculated by dividing the largest file size
+ of a set of segment files by the total size.
+
+ Multiple combinations of candidate segments are checked and the one with
+ the lowest skew value is selected for consolidation. This rather selects
+ many than few segments, but the new merged segment will be below the
+ configured `segmentsBytesMax`. The skew threshold prevents unnecessary
+ consolidation of e.g. a big segment file with a very small one, where the
+ cost of writing a merged segment is higher than the gain in read performance.
+ type: number
+ minimum: 0.0
+ maximum: 1.0
+ minDeletionRatio:
description: |
- Filter out consolidation candidates with a score less than this.
+ Clean up segments where the ratio of deleted documents is at least
+ this high. Decreasing the minimum ratio leads to earlier consolidation
+ of segments with many deleted documents and thus reclamation of
+ disk space but causes a higher write load.
+
+ The deletion ratio is the percentage of deleted documents across one
+ or more segment files. It is a number between `0.0` and `1.0` and
+ calculated by dividing the number of deleted documents by the total
+ number of documents.
+
+ The segment files with the highest individual deletion ratio are
+ the candidates. As many as possible candidates are selected for
+ consolidation (in order of decreasing ratio), but the overall ratio
+ has to be at least `minDeletionRatio` and the new segment with the
+ active documents needs to be below the configured `segmentsBytesMax`.
type: integer
+ minimum: 0.0
+ maximum: 1.0
writebufferIdle:
description: |
Maximum number of writers (segments) cached in the pool (`0` = disabled).
@@ -1950,34 +2056,54 @@ paths:
default: 0
minimum: 0.0
maximum: 1.0
- segmentsBytesFloor:
- description: |
- Defines the value (in bytes) to treat all smaller segments
- as equal for consolidation selection.
- type: integer
- default: 25165824
segmentsBytesMax:
description: |
Maximum allowed size of all consolidated segments in bytes.
type: integer
default: 8589934592
- segmentsMax:
- description: |
- The maximum number of segments that are evaluated as
- candidates for consolidation.
- type: integer
- default: 200
- segmentsMin:
+ maxSkewThreshold:
description: |
- The minimum number of segments that are
- evaluated as candidates for consolidation
- type: integer
- default: 50
- minScore:
+ Merge a subset of segments where the ratio of the largest segment size
+ to the combined segment size is within this threshold. Increasing the
+ threshold leads to fewer segment files and thus a potentially higher
+ read performance and less file descriptors but at the expense of more
+ frequent consolidations and thus higher write load.
+
+ The skew describes how much segment files vary in size. It is a number
+ between `0.0` and `1.0` and calculated by dividing the largest file size
+ of a set of segment files by the total size.
+
+ Multiple combinations of candidate segments are checked and the one with
+ the lowest skew value is selected for consolidation. This rather selects
+ many than few segments, but the new merged segment will be below the
+ configured `segmentsBytesMax`. The skew threshold prevents unnecessary
+ consolidation of e.g. a big segment file with a very small one, where the
+ cost of writing a merged segment is higher than the gain in read performance.
+ type: number
+ minimum: 0.0
+ maximum: 1.0
+ default: 0.4
+ minDeletionRatio:
description: |
- Filter out consolidation candidates with a score less than this.
+ Clean up segments where the ratio of deleted documents is at least
+ this high. Decreasing the minimum ratio leads to earlier consolidation
+ of segments with many deleted documents and thus reclamation of
+ disk space but causes a higher write load.
+
+ The deletion ratio is the percentage of deleted documents across one
+ or more segment files. It is a number between `0.0` and `1.0` and
+ calculated by dividing the number of deleted documents by the total
+ number of documents.
+
+ The segment files with the highest individual deletion ratio are
+ the candidates. As many as possible candidates are selected for
+ consolidation (in order of decreasing ratio), but the overall ratio
+ has to be at least `minDeletionRatio` and the new segment with the
+ active documents needs to be below the configured `segmentsBytesMax`.
type: integer
- default: 0
+ minimum: 0.0
+ maximum: 1.0
+ default: 0.5
responses:
'200':
description: |
@@ -2149,29 +2275,51 @@ paths:
type: number
minimum: 0.0
maximum: 1.0
- segmentsBytesFloor:
- description: |
- Defines the value (in bytes) to treat all smaller segments
- as equal for consolidation selection.
- type: integer
segmentsBytesMax:
description: |
Maximum allowed size of all consolidated segments in bytes.
type: integer
- segmentsMax:
- description: |
- The maximum number of segments that are evaluated as
- candidates for consolidation.
- type: integer
- segmentsMin:
+ maxSkewThreshold:
description: |
- The minimum number of segments that are
- evaluated as candidates for consolidation
- type: integer
- minScore:
+ Merge a subset of segments where the ratio of the largest segment size
+ to the combined segment size is within this threshold. Increasing the
+ threshold leads to fewer segment files and thus a potentially higher
+ read performance and less file descriptors but at the expense of more
+ frequent consolidations and thus higher write load.
+
+ The skew describes how much segment files vary in size. It is a number
+ between `0.0` and `1.0` and calculated by dividing the largest file size
+ of a set of segment files by the total size.
+
+ Multiple combinations of candidate segments are checked and the one with
+ the lowest skew value is selected for consolidation. This rather selects
+ many than few segments, but the new merged segment will be below the
+ configured `segmentsBytesMax`. The skew threshold prevents unnecessary
+ consolidation of e.g. a big segment file with a very small one, where the
+ cost of writing a merged segment is higher than the gain in read performance.
+ type: number
+ minimum: 0.0
+ maximum: 1.0
+ minDeletionRatio:
description: |
- Filter out consolidation candidates with a score less than this.
+ Clean up segments where the ratio of deleted documents is at least
+ this high. Decreasing the minimum ratio leads to earlier consolidation
+ of segments with many deleted documents and thus reclamation of
+ disk space but causes a higher write load.
+
+ The deletion ratio is the percentage of deleted documents across one
+ or more segment files. It is a number between `0.0` and `1.0` and
+ calculated by dividing the number of deleted documents by the total
+ number of documents.
+
+ The segment files with the highest individual deletion ratio are
+ the candidates. As many as possible candidates are selected for
+ consolidation (in order of decreasing ratio), but the overall ratio
+ has to be at least `minDeletionRatio` and the new segment with the
+ active documents needs to be below the configured `segmentsBytesMax`.
type: integer
+ minimum: 0.0
+ maximum: 1.0
writebufferIdle:
description: |
Maximum number of writers (segments) cached in the pool (`0` = disabled).
@@ -2516,29 +2664,51 @@ paths:
type: number
minimum: 0.0
maximum: 1.0
- segmentsBytesFloor:
- description: |
- Defines the value (in bytes) to treat all smaller segments
- as equal for consolidation selection.
- type: integer
segmentsBytesMax:
description: |
Maximum allowed size of all consolidated segments in bytes.
type: integer
- segmentsMax:
- description: |
- The maximum number of segments that are evaluated as
- candidates for consolidation.
- type: integer
- segmentsMin:
+ maxSkewThreshold:
description: |
- The minimum number of segments that are
- evaluated as candidates for consolidation
- type: integer
- minScore:
+ Merge a subset of segments where the ratio of the largest segment size
+ to the combined segment size is within this threshold. Increasing the
+ threshold leads to fewer segment files and thus a potentially higher
+ read performance and less file descriptors but at the expense of more
+ frequent consolidations and thus higher write load.
+
+ The skew describes how much segment files vary in size. It is a number
+ between `0.0` and `1.0` and calculated by dividing the largest file size
+ of a set of segment files by the total size.
+
+ Multiple combinations of candidate segments are checked and the one with
+ the lowest skew value is selected for consolidation. This rather selects
+ many than few segments, but the new merged segment will be below the
+ configured `segmentsBytesMax`. The skew threshold prevents unnecessary
+ consolidation of e.g. a big segment file with a very small one, where the
+ cost of writing a merged segment is higher than the gain in read performance.
+ type: number
+ minimum: 0.0
+ maximum: 1.0
+ minDeletionRatio:
description: |
- Filter out consolidation candidates with a score less than this.
+ Clean up segments where the ratio of deleted documents is at least
+ this high. Decreasing the minimum ratio leads to earlier consolidation
+ of segments with many deleted documents and thus reclamation of
+ disk space but causes a higher write load.
+
+ The deletion ratio is the percentage of deleted documents across one
+ or more segment files. It is a number between `0.0` and `1.0` and
+ calculated by dividing the number of deleted documents by the total
+ number of documents.
+
+ The segment files with the highest individual deletion ratio are
+ the candidates. As many as possible candidates are selected for
+ consolidation (in order of decreasing ratio), but the overall ratio
+ has to be at least `minDeletionRatio` and the new segment with the
+ active documents needs to be below the configured `segmentsBytesMax`.
type: integer
+ minimum: 0.0
+ maximum: 1.0
writebufferIdle:
description: |
Maximum number of writers (segments) cached in the pool (`0` = disabled).
diff --git a/site/content/arangodb/4.0/indexes-and-search/arangosearch/arangosearch-views-reference.md b/site/content/arangodb/4.0/indexes-and-search/arangosearch/arangosearch-views-reference.md
index 036758127f..7bcfdf3676 100644
--- a/site/content/arangodb/4.0/indexes-and-search/arangosearch/arangosearch-views-reference.md
+++ b/site/content/arangodb/4.0/indexes-and-search/arangosearch/arangosearch-views-reference.md
@@ -483,23 +483,43 @@ is used by these writers (in terms of "writers pool") one can use
`consolidationPolicy` properties for `"tier"` type:
- - **segmentsMin** (_optional_; type: `integer`; default: `50`)
-
- The minimum number of segments that are evaluated as candidates for consolidation.
-
- - **segmentsMax** (_optional_; type: `integer`; default: `200`)
-
- The maximum number of segments that are evaluated as candidates for consolidation.
-
- **segmentsBytesMax** (_optional_; type: `integer`; default: `8589934592`)
Maximum allowed size of all consolidated segments in bytes.
- - **segmentsBytesFloor** (_optional_; type: `integer`; default: `25165824`)
-
- Defines the value (in bytes) to treat all smaller segments as equal for consolidation
- selection.
-
- - **minScore** (_optional_; type: `integer`; default: `0`)
-
- Filter out consolidation candidates with a score less than this.
+ - **maxSkewThreshold** (_optional_; type: `number`; default: `0.4`)
+
+ Merge a subset of segments where the ratio of the largest segment size
+ to the combined segment size is within this threshold. Increasing the
+ threshold leads to fewer segment files and thus a potentially higher
+ read performance and less file descriptors but at the expense of more
+ frequent consolidations and thus higher write load.
+
+ The skew describes how much segment files vary in size. It is a number
+ between `0.0` and `1.0` and calculated by dividing the largest file size
+ of a set of segment files by the total size.
+
+ Multiple combinations of candidate segments are checked and the one with
+ the lowest skew value is selected for consolidation. This rather selects
+ many than few segments, but the new merged segment will be below the
+ configured `segmentsBytesMax`. The skew threshold prevents unnecessary
+ consolidation of e.g. a big segment file with a very small one, where the
+ cost of writing a merged segment is higher than the gain in read performance.
+
+ - **minDeletionRatio** (_optional_; type: `number`; default: `0.5`)
+
+ Clean up segments where the ratio of deleted documents is at least
+ this high. Decreasing the minimum ratio leads to earlier consolidation
+ of segments with many deleted documents and thus reclamation of
+ disk space but causes a higher write load.
+
+ The deletion ratio is the percentage of deleted documents across one
+ or more segment files. It is a number between `0.0` and `1.0` and
+ calculated by dividing the number of deleted documents by the total
+ number of documents.
+
+ The segment files with the highest individual deletion ratio are
+ the candidates. As many as possible candidates are selected for
+ consolidation (in order of decreasing ratio), but the overall ratio
+ has to be at least `minDeletionRatio` and the new segment with the
+ active documents needs to be below the configured `segmentsBytesMax`.
diff --git a/site/content/arangodb/4.0/release-notes/version-3.12/api-changes-in-3-12.md b/site/content/arangodb/4.0/release-notes/version-3.12/api-changes-in-3-12.md
index 8cc010cc0a..e3a588dbca 100644
--- a/site/content/arangodb/4.0/release-notes/version-3.12/api-changes-in-3-12.md
+++ b/site/content/arangodb/4.0/release-notes/version-3.12/api-changes-in-3-12.md
@@ -363,6 +363,25 @@ By consolidating less often and with more data, less file descriptors are used.
- `segmentsBytesMax` increased from `5368709120` (5 GiB) to `8589934592` (8 GiB)
- `segmentsBytesFloor` increased from `2097152` (2 MiB) to `25165824` (24 MiB)
+##### Added and removed consolidation options for `arangosearch` Views
+
+Introduced in: v3.12.7
+
+The following options for consolidating `arangosearch` Views have been removed
+and are now ignored when specified in a request:
+
+- `consolidationPolicy` (with `type` set to `tier`):
+ - `segmentsMin`
+ - `segmentsMax`
+ - `segmentsBytesFloor`
+ - `minScore`
+
+The following new options have been added:
+
+- `consolidationPolicy` (with `type` set to `tier`):
+ - `maxSkewThreshold` (number in range `[0.0, 1.0]`, default: `0.4`)
+ - `minDeletionRatio` (number in range `[0.0, 1.0]`, default: `0.5`)
+
#### Document API
The following endpoints accept a new `versionAttribute` query parameter that adds
@@ -501,6 +520,25 @@ By consolidating less often and with more data, less file descriptors are used.
- `segmentsBytesMax` increased from `5368709120` (5 GiB) to `8589934592` (8 GiB)
- `segmentsBytesFloor` increased from `2097152` (2 MiB) to `25165824` (24 MiB)
+##### Added and removed consolidation options for inverted indexes
+
+Introduced in: v3.12.7
+
+The following options for consolidating inverted indexes have been removed
+and are now ignored when specified in a request:
+
+- `consolidationPolicy` (with `type` set to `tier`):
+ - `segmentsMin`
+ - `segmentsMax`
+ - `segmentsBytesFloor`
+ - `minScore`
+
+The following new options have been added:
+
+- `consolidationPolicy` (with `type` set to `tier`):
+ - `maxSkewThreshold` (number in range `[0.0, 1.0]`, default: `0.4`)
+ - `minDeletionRatio` (number in range `[0.0, 1.0]`, default: `0.5`)
+
#### Optimizer rule descriptions
Introduced in: v3.10.9, v3.11.2
diff --git a/site/content/arangodb/4.0/release-notes/version-3.12/incompatible-changes-in-3-12.md b/site/content/arangodb/4.0/release-notes/version-3.12/incompatible-changes-in-3-12.md
index 959cea82cf..1fe5a31d18 100644
--- a/site/content/arangodb/4.0/release-notes/version-3.12/incompatible-changes-in-3-12.md
+++ b/site/content/arangodb/4.0/release-notes/version-3.12/incompatible-changes-in-3-12.md
@@ -994,6 +994,29 @@ more data, less file descriptors are used.
- `segmentsBytesMax` increased from `5368709120` (5 GiB) to `8589934592` (8 GiB)
- `segmentsBytesFloor` increased from `2097152` (2 MiB) to `25165824` (24 MiB)
+## Added and removed consolidation options for inverted indexs and `arangosearch` Views
+
+Introduced in: v3.12.7
+
+The following options for consolidating inverted indexes as well as
+`arangosearch` Views have been removed and are now ignored when specified in a request:
+
+- `consolidationPolicy` (with `type` set to `tier`):
+ - `segmentsMin`
+ - `segmentsMax`
+ - `segmentsBytesFloor`
+ - `minScore`
+
+The consolidation works differently now and uses the new `maxSkewThreshold` and
+`minDeletionRatio` options together with the existing `segmentsBytesMax`. If you
+previously used customized settings for the removed options, check if the default
+values of the new options are acceptable or if you need to tune them according to
+your workload.
+
+For details, see:
+- [HTTP interface for inverted indexes](../../develop/http-api/indexes/inverted.md)
+- [`arangosearch` View properties](../../indexes-and-search/arangosearch/arangosearch-views-reference.md#view-properties)
+
## HTTP RESTful API
### JavaScript-based traversal using `/_api/traversal` removed
diff --git a/site/content/arangodb/4.0/release-notes/version-3.12/whats-new-in-3-12.md b/site/content/arangodb/4.0/release-notes/version-3.12/whats-new-in-3-12.md
index d0d0c77010..199242cfb5 100644
--- a/site/content/arangodb/4.0/release-notes/version-3.12/whats-new-in-3-12.md
+++ b/site/content/arangodb/4.0/release-notes/version-3.12/whats-new-in-3-12.md
@@ -2452,6 +2452,38 @@ environment variable `NAME`. If there is an environment variable called `PID` or
`TEMP_BASE_DIR`, then `@PID@` or `@TEMP_BASE_DIR@` is substituted with the
value of the respective environment variable.
+### New consolidation algorithm for inverted indexes and `arangosearch` Views
+
+Introduced in: v3.12.7
+
+The `tier` consolidation policy now uses a different algorithm for merging
+and cleaning up segments. Overall, it avoids consolidating segments where the
+cost of writing the new segment is high and the gain in read performance is low
+(e.g. combining a big segment file with a very small one).
+
+The following options have been removed for inverted indexes as well as
+`arangosearch` Views because the new consolidation algorithm doesn't use them:
+
+- `consolidationPolicy` (with `type` set to `tier`):
+ - `segmentsMin`
+ - `segmentsMax`
+ - `segmentsBytesFloor`
+ - `minScore`
+
+The following new options have been added:
+
+- `consolidationPolicy` (with `type` set to `tier`):
+ - `maxSkewThreshold` (number in range `[0.0, 1.0]`, default: `0.4`)
+ - `minDeletionRatio` (number in range `[0.0, 1.0]`, default: `0.5`)
+
+If you previously used customized settings for the removed options, check if the
+default values of the new options are acceptable or if you need to tune them
+according to your workload.
+
+For details, see:
+- [HTTP interface for inverted indexes](../../develop/http-api/indexes/inverted.md)
+- [`arangosearch` View properties](../../indexes-and-search/arangosearch/arangosearch-views-reference.md#view-properties)
+
## Client tools
### Protocol aliases for endpoints