Skip to content

Commit 8cdb56b

Browse files
committed
Add support for converting to OCI artifacts
Signed-off-by: Daniel J Walsh <[email protected]>
1 parent 5adc006 commit 8cdb56b

File tree

14 files changed

+1442
-80
lines changed

14 files changed

+1442
-80
lines changed

docs/ramalama-convert.1.md

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,14 +39,15 @@ Image to use when converting to GGUF format (when then `--gguf` option has been
3939
executable and available in the `PATH`. The script is available from the `llama.cpp` GitHub repo. Defaults to the current
4040
`quay.io/ramalama/ramalama-rag` image.
4141

42-
#### **--type**=*raw* | *car*
42+
#### **--type**="artifact" | *raw* | *car*
4343

44-
type of OCI Model Image to convert.
44+
Convert the MODEL to the specified OCI Object
4545

46-
| Type | Description |
47-
| ---- | ------------------------------------------------------------- |
48-
| car | Includes base image with the model stored in a /models subdir |
49-
| raw | Only the model and a link file model.file to it stored at / |
46+
| Type | Description |
47+
| -------- | ------------------------------------------------------------- |
48+
| artifact | Store AI Models as artifacts |
49+
| car | Traditional OCI image including base image with the model stored in a /models subdir |
50+
| raw | Traditional OCI image including only the model and a link file `model.file` pointed at it stored at / |
5051

5152
## EXAMPLE
5253

docs/ramalama.conf

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,14 @@
3232
#
3333
#carimage = "registry.access.redhat.com/ubi10-micro:latest"
3434

35+
# Convert the MODEL to the specified OCI Object
36+
# Options: artifact, car, raw
37+
#
38+
# artifact: Store AI Models as artifacts
39+
# car: Traditional OCI image including base image with the model stored in a /models subdir
40+
# raw: Traditional OCI image including only the model and a link file `model.file` pointed at it stored at /
41+
#convert_type = "raw"
42+
3543
# Run RamaLama in the default container.
3644
#
3745
#container = true

docs/ramalama.conf.5.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,18 @@ Min chunk size to attempt reusing from the cache via KV shifting
8484
Run RamaLama in the default container.
8585
RAMALAMA_IN_CONTAINER environment variable overrides this field.
8686

87+
#convert_type = "raw"
88+
89+
Convert the MODEL to the specified OCI Object
90+
Options: artifact, car, raw
91+
92+
| Type | Description |
93+
| -------- | ------------------------------------------------------------- |
94+
| artifact | Store AI Models as artifacts |
95+
| car | Traditional OCI image including base image with the model stored in a /models subdir |
96+
| raw | Traditional OCI image including only the model and a link file `model.file` pointed at it stored at / |
97+
98+
8799
**ctx_size**=0
88100

89101
Size of the prompt context (0 = loaded from model)

ramalama/cli.py

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -743,11 +743,12 @@ def convert_parser(subparsers):
743743
)
744744
parser.add_argument(
745745
"--type",
746-
default="raw",
747-
choices=["car", "raw"],
746+
default=CONFIG.convert_type,
747+
choices=["artifact", "car", "raw"],
748748
help="""\
749749
type of OCI Model Image to push.
750750
751+
Model "artifact" stores the AI Model as an OCI Artifact.
751752
Model "car" includes base image with the model stored in a /models subdir.
752753
Model "raw" contains the model and a link file model.file to it stored at /.""",
753754
)
@@ -784,11 +785,12 @@ def push_parser(subparsers):
784785
add_network_argument(parser)
785786
parser.add_argument(
786787
"--type",
787-
default="raw",
788-
choices=["car", "raw"],
788+
default=CONFIG.convert_type,
789+
choices=["artifact", "car", "raw"],
789790
help="""\
790791
type of OCI Model Image to push.
791792
793+
Model "artifact" stores the AI Model as an OCI Artifact.
792794
Model "car" includes base image with the model stored in a /models subdir.
793795
Model "raw" contains the model and a link file model.file to it stored at /.""",
794796
)
@@ -803,9 +805,9 @@ def push_parser(subparsers):
803805
parser.set_defaults(func=push_cli)
804806

805807

806-
def _get_source_model(args):
808+
def _get_source_model(args, transport=None):
807809
src = shortnames.resolve(args.SOURCE)
808-
smodel = New(src, args)
810+
smodel = New(src, args, transport=transport)
809811
if smodel.type == "OCI":
810812
raise ValueError(f"converting from an OCI based image {src} is not supported")
811813
if not smodel.exists() and not args.dryrun:
@@ -814,9 +816,15 @@ def _get_source_model(args):
814816

815817

816818
def push_cli(args):
817-
source_model = _get_source_model(args)
818819
target = args.SOURCE
820+
transport = None
821+
if not args.TARGET:
822+
transport = "oci"
823+
source_model = _get_source_model(args, transport=transport)
824+
819825
if args.TARGET:
826+
if source_model.type == "OCI":
827+
raise ValueError(f"converting from an OCI based image {args.SOURCE} is not supported")
820828
target = shortnames.resolve(args.TARGET)
821829
target_model = New(target, args)
822830

@@ -1198,9 +1206,14 @@ def serve_cli(args):
11981206
model.ensure_model_exists(args)
11991207
except KeyError as e:
12001208
try:
1209+
if "://" in args.MODEL:
1210+
raise e
12011211
args.quiet = True
12021212
model = TransportFactory(args.MODEL, args, ignore_stderr=True).create_oci()
12031213
model.ensure_model_exists(args)
1214+
# Since this is a OCI model, prepend oci://
1215+
args.MODEL = f"oci://{args.MODEL}"
1216+
12041217
except Exception:
12051218
raise e
12061219

@@ -1448,7 +1461,7 @@ def _rm_model(models, args):
14481461
try:
14491462
m = New(model, args)
14501463
m.remove(args)
1451-
except KeyError as e:
1464+
except (KeyError, subprocess.CalledProcessError) as e:
14521465
for prefix in MODEL_TYPES:
14531466
if model.startswith(prefix + "://"):
14541467
if not args.ignore:

ramalama/common.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,7 @@ def verify_checksum(filename: str) -> bool:
287287

288288

289289
def genname():
290-
return "ramalama_" + "".join(random.choices(string.ascii_letters + string.digits, k=10))
290+
return "ramalama-" + "".join(random.choices(string.ascii_letters + string.digits, k=10))
291291

292292

293293
def engine_version(engine: SUPPORTED_ENGINES) -> str:

ramalama/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,7 @@ class BaseConfig:
206206
carimage: str = "registry.access.redhat.com/ubi10-micro:latest"
207207
container: bool = None # type: ignore
208208
ctx_size: int = 0
209+
convert_type: Literal["artifact", "car", "raw"] = "raw"
209210
default_image: str = DEFAULT_IMAGE
210211
default_rag_image: str = DEFAULT_RAG_IMAGE
211212
dryrun: bool = False

ramalama/kube.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import os
22
from typing import Optional, Tuple
33

4-
from ramalama.common import MNT_DIR, RAG_DIR, genname, get_accel_env_vars
4+
from ramalama.common import MNT_DIR, RAG_DIR, get_accel_env_vars
55
from ramalama.file import PlainFile
66
from ramalama.version import version
77

@@ -15,6 +15,7 @@ def __init__(
1515
mmproj_paths: Optional[Tuple[str, str]],
1616
args,
1717
exec_args,
18+
artifact,
1819
):
1920
self.src_model_path, self.dest_model_path = model_paths
2021
self.src_chat_template_path, self.dest_chat_template_path = (
@@ -27,27 +28,30 @@ def __init__(
2728
if getattr(args, "name", None):
2829
self.name = args.name
2930
else:
30-
self.name = genname()
31+
self.name = "ramalama"
3132

3233
self.args = args
3334
self.exec_args = exec_args
3435
self.image = args.image
36+
self.artifact = artifact
3537

3638
def _gen_volumes(self):
3739
mounts = """\
3840
volumeMounts:"""
3941

4042
volumes = """
4143
volumes:"""
42-
4344
if os.path.exists(self.src_model_path):
4445
m, v = self._gen_path_volume()
4546
mounts += m
4647
volumes += v
4748
else:
49+
subPath = ""
50+
if not self.artifact:
51+
subPath = """
52+
subPath: /models"""
4853
mounts += f"""
49-
- mountPath: {MNT_DIR}
50-
subPath: /models
54+
- mountPath: {MNT_DIR}{subPath}
5155
name: model"""
5256
volumes += self._gen_oci_volume()
5357

@@ -98,7 +102,7 @@ def _gen_path_volume(self):
98102
def _gen_oci_volume(self):
99103
return f"""
100104
- image:
101-
reference: {self.ai_image}
105+
reference: {self.src_model_path}
102106
pullPolicy: IfNotPresent
103107
name: model"""
104108

@@ -162,7 +166,7 @@ def __gen_env_vars():
162166
for k, v in env_vars.items():
163167
env_spec += f"""
164168
- name: {k}
165-
value: {v}"""
169+
value: \"{v}\""""
166170

167171
return env_spec
168172

@@ -177,7 +181,7 @@ def generate(self) -> PlainFile:
177181
# it into Kubernetes.
178182
#
179183
# Created with ramalama-{_version}
180-
apiVersion: v1
184+
apiVersion: apps/v1
181185
kind: Deployment
182186
metadata:
183187
name: {self.name}

ramalama/oci_tools.py

Lines changed: 77 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,66 @@
88
ocilabeltype = "org.containers.type"
99

1010

11-
def engine_supports_manifest_attributes(engine):
11+
def convert_from_human_readable_size(input) -> float:
12+
sizes = [("KB", 1024), ("MB", 1024**2), ("GB", 1024**3), ("TB", 1024**4), ("B", 1)]
13+
for unit, size in sizes:
14+
if input.endswith(unit) or input.endswith(unit.lower()):
15+
return float(input[: -len(unit)]) * size
16+
17+
return float(input)
18+
19+
20+
def list_artifacts(args: EngineArgType):
21+
if args.engine == "docker":
22+
return []
23+
24+
conman_args = [
25+
args.engine,
26+
"artifact",
27+
"ls",
28+
"--format",
29+
(
30+
'{"name":"oci://{{ .Repository }}:{{ .Tag }}",\
31+
"created":"{{ .CreatedAt }}", \
32+
"size":"{{ .Size }}", \
33+
"ID":"{{ .Digest }}"},'
34+
),
35+
]
36+
output = run_cmd(conman_args).stdout.decode("utf-8").strip()
37+
if output == "":
38+
return []
39+
40+
artifacts = json.loads(f"[{output[:-1]}]")
41+
models = []
42+
for artifact in artifacts:
43+
conman_args = [
44+
args.engine,
45+
"artifact",
46+
"inspect",
47+
artifact["ID"],
48+
]
49+
output = run_cmd(conman_args).stdout.decode("utf-8").strip()
50+
51+
if output == "":
52+
continue
53+
inspect = json.loads(output)
54+
if "Manifest" not in inspect:
55+
continue
56+
if "artifactType" not in inspect["Manifest"]:
57+
continue
58+
if inspect["Manifest"]['artifactType'] != annotations.ArtifactTypeModelManifest:
59+
continue
60+
models += [
61+
{
62+
"name": artifact["name"],
63+
"modified": artifact["created"],
64+
"size": convert_from_human_readable_size(artifact["size"]),
65+
}
66+
]
67+
return models
68+
69+
70+
def engine_supports_manifest_attributes(engine) -> bool:
1271
if not engine or engine == "" or engine == "docker":
1372
return False
1473
if engine == "podman" and engine_version(engine) < "5":
@@ -91,26 +150,26 @@ def list_models(args: EngineArgType):
91150
"--format",
92151
formatLine,
93152
]
153+
models = []
94154
output = run_cmd(conman_args, env={"TZ": "UTC"}).stdout.decode("utf-8").strip()
95-
if output == "":
96-
return []
97-
98-
models = json.loads(f"[{output[:-1]}]")
99-
# exclude dangling images having no tag (i.e. <none>:<none>)
100-
models = [model for model in models if model["name"] != "oci://<none>:<none>"]
101-
102-
# Grab the size from the inspect command
103-
if conman == "docker":
104-
# grab the size from the inspect command
105-
for model in models:
106-
conman_args = [conman, "image", "inspect", model["id"], "--format", "{{.Size}}"]
107-
output = run_cmd(conman_args).stdout.decode("utf-8").strip()
108-
# convert the number value from the string output
109-
model["size"] = int(output)
110-
# drop the id from the model
111-
del model["id"]
155+
if output != "":
156+
models += json.loads(f"[{output[:-1]}]")
157+
# exclude dangling images having no tag (i.e. <none>:<none>)
158+
models = [model for model in models if model["name"] != "oci://<none>:<none>"]
159+
160+
# Grab the size from the inspect command
161+
if conman == "docker":
162+
# grab the size from the inspect command
163+
for model in models:
164+
conman_args = [conman, "image", "inspect", model["id"], "--format", "{{.Size}}"]
165+
output = run_cmd(conman_args).stdout.decode("utf-8").strip()
166+
# convert the number value from the string output
167+
model["size"] = int(output)
168+
# drop the id from the model
169+
del model["id"]
112170

113171
models += list_manifests(args)
172+
models += list_artifacts(args)
114173
for model in models:
115174
# Convert to ISO 8601 format
116175
parsed_date = datetime.fromisoformat(

ramalama/quadlet.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ def __init__(
1515
mmproj_path: Optional[Tuple[str, str]],
1616
args,
1717
exec_args,
18+
artifact: bool
1819
):
1920
self.src_model_path, self.dest_model_path = model_paths
2021
self.src_chat_template_path, self.dest_chat_template_path = (
@@ -33,6 +34,7 @@ def __init__(
3334
self.name = model_name
3435

3536
self.args = args
37+
self.artifact = artifact
3638
self.exec_args = exec_args
3739
self.image = args.image
3840
self.rag = ""
@@ -147,11 +149,18 @@ def _gen_model_volume(self, quadlet_file: UnitFile):
147149

148150
files.append(self._gen_image(self.name, self.ai_image))
149151

150-
quadlet_file.add(
151-
"Container",
152-
"Mount",
153-
f"type=image,source={self.ai_image},destination={MNT_DIR},subpath=/models,readwrite=false",
154-
)
152+
if self.artifact:
153+
quadlet_file.add(
154+
"Container",
155+
"Mount",
156+
f"type=artifact,source={self.src_model_path},destination={MNT_DIR}",
157+
)
158+
else:
159+
quadlet_file.add(
160+
"Container",
161+
"Mount",
162+
f"type=image,source={self.src_model_path},destination={MNT_DIR},subpath=/models,readwrite=false",
163+
)
155164
return files
156165

157166
def _gen_port(self, quadlet_file: UnitFile):

0 commit comments

Comments
 (0)