Add, replace, cache and delete artifacts

import pytest
import lamindb as ln

ln.setup.login("testuser1")
ln.setup.init(storage="s3://lamindb-ci/test-add-replace-stage")
✅ logged in with email testuser1@lamin.ai (uid: DzTjkKse)
💡 go to: https://lamin.ai/testuser1/test-add-replace-stage
❗ updating & unlocking cloud SQLite 's3://lamindb-ci/test-add-replace-stage/0102f680066a569da6f32766b07a9f5d.lndb' of instance 'testuser1/test-add-replace-stage'
❗ locked instance (to unlock and push changes to the cloud SQLite file, call: lamin close)

Save with auto-managed (key=None)

AUTO_KEY_PREFIX = ln.core.storage.paths.AUTO_KEY_PREFIX
root = ln.settings.storage
artifact = ln.Artifact("./test-files/iris.csv", description="iris.csv")
❗ no run & transform get linked, consider calling ln.track()
artifact.save()
Artifact(updated_at=2024-05-23 10:57:12 UTC, uid='aZbILE7rGRUXap9aNEPH', suffix='.csv', description='iris.csv', size=224, hash='iwc1TmF1TW_l5weDvscSHw', hash_type='md5', visibility=1, key_is_virtual=True, created_by_id=1, storage_id=1)
key_path = root / f"{AUTO_KEY_PREFIX}{artifact.uid}{artifact.suffix}"
assert key_path.exists()
cache_csv_path = artifact.cache()
cache_csv_path
PosixUPath('/home/runner/.cache/lamindb/lamindb-ci/test-add-replace-stage/.lamindb/aZbILE7rGRUXap9aNEPH.csv')
assert cache_csv_path.suffix == ".csv"
artifact.replace("./test-files/iris.data")
❗ no run & transform get linked, consider calling ln.track()
artifact.save()
Artifact(updated_at=2024-05-23 10:57:13 UTC, uid='aZbILE7rGRUXap9aNEPH', suffix='.data', description='iris.csv', size=182, hash='42Br6no9CjB6s5ZbmO-bmw', hash_type='md5', visibility=1, key_is_virtual=True, created_by_id=1, storage_id=1)
old_key_path = key_path
new_key_path = root / f"{AUTO_KEY_PREFIX}{artifact.uid}{artifact.suffix}"

The suffix changed:

old_key_path
S3Path('s3://lamindb-ci/test-add-replace-stage/.lamindb/aZbILE7rGRUXap9aNEPH.csv')
new_key_path
S3Path('s3://lamindb-ci/test-add-replace-stage/.lamindb/aZbILE7rGRUXap9aNEPH.data')
assert not old_key_path.exists()
assert new_key_path.exists()
cache_data_path = artifact.cache()
cache_data_path
PosixUPath('/home/runner/.cache/lamindb/lamindb-ci/test-add-replace-stage/.lamindb/aZbILE7rGRUXap9aNEPH.data')
assert cache_data_path.suffix == ".data"
assert cache_data_path.stat().st_mtime >= cache_csv_path.stat().st_mtime
artifact.delete(permanent=True)

Save with manually passed real key

ln.settings.artifact_use_virtual_keys = False
artifact = ln.Artifact("./test-files/iris.csv", key="iris.csv")
❗ no run & transform get linked, consider calling ln.track()
artifact.save()
Artifact(updated_at=2024-05-23 10:57:14 UTC, uid='57oc8UndYWIT28NXmcy5', key='iris.csv', suffix='.csv', size=224, hash='iwc1TmF1TW_l5weDvscSHw', hash_type='md5', visibility=1, key_is_virtual=False, created_by_id=1, storage_id=1)
key_path = root / "iris.csv"
assert key_path.exists()
artifact.replace("./test-files/new_iris.csv")
❗ no run & transform get linked, consider calling ln.track()
artifact.save()
Artifact(updated_at=2024-05-23 10:57:15 UTC, uid='57oc8UndYWIT28NXmcy5', key='iris.csv', suffix='.csv', size=229, hash='lp2-ycXcKcaliUTnR_TqHA', hash_type='md5', visibility=1, key_is_virtual=False, created_by_id=1, storage_id=1)

Check paths: no changes here, as the suffix didn’t change.

old_key_path = key_path
new_key_path = root / "new_iris.csv"
old_key_path
S3Path('s3://lamindb-ci/test-add-replace-stage/iris.csv')
new_key_path
S3Path('s3://lamindb-ci/test-add-replace-stage/new_iris.csv')
assert old_key_path.exists()
assert not new_key_path.exists()
artifact.replace("./test-files/iris.data")
❗ no run & transform get linked, consider calling ln.track()
❗ replacing the file will replace key 'iris.csv' with 'iris.data' and delete 'iris.csv' upon `save()`
artifact.save()
Artifact(updated_at=2024-05-23 10:57:16 UTC, uid='57oc8UndYWIT28NXmcy5', key='iris.data', suffix='.data', size=182, hash='42Br6no9CjB6s5ZbmO-bmw', hash_type='md5', visibility=1, key_is_virtual=False, created_by_id=1, storage_id=1)
new_key_path = root / "iris.data"
old_key_path
S3Path('s3://lamindb-ci/test-add-replace-stage/iris.csv')
new_key_path
S3Path('s3://lamindb-ci/test-add-replace-stage/iris.data')
assert not old_key_path.exists()
assert new_key_path.exists()
artifact.delete(permanent=True, storage=True)

Save from memory

import pandas as pd
iris = pd.read_csv("./test-files/iris.csv")
artifact = ln.Artifact.from_df(iris, description="iris_store", key="iris.parquet")
❗ no run & transform get linked, consider calling ln.track()
artifact.save()
Artifact(updated_at=2024-05-23 10:57:17 UTC, uid='eTw5FQsCjktp0rzgtj4r', key='iris.parquet', suffix='.parquet', accessor='DataFrame', description='iris_store', size=4510, hash='4n5aTzTCvYC9TAcmXNW4hQ', hash_type='md5', visibility=1, key_is_virtual=False, created_by_id=1, storage_id=1)
key_path = root / "iris.parquet"
assert key_path.exists()
artifact.replace(data=iris[:-1])
❗ no run & transform get linked, consider calling ln.track()
assert artifact.key == "iris.parquet"
artifact.save()
Artifact(updated_at=2024-05-23 10:57:17 UTC, uid='eTw5FQsCjktp0rzgtj4r', key='iris.parquet', suffix='.parquet', accessor='DataFrame', description='iris_store', size=4490, hash='07E-IuJuPmhP_WKO2NA9fg', hash_type='md5', visibility=1, key_is_virtual=False, created_by_id=1, storage_id=1)
assert key_path.exists()
artifact.replace("./test-files/new_iris.csv")
❗ no run & transform get linked, consider calling ln.track()
❗ replacing the file will replace key 'iris.parquet' with 'iris.csv' and delete 'iris.parquet' upon `save()`
artifact.save()
Artifact(updated_at=2024-05-23 10:57:18 UTC, uid='eTw5FQsCjktp0rzgtj4r', key='iris.csv', suffix='.csv', accessor='DataFrame', description='iris_store', size=229, hash='lp2-ycXcKcaliUTnR_TqHA', hash_type='md5', visibility=1, key_is_virtual=False, created_by_id=1, storage_id=1)
old_key_path = key_path
new_key_path = root / "iris.csv"
old_key_path
S3Path('s3://lamindb-ci/test-add-replace-stage/iris.parquet')
new_key_path
S3Path('s3://lamindb-ci/test-add-replace-stage/iris.csv')
assert not old_key_path.exists()
assert new_key_path.exists()
# we use the path in the next section
path_in_storage = artifact.path
artifact.delete(permanent=True, storage=False)
❗ you will retain a dangling store here: s3://lamindb-ci/test-add-replace-stage/iris.csv, not referenced via an artifact

Save with manually passed virtual key

ln.settings.artifact_use_virtual_keys = True
artifact = ln.Artifact("./test-files/iris.csv", key="iris.csv")
❗ no run & transform get linked, consider calling ln.track()
artifact.save()
Artifact(updated_at=2024-05-23 10:57:19 UTC, uid='0n2GabtHU8Xock7OX8TO', key='iris.csv', suffix='.csv', size=224, hash='iwc1TmF1TW_l5weDvscSHw', hash_type='md5', visibility=1, key_is_virtual=True, created_by_id=1, storage_id=1)
with pytest.raises(ValueError):
    artifact.replace(path_in_storage)
❗ no run & transform get linked, consider calling ln.track()
# return an existing artifact if the hash is the same
assert artifact == artifact.replace("./test-files/iris.csv")
❗ no run & transform get linked, consider calling ln.track()
❗ returning existing artifact with same hash: Artifact(updated_at=2024-05-23 10:57:19 UTC, uid='0n2GabtHU8Xock7OX8TO', key='iris.csv', suffix='.csv', size=224, hash='iwc1TmF1TW_l5weDvscSHw', hash_type='md5', visibility=1, key_is_virtual=True, created_by_id=1, storage_id=1)
fpath = artifact.path
assert fpath.suffix == ".csv" and fpath.stem == artifact.uid
artifact.replace("./test-files/iris.data")
❗ no run & transform get linked, consider calling ln.track()
artifact.save()
Artifact(updated_at=2024-05-23 10:57:21 UTC, uid='0n2GabtHU8Xock7OX8TO', key='iris.data', suffix='.data', size=182, hash='42Br6no9CjB6s5ZbmO-bmw', hash_type='md5', visibility=1, key_is_virtual=True, created_by_id=1, storage_id=1)
assert artifact.key == "iris.data"
assert not fpath.exists()
fpath = artifact.path
assert fpath.suffix == ".data" and fpath.stem == artifact.uid
artifact.delete(permanent=True, storage=True)
path_in_storage.unlink()
ln.setup.delete("test-add-replace-stage", force=True)
💡 deleted storage record on hub 8d5230fca3955b48bb7f707e93064ca9
💡 deleted instance record on hub 0102f680066a569da6f32766b07a9f5d