diff --git a/.dockerignore b/.dockerignore index fdcb6c220..0a387f79c 100644 --- a/.dockerignore +++ b/.dockerignore @@ -56,7 +56,7 @@ fossa*.zip # ==================== Miscellaneous folders proto/ -examples/MAG240M/downloads/ +gigl/examples/MAG240M/downloads/ graphlearn_torch/ graphlearn_torch.egg-info/ do_not_open_source diff --git a/Makefile b/Makefile index 8122515de..1d8351f27 100644 --- a/Makefile +++ b/Makefile @@ -21,7 +21,7 @@ DOCKER_IMAGE_MAIN_CUDA_NAME_WITH_TAG?=${DOCKER_IMAGE_MAIN_CUDA_NAME}:${DATE} DOCKER_IMAGE_MAIN_CPU_NAME_WITH_TAG?=${DOCKER_IMAGE_MAIN_CPU_NAME}:${DATE} DOCKER_IMAGE_DEV_WORKBENCH_NAME_WITH_TAG?=${DOCKER_IMAGE_DEV_WORKBENCH_NAME}:${DATE} -PYTHON_DIRS:=.github/scripts examples gigl tests snapchat scripts testing +PYTHON_DIRS:=.github/scripts gigl tests snapchat scripts testing PY_TEST_FILES?="*_test.py" # You can override GIGL_TEST_DEFAULT_RESOURCE_CONFIG by setting it in your environment i.e. # adding `export GIGL_TEST_DEFAULT_RESOURCE_CONFIG=your_resource_config` to your shell config (~/.bashrc, ~/.zshrc, etc.) @@ -270,8 +270,8 @@ _skip_build_deps: # make \ job_name="{alias}_run_dev_mag240m_kfp_pipeline" \ start_at="config_populator" \ - task_config_uri="examples/MAG240M/task_config.yaml" \ - resource_config_uri="examples/MAG240M/resource_config.yaml" \ + task_config_uri="gigl/examples/MAG240M/task_config.yaml" \ + resource_config_uri="gigl/examples/MAG240M/resource_config.yaml" \ run_dev_gnn_kubeflow_pipeline # If you have precompiled to some specified poth using `make compile_gigl_kubeflow_pipeline` # You can use it here instead of re-compiling by setting `compiled_pipeline_path` diff --git a/conf.py b/conf.py index b132c183f..ce2f8ef64 100644 --- a/conf.py +++ b/conf.py @@ -68,7 +68,6 @@ "snapchat/**", "snapchat/**", "index.rst", - "examples/**", ] autodoc_default_options = { diff --git a/containers/Dockerfile.src b/containers/Dockerfile.src index 4a92e9b7c..ec94fa1e6 100644 --- a/containers/Dockerfile.src +++ b/containers/Dockerfile.src @@ -17,6 +17,5 @@ COPY deployment deployment COPY gigl gigl COPY snapchat snapchat COPY tests tests -COPY examples examples RUN uv pip install -e . diff --git a/docs/user_guide/config_guides/task_config_guide.md b/docs/user_guide/config_guides/task_config_guide.md index f55e97891..ff5090f8c 100644 --- a/docs/user_guide/config_guides/task_config_guide.md +++ b/docs/user_guide/config_guides/task_config_guide.md @@ -26,7 +26,7 @@ We will use the MAG240M task config to walk you through what a config may look l
Full task config for reference: -```{literalinclude} ../../../examples/MAG240M/task_config.yaml +```{literalinclude} ../../../gigl/examples/MAG240M/task_config.yaml :language: yaml ``` @@ -40,7 +40,7 @@ one edge type: `(paper_or_author, references, paper_or_author)` Note: In this example we have converted the hetrogeneous MAG240M dataset to a homogeneous one with just one edge and one node; which we will be doing self supervised learning on. -```{literalinclude} ../../../examples/MAG240M/task_config.yaml +```{literalinclude} ../../../gigl/examples/MAG240M/task_config.yaml :language: yaml :start-after: GraphMetadata :end-before: ======== @@ -52,7 +52,7 @@ Now we specify what type of learning task we want to do. In this case we want to Prediction to do self supervised learning on the edge: `(paper_or_author, references, paper_or_author)`. Thus, we are using the `NodeAnchorBasedLinkPredictionTaskMetadata` task. -```{literalinclude} ../../../examples/MAG240M/task_config.yaml +```{literalinclude} ../../../gigl/examples/MAG240M/task_config.yaml :language: yaml :start-after: TaskMetadata :end-before: ======== @@ -67,7 +67,7 @@ An example of `NodeBasedTaskMetadata` can be found in `gigl/src/mocking/configs/ Shared config are parameters that are common and may be used across multiple components i.e. Trainer, Inferencer, SubgraphSampler, etc. -```{literalinclude} ../../../examples/MAG240M/task_config.yaml +```{literalinclude} ../../../gigl/examples/MAG240M/task_config.yaml :language: yaml :start-after: SharedConfig :end-before: ======== @@ -87,7 +87,7 @@ Once we have the data preprocessed, we will be tabularizing the data with the us Subsequently, we will be creating test/train/val splits based on the %'s specified, using [Split Generator](../overview/components/split_generator.md) -```{literalinclude} ../../../examples/MAG240M/task_config.yaml +```{literalinclude} ../../../gigl/examples/MAG240M/task_config.yaml :language: yaml :start-after: DatasetConfig :end-before: ======== @@ -102,7 +102,7 @@ defined @ {py:class}`gigl.src.training.v1.lib.base_trainer.BaseTrainer`. Some common sense pre-configured trainer implementations can be found in {py:class}`gigl.src.common.modeling_task_specs`. Although, you are recommended to implement your own. -```{literalinclude} ../../../examples/MAG240M/task_config.yaml +```{literalinclude} ../../../gigl/examples/MAG240M/task_config.yaml :language: yaml :start-after: TrainerConfig :end-before: ======== @@ -114,7 +114,7 @@ Similar to Trainer, the class specified by `inferencerClsPath` will be initializ `inferencerArgs` will be directly passed in `**kwargs` to your inferencer class. The only requirement is the inferencer class implement the protocol defined @ {py:class}`gigl.src.inference.v1.lib.base_inferencer.BaseInferencer` -```{literalinclude} ../../../examples/MAG240M/task_config.yaml +```{literalinclude} ../../../gigl/examples/MAG240M/task_config.yaml :language: yaml :start-after: InferencerConfig :end-before: ======== diff --git a/docs/user_guide/examples/index.md b/docs/user_guide/examples/index.md index 3f15feb40..623e0dbfa 100644 --- a/docs/user_guide/examples/index.md +++ b/docs/user_guide/examples/index.md @@ -6,7 +6,7 @@ maxdepth: 2 caption: Examples --- -../../../examples/toy_visual_example/toy_example_walkthrough.ipynb -../../../examples/link_prediction/README.md -../../../examples/MAG240M/README.md +../../../gigl/examples/toy_visual_example/toy_example_walkthrough.ipynb +../../../gigl/examples/link_prediction/README.md +../../../gigl/examples/MAG240M/README.md ``` diff --git a/docs/user_guide/getting_started/quick_start.md b/docs/user_guide/getting_started/quick_start.md index fd1d036c1..72c7efed7 100644 --- a/docs/user_guide/getting_started/quick_start.md +++ b/docs/user_guide/getting_started/quick_start.md @@ -103,4 +103,4 @@ data, and general customization: [components page](../overview/architecture.md) - **Examples**: For easy references and make your next steps easier, various example walkthroughs are available on the - examples page. See [here](../examples/index.md) + examples page. See [here](../gigl/examples/index.md) diff --git a/examples/MAG240M/.gitignore b/gigl/examples/MAG240M/.gitignore similarity index 100% rename from examples/MAG240M/.gitignore rename to gigl/examples/MAG240M/.gitignore diff --git a/examples/MAG240M/README.md b/gigl/examples/MAG240M/README.md similarity index 100% rename from examples/MAG240M/README.md rename to gigl/examples/MAG240M/README.md diff --git a/examples/MAG240M/__init__.py b/gigl/examples/MAG240M/__init__.py similarity index 100% rename from examples/MAG240M/__init__.py rename to gigl/examples/MAG240M/__init__.py diff --git a/examples/MAG240M/common.py b/gigl/examples/MAG240M/common.py similarity index 100% rename from examples/MAG240M/common.py rename to gigl/examples/MAG240M/common.py diff --git a/examples/MAG240M/fetch_data.ipynb b/gigl/examples/MAG240M/fetch_data.ipynb similarity index 99% rename from examples/MAG240M/fetch_data.ipynb rename to gigl/examples/MAG240M/fetch_data.ipynb index c47087cd1..67e7e49a0 100644 --- a/examples/MAG240M/fetch_data.ipynb +++ b/gigl/examples/MAG240M/fetch_data.ipynb @@ -6,7 +6,7 @@ "source": [ "# (Optional) Fetch MAG240M Data into your own project\n", "\n", - "Latest version of this notebook can be found on [github](https://github.com/Snapchat/GiGL/blob/main/examples/MAG240M/fetch_data.ipynb)\n" + "Latest version of this notebook can be found on [github](https://github.com/Snapchat/GiGL/blob/main/gigl/examples/MAG240M/fetch_data.ipynb)\n" ] }, { diff --git a/examples/MAG240M/mag240m.ipynb b/gigl/examples/MAG240M/mag240m.ipynb similarity index 98% rename from examples/MAG240M/mag240m.ipynb rename to gigl/examples/MAG240M/mag240m.ipynb index ddf629db7..8b52c7fa3 100644 --- a/examples/MAG240M/mag240m.ipynb +++ b/gigl/examples/MAG240M/mag240m.ipynb @@ -53,8 +53,8 @@ "\n", "# Firstly, let's give your job a name and ensure that the resource and task configs exist and can be loaded\n", "JOB_NAME = \"test_mag240m\"\n", - "TEMPLATE_TASK_CONFIG_URI = LocalUri(\"examples/MAG240M/task_config.yaml\")\n", - "RESOURCE_CONFIG_URI = LocalUri(\"examples/MAG240M/resource_config.yaml\")\n", + "TEMPLATE_TASK_CONFIG_URI = LocalUri(\"gigl/examples/MAG240M/task_config.yaml\")\n", + "RESOURCE_CONFIG_URI = LocalUri(\"gigl/examples/MAG240M/resource_config.yaml\")\n", "\n", "TEMPLATE_TASK_CONFIG: GbmlConfigPbWrapper = GbmlConfigPbWrapper.get_gbml_config_pb_wrapper_from_uri(gbml_config_uri=TEMPLATE_TASK_CONFIG_URI)\n", "RESOURCE_CONFIG: GiglResourceConfigWrapper = get_resource_config(resource_config_uri=RESOURCE_CONFIG_URI)\n", diff --git a/examples/MAG240M/preprocessor_config.py b/gigl/examples/MAG240M/preprocessor_config.py similarity index 99% rename from examples/MAG240M/preprocessor_config.py rename to gigl/examples/MAG240M/preprocessor_config.py index 11da2153c..6072f93cd 100644 --- a/examples/MAG240M/preprocessor_config.py +++ b/gigl/examples/MAG240M/preprocessor_config.py @@ -4,18 +4,18 @@ import tensorflow as tf import tensorflow_transform as tft -from examples.MAG240M.common import NUM_PAPER_FEATURES, TOTAL_NUM_PAPERS -from examples.MAG240M.queries import ( +from google.cloud.bigquery.job import WriteDisposition + +from gigl.common.logger import Logger +from gigl.env.pipelines_config import get_resource_config +from gigl.examples.MAG240M.common import NUM_PAPER_FEATURES, TOTAL_NUM_PAPERS +from gigl.examples.MAG240M.queries import ( query_template_cast_to_homogeneous_edge_table, query_template_cast_to_intermediary_homogeneous_node_table, query_template_computed_node_degree_table, query_template_generate_homogeneous_node_table, query_template_reindex_author_writes_paper_table, ) -from google.cloud.bigquery.job import WriteDisposition - -from gigl.common.logger import Logger -from gigl.env.pipelines_config import get_resource_config from gigl.src.common.types import AppliedTaskIdentifier from gigl.src.common.types.graph_data import EdgeType, EdgeUsageType, NodeType, Relation from gigl.src.common.types.pb_wrappers.gigl_resource_config import ( diff --git a/examples/MAG240M/preprocessor_config_heterogeneous.py b/gigl/examples/MAG240M/preprocessor_config_heterogeneous.py similarity index 98% rename from examples/MAG240M/preprocessor_config_heterogeneous.py rename to gigl/examples/MAG240M/preprocessor_config_heterogeneous.py index e524e6e2e..0d12d233d 100644 --- a/examples/MAG240M/preprocessor_config_heterogeneous.py +++ b/gigl/examples/MAG240M/preprocessor_config_heterogeneous.py @@ -1,11 +1,11 @@ from __future__ import annotations -from examples.MAG240M.common import NUM_PAPER_FEATURES -from examples.MAG240M.queries import query_template_compute_average_features from google.cloud.bigquery.job import WriteDisposition from gigl.common.logger import Logger from gigl.env.pipelines_config import get_resource_config +from gigl.examples.MAG240M.common import NUM_PAPER_FEATURES +from gigl.examples.MAG240M.queries import query_template_compute_average_features from gigl.src.common.types import AppliedTaskIdentifier from gigl.src.common.types.graph_data import EdgeType, EdgeUsageType, NodeType, Relation from gigl.src.common.types.pb_wrappers.gigl_resource_config import ( diff --git a/examples/MAG240M/queries.py b/gigl/examples/MAG240M/queries.py similarity index 97% rename from examples/MAG240M/queries.py rename to gigl/examples/MAG240M/queries.py index a9b30ef21..1a254488d 100644 --- a/examples/MAG240M/queries.py +++ b/gigl/examples/MAG240M/queries.py @@ -1,4 +1,4 @@ -from examples.MAG240M.common import NUM_PAPER_FEATURES +from gigl.examples.MAG240M.common import NUM_PAPER_FEATURES query_template_reindex_author_writes_paper_table = """ -- Firstly, we reindex the author to the same node space as papers diff --git a/examples/MAG240M/resource_config.yaml b/gigl/examples/MAG240M/resource_config.yaml similarity index 100% rename from examples/MAG240M/resource_config.yaml rename to gigl/examples/MAG240M/resource_config.yaml diff --git a/examples/MAG240M/resource_config_heterogeneous.yaml b/gigl/examples/MAG240M/resource_config_heterogeneous.yaml similarity index 100% rename from examples/MAG240M/resource_config_heterogeneous.yaml rename to gigl/examples/MAG240M/resource_config_heterogeneous.yaml diff --git a/examples/MAG240M/task_config.yaml b/gigl/examples/MAG240M/task_config.yaml similarity index 100% rename from examples/MAG240M/task_config.yaml rename to gigl/examples/MAG240M/task_config.yaml diff --git a/examples/MAG240M/task_config_heterogeneous.yaml b/gigl/examples/MAG240M/task_config_heterogeneous.yaml similarity index 95% rename from examples/MAG240M/task_config_heterogeneous.yaml rename to gigl/examples/MAG240M/task_config_heterogeneous.yaml index 8fcbb699c..b317cc3fd 100644 --- a/examples/MAG240M/task_config_heterogeneous.yaml +++ b/gigl/examples/MAG240M/task_config_heterogeneous.yaml @@ -59,7 +59,7 @@ trainerConfig: out_dim: "256" val_every_n_batch: "1000" # Decrease this number to do more frequent validation learning_rate: "0.005" - command: python -m examples.link_prediction.heterogeneous_training + command: python -m gigl.examples.link_prediction.heterogeneous_training # ======== # InferencerConfig: # specifies the inference configuration. This includes the command and the arguments to pass to it @@ -71,7 +71,7 @@ inferencerConfig: hid_dim: "256" out_dim: "256" inferenceBatchSize: 256 # Reduce batch size if Cuda OOM - command: python -m examples.link_prediction.heterogeneous_inference + command: python -m gigl.examples.link_prediction.heterogeneous_inference # ======== # FeatureFlags: # any additional flags which we should specify for the training + inference job. We currently use this to diff --git a/examples/__init__.py b/gigl/examples/__init__.py similarity index 100% rename from examples/__init__.py rename to gigl/examples/__init__.py diff --git a/examples/id_embeddings/gowalla_data_preprocessor_config.py b/gigl/examples/id_embeddings/gowalla_data_preprocessor_config.py similarity index 100% rename from examples/id_embeddings/gowalla_data_preprocessor_config.py rename to gigl/examples/id_embeddings/gowalla_data_preprocessor_config.py diff --git a/examples/link_prediction/README.md b/gigl/examples/link_prediction/README.md similarity index 100% rename from examples/link_prediction/README.md rename to gigl/examples/link_prediction/README.md diff --git a/examples/link_prediction/__init__.py b/gigl/examples/link_prediction/__init__.py similarity index 100% rename from examples/link_prediction/__init__.py rename to gigl/examples/link_prediction/__init__.py diff --git a/examples/link_prediction/configs/e2e_het_dblp_sup_task_config.yaml b/gigl/examples/link_prediction/configs/e2e_het_dblp_sup_task_config.yaml similarity index 95% rename from examples/link_prediction/configs/e2e_het_dblp_sup_task_config.yaml rename to gigl/examples/link_prediction/configs/e2e_het_dblp_sup_task_config.yaml index 8531fd081..fe62c7af8 100644 --- a/examples/link_prediction/configs/e2e_het_dblp_sup_task_config.yaml +++ b/gigl/examples/link_prediction/configs/e2e_het_dblp_sup_task_config.yaml @@ -48,7 +48,7 @@ trainerConfig: ("paper", "to", "author"): [15, 15], ("author", "to", "paper"): [20, 20] } - command: python -m examples.link_prediction.heterogeneous_training + command: python -m gigl.examples.link_prediction.heterogeneous_training inferencerConfig: inferencerArgs: # Example argument to inferencer @@ -64,7 +64,7 @@ inferencerConfig: ("author", "to", "paper"): [20, 20] } inferenceBatchSize: 512 - command: python -m examples.link_prediction.heterogeneous_inference + command: python -m gigl.examples.link_prediction.heterogeneous_inference sharedConfig: shouldSkipAutomaticTempAssetCleanup: false shouldSkipInference: false diff --git a/examples/link_prediction/configs/e2e_hom_cora_sup_task_config.yaml b/gigl/examples/link_prediction/configs/e2e_hom_cora_sup_task_config.yaml similarity index 92% rename from examples/link_prediction/configs/e2e_hom_cora_sup_task_config.yaml rename to gigl/examples/link_prediction/configs/e2e_hom_cora_sup_task_config.yaml index 606f13c29..e0a0627cd 100644 --- a/examples/link_prediction/configs/e2e_hom_cora_sup_task_config.yaml +++ b/gigl/examples/link_prediction/configs/e2e_hom_cora_sup_task_config.yaml @@ -17,14 +17,14 @@ trainerConfig: # Example argument to trainer log_every_n_batch: "50" # Frequency in which we log batch information num_neighbors: "[10, 10]" # Fanout per hop, specified as a string representation of a list for the homogeneous use case - command: python -m examples.link_prediction.homogeneous_training + command: python -m gigl.examples.link_prediction.homogeneous_training inferencerConfig: inferencerArgs: # Example argument to inferencer log_every_n_batch: "50" # Frequency in which we log batch information num_neighbors: "[10, 10]" # Fanout per hop, specified as a string representation of a list for the homogeneous use case inferenceBatchSize: 512 - command: python -m examples.link_prediction.homogeneous_inference + command: python -m gigl.examples.link_prediction.homogeneous_inference sharedConfig: shouldSkipAutomaticTempAssetCleanup: false shouldSkipInference: false diff --git a/examples/link_prediction/configs/example_resource_config.yaml b/gigl/examples/link_prediction/configs/example_resource_config.yaml similarity index 100% rename from examples/link_prediction/configs/example_resource_config.yaml rename to gigl/examples/link_prediction/configs/example_resource_config.yaml diff --git a/examples/link_prediction/cora.ipynb b/gigl/examples/link_prediction/cora.ipynb similarity index 98% rename from examples/link_prediction/cora.ipynb rename to gigl/examples/link_prediction/cora.ipynb index e558a3b2b..03f1f694a 100644 --- a/examples/link_prediction/cora.ipynb +++ b/gigl/examples/link_prediction/cora.ipynb @@ -79,11 +79,11 @@ "\n", "# Firstly, let's give your job a name and ensure that the resource and task configs exist and can be loaded\n", "JOB_NAME = f\"{getpass.getuser()}_gigl_cora_{curr_datetime}\"\n", - "TEMPLATE_TASK_CONFIG_URI = LocalUri(\"examples/link_prediction/configs/e2e_hom_cora_sup_task_config.yaml\")\n", + "TEMPLATE_TASK_CONFIG_URI = LocalUri(\"gigl/examples/link_prediction/configs/e2e_hom_cora_sup_task_config.yaml\")\n", "\n", "# Respect the environment variable for resource config URI\n", "# if not, set it to some default value.\n", - "RESOURCE_CONFIG_URI = LocalUri(os.environ.get(\"GIGL_TEST_DEFAULT_RESOURCE_CONFIG\", \"examples/link_prediction/configs/example_resource_config.yaml\"))\n", + "RESOURCE_CONFIG_URI = LocalUri(os.environ.get(\"GIGL_TEST_DEFAULT_RESOURCE_CONFIG\", \"gigl/examples/link_prediction/configs/example_resource_config.yaml\"))\n", "print(f\"Using resource config URI: {RESOURCE_CONFIG_URI}\")\n", "\n", "TEMPLATE_TASK_CONFIG: GbmlConfigPbWrapper = GbmlConfigPbWrapper.get_gbml_config_pb_wrapper_from_uri(gbml_config_uri=TEMPLATE_TASK_CONFIG_URI)\n", @@ -363,7 +363,7 @@ "import torch\n", "from torch_geometric.data import Data\n", "\n", - "from examples.link_prediction.models import init_example_gigl_homogeneous_model\n", + "from gigl.examples.link_prediction.models import init_example_gigl_homogeneous_model\n", "from gigl.common import UriFactory\n", "from gigl.src.common.utils.model import load_state_dict_from_uri\n", "\n", diff --git a/examples/link_prediction/dblp.ipynb b/gigl/examples/link_prediction/dblp.ipynb similarity index 98% rename from examples/link_prediction/dblp.ipynb rename to gigl/examples/link_prediction/dblp.ipynb index 464fdea3b..88218e335 100644 --- a/examples/link_prediction/dblp.ipynb +++ b/gigl/examples/link_prediction/dblp.ipynb @@ -79,10 +79,10 @@ "\n", "# Firstly, let's give your job a name and ensure that the resource and task configs exist and can be loaded\n", "JOB_NAME = f\"{getpass.getuser()}_gigl_dblp_{curr_datetime}\"\n", - "TEMPLATE_TASK_CONFIG_URI = LocalUri(\"examples/link_prediction/configs/e2e_het_dblp_sup_task_config.yaml\")\n", + "TEMPLATE_TASK_CONFIG_URI = LocalUri(\"gigl/examples/link_prediction/configs/e2e_het_dblp_sup_task_config.yaml\")\n", "# Respect the environment variable for resource config URI\n", "# if not, set it to some default value.\n", - "RESOURCE_CONFIG_URI = LocalUri(os.environ.get(\"GIGL_TEST_DEFAULT_RESOURCE_CONFIG\", \"examples/link_prediction/configs/example_resource_config.yaml\"))\n", + "RESOURCE_CONFIG_URI = LocalUri(os.environ.get(\"GIGL_TEST_DEFAULT_RESOURCE_CONFIG\", \"gigl/examples/link_prediction/configs/example_resource_config.yaml\"))\n", "print(f\"Using resource config URI: {RESOURCE_CONFIG_URI}\")\n", "\n", "TEMPLATE_TASK_CONFIG: GbmlConfigPbWrapper = GbmlConfigPbWrapper.get_gbml_config_pb_wrapper_from_uri(gbml_config_uri=TEMPLATE_TASK_CONFIG_URI)\n", @@ -362,7 +362,7 @@ "import torch\n", "from torch_geometric.data import HeteroData\n", "\n", - "from examples.link_prediction.models import init_example_gigl_heterogeneous_model\n", + "from gigl.examples.link_prediction.models import init_example_gigl_heterogeneous_model\n", "from gigl.common import UriFactory\n", "from gigl.src.common.utils.model import load_state_dict_from_uri\n", "from gigl.src.common.types.graph_data import EdgeType, NodeType\n", diff --git a/examples/link_prediction/graph_store/__init__.py b/gigl/examples/link_prediction/graph_store/__init__.py similarity index 100% rename from examples/link_prediction/graph_store/__init__.py rename to gigl/examples/link_prediction/graph_store/__init__.py diff --git a/examples/link_prediction/graph_store/configs/e2e_hom_cora_sup_gs_task_config.yaml b/gigl/examples/link_prediction/graph_store/configs/e2e_hom_cora_sup_gs_task_config.yaml similarity index 92% rename from examples/link_prediction/graph_store/configs/e2e_hom_cora_sup_gs_task_config.yaml rename to gigl/examples/link_prediction/graph_store/configs/e2e_hom_cora_sup_gs_task_config.yaml index 6cf4bdeea..1a0e52a08 100644 --- a/examples/link_prediction/graph_store/configs/e2e_hom_cora_sup_gs_task_config.yaml +++ b/gigl/examples/link_prediction/graph_store/configs/e2e_hom_cora_sup_gs_task_config.yaml @@ -21,7 +21,7 @@ trainerConfig: # Example argument to trainer log_every_n_batch: "50" # Frequency in which we log batch information num_neighbors: "[10, 10]" # Fanout per hop, specified as a string representation of a list for the homogeneous use case - command: python -m examples.link_prediction.homogeneous_training + command: python -m gigl.examples.link_prediction.homogeneous_training # TODO(kmonte): Move to user-defined server code inferencerConfig: inferencerArgs: @@ -29,7 +29,7 @@ inferencerConfig: log_every_n_batch: "50" # Frequency in which we log batch information num_neighbors: "[10, 10]" # Fanout per hop, specified as a string representation of a list for the homogeneous use case inferenceBatchSize: 512 - command: python -m examples.link_prediction.graph_store.homogeneous_inference + command: python -m gigl.examples.link_prediction.graph_store.homogeneous_inference sharedConfig: shouldSkipInference: false # Model Evaluation is currently only supported for tabularized SGS GiGL pipelines. This will soon be added for in-mem SGS GiGL pipelines. diff --git a/examples/link_prediction/graph_store/configs/example_resource_config.yaml b/gigl/examples/link_prediction/graph_store/configs/example_resource_config.yaml similarity index 100% rename from examples/link_prediction/graph_store/configs/example_resource_config.yaml rename to gigl/examples/link_prediction/graph_store/configs/example_resource_config.yaml diff --git a/examples/link_prediction/graph_store/homogeneous_inference.py b/gigl/examples/link_prediction/graph_store/homogeneous_inference.py similarity index 98% rename from examples/link_prediction/graph_store/homogeneous_inference.py rename to gigl/examples/link_prediction/graph_store/homogeneous_inference.py index 08d59e758..b93d6c938 100644 --- a/examples/link_prediction/graph_store/homogeneous_inference.py +++ b/gigl/examples/link_prediction/graph_store/homogeneous_inference.py @@ -14,7 +14,7 @@ - Better memory utilization (graph data stays on storage nodes) - Cost optimization by using appropriate hardware for each role -In contrast, the standard inference mode (see `examples/link_prediction/homogeneous_inference.py`) +In contrast, the standard inference mode (see `gigl/examples/link_prediction/homogeneous_inference.py`) uses a homogeneous cluster where each machine handles both graph storage and computation. Key Implementation Differences: @@ -67,7 +67,7 @@ # Example argument to inferencer log_every_n_batch: "50" inferenceBatchSize: 512 - command: python -m examples.link_prediction.graph_store.homogeneous_inference + command: python -m gigl.examples.link_prediction.graph_store.homogeneous_inference featureFlags: should_run_glt_backend: 'True' @@ -85,7 +85,6 @@ import torch import torch.multiprocessing as mp -from examples.link_prediction.models import init_example_gigl_homogeneous_model import gigl.distributed import gigl.distributed.utils @@ -97,6 +96,7 @@ from gigl.distributed.graph_store.remote_dist_dataset import RemoteDistDataset from gigl.distributed.utils import get_graph_store_info from gigl.env.distributed import GraphStoreInfo +from gigl.examples.link_prediction.models import init_example_gigl_homogeneous_model from gigl.nn import LinkPredictionGNN from gigl.src.common.types import AppliedTaskIdentifier from gigl.src.common.types.graph_data import NodeType diff --git a/examples/link_prediction/heterogeneous_inference.py b/gigl/examples/link_prediction/heterogeneous_inference.py similarity index 98% rename from examples/link_prediction/heterogeneous_inference.py rename to gigl/examples/link_prediction/heterogeneous_inference.py index e75f715da..bd15cde35 100644 --- a/examples/link_prediction/heterogeneous_inference.py +++ b/gigl/examples/link_prediction/heterogeneous_inference.py @@ -12,7 +12,7 @@ # Example argument to inferencer log_every_n_batch: "50" inferenceBatchSize: 512 - command: python -m examples.link_prediction.heterogeneous_inference + command: python -m gigl.examples.link_prediction.heterogeneous_inference featureFlags: should_run_glt_backend: 'True' @@ -27,7 +27,6 @@ import torch import torch.distributed import torch.multiprocessing as mp -from examples.link_prediction.models import init_example_gigl_heterogeneous_model import gigl.distributed import gigl.distributed.utils @@ -36,6 +35,7 @@ from gigl.common.logger import Logger from gigl.common.utils.gcs import GcsUtils from gigl.distributed import DistDataset, build_dataset_from_task_config_uri +from gigl.examples.link_prediction.models import init_example_gigl_heterogeneous_model from gigl.nn import LinkPredictionGNN from gigl.src.common.types import AppliedTaskIdentifier from gigl.src.common.types.graph_data import EdgeType, NodeType @@ -100,7 +100,7 @@ def _inference_process( # to each edge type in the graph, or as string of format dict[(tuple[str, str, str])), list[int]] which will specify fanouts per edge type. # In the case of the latter, the keys should be specified with format (SRC_NODE_TYPE, RELATION, DST_NODE_TYPE). # For the default example, we make a decision to keep the fanouts for all edge types the same, specifying the `fanout` with a `list[int]`. - # To see an example of a 'fanout' with different behaviors per edge type, refer to `examples/link_prediction.configs/e2e_het_dblp_sup_task_config.yaml`. + # To see an example of a 'fanout' with different behaviors per edge type, refer to `gigl/examples/link_prediction.configs/e2e_het_dblp_sup_task_config.yaml`. fanout = inferencer_args.get("num_neighbors", "[10, 10]") num_neighbors = parse_fanout(fanout) diff --git a/examples/link_prediction/heterogeneous_training.py b/gigl/examples/link_prediction/heterogeneous_training.py similarity index 99% rename from examples/link_prediction/heterogeneous_training.py rename to gigl/examples/link_prediction/heterogeneous_training.py index d29bd4330..c4452a395 100644 --- a/examples/link_prediction/heterogeneous_training.py +++ b/gigl/examples/link_prediction/heterogeneous_training.py @@ -9,14 +9,14 @@ trainerArgs: log_every_n_batch: "50" ssl_positive_label_percentage: "0.05" - command: python -m examples.link_prediction.heterogeneous_training + command: python -m gigl.examples.link_prediction.heterogeneous_training featureFlags: should_run_glt_backend: 'True' You can run this example in a full pipeline with `make run_het_dblp_sup_test` from GiGL root. Given a frozen task config with some already populated data preprocessor output, the following training script can be run locally with training using: -WORLD_SIZE=1 RANK=0 MASTER_ADDR="localhost" MASTER_PORT=20000 python -m examples.link_prediction.heterogeneous_training --task_config_uri= +WORLD_SIZE=1 RANK=0 MASTER_ADDR="localhost" MASTER_PORT=20000 python -m gigl.examples.link_prediction.heterogeneous_training --task_config_uri= A frozen task config with data preprocessor outputs can be generated by running an e2e pipeline with `stop_after=data_preprocessor` and using the frozen config generated from the `config_populator` component after the run has completed. @@ -36,7 +36,6 @@ import torch import torch.distributed import torch.multiprocessing as mp -from examples.link_prediction.models import init_example_gigl_heterogeneous_model from torch_geometric.data import HeteroData import gigl.distributed.utils @@ -50,6 +49,7 @@ ) from gigl.distributed.distributed_neighborloader import DistNeighborLoader from gigl.distributed.utils import get_available_device +from gigl.examples.link_prediction.models import init_example_gigl_heterogeneous_model from gigl.nn import LinkPredictionGNN, RetrievalLoss from gigl.src.common.types.graph_data import EdgeType, NodeType from gigl.src.common.types.pb_wrappers.gbml_config import GbmlConfigPbWrapper @@ -710,7 +710,7 @@ def _run_example_training( # to each edge type in the graph, or as string of format dict[(tuple[str, str, str])), list[int]] which will specify fanouts per edge type. # In the case of the latter, the keys should be specified with format (SRC_NODE_TYPE, RELATION, DST_NODE_TYPE). # For the default example, we make a decision to keep the fanouts for all edge types the same, specifying the `fanout` with a `list[int]`. - # To see an example of a 'fanout' with different behaviors per edge type, refer to `examples/link_prediction.configs/e2e_het_dblp_sup_task_config.yaml`. + # To see an example of a 'fanout' with different behaviors per edge type, refer to `gigl/examples/link_prediction.configs/e2e_het_dblp_sup_task_config.yaml`. fanout = trainer_args.get("num_neighbors", "[10, 10]") num_neighbors = parse_fanout(fanout) diff --git a/examples/link_prediction/homogeneous_inference.py b/gigl/examples/link_prediction/homogeneous_inference.py similarity index 99% rename from examples/link_prediction/homogeneous_inference.py rename to gigl/examples/link_prediction/homogeneous_inference.py index 2e9f30a52..15b144c23 100644 --- a/examples/link_prediction/homogeneous_inference.py +++ b/gigl/examples/link_prediction/homogeneous_inference.py @@ -12,7 +12,7 @@ # Example argument to inferencer log_every_n_batch: "50" inferenceBatchSize: 512 - command: python -m examples.link_prediction.homogeneous_inference + command: python -m gigl.examples.link_prediction.homogeneous_inference featureFlags: should_run_glt_backend: 'True' @@ -25,7 +25,6 @@ import torch import torch.multiprocessing as mp -from examples.link_prediction.models import init_example_gigl_homogeneous_model import gigl.distributed import gigl.distributed.utils @@ -34,6 +33,7 @@ from gigl.common.logger import Logger from gigl.common.utils.gcs import GcsUtils from gigl.distributed import DistDataset, build_dataset_from_task_config_uri +from gigl.examples.link_prediction.models import init_example_gigl_homogeneous_model from gigl.nn import LinkPredictionGNN from gigl.src.common.types import AppliedTaskIdentifier from gigl.src.common.types.graph_data import NodeType diff --git a/examples/link_prediction/homogeneous_training.py b/gigl/examples/link_prediction/homogeneous_training.py similarity index 99% rename from examples/link_prediction/homogeneous_training.py rename to gigl/examples/link_prediction/homogeneous_training.py index 85c8c4e48..79a262b20 100644 --- a/examples/link_prediction/homogeneous_training.py +++ b/gigl/examples/link_prediction/homogeneous_training.py @@ -9,14 +9,14 @@ trainerArgs: # Example argument to trainer log_every_n_batch: "50" - command: python -m examples.link_prediction.homogeneous_training + command: python -m gigl.examples.link_prediction.homogeneous_training featureFlags: should_run_glt_backend: 'True' You can run this example in a full pipeline with `make run_hom_cora_sup_test` from GiGL root. Given a frozen task config with some already populated data preprocessor output, the following training script can be run locally using: -WORLD_SIZE=1 RANK=0 MASTER_ADDR="localhost" MASTER_PORT=20000 python -m examples.link_prediction.homogeneous_training --task_config_uri= +WORLD_SIZE=1 RANK=0 MASTER_ADDR="localhost" MASTER_PORT=20000 python -m gigl.examples.link_prediction.homogeneous_training --task_config_uri= A frozen task config with data preprocessor outputs can be generated by running an e2e pipeline with `stop_after=data_preprocessor` and using the frozen config generated from the `config_populator` component after the run has completed. @@ -31,7 +31,6 @@ import torch import torch.distributed import torch.multiprocessing as mp -from examples.link_prediction.models import init_example_gigl_homogeneous_model from torch_geometric.data import Data import gigl.distributed.utils @@ -45,6 +44,7 @@ ) from gigl.distributed.distributed_neighborloader import DistNeighborLoader from gigl.distributed.utils import get_available_device +from gigl.examples.link_prediction.models import init_example_gigl_homogeneous_model from gigl.nn import LinkPredictionGNN, RetrievalLoss from gigl.src.common.types.pb_wrappers.gbml_config import GbmlConfigPbWrapper from gigl.src.common.utils.model import load_state_dict_from_uri, save_state_dict diff --git a/examples/link_prediction/models.py b/gigl/examples/link_prediction/models.py similarity index 100% rename from examples/link_prediction/models.py rename to gigl/examples/link_prediction/models.py diff --git a/examples/toy_visual_example/README.md b/gigl/examples/toy_visual_example/README.md similarity index 100% rename from examples/toy_visual_example/README.md rename to gigl/examples/toy_visual_example/README.md diff --git a/examples/toy_visual_example/assets/config_boss.png b/gigl/examples/toy_visual_example/assets/config_boss.png similarity index 100% rename from examples/toy_visual_example/assets/config_boss.png rename to gigl/examples/toy_visual_example/assets/config_boss.png diff --git a/examples/toy_visual_example/assets/link_pred_sample_node_9.png b/gigl/examples/toy_visual_example/assets/link_pred_sample_node_9.png similarity index 100% rename from examples/toy_visual_example/assets/link_pred_sample_node_9.png rename to gigl/examples/toy_visual_example/assets/link_pred_sample_node_9.png diff --git a/examples/toy_visual_example/assets/rooted_neighborhood_node_1.png b/gigl/examples/toy_visual_example/assets/rooted_neighborhood_node_1.png similarity index 100% rename from examples/toy_visual_example/assets/rooted_neighborhood_node_1.png rename to gigl/examples/toy_visual_example/assets/rooted_neighborhood_node_1.png diff --git a/examples/toy_visual_example/assets/tft_example.png b/gigl/examples/toy_visual_example/assets/tft_example.png similarity index 100% rename from examples/toy_visual_example/assets/tft_example.png rename to gigl/examples/toy_visual_example/assets/tft_example.png diff --git a/examples/toy_visual_example/assets/transductive.png b/gigl/examples/toy_visual_example/assets/transductive.png similarity index 100% rename from examples/toy_visual_example/assets/transductive.png rename to gigl/examples/toy_visual_example/assets/transductive.png diff --git a/examples/toy_visual_example/graph_config.yaml b/gigl/examples/toy_visual_example/graph_config.yaml similarity index 100% rename from examples/toy_visual_example/graph_config.yaml rename to gigl/examples/toy_visual_example/graph_config.yaml diff --git a/examples/toy_visual_example/resource_config.yaml b/gigl/examples/toy_visual_example/resource_config.yaml similarity index 100% rename from examples/toy_visual_example/resource_config.yaml rename to gigl/examples/toy_visual_example/resource_config.yaml diff --git a/examples/toy_visual_example/template_task_config.yaml b/gigl/examples/toy_visual_example/template_task_config.yaml similarity index 100% rename from examples/toy_visual_example/template_task_config.yaml rename to gigl/examples/toy_visual_example/template_task_config.yaml diff --git a/examples/toy_visual_example/toy_data_preprocessor_config.py b/gigl/examples/toy_visual_example/toy_data_preprocessor_config.py similarity index 100% rename from examples/toy_visual_example/toy_data_preprocessor_config.py rename to gigl/examples/toy_visual_example/toy_data_preprocessor_config.py diff --git a/examples/toy_visual_example/toy_example_walkthrough.ipynb b/gigl/examples/toy_visual_example/toy_example_walkthrough.ipynb similarity index 99% rename from examples/toy_visual_example/toy_example_walkthrough.ipynb rename to gigl/examples/toy_visual_example/toy_example_walkthrough.ipynb index f658f77ac..5e2ebab86 100644 --- a/examples/toy_visual_example/toy_example_walkthrough.ipynb +++ b/gigl/examples/toy_visual_example/toy_example_walkthrough.ipynb @@ -6,7 +6,7 @@ "source": [ "# Toy Example - Tabularized GiGL\n", "\n", - "Latest version of this notebook can be found on [GiGL/examples/toy_visual_example/toy_example_walkthrough.ipynb](https://github.com/Snapchat/GiGL/blob/main/examples/toy_visual_example/toy_example_walkthrough.ipynb)\n", + "Latest version of this notebook can be found on [GiGL/examples/toy_visual_example/toy_example_walkthrough.ipynb](https://github.com/Snapchat/GiGL/blob/main/gigl/examples/toy_visual_example/toy_example_walkthrough.ipynb)\n", "\n", "\n", "This notebook provides a walkthrough of preprocessing, subgraph sampling, and split generation components with a small toy graph for GiGL's Tabularized setting for training/inference. It will help you understand how each of these components prepare tabularized subgraphs.\n", @@ -97,7 +97,7 @@ "from gigl.src.mocking.toy_asset_mocker import load_toy_graph\n", "\n", "original_graph_heterodata: HeteroData = load_toy_graph(\n", - " graph_config_path=\"examples/toy_visual_example/graph_config.yaml\"\n", + " graph_config_path=\"gigl/examples/toy_visual_example/graph_config.yaml\"\n", ") # If you want to update the graph, you will need to re-mock - See README.md\n", "\n", "\n", @@ -164,7 +164,7 @@ "PERM_ASSETS_BUCKET = f\"gs://gigl_perm_assets_{PROJECT}\"\n", "\n", "# Use the template resource config as a starting point\n", - "TEMPLATE_RESOURCE_CONFIG_PATH = \"examples/toy_visual_example/resource_config.yaml\"\n", + "TEMPLATE_RESOURCE_CONFIG_PATH = \"gigl/examples/toy_visual_example/resource_config.yaml\"\n", "# This is the output path where we will store your unique resource config\n", "RESOURCE_CONFIG_PATH = GcsUri(f\"gs://gigl_perm_assets_{PROJECT}/tabularized_resource_config.yaml\")\n", "\n", diff --git a/examples/tutorial/KDD_2025/.gitignore b/gigl/examples/tutorial/KDD_2025/.gitignore similarity index 100% rename from examples/tutorial/KDD_2025/.gitignore rename to gigl/examples/tutorial/KDD_2025/.gitignore diff --git a/examples/tutorial/KDD_2025/README.md b/gigl/examples/tutorial/KDD_2025/README.md similarity index 100% rename from examples/tutorial/KDD_2025/README.md rename to gigl/examples/tutorial/KDD_2025/README.md diff --git a/examples/tutorial/KDD_2025/graph_config.yaml b/gigl/examples/tutorial/KDD_2025/graph_config.yaml similarity index 100% rename from examples/tutorial/KDD_2025/graph_config.yaml rename to gigl/examples/tutorial/KDD_2025/graph_config.yaml diff --git a/examples/tutorial/KDD_2025/heterogeneous_inference.py b/gigl/examples/tutorial/KDD_2025/heterogeneous_inference.py similarity index 96% rename from examples/tutorial/KDD_2025/heterogeneous_inference.py rename to gigl/examples/tutorial/KDD_2025/heterogeneous_inference.py index d07f220a7..06a2ea97b 100644 --- a/examples/tutorial/KDD_2025/heterogeneous_inference.py +++ b/gigl/examples/tutorial/KDD_2025/heterogeneous_inference.py @@ -5,9 +5,9 @@ It also exports the embeddings to a specified output URI, which can be a GCS bucket or a local directory. Example usage: - python -m examples.tutorial.KDD_2025.heterogeneous_inference --task_config_uri + python -m gigl.examples.tutorial.KDD_2025.heterogeneous_inference --task_config_uri -To generate a frozen config from a template task config, see instructions at top of `examples/tutorial/KDD_2025/task_config.yaml`. +To generate a frozen config from a template task config, see instructions at top of `gigl/examples/tutorial/KDD_2025/task_config.yaml`. Args: --task_config_uri: Path to the task config URI. @@ -32,7 +32,6 @@ import pandas as pd import torch import torch.multiprocessing.spawn -from examples.tutorial.KDD_2025.utils import LOCAL_SAVED_MODEL_URI, init_model from gigl.common import Uri, UriFactory from gigl.common.data.export import EmbeddingExporter @@ -43,6 +42,7 @@ build_dataset_from_task_config_uri, ) from gigl.distributed.utils import get_free_port +from gigl.examples.tutorial.KDD_2025.utils import LOCAL_SAVED_MODEL_URI, init_model from gigl.src.common.types.pb_wrappers.gbml_config import GbmlConfigPbWrapper from gigl.src.common.utils.model import load_state_dict_from_uri diff --git a/examples/tutorial/KDD_2025/heterogeneous_training.py b/gigl/examples/tutorial/KDD_2025/heterogeneous_training.py similarity index 97% rename from examples/tutorial/KDD_2025/heterogeneous_training.py rename to gigl/examples/tutorial/KDD_2025/heterogeneous_training.py index a6550b78a..67766736e 100644 --- a/examples/tutorial/KDD_2025/heterogeneous_training.py +++ b/gigl/examples/tutorial/KDD_2025/heterogeneous_training.py @@ -5,9 +5,9 @@ Does not support GPU training. Run with: - python -m examples.tutorial.KDD_2025.heterogeneous_training --task_config_uri + python -m gigl.examples.tutorial.KDD_2025.heterogeneous_training --task_config_uri -To generate a frozen config from a template task config, see instructions at top of `examples/tutorial/KDD_2025/task_config.yaml`. +To generate a frozen config from a template task config, see instructions at top of `gigl/examples/tutorial/KDD_2025/task_config.yaml`. This example is meant to be run on the "toy graph" dataset, @@ -40,7 +40,6 @@ from typing import Literal import torch -from examples.tutorial.KDD_2025.utils import LOCAL_SAVED_MODEL_URI, init_model from torch.nn.parallel import DistributedDataParallel from torch_geometric.data import HeteroData @@ -52,6 +51,7 @@ build_dataset_from_task_config_uri, ) from gigl.distributed.utils import get_free_port +from gigl.examples.tutorial.KDD_2025.utils import LOCAL_SAVED_MODEL_URI, init_model from gigl.src.common.types.graph_data import EdgeType, NodeType, Relation from gigl.src.common.types.pb_wrappers.gbml_config import GbmlConfigPbWrapper from gigl.src.common.utils.model import save_state_dict diff --git a/examples/tutorial/KDD_2025/heterogeneous_walkthrough.ipynb b/gigl/examples/tutorial/KDD_2025/heterogeneous_walkthrough.ipynb similarity index 99% rename from examples/tutorial/KDD_2025/heterogeneous_walkthrough.ipynb rename to gigl/examples/tutorial/KDD_2025/heterogeneous_walkthrough.ipynb index 443e5f1bb..cbb48d730 100644 --- a/examples/tutorial/KDD_2025/heterogeneous_walkthrough.ipynb +++ b/gigl/examples/tutorial/KDD_2025/heterogeneous_walkthrough.ipynb @@ -7,7 +7,7 @@ "source": [ "# In-Memory GiGL - Heterogeneous Graph Example\n", "\n", - "Latest version of this notebook can be found on [github](https://github.com/Snapchat/GiGL/blob/main/examples/tutorial/KDD_2025/heterogeneous_walkthrough.ipynb)\n", + "Latest version of this notebook can be found on [github](https://github.com/Snapchat/GiGL/blob/main/gigl/examples/tutorial/KDD_2025/heterogeneous_walkthrough.ipynb)\n", "\n", "\n", "This notebook provides a walkthrough of preprocessing components with a small toy graph for GiGL's in-memory setting for training/inference. It will help you understand how each of these components perform in-memory training and inference.\n", @@ -120,7 +120,7 @@ "from gigl.src.mocking.toy_asset_mocker import load_toy_graph\n", "\n", "\n", - "original_graph_heterodata: HeteroData = load_toy_graph(graph_config_path=\"examples/tutorial/KDD_2025/graph_config.yaml\")\n", + "original_graph_heterodata: HeteroData = load_toy_graph(graph_config_path=\"gigl/examples/tutorial/KDD_2025/graph_config.yaml\")\n", "# Visualize the graph\n", "GraphVisualizer.visualize_graph(original_graph_heterodata)\n", "\n", @@ -174,7 +174,7 @@ " --embedding_bq_dataset_name=\"gigl_embeddings\" \\\n", " --temp_assets_bucket=\"gs://gigl_temp_assets_$PROJECT\" \\\n", " --perm_assets_bucket=\"gs://gigl_perm_assets_$PROJECT\" \\\n", - " --template_resource_config_uri=\"examples/tutorial/KDD_2025/resource_config.yaml\" \\\n", + " --template_resource_config_uri=\"gigl/examples/tutorial/KDD_2025/resource_config.yaml\" \\\n", " --output_resource_config_path=\"$RESOURCE_CONFIG_PATH\" \\\n", " --force_shell_config_update=True\n", "\n", @@ -1259,7 +1259,7 @@ "# Use a local directory for exporting embeddings.\n", "# You can also use a GCS URI if you want to export to GCS.\n", "# For example, use \"gs://your-bucket-name/path/to/embeddings\".\n", - "embedding_dir = UriFactory.create_uri(\"examples/tutorial/KDD_2025/.embeddings\")\n", + "embedding_dir = UriFactory.create_uri(\"gigl/examples/tutorial/KDD_2025/.embeddings\")\n", "\n", "exporter = EmbeddingExporter(\n", " export_dir=embedding_dir,\n", @@ -1298,7 +1298,7 @@ "import fastavro\n", "\n", "avro_records = []\n", - "for file in Path(\"examples/tutorial/KDD_2025/.embeddings\").glob(\"*.avro\"):\n", + "for file in Path(\"gigl/examples/tutorial/KDD_2025/.embeddings\").glob(\"*.avro\"):\n", " with open(file, \"rb\") as f:\n", " reader = fastavro.reader(f)\n", " for record in reader:\n", diff --git a/examples/tutorial/KDD_2025/lab_instructions.md b/gigl/examples/tutorial/KDD_2025/lab_instructions.md similarity index 90% rename from examples/tutorial/KDD_2025/lab_instructions.md rename to gigl/examples/tutorial/KDD_2025/lab_instructions.md index af09d157a..184872acd 100644 --- a/examples/tutorial/KDD_2025/lab_instructions.md +++ b/gigl/examples/tutorial/KDD_2025/lab_instructions.md @@ -38,10 +38,10 @@ Hands-On experience with the GiGL library to train industry-scale Graph Neural N Screenshot 2025-07-31 at 10 48 37 AM -6. Open the `gigl` folder in the left sidebar. Navigate to the `examples/tutorial/KDD_2025` folder. +6. Open the `gigl` folder in the left sidebar. Navigate to the `gigl/examples/tutorial/KDD_2025` folder. 7. In the hands-on portion of the tutorial, we will be running notebooks - `examples/toy_visual_example/toy_example_walkthrough.ipynb` for tabularization subgraph sampling, and - `examples/tutorial/KDD_2025/heterogeneous_walkthrough.ipynb` for in-memory subgraph sampling. + `gigl/examples/toy_visual_example/toy_example_walkthrough.ipynb` for tabularization subgraph sampling, and + `gigl/examples/tutorial/KDD_2025/heterogeneous_walkthrough.ipynb` for in-memory subgraph sampling. 8. Open the notebooks with `gigl` kernel. The select kernel will show on the top right corner of the notebook page. Screenshot 2025-07-31 at 10 53 57 AM @@ -53,7 +53,7 @@ Hands-On experience with the GiGL library to train industry-scale Graph Neural N ## Additional Resources - GiGL KDD '25 tutorial page: - [Homepage](https://github.com/Snapchat/GiGL/blob/main/examples/tutorial/KDD_2025/README.md) + [Homepage](https://github.com/Snapchat/GiGL/blob/main/gigl/examples/tutorial/KDD_2025/README.md) - GiGL KDD ADS track paper: [Paper link](https://arxiv.org/abs/2502.15054) - GiGL library source code: [GitHub](https://github.com/Snapchat/GiGL/tree/main) - GiGL documentation: [Read the Docs](https://snapchat.github.io/GiGL/index.html) diff --git a/examples/tutorial/KDD_2025/preprocessor_config.py b/gigl/examples/tutorial/KDD_2025/preprocessor_config.py similarity index 100% rename from examples/tutorial/KDD_2025/preprocessor_config.py rename to gigl/examples/tutorial/KDD_2025/preprocessor_config.py diff --git a/examples/tutorial/KDD_2025/resource_config.yaml b/gigl/examples/tutorial/KDD_2025/resource_config.yaml similarity index 100% rename from examples/tutorial/KDD_2025/resource_config.yaml rename to gigl/examples/tutorial/KDD_2025/resource_config.yaml diff --git a/examples/tutorial/KDD_2025/slides_tutorial_KDD_25.pdf b/gigl/examples/tutorial/KDD_2025/slides_tutorial_KDD_25.pdf similarity index 100% rename from examples/tutorial/KDD_2025/slides_tutorial_KDD_25.pdf rename to gigl/examples/tutorial/KDD_2025/slides_tutorial_KDD_25.pdf diff --git a/examples/tutorial/KDD_2025/task_config.yaml b/gigl/examples/tutorial/KDD_2025/task_config.yaml similarity index 89% rename from examples/tutorial/KDD_2025/task_config.yaml rename to gigl/examples/tutorial/KDD_2025/task_config.yaml index fd6e588cf..fdf6728e2 100644 --- a/examples/tutorial/KDD_2025/task_config.yaml +++ b/gigl/examples/tutorial/KDD_2025/task_config.yaml @@ -6,9 +6,9 @@ # python -m \ # gigl.src.config_populator.config_populator \ # --job_name="example_job_name" \ -# --template_uri="examples/tutorial/KDD_2025/task_config.yaml" \ -# --resource_config_uri="examples/tutorial/KDD_2025/resource_config.yaml" \ -# --output_file_path_frozen_gbml_config_uri="examples/tutorial/KDD_2025/example_output_path.yaml" +# --template_uri="gigl/examples/tutorial/KDD_2025/task_config.yaml" \ +# --resource_config_uri="gigl/examples/tutorial/KDD_2025/resource_config.yaml" \ +# --output_file_path_frozen_gbml_config_uri="gigl/examples/tutorial/KDD_2025/example_output_path.yaml" # ======== # TaskMetadata: @@ -64,7 +64,7 @@ trainerConfig: ssl_positive_label_percentage: "0.7" num_val: "0.3" num_test: "0.3" - command: python -m examples.tutorial.KDD_2025.heterogeneous_training + command: python -m gigl.examples.tutorial.KDD_2025.heterogeneous_training # ======== # InferencerConfig: # specifies the inference configuration. This includes the inferencer command and the arguments to pass to it @@ -73,7 +73,7 @@ inferencerConfig: # Example argument to inferencer log_every_n_batch: "50" inferenceBatchSize: 4 - command: python -m examples.tutorial.KDD_2025.heterogeneous_inference + command: python -m gigl.examples.tutorial.KDD_2025.heterogeneous_inference # ======== # FeatureFlags: # any additional flags which we should specify for the training + inference job. We currently use this to diff --git a/examples/tutorial/KDD_2025/utils.py b/gigl/examples/tutorial/KDD_2025/utils.py similarity index 100% rename from examples/tutorial/KDD_2025/utils.py rename to gigl/examples/tutorial/KDD_2025/utils.py diff --git a/gigl/src/common/models/pyg/nn/conv/hgt_conv.py b/gigl/src/common/models/pyg/nn/conv/hgt_conv.py index 673df3b28..e199902c8 100644 --- a/gigl/src/common/models/pyg/nn/conv/hgt_conv.py +++ b/gigl/src/common/models/pyg/nn/conv/hgt_conv.py @@ -27,7 +27,7 @@ class HGTConv(MessagePassing): .. note:: - For an example of using HGT, see `examples/hetero/hgt_dblp.py + For an example of using HGT, see `gigl/examples/hetero/hgt_dblp.py `_. diff --git a/gigl/src/mocking/dataset_asset_mocking_suite.py b/gigl/src/mocking/dataset_asset_mocking_suite.py index e37ee472d..ae9da0d76 100644 --- a/gigl/src/mocking/dataset_asset_mocking_suite.py +++ b/gigl/src/mocking/dataset_asset_mocking_suite.py @@ -26,7 +26,7 @@ logger = Logger() _HOMOGENEOUS_TOY_GRAPH_CONFIG = str( - GIGL_ROOT_DIR / "examples/toy_visual_example/graph_config.yaml" + GIGL_ROOT_DIR / "gigl/examples/toy_visual_example/graph_config.yaml" ) _BIPARTITE_TOY_GRAPH_CONFIG = ( GIGL_ROOT_DIR / "gigl/src/mocking/mocking_assets/bipartite_toy_graph_data.yaml" diff --git a/testing/api_test/api_test_inference.py b/testing/api_test/api_test_inference.py index 267347167..5e119280c 100644 --- a/testing/api_test/api_test_inference.py +++ b/testing/api_test/api_test_inference.py @@ -26,7 +26,6 @@ import torch import torch.multiprocessing as mp -from examples.link_prediction.models import init_example_gigl_homogeneous_model from graphlearn_torch.distributed import barrier, shutdown_rpc import gigl.distributed @@ -36,6 +35,7 @@ from gigl.common.logger import Logger from gigl.common.utils.gcs import GcsUtils from gigl.distributed import DistDataset, build_dataset_from_task_config_uri +from gigl.examples.link_prediction.models import init_example_gigl_homogeneous_model from gigl.nn.models import LinkPredictionGNN from gigl.src.common.types import AppliedTaskIdentifier from gigl.src.common.types.graph_data import NodeType diff --git a/testing/api_test/api_test_training.py b/testing/api_test/api_test_training.py index 2fbe50932..3139574c9 100644 --- a/testing/api_test/api_test_training.py +++ b/testing/api_test/api_test_training.py @@ -32,7 +32,6 @@ import torch import torch.distributed import torch.multiprocessing as mp -from examples.link_prediction.models import init_example_gigl_homogeneous_model from torch_geometric.data import Data import gigl.distributed.utils @@ -46,6 +45,7 @@ ) from gigl.distributed.distributed_neighborloader import DistNeighborLoader from gigl.distributed.utils import get_available_device +from gigl.examples.link_prediction.models import init_example_gigl_homogeneous_model from gigl.nn.loss import RetrievalLoss from gigl.nn.models import LinkPredictionGNN from gigl.src.common.types.pb_wrappers.gbml_config import GbmlConfigPbWrapper diff --git a/testing/e2e_tests/e2e_tests.yaml b/testing/e2e_tests/e2e_tests.yaml index b7c591049..609cc5826 100644 --- a/testing/e2e_tests/e2e_tests.yaml +++ b/testing/e2e_tests/e2e_tests.yaml @@ -14,11 +14,11 @@ tests: task_config_uri: "gigl/src/mocking/configs/dblp_node_anchor_based_link_prediction_template_gbml_config.yaml" resource_config_uri: "${oc.env:GIGL_TEST_DEFAULT_RESOURCE_CONFIG,deployment/configs/e2e_cicd_resource_config.yaml}" hom_cora_sup_test: - task_config_uri: "examples/link_prediction/configs/e2e_hom_cora_sup_task_config.yaml" + task_config_uri: "gigl/examples/link_prediction/configs/e2e_hom_cora_sup_task_config.yaml" resource_config_uri: "${oc.env:GIGL_TEST_IN_MEMORY_DEFAULT_RESOURCE_CONFIG,deployment/configs/e2e_glt_resource_config.yaml}" het_dblp_sup_test: - task_config_uri: "examples/link_prediction/configs/e2e_het_dblp_sup_task_config.yaml" + task_config_uri: "gigl/examples/link_prediction/configs/e2e_het_dblp_sup_task_config.yaml" resource_config_uri: "${oc.env:GIGL_TEST_IN_MEMORY_DEFAULT_RESOURCE_CONFIG,deployment/configs/e2e_glt_resource_config.yaml}" hom_cora_sup_gs_test: - task_config_uri: "examples/link_prediction/graph_store/configs/e2e_hom_cora_sup_gs_task_config.yaml" + task_config_uri: "gigl/examples/link_prediction/graph_store/configs/e2e_hom_cora_sup_gs_task_config.yaml" resource_config_uri: "${oc.env:GIGL_TEST_IN_MEMORY_DEFAULT_GRAPH_STORE_RESOURCE_CONFIG,deployment/configs/e2e_glt_gs_resource_config.yaml}" diff --git a/testing/notebooks_test.py b/testing/notebooks_test.py index d3118b10e..8a6001624 100644 --- a/testing/notebooks_test.py +++ b/testing/notebooks_test.py @@ -67,7 +67,7 @@ def setUp(self): _NoteBookTestConfig( name="cora", notebook_path=str( - GIGL_ROOT_DIR / "examples/link_prediction/cora.ipynb" + GIGL_ROOT_DIR / "gigl/examples/link_prediction/cora.ipynb" ), env_overrides={"GIGL_TEST_DEFAULT_RESOURCE_CONFIG": gcs_uri.uri}, ), @@ -75,7 +75,7 @@ def setUp(self): # _NoteBookTestConfig( # name="dblp", # notebook_path=str( - # GIGL_ROOT_DIR / "examples/link_prediction/dblp.ipynb" + # GIGL_ROOT_DIR / "gigl/examples/link_prediction/dblp.ipynb" # ), # env_overrides={ # "GIGL_TEST_DEFAULT_RESOURCE_CONFIG": gcs_uri.uri, @@ -85,14 +85,14 @@ def setUp(self): name="toy_example", notebook_path=str( GIGL_ROOT_DIR - / "examples/toy_visual_example/toy_example_walkthrough.ipynb" + / "gigl/examples/toy_visual_example/toy_example_walkthrough.ipynb" ), ), _NoteBookTestConfig( "kdd_2025_heterogeneous", notebook_path=str( GIGL_ROOT_DIR - / "examples/tutorial/KDD_2025/heteregeneous_walkthrough.ipynb" + / "gigl/examples/tutorial/KDD_2025/heteregeneous_walkthrough.ipynb" ), ), ] diff --git a/tests/unit/src/validation/config_validator_test.py b/tests/unit/src/validation/config_validator_test.py index 3d9cce896..5ed2a394b 100644 --- a/tests/unit/src/validation/config_validator_test.py +++ b/tests/unit/src/validation/config_validator_test.py @@ -169,12 +169,12 @@ def _create_valid_live_subgraph_sampling_task_config() -> gbml_config_pb2.GbmlCo ) # Trainer config trainer_config = gbml_config_pb2.GbmlConfig.TrainerConfig( - command="python -m examples.link_prediction.homogeneous_training", + command="python -m gigl.examples.link_prediction.homogeneous_training", ) # Inferencer config inferencer_config = gbml_config_pb2.GbmlConfig.InferencerConfig( - command="python -m examples.link_prediction.homogeneous_inference", + command="python -m gigl.examples.link_prediction.homogeneous_inference", ) return gbml_config_pb2.GbmlConfig(