Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/glt-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ env:

jobs:
run-glt-unittests:
runs-on: self-hosted
if: ${{ github.repository == 'alibaba/graphlearn-for-pytorch' }}
runs-on: glt-gpu-instances
if: ${{ github.repository == 'snapchat/graphlearn-for-pytorch' }}
steps:
- name: Checkout Code
uses: actions/checkout@v3
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/glt-v6d-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ env:

jobs:
run-glt-v6d-unittests:
runs-on: self-hosted
if: ${{ github.repository == 'alibaba/graphlearn-for-pytorch' }}
runs-on: glt-gpu-instances
if: ${{ github.repository == 'snapchat/graphlearn-for-pytorch' }}
steps:
- name: Checkout Code
uses: actions/checkout@v3
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/manylinux-cd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ env:

jobs:
build:
runs-on: self-hosted
if: ${{ github.repository == 'alibaba/graphlearn-for-pytorch' }}
runs-on: ubuntu-latest
if: ${{ github.repository == 'snapchat/graphlearn-for-pytorch' }}
steps:
- name: Checkout Code
uses: actions/checkout@v3
Expand Down
8 changes: 3 additions & 5 deletions test/python/dist_test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@

# fixed sampling options
sampling_nprocs = 2
device_num = 2
device_num = 1


def _prepare_dataset(rank: int,
Expand Down Expand Up @@ -110,8 +110,7 @@ def _prepare_dataset(rank: int,
weighted_graph = glt.data.Graph(weighted_csr_topo, 'CPU')

# feature
device_group_list = [glt.data.DeviceGroup(0, [0]),
glt.data.DeviceGroup(1, [1])]
device_group_list = [glt.data.DeviceGroup(0, [0])]
split_ratio = 0.2

nfeat = torch.tensor(nodes, dtype=torch.float32).unsqueeze(1).repeat(1, 512)
Expand Down Expand Up @@ -229,8 +228,7 @@ def _prepare_hetero_dataset(
}

# feature
device_group_list = [glt.data.DeviceGroup(0, [0]),
glt.data.DeviceGroup(1, [1])]
device_group_list = [glt.data.DeviceGroup(0, [0])]
split_ratio = 0.2

user_nfeat = rank + torch.zeros(len(user_nodes), 512, dtype=torch.float32)
Expand Down
10 changes: 8 additions & 2 deletions test/python/test_dist_feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def run_dist_feature_test(world_size: int, rank: int, feature: glt.data.Feature,
partition2workers = glt.distributed.rpc_sync_data_partitions(world_size, rank)
rpc_router = glt.distributed.RpcDataPartitionRouter(partition2workers)

current_device = torch.device('cuda', rank % 2)
current_device = torch.device('cuda', 0)

dist_feature = glt.distributed.DistFeature(
world_size, rank, feature, partition_book,
Expand Down Expand Up @@ -74,9 +74,15 @@ def test_dist_feature_lookup(self):
])
partition_book.share_memory_()

# device_group_list = [
# glt.data.DeviceGroup(0, [0]),
# glt.data.DeviceGroup(1, [1])
# ]
# TODO(kmonte): Swap back to using real device_group_list when we have
# a way to run tests on multiple GPUs.
device_group_list = None
device_group_list = [
glt.data.DeviceGroup(0, [0]),
glt.data.DeviceGroup(1, [1])
]

split_ratio = 0.8
Expand Down
11 changes: 7 additions & 4 deletions test/python/test_dist_link_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@
from dist_test_utils import _prepare_dataset, _prepare_hetero_dataset
from parameterized import parameterized


device_num = 1

def _check_sample_result(data, edge_dir='out'):
tc = unittest.TestCase()

Expand Down Expand Up @@ -221,7 +224,7 @@ def run_test_as_worker(world_size: int, rank: int,
else:
worker_options = glt.distributed.MpDistSamplingWorkerOptions(
num_workers=sampling_nprocs,
worker_devices=[torch.device('cuda', i % device_num)
worker_devices=[torch.device('cuda', 0)
for i in range(sampling_nprocs)],
worker_concurrency=2,
master_addr='localhost',
Expand All @@ -241,7 +244,7 @@ def run_test_as_worker(world_size: int, rank: int,
with_edge=True,
edge_dir=edge_dir,
collect_features=True,
to_device=torch.device('cuda', rank % device_num),
to_device=torch.device('cuda', 0),
worker_options=worker_options
)

Expand Down Expand Up @@ -297,7 +300,7 @@ def run_test_as_client(num_servers: int, num_clients: int, client_rank: int,
options = glt.distributed.RemoteDistSamplingWorkerOptions(
server_rank=target_server_rank,
num_workers=sampling_nprocs,
worker_devices=[torch.device('cuda', i % device_num)
worker_devices=[torch.device('cuda', 0)
for i in range(sampling_nprocs)],
worker_concurrency=2,
master_addr='localhost',
Expand All @@ -317,7 +320,7 @@ def run_test_as_client(num_servers: int, num_clients: int, client_rank: int,
with_edge=True,
edge_dir='out',
collect_features=True,
to_device=torch.device('cuda', client_rank % device_num),
to_device=torch.device('cuda', 0),
worker_options=options
)

Expand Down
8 changes: 4 additions & 4 deletions test/python/test_dist_neighbor_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ def run_test_as_worker(world_size: int, rank: int,
else:
worker_options = glt.distributed.MpDistSamplingWorkerOptions(
num_workers=sampling_nprocs,
worker_devices=[torch.device('cuda', i % device_num)
worker_devices=[torch.device('cuda', 0)
for i in range(sampling_nprocs)],
worker_concurrency=2,
master_addr='localhost',
Expand All @@ -195,7 +195,7 @@ def run_test_as_worker(world_size: int, rank: int,
with_edge=True,
edge_dir=edge_dir,
collect_features=True,
to_device=torch.device('cuda', rank % device_num),
to_device=torch.device('cuda', 0),
worker_options=worker_options
)

Expand Down Expand Up @@ -254,7 +254,7 @@ def run_test_as_client(num_servers: int, num_clients: int, client_rank: int, ser
# Automatically assign server_rank (server_rank_list) if server_rank (server_rank_list) is None
server_rank=server_rank,
num_workers=sampling_nprocs,
worker_devices=[torch.device('cuda', i % device_num)
worker_devices=[torch.device('cuda', 0)
for i in range(sampling_nprocs)],
worker_concurrency=2,
master_addr='localhost',
Expand All @@ -274,7 +274,7 @@ def run_test_as_client(num_servers: int, num_clients: int, client_rank: int, ser
with_edge=True,
edge_dir=edge_dir,
collect_features=True,
to_device=torch.device('cuda', client_rank % device_num),
to_device=torch.device('cuda', 0),
worker_options=options
)

Expand Down
18 changes: 9 additions & 9 deletions test/python/test_dist_subgraph_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

# sampling options
sampling_nprocs = 2
device_num = 2
device_num = 1

def _prepare_dataset(rank: int):
"""
Expand Down Expand Up @@ -87,12 +87,12 @@ def _check_sample_result(data, rank):
true_edge_id = torch.tensor([0, 1, 2, 3, 4, 5, 9, 12, 13, 14, 16], device='cuda:0')
true_mapping = torch.tensor([0, 2, 5], device='cuda:0')
else:
true_node = torch.tensor([0, 1, 3, 5, 6, 7], device='cuda:1')
true_node = torch.tensor([0, 1, 3, 5, 6, 7], device='cuda:0')
true_edge_index = torch.tensor([[0, 3, 0, 5, 0, 1, 5, 1, 2, 4, 3],
[3, 3, 4, 5, 0, 0, 0, 1, 1, 1, 2]],
device='cuda:1')
true_edge_id = torch.tensor([12, 13, 14, 16, 0, 1, 2, 3, 4, 5, 9], device='cuda:1')
true_mapping = torch.tensor([0, 2, 5], device='cuda:1')
device='cuda:0')
true_edge_id = torch.tensor([12, 13, 14, 16, 0, 1, 2, 3, 4, 5, 9], device='cuda:0')
true_mapping = torch.tensor([0, 2, 5], device='cuda:0')
tc.assertTrue(glt.utils.tensor_equal_with_device(data.node, true_node))
tc.assertTrue(glt.utils.tensor_equal_with_device(data.edge_index, true_edge_index))
tc.assertTrue(glt.utils.tensor_equal_with_device(data.edge, true_edge_id))
Expand Down Expand Up @@ -141,7 +141,7 @@ def run_test_as_worker(world_size: int, rank: int,
else:
worker_options = glt.distributed.MpDistSamplingWorkerOptions(
num_workers=sampling_nprocs,
worker_devices=[torch.device('cuda', i % device_num)
worker_devices=[torch.device('cuda', 0)
for i in range(sampling_nprocs)],
worker_concurrency=2,
master_addr='localhost',
Expand All @@ -159,7 +159,7 @@ def run_test_as_worker(world_size: int, rank: int,
drop_last=False,
with_edge=True,
collect_features=True,
to_device=torch.device('cuda', rank % device_num),
to_device=torch.device('cuda', 0),
worker_options=worker_options
)

Expand Down Expand Up @@ -214,7 +214,7 @@ def run_test_as_client(num_servers: int, num_clients: int, client_rank: int,
options = glt.distributed.RemoteDistSamplingWorkerOptions(
server_rank=target_server_rank,
num_workers=sampling_nprocs,
worker_devices=[torch.device('cuda', i % device_num)
worker_devices=[torch.device('cuda', 0)
for i in range(sampling_nprocs)],
worker_concurrency=2,
master_addr='localhost',
Expand All @@ -232,7 +232,7 @@ def run_test_as_client(num_servers: int, num_clients: int, client_rank: int,
drop_last=False,
with_edge=True,
collect_features=True,
to_device=torch.device('cuda', client_rank % device_num),
to_device=torch.device('cuda', 0),
worker_options=options
)

Expand Down
Loading