diff --git a/cornac/datasets/gowalla.py b/cornac/datasets/gowalla.py index c623fe194..9a78789dc 100644 --- a/cornac/datasets/gowalla.py +++ b/cornac/datasets/gowalla.py @@ -13,9 +13,7 @@ # limitations under the License. # ============================================================================ """ -This data is built based on the Ta Feng Grocery Dataset that contains -a Chinese grocery store transaction data from November 2000 to February 2001. -Accessed at https://www.kaggle.com/datasets/chiranjivdas09/ta-feng-grocery-dataset +Source: https://snap.stanford.edu/data/loc-gowalla.html """ from ..utils import cache @@ -38,10 +36,9 @@ def load_checkins(fmt="USITJson", reader: Reader = None) -> List: Location information is stored in `json` format """ fpath = cache( - url="https://static.preferred.ai/datasets/gowalla/check-ins.zip", + url="https://static.preferred.ai/cornac/datasets/gowalla/check-ins.zip", unzip=True, relative_path="gowalla/check-ins.txt", ) reader = Reader() if reader is None else reader return reader.read(fpath, fmt=fmt, sep="\t") - diff --git a/cornac/datasets/tafeng.py b/cornac/datasets/tafeng.py index ac3c5faf6..9e99ec714 100644 --- a/cornac/datasets/tafeng.py +++ b/cornac/datasets/tafeng.py @@ -13,7 +13,7 @@ # limitations under the License. # ============================================================================ """ -This data is built based on the Ta Feng Grocery Dataset that contains +This data is built based on the Ta Feng Grocery Dataset that contains a Chinese grocery store transaction data from November 2000 to February 2001. Accessed at https://www.kaggle.com/datasets/chiranjivdas09/ta-feng-grocery-dataset """ @@ -37,7 +37,7 @@ def load_basket(fmt="UBITJson", reader: Reader = None) -> List: Data in the form of a list of tuples (user, basket, item, timestamp, json). """ fpath = cache( - url="https://static.preferred.ai/hieudo/basket.zip", + url="https://static.preferred.ai/cornac/datasets/tafeng/basket.zip", unzip=True, relative_path="tafeng/basket.txt", ) diff --git a/cornac/datasets/yoochoose.py b/cornac/datasets/yoochoose.py index 726feff1d..a22384799 100644 --- a/cornac/datasets/yoochoose.py +++ b/cornac/datasets/yoochoose.py @@ -13,9 +13,7 @@ # limitations under the License. # ============================================================================ """ -This data is built based on the Ta Feng Grocery Dataset that contains -a Chinese grocery store transaction data from November 2000 to February 2001. -Accessed at https://www.kaggle.com/datasets/chiranjivdas09/ta-feng-grocery-dataset +Yoochoose Dataset is originally from the RecSys Challenge 2015. """ from typing import List @@ -39,7 +37,7 @@ def load_buy(fmt="SITJson", reader: Reader = None) -> List: Location information is stored in `json` format """ fpath = cache( - url="https://static.preferred.ai/datasets/yoochoose/buy.zip", + url="https://static.preferred.ai/cornac/datasets/yoochoose/buy.zip", unzip=True, relative_path="yoochoose/buy.txt", ) @@ -62,7 +60,7 @@ def load_click(fmt="SITJson", reader: Reader = None) -> List: Location information is stored in `json` format """ fpath = cache( - url="https://static.preferred.ai/datasets/yoochoose/click.zip", + url="https://static.preferred.ai/cornac/datasets/yoochoose/click.zip", unzip=True, relative_path="yoochoose/click.txt", ) @@ -85,7 +83,7 @@ def load_test(fmt="SITJson", reader: Reader = None) -> List: Location information is stored in `json` format """ fpath = cache( - url="https://static.preferred.ai/datasets/yoochoose/test.zip", + url="https://static.preferred.ai/cornac/datasets/yoochoose/test.zip", unzip=True, relative_path="yoochoose/test.txt", ) diff --git a/tests/cornac/datasets/test_gowalla.py b/tests/cornac/datasets/test_gowalla.py new file mode 100644 index 000000000..fb187aec7 --- /dev/null +++ b/tests/cornac/datasets/test_gowalla.py @@ -0,0 +1,33 @@ +# Copyright 2023 The Cornac Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import random +import time +import unittest + +from cornac.datasets import gowalla + + +class TestGowalla(unittest.TestCase): + + def test_load_checkins(self): + random.seed(time.time()) + if random.random() > 0.8: + checkins = gowalla.load_checkins() + self.assertEqual(len(checkins), 6442892) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/cornac/datasets/test_tafeng.py b/tests/cornac/datasets/test_tafeng.py new file mode 100644 index 000000000..7b7e9827a --- /dev/null +++ b/tests/cornac/datasets/test_tafeng.py @@ -0,0 +1,33 @@ +# Copyright 2023 The Cornac Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import random +import time +import unittest + +from cornac.datasets import tafeng + + +class TestTafeng(unittest.TestCase): + + def test_load_basket(self): + random.seed(time.time()) + if random.random() > 0.8: + baskets = tafeng.load_basket() + self.assertEqual(len(baskets), 817741) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/cornac/datasets/test_yoochoose.py b/tests/cornac/datasets/test_yoochoose.py new file mode 100644 index 000000000..d7d67738a --- /dev/null +++ b/tests/cornac/datasets/test_yoochoose.py @@ -0,0 +1,38 @@ +# Copyright 2023 The Cornac Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import random +import time +import unittest + +from cornac.datasets import yoochoose + + +class TestYooChoose(unittest.TestCase): + + def test_load_buy_click_test(self): + random.seed(time.time()) + if random.random() > 0.8: + buy = yoochoose.load_buy() + click = yoochoose.load_click() + test = yoochoose.load_test() + + self.assertEqual(len(buy), 1150753) + self.assertEqual(len(click), 33003944) + self.assertEqual(len(test), 8251791) + + +if __name__ == "__main__": + unittest.main()