-
-
Notifications
You must be signed in to change notification settings - Fork 68
Add Meson WrapDB mining pipeline #803 #823
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,68 @@ | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
| # | ||
| # http://nexb.com and https://github.com/aboutcode-org/scancode.io | ||
| # The ScanCode.io software is licensed under the Apache License version 2.0. | ||
| # Data generated with ScanCode.io is provided as-is without warranties. | ||
| # ScanCode is a trademark of nexB Inc. | ||
| # | ||
| # You may not use this software except in compliance with the License. | ||
| # You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 | ||
| # Unless required by applicable law or agreed to in writing, software distributed | ||
| # under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | ||
| # CONDITIONS OF ANY KIND, either express or implied. See the License for the | ||
| # specific language governing permissions and limitations under the License. | ||
| # | ||
| # Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES | ||
| # OR CONDITIONS OF ANY KIND, either express or implied. No content created from | ||
| # ScanCode.io should be considered or used as legal advice. Consult an Attorney | ||
| # for any legal advice. | ||
| # | ||
| # ScanCode.io is a free software code scanning tool from nexB Inc. and others. | ||
| # Visit https://github.com/aboutcode-org/scancode.io for support and download. | ||
|
|
||
| import json | ||
| from pathlib import Path | ||
|
|
||
| from minecode_pipelines.pipes import meson | ||
| from minecode_pipelines.pipelines import MineCodeBasePipeline | ||
| from scanpipe.pipes import federatedcode | ||
|
|
||
|
|
||
| class MineMeson(MineCodeBasePipeline): | ||
| """Pipeline to mine Meson WrapDB packages and publish them to FederatedCode repo.""" | ||
|
|
||
| MESON_WRAPDB_REPO = "https://github.com/mesonbuild/wrapdb" | ||
|
|
||
| @classmethod | ||
| def steps(cls): | ||
| return ( | ||
| cls.check_federatedcode_eligibility, | ||
| cls.create_federatedcode_working_dir, | ||
| cls.clone_wrapdb_index, | ||
| cls.fetch_federation_config, | ||
| cls.mine_and_publish_packageurls, | ||
| cls.delete_working_dir, | ||
| ) | ||
|
|
||
| def clone_wrapdb_index(self): | ||
| """Clone the Meson WrapDB repository.""" | ||
| self.wrapdb_repo = federatedcode.clone_repository( | ||
| repo_url=self.MESON_WRAPDB_REPO, | ||
| clone_path=self.working_path / "wrapdb", | ||
| logger=self.log, | ||
| ) | ||
|
|
||
| def packages_count(self): | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is this for? |
||
| releases_path = Path(self.wrapdb_repo.working_dir) / "releases.json" | ||
| if not releases_path.exists(): | ||
| return 0 | ||
| with open(releases_path, encoding="utf-8") as f: | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are you really loading the whole file just to get a count?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. HTTP fetch — Removed repo cloning entirely. Now uses requests.get() to fetch just releases.json |
||
| releases = json.load(f) | ||
| return len(releases) | ||
|
|
||
| def mine_packageurls(self): | ||
| """Yield PackageURLs from Meson WrapDB releases.json.""" | ||
| return meson.mine_meson_packageurls( | ||
| wrapdb_repo=self.wrapdb_repo, | ||
| logger=self.log, | ||
| ) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,81 @@ | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
| # | ||
| # http://nexb.com and https://github.com/aboutcode-org/scancode.io | ||
| # The ScanCode.io software is licensed under the Apache License version 2.0. | ||
| # Data generated with ScanCode.io is provided as-is without warranties. | ||
| # ScanCode is a trademark of nexB Inc. | ||
| # | ||
| # You may not use this software except in compliance with the License. | ||
| # You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 | ||
| # Unless required by applicable law or agreed to in writing, software distributed | ||
| # under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | ||
| # CONDITIONS OF ANY KIND, either express or implied. See the License for the | ||
| # specific language governing permissions and limitations under the License. | ||
| # | ||
| # Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES | ||
| # OR CONDITIONS OF ANY KIND, either express or implied. No content created from | ||
| # ScanCode.io should be considered or used as legal advice. Consult an Attorney | ||
| # for any legal advice. | ||
| # | ||
| # ScanCode.io is a free software code scanning tool from nexB Inc. and others. | ||
| # Visit https://github.com/aboutcode-org/scancode.io for support and download. | ||
|
|
||
| import json | ||
| from pathlib import Path | ||
|
|
||
| from packageurl import PackageURL | ||
|
|
||
|
|
||
| MESON_WRAPDB_RELEASES_URL = ( | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Where is this used? Why not use that rather than a repo clone? |
||
| "https://raw.githubusercontent.com/mesonbuild/wrapdb/master/releases.json" | ||
| ) | ||
|
|
||
|
|
||
| def get_meson_packages(package_name, package_data): | ||
| """ | ||
| Return a tuple of (base_purl, [versioned_purl_strings]) for a single | ||
| Meson WrapDB package entry from ``releases.json``. | ||
|
|
||
| The ``package_data`` dict has the structure:: | ||
|
|
||
| { | ||
| "dependency_names": ["dep1", "dep2"], | ||
| "versions": ["1.0.0-1", "1.0.0-2", ...] | ||
| } | ||
|
|
||
| WrapDB versions use a ``-N`` suffix to denote build recipe revisions that | ||
| are specific to the WrapDB and do not exist upstream. | ||
| """ | ||
| base_purl = PackageURL(type="meson", name=package_name) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this a registered PURL in the spec repo? If not we need one there first
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removed from pipes/meson.py mine_meson.py |
||
| versions = package_data.get("versions") or [] | ||
| versioned_purls = [ | ||
| PackageURL( | ||
| type="meson", | ||
| name=package_name, | ||
| version=str(version), | ||
| ).to_string() | ||
| for version in versions | ||
| ] | ||
| return base_purl, versioned_purls | ||
|
|
||
|
|
||
| def mine_meson_packageurls(wrapdb_repo, logger): | ||
| """ | ||
| Yield ``(base_purl, [versioned_purl_strings])`` tuples from a cloned | ||
| Meson WrapDB repository by parsing its ``releases.json``. | ||
| """ | ||
| releases_path = Path(wrapdb_repo.working_dir) / "releases.json" | ||
| if not releases_path.exists(): | ||
| logger(f"releases.json not found at {releases_path}") | ||
| return | ||
|
|
||
| with open(releases_path, encoding="utf-8") as f: | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You are already opening that file before. |
||
| releases = json.load(f) | ||
|
|
||
| for package_name, package_data in releases.items(): | ||
| if not package_data: | ||
| continue | ||
| yield get_meson_packages( | ||
| package_name=package_name, | ||
| package_data=package_data, | ||
| ) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,88 @@ | ||
| # | ||
| # Copyright (c) nexB Inc. and others. All rights reserved. | ||
| # purldb is a trademark of nexB Inc. | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
| # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. | ||
| # See https://github.com/aboutcode-org/purldb for support or download. | ||
| # See https://aboutcode.org for more information about nexB OSS projects. | ||
| # | ||
|
|
||
| import json | ||
| from pathlib import Path | ||
|
|
||
| from django.test import TestCase | ||
|
|
||
| from minecode_pipelines.pipes.meson import get_meson_packages | ||
|
|
||
| DATA_DIR = Path(__file__).parent.parent / "test_data" / "meson" | ||
|
|
||
|
|
||
| class MesonPipeTests(TestCase): | ||
| def test_get_meson_packages_basic(self): | ||
| """Test that get_meson_packages correctly parses a single package entry.""" | ||
| package_data = { | ||
| "dependency_names": ["ogg"], | ||
| "versions": ["1.3.6-1", "1.3.5-3", "1.3.5-2", "1.3.5-1"], | ||
| } | ||
| base_purl, versioned_purls = get_meson_packages("ogg", package_data) | ||
|
|
||
| self.assertEqual(str(base_purl), "pkg:meson/ogg") | ||
| self.assertEqual(len(versioned_purls), 4) | ||
| self.assertIn("pkg:meson/ogg@1.3.6-1", versioned_purls) | ||
| self.assertIn("pkg:meson/ogg@1.3.5-1", versioned_purls) | ||
|
|
||
| def test_get_meson_packages_empty_versions(self): | ||
| """Test that get_meson_packages handles empty version lists.""" | ||
| package_data = { | ||
| "dependency_names": ["empty-pkg"], | ||
| "versions": [], | ||
| } | ||
| base_purl, versioned_purls = get_meson_packages("empty-pkg", package_data) | ||
|
|
||
| self.assertEqual(str(base_purl), "pkg:meson/empty-pkg") | ||
| self.assertEqual(versioned_purls, []) | ||
|
|
||
| def test_get_meson_packages_no_versions_key(self): | ||
| """Test that get_meson_packages handles missing versions key.""" | ||
| package_data = { | ||
| "dependency_names": ["no-ver"], | ||
| } | ||
| base_purl, versioned_purls = get_meson_packages("no-ver", package_data) | ||
|
|
||
| self.assertEqual(str(base_purl), "pkg:meson/no-ver") | ||
| self.assertEqual(versioned_purls, []) | ||
|
|
||
| def test_get_meson_packages_from_releases_json(self): | ||
| """Test parsing packages from the test releases.json fixture.""" | ||
| releases_path = DATA_DIR / "releases.json" | ||
| with open(releases_path, encoding="utf-8") as f: | ||
| releases = json.load(f) | ||
|
|
||
| all_results = [] | ||
| for package_name, package_data in releases.items(): | ||
| if not package_data: | ||
| continue | ||
| all_results.append( | ||
| get_meson_packages( | ||
| package_name=package_name, | ||
| package_data=package_data, | ||
| ) | ||
| ) | ||
|
|
||
| self.assertEqual(len(all_results), 3) # ogg, zlib, catch2 | ||
|
|
||
| # Check ogg | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do not check these one by one. Reuse the data-driven with a JSON expected file
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Switched to fully data-driven approach comparing against expected_purls.json |
||
| ogg_base, ogg_purls = all_results[0] | ||
| self.assertEqual(str(ogg_base), "pkg:meson/ogg") | ||
| self.assertEqual(len(ogg_purls), 4) | ||
|
|
||
| # Check zlib | ||
| zlib_base, zlib_purls = all_results[1] | ||
| self.assertEqual(str(zlib_base), "pkg:meson/zlib") | ||
| self.assertEqual(len(zlib_purls), 3) | ||
|
|
||
| # Check catch2 | ||
| catch2_base, catch2_purls = all_results[2] | ||
| self.assertEqual(str(catch2_base), "pkg:meson/catch2") | ||
| self.assertEqual(len(catch2_purls), 2) | ||
| self.assertIn("pkg:meson/catch2@3.5.2-1", catch2_purls) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,33 @@ | ||
| { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please use a real extract from the real JSON, not truncated.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Test data — Replaced with real, complete entries verbatim from WrapDB releases.json |
||
| "ogg": { | ||
| "dependency_names": [ | ||
| "ogg" | ||
| ], | ||
| "versions": [ | ||
| "1.3.6-1", | ||
| "1.3.5-3", | ||
| "1.3.5-2", | ||
| "1.3.5-1" | ||
| ] | ||
| }, | ||
| "zlib": { | ||
| "dependency_names": [ | ||
| "zlib" | ||
| ], | ||
| "versions": [ | ||
| "1.3.1-1", | ||
| "1.2.13-2", | ||
| "1.2.13-1" | ||
| ] | ||
| }, | ||
| "catch2": { | ||
| "dependency_names": [ | ||
| "catch2", | ||
| "catch2-with-main" | ||
| ], | ||
| "versions": [ | ||
| "3.5.2-1", | ||
| "3.4.0-1" | ||
| ] | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why do you clone the whole repo for now, since you are using only a single release file? And why try also a release.json URL elsewhere?