Extractors in Mapping Suite SDK
Overview
The Mapping Suite SDK provides two primary extractors for handling mapping packages:
-
ArchivePackageExtractor: For working with ZIP archives -
GithubPackageExtractor: For extracting packages from GitHub repositories
Archive Package Extractor
Basic Usage
from pathlib import Path
from mapping_suite_sdk import ArchivePackageExtractor
# Create an extractor instance
extractor = ArchivePackageExtractor()
# Extract to a specific destination
output_path = extractor.extract(
source_path=Path("package.zip"),
destination_path=Path("output_directory")
)
# Extract to a temporary location (automatically cleaned up)
with extractor.extract_temporary(Path("package.zip")) as temp_path:
# Work with extracted files
print(f"Extracted to temporary path: {temp_path}")
# Files will be automatically cleaned up after the block
GitHub Package Extractor
Basic Repository Extraction
from mapping_suite_sdk import GithubPackageExtractor
extractor = GithubPackageExtractor()
# Extract a specific package from a repository
package_path = extractor.extract(
repository_url="https://github.com/org/repo",
destination_path=Path("/local/path"),
package_path=Path("mappings/package_v1"),
branch_or_tag_name="main"
)
Multiple Package Extraction
# Extract multiple packages matching a pattern
with extractor.extract_temporary(
repository_url="https://github.com/org/repo",
packages_path_pattern="mappings/package*",
branch_or_tag_name="v1.0.0"
) as package_paths:
for path in package_paths:
print(f"Found package at: {path}")
# Process each package as needed
Custom Extractor Implementation
You can create custom extractors by implementing the MappingPackageExtractorABC abstract base class:
from pathlib import Path
from contextlib import contextmanager
from typing import Generator, List
from mapping_suite_sdk.adapters.extractor import MappingPackageExtractorABC
class CustomPackageExtractor(MappingPackageExtractorABC):
def extract(
self,
source: Path,
destination: Path,
**kwargs
) -> Path:
# Implement custom extraction logic
destination.mkdir(parents=True, exist_ok=True)
# Your extraction code here
return destination
@contextmanager
def extract_temporary(
self,
source: Path,
**kwargs
) -> Generator[List[Path], None, None]:
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
# Your temporary extraction logic
yield [temp_path]
# Usage example
custom_extractor = CustomPackageExtractor()
with custom_extractor.extract_temporary(Path("source_package")) as paths:
for path in paths:
print(f"Extracted to: {path}")
Best Practices
-
Always use context managers (
withstatement) for temporary extractions -
Handle exceptions gracefully
-
Be mindful of system resources when working with large packages
-
Consider network and storage limitations when extracting from remote sources
Error Handling
from pathlib import Path
from mapping_suite_sdk import ArchivePackageExtractor, GithubPackageExtractor
def safe_extract(extractor, source):
try:
with extractor.extract_temporary(source):
# Process extracted paths
pass
except FileNotFoundError:
print(f"Source not found: {source}")
except ValueError as e:
print(f"Extraction error: {e}")
# Example usage
archive_extractor = ArchivePackageExtractor()
github_extractor = GithubPackageExtractor()
safe_extract(archive_extractor, Path("non_existent.zip"))
safe_extract(
github_extractor,
"https://github.com/non-existent-repo/mapping-packages"
)