#!/usr/bin/env python3 """ Test script to reproduce the metadata detection issue with GCC. """ import os import sys import tempfile import shutil import urllib.request import tarfile from pathlib import Path # Add src to path sys.path.insert(0, 'src') from autusm.metadata import MetadataExtractor from autusm.models import PackageInfo def test_gcc_metadata_extraction(): """Test metadata extraction with GCC source code.""" # GCC URL gcc_url = "https://mirrors.middlendian.com/gnu//gcc/gcc-15.1.0/gcc-15.1.0.tar.xz" # Create a temporary directory for extraction with tempfile.TemporaryDirectory() as temp_dir: temp_path = Path(temp_dir) archive_path = temp_path / "gcc.tar.xz" extract_path = temp_path / "extracted" print(f"Downloading GCC from {gcc_url}...") try: # Download the archive urllib.request.urlretrieve(gcc_url, archive_path) print("Download completed.") # Extract the archive print("Extracting archive...") extract_path.mkdir(exist_ok=True) with tarfile.open(archive_path, "r:xz") as tar: tar.extractall(extract_path) print("Extraction completed.") # Find the extracted directory extracted_dirs = [d for d in extract_path.iterdir() if d.is_dir()] if not extracted_dirs: print("Error: No extracted directory found") return False source_dir = extracted_dirs[0] print(f"Source directory: {source_dir}") # Test metadata extraction print("\nTesting metadata extraction...") extractor = MetadataExtractor() package_info = extractor.extract(source_dir, gcc_url) # Print the results print(f"\nExtracted package name: {package_info.name}") print(f"Version: {package_info.version}") print(f"Summary: {package_info.summary}") print(f"URL: {package_info.url}") print(f"Metadata files found: {package_info.metadata_files}") # Check if it correctly identified GCC if package_info.name and "gcc" in package_info.name.lower(): print("\n✅ SUCCESS: Correctly identified GCC as the main package") return True else: print(f"\n❌ FAILURE: Identified '{package_info.name}' instead of GCC") return False except Exception as e: print(f"Error during test: {e}") import traceback traceback.print_exc() return False if __name__ == "__main__": success = test_gcc_metadata_extraction() sys.exit(0 if success else 1)