test_metadata_detection.py 2.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. #!/usr/bin/env python3
  2. """
  3. Test script to reproduce the metadata detection issue with GCC.
  4. """
  5. import os
  6. import sys
  7. import tempfile
  8. import shutil
  9. import urllib.request
  10. import tarfile
  11. from pathlib import Path
  12. # Add src to path
  13. sys.path.insert(0, 'src')
  14. from autusm.metadata import MetadataExtractor
  15. from autusm.models import PackageInfo
  16. def test_gcc_metadata_extraction():
  17. """Test metadata extraction with GCC source code."""
  18. # GCC URL
  19. gcc_url = "https://mirrors.middlendian.com/gnu//gcc/gcc-15.1.0/gcc-15.1.0.tar.xz"
  20. # Create a temporary directory for extraction
  21. with tempfile.TemporaryDirectory() as temp_dir:
  22. temp_path = Path(temp_dir)
  23. archive_path = temp_path / "gcc.tar.xz"
  24. extract_path = temp_path / "extracted"
  25. print(f"Downloading GCC from {gcc_url}...")
  26. try:
  27. # Download the archive
  28. urllib.request.urlretrieve(gcc_url, archive_path)
  29. print("Download completed.")
  30. # Extract the archive
  31. print("Extracting archive...")
  32. extract_path.mkdir(exist_ok=True)
  33. with tarfile.open(archive_path, "r:xz") as tar:
  34. tar.extractall(extract_path)
  35. print("Extraction completed.")
  36. # Find the extracted directory
  37. extracted_dirs = [d for d in extract_path.iterdir() if d.is_dir()]
  38. if not extracted_dirs:
  39. print("Error: No extracted directory found")
  40. return False
  41. source_dir = extracted_dirs[0]
  42. print(f"Source directory: {source_dir}")
  43. # Test metadata extraction
  44. print("\nTesting metadata extraction...")
  45. extractor = MetadataExtractor()
  46. package_info = extractor.extract(source_dir, gcc_url)
  47. # Print the results
  48. print(f"\nExtracted package name: {package_info.name}")
  49. print(f"Version: {package_info.version}")
  50. print(f"Summary: {package_info.summary}")
  51. print(f"URL: {package_info.url}")
  52. print(f"Metadata files found: {package_info.metadata_files}")
  53. # Check if it correctly identified GCC
  54. if package_info.name and "gcc" in package_info.name.lower():
  55. print("\n✅ SUCCESS: Correctly identified GCC as the main package")
  56. return True
  57. else:
  58. print(f"\n❌ FAILURE: Identified '{package_info.name}' instead of GCC")
  59. return False
  60. except Exception as e:
  61. print(f"Error during test: {e}")
  62. import traceback
  63. traceback.print_exc()
  64. return False
  65. if __name__ == "__main__":
  66. success = test_gcc_metadata_extraction()
  67. sys.exit(0 if success else 1)