test_license_detection.py 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. #!/usr/bin/env python3
  2. """
  3. Test script to reproduce the license detection issue with valdo package.
  4. """
  5. import os
  6. import sys
  7. import tempfile
  8. import shutil
  9. import urllib.request
  10. import tarfile
  11. from pathlib import Path
  12. # Add src to path
  13. sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
  14. from autusm.metadata import MetadataExtractor
  15. def download_and_extract(url, extract_dir):
  16. """Download and extract a tar.xz file."""
  17. print(f"Downloading {url}...")
  18. # Download the file
  19. with urllib.request.urlopen(url) as response:
  20. temp_file = extract_dir / "temp.tar.xz"
  21. with open(temp_file, 'wb') as f:
  22. shutil.copyfileobj(response, f)
  23. # Extract the file
  24. print("Extracting...")
  25. with tarfile.open(temp_file, 'r:xz') as tar:
  26. tar.extractall(extract_dir)
  27. # Find the extracted directory
  28. for item in extract_dir.iterdir():
  29. if item.is_dir() and item.name != "temp.tar.xz":
  30. return item
  31. return None
  32. def test_license_detection():
  33. """Test license detection with the valdo package."""
  34. url = "https://github.com/vala-lang/valdo/releases/download/2022.04.14/valdo-2022.04.14.tar.xz"
  35. with tempfile.TemporaryDirectory() as temp_dir:
  36. extract_dir = Path(temp_dir)
  37. # Download and extract
  38. source_dir = download_and_extract(url, extract_dir)
  39. if not source_dir:
  40. print("Failed to extract package")
  41. return
  42. print(f"Extracted to: {source_dir}")
  43. # Check for license files
  44. license_files = []
  45. for root, dirs, files in os.walk(source_dir):
  46. for file in files:
  47. if file.lower().startswith(('license', 'copying')):
  48. license_files.append(Path(root) / file)
  49. print(f"Found license files: {license_files}")
  50. # Read license file content
  51. for license_file in license_files:
  52. print(f"\n--- Content of {license_file} ---")
  53. with open(license_file, 'r', encoding='utf-8', errors='replace') as f:
  54. content = f.read()
  55. print(content[:1000] + "..." if len(content) > 1000 else content)
  56. # Test metadata extraction
  57. print("\n--- Testing Metadata Extraction ---")
  58. extractor = MetadataExtractor()
  59. package_info = extractor.extract(source_dir, url)
  60. print(f"Package name: {package_info.name}")
  61. print(f"Package version: {package_info.version}")
  62. print(f"Detected licenses:")
  63. for license in package_info.licenses:
  64. print(f" - Name: {license.name}, Category: {license.category}")
  65. if __name__ == "__main__":
  66. test_license_detection()