clanker
/
autusm


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
							#!/usr/bin/env python3
"""
Test script to reproduce the license detection issue with valdo package.
"""

import os
import sys
import tempfile
import shutil
import urllib.request
import tarfile
from pathlib import Path

# Add src to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))

from autusm.metadata import MetadataExtractor

def download_and_extract(url, extract_dir):
    """Download and extract a tar.xz file."""
    print(f"Downloading {url}...")
    
    # Download the file
    with urllib.request.urlopen(url) as response:
        temp_file = extract_dir / "temp.tar.xz"
        with open(temp_file, 'wb') as f:
            shutil.copyfileobj(response, f)
    
    # Extract the file
    print("Extracting...")
    with tarfile.open(temp_file, 'r:xz') as tar:
        tar.extractall(extract_dir)
    
    # Find the extracted directory
    for item in extract_dir.iterdir():
        if item.is_dir() and item.name != "temp.tar.xz":
            return item
    
    return None

def test_license_detection():
    """Test license detection with the valdo package."""
    url = "https://github.com/vala-lang/valdo/releases/download/2022.04.14/valdo-2022.04.14.tar.xz"
    
    with tempfile.TemporaryDirectory() as temp_dir:
        extract_dir = Path(temp_dir)
        
        # Download and extract
        source_dir = download_and_extract(url, extract_dir)
        if not source_dir:
            print("Failed to extract package")
            return
        
        print(f"Extracted to: {source_dir}")
        
        # Check for license files
        license_files = []
        for root, dirs, files in os.walk(source_dir):
            for file in files:
                if file.lower().startswith(('license', 'copying')):
                    license_files.append(Path(root) / file)
        
        print(f"Found license files: {license_files}")
        
        # Read license file content
        for license_file in license_files:
            print(f"\n--- Content of {license_file} ---")
            with open(license_file, 'r', encoding='utf-8', errors='replace') as f:
                content = f.read()
                print(content[:1000] + "..." if len(content) > 1000 else content)
        
        # Test metadata extraction
        print("\n--- Testing Metadata Extraction ---")
        extractor = MetadataExtractor()
        package_info = extractor.extract(source_dir, url)
        
        print(f"Package name: {package_info.name}")
        print(f"Package version: {package_info.version}")
        print(f"Detected licenses:")
        for license in package_info.licenses:
            print(f"  - Name: {license.name}, Category: {license.category}")

if __name__ == "__main__":
    test_license_detection()