clanker 1 miesiąc temu
rodzic
commit
d1a3f326e9
3 zmienionych plików z 167 dodań i 7 usunięć
  1. 9 7
      src/autusm/metadata.py
  2. 74 0
      test_license_comprehensive.py
  3. 84 0
      test_license_detection.py

+ 9 - 7
src/autusm/metadata.py

@@ -711,13 +711,7 @@ class MetadataExtractor:
             return "MIT"
         elif "apache license" in content_lower or "apache-2.0" in content_lower:
             return "Apache-2.0"
-        elif "gnu general public license" in content_lower or "gpl" in content_lower:
-            if "version 3" in content_lower:
-                return "GPL-3.0"
-            elif "version 2" in content_lower:
-                return "GPL-2.0"
-            else:
-                return "GPL"
+        # Check for LGPL first since it's more specific and contains GPL text
         elif "gnu lesser general public license" in content_lower or "lgpl" in content_lower:
             if "version 3" in content_lower:
                 return "LGPL-3.0"
@@ -725,6 +719,14 @@ class MetadataExtractor:
                 return "LGPL-2.1"
             else:
                 return "LGPL"
+        # Check for GPL after LGPL to avoid false positives
+        elif "gnu general public license" in content_lower or "gpl" in content_lower:
+            if "version 3" in content_lower:
+                return "GPL-3.0"
+            elif "version 2" in content_lower:
+                return "GPL-2.0"
+            else:
+                return "GPL"
         elif "bsd license" in content_lower:
             if "3-clause" in content_lower:
                 return "BSD-3-Clause"

+ 74 - 0
test_license_comprehensive.py

@@ -0,0 +1,74 @@
+#!/usr/bin/env python3
+"""
+Comprehensive test for license detection to ensure our fix doesn't break other cases.
+"""
+
+import sys
+import os
+
+# Add src to path
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
+
+from autusm.metadata import MetadataExtractor
+
+def test_license_detection():
+    """Test license detection with various license texts."""
+    extractor = MetadataExtractor()
+    
+    # Test cases: (description, content, expected_license)
+    test_cases = [
+        ("LGPL-2.1", """                  GNU LESSER GENERAL PUBLIC LICENSE
+                       Version 2.1, February 1999
+
+ Copyright (C) 1991, 1999 Free Software Foundation, Inc.
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.""", "LGPL-2.1"),
+        
+        ("GPL-3.0", """                    GNU GENERAL PUBLIC LICENSE
+                       Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc.
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.""", "GPL-3.0"),
+        
+        ("GPL-2.0", """                    GNU GENERAL PUBLIC LICENSE
+                       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.""", "GPL-2.0"),
+        
+        ("MIT", """MIT License
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:""", "MIT"),
+        
+        ("Apache-2.0", """Apache License
+Version 2.0, January 2004
+http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION""", "Apache-2.0"),
+    ]
+    
+    print("Testing license detection...")
+    all_passed = True
+    
+    for description, content, expected in test_cases:
+        result = extractor._identify_license_type(content)
+        status = "✓" if result == expected else "✗"
+        print(f"{status} {description}: Expected {expected}, Got {result}")
+        if result != expected:
+            all_passed = False
+    
+    if all_passed:
+        print("\nAll tests passed!")
+    else:
+        print("\nSome tests failed!")
+        sys.exit(1)
+
+if __name__ == "__main__":
+    test_license_detection()

+ 84 - 0
test_license_detection.py

@@ -0,0 +1,84 @@
+#!/usr/bin/env python3
+"""
+Test script to reproduce the license detection issue with valdo package.
+"""
+
+import os
+import sys
+import tempfile
+import shutil
+import urllib.request
+import tarfile
+from pathlib import Path
+
+# Add src to path
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
+
+from autusm.metadata import MetadataExtractor
+
+def download_and_extract(url, extract_dir):
+    """Download and extract a tar.xz file."""
+    print(f"Downloading {url}...")
+    
+    # Download the file
+    with urllib.request.urlopen(url) as response:
+        temp_file = extract_dir / "temp.tar.xz"
+        with open(temp_file, 'wb') as f:
+            shutil.copyfileobj(response, f)
+    
+    # Extract the file
+    print("Extracting...")
+    with tarfile.open(temp_file, 'r:xz') as tar:
+        tar.extractall(extract_dir)
+    
+    # Find the extracted directory
+    for item in extract_dir.iterdir():
+        if item.is_dir() and item.name != "temp.tar.xz":
+            return item
+    
+    return None
+
+def test_license_detection():
+    """Test license detection with the valdo package."""
+    url = "https://github.com/vala-lang/valdo/releases/download/2022.04.14/valdo-2022.04.14.tar.xz"
+    
+    with tempfile.TemporaryDirectory() as temp_dir:
+        extract_dir = Path(temp_dir)
+        
+        # Download and extract
+        source_dir = download_and_extract(url, extract_dir)
+        if not source_dir:
+            print("Failed to extract package")
+            return
+        
+        print(f"Extracted to: {source_dir}")
+        
+        # Check for license files
+        license_files = []
+        for root, dirs, files in os.walk(source_dir):
+            for file in files:
+                if file.lower().startswith(('license', 'copying')):
+                    license_files.append(Path(root) / file)
+        
+        print(f"Found license files: {license_files}")
+        
+        # Read license file content
+        for license_file in license_files:
+            print(f"\n--- Content of {license_file} ---")
+            with open(license_file, 'r', encoding='utf-8', errors='replace') as f:
+                content = f.read()
+                print(content[:1000] + "..." if len(content) > 1000 else content)
+        
+        # Test metadata extraction
+        print("\n--- Testing Metadata Extraction ---")
+        extractor = MetadataExtractor()
+        package_info = extractor.extract(source_dir, url)
+        
+        print(f"Package name: {package_info.name}")
+        print(f"Package version: {package_info.version}")
+        print(f"Detected licenses:")
+        for license in package_info.licenses:
+            print(f"  - Name: {license.name}, Category: {license.category}")
+
+if __name__ == "__main__":
+    test_license_detection()