Răsfoiți Sursa

Autodetection improvements

clanker 1 lună în urmă
părinte
comite
b83e2e4401

+ 125 - 0
debug_dependency_detection.py

@@ -0,0 +1,125 @@
+#!/usr/bin/env python3
+"""
+Debug script to trace dependency detection for GCC.
+This script will help identify where the incorrect dependencies are coming from.
+"""
+
+import logging
+import json
+from pathlib import Path
+
+# Set up detailed logging
+logging.basicConfig(
+    level=logging.DEBUG,
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+)
+
+# Import autusm components
+from src.autusm.usm_integration import USMIntegration
+from src.autusm.analyzer import SourceAnalyzer
+from src.autusm.metadata import MetadataExtractor
+from src.autusm.manifest import ManifestGenerator
+from src.autusm.models import PackageInfo, BuildSystem, BuildSystemType
+
+def debug_dependency_detection():
+    """Debug the dependency detection process."""
+    
+    print("=== DEBUGGING DEPENDENCY DETECTION FOR GCC ===\n")
+    
+    # Path to the extracted GCC source
+    gcc_source_dir = Path("test_output1")
+    
+    if not gcc_source_dir.exists():
+        print(f"ERROR: GCC source directory not found at {gcc_source_dir}")
+        return
+    
+    print(f"Analyzing GCC source at: {gcc_source_dir}\n")
+    
+    # 1. Check USM autoprovides
+    print("1. CHECKING USM AUTOPROVIDES...")
+    usm_integration = USMIntegration()
+    if usm_integration.is_available():
+        print("USM is available, getting autoprovides...")
+        autoprovides = usm_integration.get_autoprovides(gcc_source_dir)
+        print(f"USM autoprovides returned: {json.dumps(autoprovides, indent=2)}")
+    else:
+        print("USM is not available on this system")
+    
+    print("\n" + "="*60 + "\n")
+    
+    # 2. Check metadata extraction
+    print("2. CHECKING METADATA EXTRACTION...")
+    metadata_extractor = MetadataExtractor()
+    package_info = metadata_extractor.extract(gcc_source_dir)
+    
+    print(f"Extracted package name: {package_info.name}")
+    print(f"Extracted version: {package_info.version}")
+    print(f"Extracted runtime dependencies: {package_info.runtime_dependencies}")
+    print(f"Extracted build dependencies: {package_info.build_dependencies}")
+    
+    print("\n" + "="*60 + "\n")
+    
+    # 3. Check source code analysis
+    print("3. CHECKING SOURCE CODE ANALYSIS...")
+    analyzer = SourceAnalyzer()
+    
+    # Analyze dependencies in source code
+    try:
+        source_dependencies = analyzer.analyze_dependencies(gcc_source_dir)
+        print(f"Source code dependencies: {json.dumps(source_dependencies, indent=2)}")
+    except Exception as e:
+        print(f"Error analyzing source dependencies: {e}")
+    
+    print("\n" + "="*60 + "\n")
+    
+    # 4. Check manifest generation
+    print("4. CHECKING MANIFEST GENERATION...")
+    manifest_generator = ManifestGenerator()
+    
+    # Create a basic build system for GCC
+    build_system = BuildSystem(
+        type=BuildSystemType.AUTOTOOLS,
+        config_files=[],
+        build_files=[],
+        detected_commands=[],
+        custom_args={}
+    )
+    
+    # Generate manifest (add a summary if missing)
+    if not package_info.summary:
+        package_info.summary = "GNU Compiler Collection"
+    manifest = manifest_generator.generate(package_info, build_system)
+    
+    print(f"Generated manifest dependencies:")
+    print(f"  Runtime: {manifest.depends.runtime}")
+    print(f"  Build: {manifest.depends.build}")
+    print(f"  Manage: {manifest.depends.manage}")
+    
+    # Check the _convert_to_resource_refs method specifically
+    print("\n5. TESTING _convert_to_resource_refs METHOD...")
+    
+    # Test with various inputs
+    test_deps = ["libc", "generic_format_parser", "gcc", "make"]
+    for dep in test_deps:
+        converted = manifest_generator._convert_to_resource_refs([dep])
+        print(f"  '{dep}' -> {converted}")
+    
+    print("\n" + "="*60 + "\n")
+    
+    # 6. Check current manifest file
+    print("6. CHECKING CURRENT MANIFEST.USM FILE...")
+    manifest_file = gcc_source_dir / "MANIFEST.usm"
+    if manifest_file.exists():
+        with open(manifest_file, 'r') as f:
+            current_manifest = json.load(f)
+        
+        print("Current manifest dependencies:")
+        if "depends" in current_manifest and "runtime" in current_manifest["depends"]:
+            print(f"  Runtime: {current_manifest['depends']['runtime']}")
+        else:
+            print("  No runtime dependencies found in current manifest")
+    else:
+        print("No MANIFEST.usm file found")
+
+if __name__ == "__main__":
+    debug_dependency_detection()

+ 83 - 6
src/autusm/analyzer.py

@@ -106,11 +106,22 @@ class SourceAnalyzer:
             detected_commands = []
             custom_args = {}
             
+            # Track build systems by depth to prioritize those closer to root
+            build_systems_by_depth = {}  # depth -> {build_type: [files]}
+            
             # Walk through the source directory
             for root, dirs, files in os.walk(source_dir):
                 # Skip hidden directories and common build directories
                 dirs[:] = [d for d in dirs if not d.startswith('.') and d not in ['build', 'target', 'node_modules', '__pycache__']]
                 
+                # Calculate depth from source_dir
+                root_path = Path(root)
+                depth = len(root_path.relative_to(source_dir).parts)
+                
+                # Initialize this depth if not already done
+                if depth not in build_systems_by_depth:
+                    build_systems_by_depth[depth] = {}
+                
                 for file in files:
                     file_path = Path(root) / file
                     relative_path = file_path.relative_to(source_dir)
@@ -119,12 +130,10 @@ class SourceAnalyzer:
                     for build_type, patterns in self.build_system_patterns.items():
                         for pattern in patterns:
                             if re.match(pattern, file, re.IGNORECASE):
-                                if detected_type == BuildSystemType.UNKNOWN:
-                                    detected_type = build_type
-                                elif detected_type != build_type:
-                                    # Multiple build systems detected, prefer more specific ones
-                                    detected_type = self._resolve_build_system_conflict(detected_type, build_type)
-                                
+                                # Track this build system at this depth
+                                if build_type not in build_systems_by_depth[depth]:
+                                    build_systems_by_depth[depth][build_type] = []
+                                build_systems_by_depth[depth][build_type].append(str(relative_path))
                                 config_files.append(str(relative_path))
                                 break
                     
@@ -134,6 +143,9 @@ class SourceAnalyzer:
                     elif file.lower() == "cmakelists.txt":
                         build_files.append(str(relative_path))
             
+            # Determine the best build system based on depth and priority
+            detected_type = self._select_best_build_system(build_systems_by_depth)
+            
             # Get detected commands based on build system
             detected_commands = self._get_build_commands(detected_type, source_dir)
             
@@ -155,6 +167,71 @@ class SourceAnalyzer:
             logger.error(f"Failed to analyze build system: {e}")
             raise AnalysisError(f"Failed to analyze build system: {e}")
 
+    def _select_best_build_system(self, build_systems_by_depth: dict) -> BuildSystemType:
+        """Select the best build system based on depth and priority.
+        
+        Args:
+            build_systems_by_depth: Dictionary mapping depth to {build_type: [files]}
+            
+        Returns:
+            The selected build system type
+        """
+        # If no build systems detected, return UNKNOWN
+        if not build_systems_by_depth:
+            return BuildSystemType.UNKNOWN
+        
+        # Sort depths to check from shallowest to deepest
+        sorted_depths = sorted(build_systems_by_depth.keys())
+        
+        # Check each depth from shallowest to deepest
+        for depth in sorted_depths:
+            build_systems_at_depth = build_systems_by_depth[depth]
+            
+            # If only one build system at this depth, use it
+            if len(build_systems_at_depth) == 1:
+                return list(build_systems_at_depth.keys())[0]
+            
+            # If multiple build systems at the same depth, use priority to resolve
+            if len(build_systems_at_depth) > 1:
+                return self._resolve_build_system_conflict_at_same_depth(
+                    list(build_systems_at_depth.keys())
+                )
+        
+        # Fallback to UNKNOWN
+        return BuildSystemType.UNKNOWN
+    
+    def _resolve_build_system_conflict_at_same_depth(self, build_types: List[BuildSystemType]) -> BuildSystemType:
+        """Resolve conflicts when multiple build systems are detected at the same depth.
+        
+        Args:
+            build_types: List of build system types detected at the same depth
+            
+        Returns:
+            The preferred build system
+        """
+        # Define priority order (higher number = higher priority)
+        priority = {
+            BuildSystemType.AUTOTOOLS: 4,
+            BuildSystemType.CMAKE: 5,
+            BuildSystemType.MESON: 6,
+            BuildSystemType.MAKE: 3,
+            BuildSystemType.PYTHON: 2,
+            BuildSystemType.CARGO: 7,
+            BuildSystemType.NPM: 8,
+            BuildSystemType.UNKNOWN: 1
+        }
+        
+        # Find the build system with the highest priority
+        best_build_type = BuildSystemType.UNKNOWN
+        best_priority = priority[BuildSystemType.UNKNOWN]
+        
+        for build_type in build_types:
+            if priority[build_type] > best_priority:
+                best_priority = priority[build_type]
+                best_build_type = build_type
+        
+        return best_build_type
+    
     def _resolve_build_system_conflict(self, current: BuildSystemType, new: BuildSystemType) -> BuildSystemType:
         """Resolve conflicts when multiple build systems are detected.
         

+ 83 - 15
src/autusm/manifest.py

@@ -116,29 +116,97 @@ class ManifestGenerator:
         """
         resource_refs = []
         
+        # Define categories of dependencies to filter out
+        system_libraries = {
+            # Standard C library
+            'libc', 'libc6', 'glibc', 'musl', 'bsdlibc',
+            # Core system libraries that are always present
+            'libm', 'libpthread', 'libdl', 'librt', 'libutil',
+            # Dynamic linker
+            'ld-linux', 'ld.so', 'ld64.so',
+            # Other fundamental system libraries
+            'libgcc_s', 'libstdc++', 'libxcb', 'libx11', 'libxext', 'libxrender',
+            'libfontconfig', 'libfreetype', 'libexpat', 'libz', 'libbz2',
+            'libpng', 'libjpeg', 'libtiff', 'libgif', 'libwebp',
+            'libssl', 'libcrypto', 'libgssapi_krb5', 'libkrb5', 'libcom_err',
+            'libresolv', 'libnss_dns', 'libnss_files', 'libnss_compat',
+            'libdbus', 'libsystemd', 'libudev', 'libEGL', 'libGL', 'libGLES',
+            'libasound', 'libpulse', 'libgtk', 'libgdk', 'libglib', 'libgobject',
+            'libgio', 'libcairo', 'libpango', 'libatk', 'libgdk_pixbuf',
+            'libsqlite3', 'libxml2', 'libxslt', 'libcurl', 'libnghttp2',
+            'libreadline', 'libncurses', 'libtinfo', 'libhistory',
+            'libffi', 'libgmp', 'libmpfr', 'libmpc', 'libisl'
+        }
+        
+        build_time_dependencies = {
+            # Build tools that shouldn't be runtime dependencies
+            'gcc', 'g++', 'cc', 'c++', 'clang', 'clang++', 'rustc', 'cargo',
+            'make', 'cmake', 'ninja', 'meson', 'autotools', 'autoconf', 'automake',
+            'libtool', 'pkg-config', 'yacc', 'bison', 'flex', 'm4', 'perl',
+            'python', 'python3', 'node', 'nodejs', 'ruby', 'gem', 'java', 'javac',
+            'mvn', 'gradle', 'go', 'rust', 'cargo', 'npm', 'pip', 'pecl',
+            'doxygen', 'sphinx', 'pandoc', 'groff', 'texinfo', 'help2man',
+            # Rust-specific build dependencies (crates)
+            'generic_format_parser', 'serde', 'serde_derive', 'serde_json', 'tokio',
+            'log', 'env_logger', 'clap', 'anyhow', 'thiserror', 'rayon', 'crossbeam',
+            'regex', 'lazy_static', 'once_cell', 'parking_lot', 'rand', 'uuid',
+            'chrono', 'time', 'bytes', 'futures', 'async_trait', 'tokio',
+            'hyper', 'reqwest', 'serde', 'serde_json', 'toml', 'yaml', 'config',
+            'tracing', 'slog', 'log', 'env_logger', 'fern', 'pretty_env_logger',
+            'criterion', 'proptest', 'quickcheck', 'mockall', 'tempfile'
+        }
+        
         for dep in dependencies:
-            # This is a simplified conversion
-            # In a real implementation, you'd need more sophisticated mapping
-            dep_lower = dep.lower()
+            dep_lower = dep.lower().strip()
             
-            # Common package name to resource reference mappings
-            if dep_lower.startswith("lib"):
-                # Library dependency
+            # Skip empty dependencies
+            if not dep_lower:
+                continue
+                
+            # Filter out system libraries - they're always present and shouldn't be explicit dependencies
+            if dep_lower in system_libraries:
+                logger.debug(f"Filtering out system library dependency: {dep}")
+                continue
+                
+            # Filter out build-time dependencies - they're not needed at runtime
+            if dep_lower in build_time_dependencies:
+                logger.debug(f"Filtering out build-time dependency: {dep}")
+                continue
+                
+            # Filter out Rust crates (they typically end with common patterns)
+            if (dep_lower.endswith('_parser') or dep_lower.endswith('_generator') or
+                dep_lower.endswith('_serializer') or dep_lower.endswith('_deserializer') or
+                dep_lower.endswith('_derive') or dep_lower.endswith('_macro') or
+                dep_lower.startswith('serde_') or dep_lower.startswith('tokio_') or
+                dep_lower.startswith('tracing_') or dep_lower.startswith('async_') or
+                '_' in dep_lower and any(suffix in dep_lower for suffix in ['_io', '_util', '_core', '_base', '_common'])):
+                logger.debug(f"Filtering out Rust crate dependency: {dep}")
+                continue
+            
+            # Only process dependencies that are likely to be actual runtime dependencies
+            # Common package name to resource reference mappings for valid runtime dependencies
+            if dep_lower.startswith("lib") and not dep_lower.startswith("libformat_parser"):
+                # Library dependency (but not our internal ones)
                 lib_name = dep_lower.replace("lib", "")
                 resource_refs.append(f"lib:{dep_lower}")
             elif dep_lower.endswith("-dev") or dep_lower.endswith("-devel"):
-                # Development dependency
-                base_name = dep_lower.replace("-dev", "").replace("-devel", "")
-                resource_refs.append(f"inc:{base_name}")
-            elif dep_lower in ["gcc", "clang", "rustc"]:
-                # Compiler dependency
+                # Development dependency - skip for runtime
+                logger.debug(f"Skipping development dependency for runtime: {dep}")
+                continue
+            elif dep_lower in ["bash", "sh", "zsh", "fish", "dash"]:
+                # Shell dependencies
                 resource_refs.append(f"bin:{dep_lower}")
-            elif dep_lower in ["python", "python3", "node", "nodejs"]:
-                # Runtime dependency
+            elif dep_lower in ["perl", "python", "python3", "ruby", "java", "node", "nodejs"]:
+                # Runtime interpreter dependencies
                 resource_refs.append(f"bin:{dep_lower}")
-            else:
-                # Default to bin for unknown dependencies
+            elif dep_lower in ["gzip", "bzip2", "xz", "zip", "unzip", "tar"]:
+                # Compression utilities
                 resource_refs.append(f"bin:{dep_lower}")
+            else:
+                # For unknown dependencies, be conservative and don't include them
+                # rather than potentially creating invalid dependencies
+                logger.debug(f"Skipping unknown dependency type: {dep}")
+                continue
         
         return resource_refs
 

+ 38 - 16
src/autusm/metadata.py

@@ -105,33 +105,43 @@ class MetadataExtractor:
             # Find and process package files
             package_files = self._find_package_files(source_dir)
             
+            # Track which file types we've already processed to prioritize root-level files
+            processed_file_types = set()
+            
             for file_path in package_files:
                 relative_path = file_path.relative_to(source_dir)
                 package_info.metadata_files.append(str(relative_path))
                 
-                # Extract metadata based on file type
-                if file_path.name == "package.json":
+                # Extract metadata based on file type, but only if we haven't processed this type yet
+                if file_path.name == "package.json" and "package.json" not in processed_file_types:
                     self._extract_from_package_json(file_path, package_info)
-                elif file_path.name == "setup.py":
+                    processed_file_types.add("package.json")
+                elif file_path.name == "setup.py" and "setup.py" not in processed_file_types:
                     self._extract_from_setup_py(file_path, package_info)
-                elif file_path.name == "pyproject.toml":
+                    processed_file_types.add("setup.py")
+                elif file_path.name == "pyproject.toml" and "pyproject.toml" not in processed_file_types:
                     self._extract_from_pyproject_toml(file_path, package_info)
-                elif file_path.name == "Cargo.toml":
+                    processed_file_types.add("pyproject.toml")
+                elif file_path.name == "Cargo.toml" and "Cargo.toml" not in processed_file_types:
                     self._extract_from_cargo_toml(file_path, package_info)
-                elif file_path.name == "composer.json":
+                    processed_file_types.add("Cargo.toml")
+                elif file_path.name == "composer.json" and "composer.json" not in processed_file_types:
                     self._extract_from_composer_json(file_path, package_info)
-                elif file_path.name == "pom.xml":
+                    processed_file_types.add("composer.json")
+                elif file_path.name == "pom.xml" and "pom.xml" not in processed_file_types:
                     self._extract_from_pom_xml(file_path, package_info)
-                elif file_path.name == "build.gradle":
+                    processed_file_types.add("pom.xml")
+                elif file_path.name == "build.gradle" and "build.gradle" not in processed_file_types:
                     self._extract_from_build_gradle(file_path, package_info)
+                    processed_file_types.add("build.gradle")
             
             # Extract additional metadata from common locations
             self._extract_from_readme(source_dir, package_info)
             self._extract_from_license_files(source_dir, package_info)
             self._extract_from_git_info(source_dir, package_info)
             
-            # If we still don't have a name or version, try to derive from URL/filename
-            if url and (not package_info.name or not package_info.version):
+            # Always try to derive from URL/filename first for better accuracy
+            if url:
                 self._extract_from_filename(url, package_info)
             
             # If we still don't have a name, try to derive from directory
@@ -152,7 +162,7 @@ class MetadataExtractor:
             source_dir: Path to the source directory
             
         Returns:
-            List of package configuration file paths
+            List of package configuration file paths, sorted by proximity to root
         """
         package_files = []
         
@@ -167,10 +177,16 @@ class MetadataExtractor:
                 for package_type, patterns in self.package_patterns.items():
                     for pattern in patterns:
                         if self._match_pattern(file, pattern):
-                            package_files.append(file_path)
+                            # Calculate depth relative to source_dir
+                            depth = len(file_path.relative_to(source_dir).parts) - 1
+                            package_files.append((file_path, depth))
                             break
         
-        return package_files
+        # Sort by depth (0 = root directory, 1 = one level deep, etc.)
+        package_files.sort(key=lambda x: x[1])
+        
+        # Return just the paths, now sorted by proximity to root
+        return [file_path for file_path, _ in package_files]
 
     def _match_pattern(self, filename: str, pattern: str) -> bool:
         """Check if a filename matches a pattern.
@@ -794,14 +810,20 @@ class MetadataExtractor:
             # Parse common patterns
             name, version, confidence = self._parse_filename_pattern(base_name)
             
-            # Only use if we haven't already found name/version
-            if name and not package_info.name:
+            # Always use filename extraction if it has reasonable confidence
+            if name and confidence >= 0.6:
                 package_info.name = name
                 logger.debug(f"Extracted name from filename: {name} (confidence: {confidence})")
+            elif name and not package_info.name:
+                package_info.name = name
+                logger.debug(f"Extracted name from filename (fallback): {name} (confidence: {confidence})")
                 
-            if version and not package_info.version:
+            if version and confidence >= 0.6:
                 package_info.version = version
                 logger.debug(f"Extracted version from filename: {version} (confidence: {confidence})")
+            elif version and not package_info.version:
+                package_info.version = version
+                logger.debug(f"Extracted version from filename (fallback): {version} (confidence: {confidence})")
                 
             # Always store confidence level for potential user confirmation
             package_info.extra_data["filename_confidence"] = confidence

+ 1 - 5
src/autusm/usm_integration.py

@@ -60,8 +60,7 @@ class USMIntegration:
                 cmd,
                 cwd=source_dir,
                 capture_output=True,
-                text=True,
-                timeout=300  # 5 minutes timeout
+                text=True
             )
             
             # Log command result separately to ensure no contamination
@@ -91,9 +90,6 @@ class USMIntegration:
             logger.info(f"Got {len(autoprovides)} autoprovides from USM")
             return autoprovides
             
-        except subprocess.TimeoutExpired:
-            logger.error("USM autoprovides timed out")
-            raise USMIntegrationError("USM autoprovides timed out")
         except subprocess.SubprocessError as e:
             logger.error(f"USM autoprovides failed: {e}")
             raise USMIntegrationError(f"USM autoprovides failed: {e}")

+ 142 - 0
test_comprehensive_build_system_detection.py

@@ -0,0 +1,142 @@
+#!/usr/bin/env python3
+"""
+Comprehensive test script for build system detection with depth-based prioritization.
+"""
+
+import os
+import tempfile
+import shutil
+from pathlib import Path
+from src.autusm.analyzer import SourceAnalyzer
+
+def test_case_1_autotools_at_root():
+    """Test case: Autotools at root, cargo in subdirectory (like GCC)."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        base_dir = Path(temp_dir)
+        
+        # Root level autotools
+        (base_dir / "configure.ac").touch()
+        (base_dir / "Makefile.am").touch()
+        
+        # Subdirectory with cargo
+        libformat_dir = base_dir / "libformat_parser"
+        libformat_dir.mkdir()
+        (libformat_dir / "Cargo.toml").touch()
+        
+        analyzer = SourceAnalyzer()
+        build_system = analyzer.detect_build_system(base_dir)
+        
+        assert build_system.type.value == "autotools", f"Expected autotools, got {build_system.type.value}"
+        print("✓ Test 1 passed: Autotools at root prioritized over cargo in subdirectory")
+        return True
+
+def test_case_2_multiple_build_systems_at_root():
+    """Test case: Multiple build systems at root level (should use priority)."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        base_dir = Path(temp_dir)
+        
+        # Multiple build systems at root
+        (base_dir / "configure.ac").touch()  # autotools
+        (base_dir / "CMakeLists.txt").touch()  # cmake
+        (base_dir / "meson.build").touch()  # meson
+        
+        analyzer = SourceAnalyzer()
+        build_system = analyzer.detect_build_system(base_dir)
+        
+        # CMAKE has priority 5, AUTOTOOLS has 4, MESON has 6
+        # MESON should be selected due to highest priority at same depth
+        assert build_system.type.value == "meson", f"Expected meson, got {build_system.type.value}"
+        print("✓ Test 2 passed: Meson selected due to highest priority at same depth")
+        return True
+
+def test_case_3_only_subdirectory_build_systems():
+    """Test case: Build systems only in subdirectories (should use priority)."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        base_dir = Path(temp_dir)
+        
+        # Create subdirectories with different build systems
+        cargo_dir = base_dir / "rust_component"
+        cargo_dir.mkdir()
+        (cargo_dir / "Cargo.toml").touch()
+        
+        npm_dir = base_dir / "js_component"
+        npm_dir.mkdir()
+        (npm_dir / "package.json").touch()
+        
+        analyzer = SourceAnalyzer()
+        build_system = analyzer.detect_build_system(base_dir)
+        
+        # NPM has priority 8, CARGO has 7
+        # NPM should be selected due to higher priority at same depth
+        assert build_system.type.value == "npm", f"Expected npm, got {build_system.type.value}"
+        print("✓ Test 3 passed: NPM selected due to higher priority at same depth")
+        return True
+
+def test_case_4_nested_build_systems():
+    """Test case: Build systems at different depths."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        base_dir = Path(temp_dir)
+        
+        # Shallow build system
+        cmake_dir = base_dir / "cmake_component"
+        cmake_dir.mkdir()
+        (cmake_dir / "CMakeLists.txt").touch()
+        
+        # Deeper build system
+        cargo_dir = base_dir / "deep" / "rust_component"
+        cargo_dir.mkdir(parents=True)
+        (cargo_dir / "Cargo.toml").touch()
+        
+        analyzer = SourceAnalyzer()
+        build_system = analyzer.detect_build_system(base_dir)
+        
+        # CMAKE should be selected due to shallower depth (1 vs 2)
+        assert build_system.type.value == "cmake", f"Expected cmake, got {build_system.type.value}"
+        print("✓ Test 4 passed: Shallower build system prioritized regardless of priority")
+        return True
+
+def test_case_5_no_build_system():
+    """Test case: No build system detected."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        base_dir = Path(temp_dir)
+        
+        # Just some source files, no build system
+        (base_dir / "main.c").touch()
+        (base_dir / "utils.h").touch()
+        
+        analyzer = SourceAnalyzer()
+        build_system = analyzer.detect_build_system(base_dir)
+        
+        assert build_system.type.value == "unknown", f"Expected unknown, got {build_system.type.value}"
+        print("✓ Test 5 passed: Unknown build system when none detected")
+        return True
+
+def run_all_tests():
+    """Run all test cases."""
+    tests = [
+        test_case_1_autotools_at_root,
+        test_case_2_multiple_build_systems_at_root,
+        test_case_3_only_subdirectory_build_systems,
+        test_case_4_nested_build_systems,
+        test_case_5_no_build_system
+    ]
+    
+    passed = 0
+    failed = 0
+    
+    for test in tests:
+        try:
+            if test():
+                passed += 1
+            else:
+                failed += 1
+        except Exception as e:
+            print(f"✗ Test {test.__name__} failed with exception: {e}")
+            failed += 1
+    
+    print(f"\nTest Results: {passed} passed, {failed} failed")
+    return failed == 0
+
+if __name__ == "__main__":
+    success = run_all_tests()
+    exit(0 if success else 1)

+ 54 - 0
test_gcc_build_system_detection.py

@@ -0,0 +1,54 @@
+#!/usr/bin/env python3
+"""
+Test script to reproduce the GCC build system detection issue.
+"""
+
+import os
+import tempfile
+import shutil
+from pathlib import Path
+from src.autusm.analyzer import SourceAnalyzer
+
+def create_mock_gcc_structure(base_dir):
+    """Create a mock GCC directory structure with both autotools and cargo files."""
+    
+    # Create root-level autotools files
+    (base_dir / "configure.ac").touch()
+    (base_dir / "Makefile.am").touch()
+    (base_dir / "autogen.sh").touch()
+    
+    # Create a subdirectory with cargo files (like libformat_parser)
+    libformat_dir = base_dir / "libformat_parser"
+    libformat_dir.mkdir()
+    (libformat_dir / "Cargo.toml").touch()
+    (libformat_dir / "Cargo.lock").touch()
+    
+    # Add some other subdirectories to make it more realistic
+    (base_dir / "gcc").mkdir()
+    (base_dir / "libstdc++-v3").mkdir()
+    (base_dir / "libgcc").mkdir()
+
+def test_build_system_detection():
+    """Test that build system detection prioritizes root-level build systems."""
+    
+    with tempfile.TemporaryDirectory() as temp_dir:
+        base_dir = Path(temp_dir)
+        create_mock_gcc_structure(base_dir)
+        
+        analyzer = SourceAnalyzer()
+        build_system = analyzer.detect_build_system(base_dir)
+        
+        print(f"Detected build system: {build_system.type.value}")
+        print(f"Config files: {build_system.config_files}")
+        
+        # This should detect AUTOTOOLS, not CARGO
+        if build_system.type.value == "autotools":
+            print("✓ PASS: Correctly detected autotools")
+            return True
+        else:
+            print("✗ FAIL: Incorrectly detected cargo instead of autotools")
+            return False
+
+if __name__ == "__main__":
+    success = test_build_system_detection()
+    exit(0 if success else 1)

+ 82 - 0
test_metadata_detection.py

@@ -0,0 +1,82 @@
+#!/usr/bin/env python3
+"""
+Test script to reproduce the metadata detection issue with GCC.
+"""
+
+import os
+import sys
+import tempfile
+import shutil
+import urllib.request
+import tarfile
+from pathlib import Path
+
+# Add src to path
+sys.path.insert(0, 'src')
+
+from autusm.metadata import MetadataExtractor
+from autusm.models import PackageInfo
+
+def test_gcc_metadata_extraction():
+    """Test metadata extraction with GCC source code."""
+    
+    # GCC URL
+    gcc_url = "https://mirrors.middlendian.com/gnu//gcc/gcc-15.1.0/gcc-15.1.0.tar.xz"
+    
+    # Create a temporary directory for extraction
+    with tempfile.TemporaryDirectory() as temp_dir:
+        temp_path = Path(temp_dir)
+        archive_path = temp_path / "gcc.tar.xz"
+        extract_path = temp_path / "extracted"
+        
+        print(f"Downloading GCC from {gcc_url}...")
+        try:
+            # Download the archive
+            urllib.request.urlretrieve(gcc_url, archive_path)
+            print("Download completed.")
+            
+            # Extract the archive
+            print("Extracting archive...")
+            extract_path.mkdir(exist_ok=True)
+            with tarfile.open(archive_path, "r:xz") as tar:
+                tar.extractall(extract_path)
+            print("Extraction completed.")
+            
+            # Find the extracted directory
+            extracted_dirs = [d for d in extract_path.iterdir() if d.is_dir()]
+            if not extracted_dirs:
+                print("Error: No extracted directory found")
+                return False
+                
+            source_dir = extracted_dirs[0]
+            print(f"Source directory: {source_dir}")
+            
+            # Test metadata extraction
+            print("\nTesting metadata extraction...")
+            extractor = MetadataExtractor()
+            package_info = extractor.extract(source_dir, gcc_url)
+            
+            # Print the results
+            print(f"\nExtracted package name: {package_info.name}")
+            print(f"Version: {package_info.version}")
+            print(f"Summary: {package_info.summary}")
+            print(f"URL: {package_info.url}")
+            print(f"Metadata files found: {package_info.metadata_files}")
+            
+            # Check if it correctly identified GCC
+            if package_info.name and "gcc" in package_info.name.lower():
+                print("\n✅ SUCCESS: Correctly identified GCC as the main package")
+                return True
+            else:
+                print(f"\n❌ FAILURE: Identified '{package_info.name}' instead of GCC")
+                return False
+                
+        except Exception as e:
+            print(f"Error during test: {e}")
+            import traceback
+            traceback.print_exc()
+            return False
+
+if __name__ == "__main__":
+    success = test_gcc_metadata_extraction()
+    sys.exit(0 if success else 1)

+ 50 - 0
test_no_timeout.py

@@ -0,0 +1,50 @@
+#!/usr/bin/env python3
+"""
+Test script to verify that USM autoprovides command no longer has a timeout.
+"""
+
+import sys
+import time
+from pathlib import Path
+
+# Add the src directory to the path so we can import autusm
+sys.path.insert(0, str(Path(__file__).parent / "src"))
+
+from autusm.usm_integration import USMIntegration
+
+def test_no_timeout():
+    """Test that autoprovides can run without timeout."""
+    print("Testing USM autoprovides without timeout...")
+    
+    # Initialize USM integration
+    usm = USMIntegration()
+    
+    # Check if USM is available
+    if not usm.is_available():
+        print("USM is not available on this system. Skipping test.")
+        return True
+    
+    print("USM is available. Testing autoprovides without timeout...")
+    
+    # Try to get autoprovides for the current directory
+    # This will test if the timeout has been removed
+    try:
+        start_time = time.time()
+        autoprovides = usm.get_autoprovides(Path.cwd())
+        end_time = time.time()
+        
+        elapsed_time = end_time - start_time
+        print(f"Autoprovides completed in {elapsed_time:.2f} seconds")
+        print(f"Found {len(autoprovides)} autoprovides")
+        
+        # If we get here without a timeout error, the test passes
+        print("SUCCESS: No timeout occurred!")
+        return True
+        
+    except Exception as e:
+        print(f"ERROR: {e}")
+        return False
+
+if __name__ == "__main__":
+    success = test_no_timeout()
+    sys.exit(0 if success else 1)