3 luni în urmă · b83e2e4401
--- a/debug_dependency_detection.py
+++ b/debug_dependency_detection.py
@@ -0,0 +1,125 @@
 
				+#!/usr/bin/env python3
			
 
				+"""
			
 
				+Debug script to trace dependency detection for GCC.
			
 
				+This script will help identify where the incorrect dependencies are coming from.
			
 
				+"""
			
 
				+
			
 
				+import logging
			
 
				+import json
			
 
				+from pathlib import Path
			
 
				+
			
 
				+# Set up detailed logging
			
 
				+logging.basicConfig(
			
 
				+    level=logging.DEBUG,
			
 
				+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
			
 
				+)
			
 
				+
			
 
				+# Import autusm components
			
 
				+from src.autusm.usm_integration import USMIntegration
			
 
				+from src.autusm.analyzer import SourceAnalyzer
			
 
				+from src.autusm.metadata import MetadataExtractor
			
 
				+from src.autusm.manifest import ManifestGenerator
			
 
				+from src.autusm.models import PackageInfo, BuildSystem, BuildSystemType
			
 
				+
			
 
				+def debug_dependency_detection():
			
 
				+    """Debug the dependency detection process."""
			
 
				+    
			
 
				+    print("=== DEBUGGING DEPENDENCY DETECTION FOR GCC ===\n")
			
 
				+    
			
 
				+    # Path to the extracted GCC source
			
 
				+    gcc_source_dir = Path("test_output1")
			
 
				+    
			
 
				+    if not gcc_source_dir.exists():
			
 
				+        print(f"ERROR: GCC source directory not found at {gcc_source_dir}")
			
 
				+        return
			
 
				+    
			
 
				+    print(f"Analyzing GCC source at: {gcc_source_dir}\n")
			
 
				+    
			
 
				+    # 1. Check USM autoprovides
			
 
				+    print("1. CHECKING USM AUTOPROVIDES...")
			
 
				+    usm_integration = USMIntegration()
			
 
				+    if usm_integration.is_available():
			
 
				+        print("USM is available, getting autoprovides...")
			
 
				+        autoprovides = usm_integration.get_autoprovides(gcc_source_dir)
			
 
				+        print(f"USM autoprovides returned: {json.dumps(autoprovides, indent=2)}")
			
 
				+    else:
			
 
				+        print("USM is not available on this system")
			
 
				+    
			
 
				+    print("\n" + "="*60 + "\n")
			
 
				+    
			
 
				+    # 2. Check metadata extraction
			
 
				+    print("2. CHECKING METADATA EXTRACTION...")
			
 
				+    metadata_extractor = MetadataExtractor()
			
 
				+    package_info = metadata_extractor.extract(gcc_source_dir)
			
 
				+    
			
 
				+    print(f"Extracted package name: {package_info.name}")
			
 
				+    print(f"Extracted version: {package_info.version}")
			
 
				+    print(f"Extracted runtime dependencies: {package_info.runtime_dependencies}")
			
 
				+    print(f"Extracted build dependencies: {package_info.build_dependencies}")
			
 
				+    
			
 
				+    print("\n" + "="*60 + "\n")
			
 
				+    
			
 
				+    # 3. Check source code analysis
			
 
				+    print("3. CHECKING SOURCE CODE ANALYSIS...")
			
 
				+    analyzer = SourceAnalyzer()
			
 
				+    
			
 
				+    # Analyze dependencies in source code
			
 
				+    try:
			
 
				+        source_dependencies = analyzer.analyze_dependencies(gcc_source_dir)
			
 
				+        print(f"Source code dependencies: {json.dumps(source_dependencies, indent=2)}")
			
 
				+    except Exception as e:
			
 
				+        print(f"Error analyzing source dependencies: {e}")
			
 
				+    
			
 
				+    print("\n" + "="*60 + "\n")
			
 
				+    
			
 
				+    # 4. Check manifest generation
			
 
				+    print("4. CHECKING MANIFEST GENERATION...")
			
 
				+    manifest_generator = ManifestGenerator()
			
 
				+    
			
 
				+    # Create a basic build system for GCC
			
 
				+    build_system = BuildSystem(
			
 
				+        type=BuildSystemType.AUTOTOOLS,
			
 
				+        config_files=[],
			
 
				+        build_files=[],
			
 
				+        detected_commands=[],
			
 
				+        custom_args={}
			
 
				+    )
			
 
				+    
			
 
				+    # Generate manifest (add a summary if missing)
			
 
				+    if not package_info.summary:
			
 
				+        package_info.summary = "GNU Compiler Collection"
			
 
				+    manifest = manifest_generator.generate(package_info, build_system)
			
 
				+    
			
 
				+    print(f"Generated manifest dependencies:")
			
 
				+    print(f"  Runtime: {manifest.depends.runtime}")
			
 
				+    print(f"  Build: {manifest.depends.build}")
			
 
				+    print(f"  Manage: {manifest.depends.manage}")
			
 
				+    
			
 
				+    # Check the _convert_to_resource_refs method specifically
			
 
				+    print("\n5. TESTING _convert_to_resource_refs METHOD...")
			
 
				+    
			
 
				+    # Test with various inputs
			
 
				+    test_deps = ["libc", "generic_format_parser", "gcc", "make"]
			
 
				+    for dep in test_deps:
			
 
				+        converted = manifest_generator._convert_to_resource_refs([dep])
			
 
				+        print(f"  '{dep}' -> {converted}")
			
 
				+    
			
 
				+    print("\n" + "="*60 + "\n")
			
 
				+    
			
 
				+    # 6. Check current manifest file
			
 
				+    print("6. CHECKING CURRENT MANIFEST.USM FILE...")
			
 
				+    manifest_file = gcc_source_dir / "MANIFEST.usm"
			
 
				+    if manifest_file.exists():
			
 
				+        with open(manifest_file, 'r') as f:
			
 
				+            current_manifest = json.load(f)
			
 
				+        
			
 
				+        print("Current manifest dependencies:")
			
 
				+        if "depends" in current_manifest and "runtime" in current_manifest["depends"]:
			
 
				+            print(f"  Runtime: {current_manifest['depends']['runtime']}")
			
 
				+        else:
			
 
				+            print("  No runtime dependencies found in current manifest")
			
 
				+    else:
			
 
				+        print("No MANIFEST.usm file found")
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    debug_dependency_detection()
			
--- a/src/autusm/analyzer.py
+++ b/src/autusm/analyzer.py
@@ -106,11 +106,22 @@ class SourceAnalyzer:
 
				             detected_commands = []
			
 
				             custom_args = {}
			
 
				             
			
 
				+            # Track build systems by depth to prioritize those closer to root
			
 
				+            build_systems_by_depth = {}  # depth -> {build_type: [files]}
			
 
				+            
			
 
				             # Walk through the source directory
			
 
				             for root, dirs, files in os.walk(source_dir):
			
 
				                 # Skip hidden directories and common build directories
			
 
				                 dirs[:] = [d for d in dirs if not d.startswith('.') and d not in ['build', 'target', 'node_modules', '__pycache__']]
			
 
				                 
			
 
				+                # Calculate depth from source_dir
			
 
				+                root_path = Path(root)
			
 
				+                depth = len(root_path.relative_to(source_dir).parts)
			
 
				+                
			
 
				+                # Initialize this depth if not already done
			
 
				+                if depth not in build_systems_by_depth:
			
 
				+                    build_systems_by_depth[depth] = {}
			
 
				+                
			
 
				                 for file in files:
			
 
				                     file_path = Path(root) / file
			
 
				                     relative_path = file_path.relative_to(source_dir)
			
@@ -119,12 +130,10 @@ class SourceAnalyzer:
 
				                     for build_type, patterns in self.build_system_patterns.items():
			
 
				                         for pattern in patterns:
			
 
				                             if re.match(pattern, file, re.IGNORECASE):
			
 
				-                                if detected_type == BuildSystemType.UNKNOWN:
			
 
				-                                    detected_type = build_type
			
 
				-                                elif detected_type != build_type:
			
 
				-                                    # Multiple build systems detected, prefer more specific ones
			
 
				-                                    detected_type = self._resolve_build_system_conflict(detected_type, build_type)
			
 
				-                                
			
 
				+                                # Track this build system at this depth
			
 
				+                                if build_type not in build_systems_by_depth[depth]:
			
 
				+                                    build_systems_by_depth[depth][build_type] = []
			
 
				+                                build_systems_by_depth[depth][build_type].append(str(relative_path))
			
 
				                                 config_files.append(str(relative_path))
			
 
				                                 break
			
 
				                     
			
@@ -134,6 +143,9 @@ class SourceAnalyzer:
 
				                     elif file.lower() == "cmakelists.txt":
			
 
				                         build_files.append(str(relative_path))
			
 
				             
			
 
				+            # Determine the best build system based on depth and priority
			
 
				+            detected_type = self._select_best_build_system(build_systems_by_depth)
			
 
				+            
			
 
				             # Get detected commands based on build system
			
 
				             detected_commands = self._get_build_commands(detected_type, source_dir)
			
 
				             
			
@@ -155,6 +167,71 @@ class SourceAnalyzer:
 
				             logger.error(f"Failed to analyze build system: {e}")
			
 
				             raise AnalysisError(f"Failed to analyze build system: {e}")
			
 
				 
			
 
				+    def _select_best_build_system(self, build_systems_by_depth: dict) -> BuildSystemType:
			
 
				+        """Select the best build system based on depth and priority.
			
 
				+        
			
 
				+        Args:
			
 
				+            build_systems_by_depth: Dictionary mapping depth to {build_type: [files]}
			
 
				+            
			
 
				+        Returns:
			
 
				+            The selected build system type
			
 
				+        """
			
 
				+        # If no build systems detected, return UNKNOWN
			
 
				+        if not build_systems_by_depth:
			
 
				+            return BuildSystemType.UNKNOWN
			
 
				+        
			
 
				+        # Sort depths to check from shallowest to deepest
			
 
				+        sorted_depths = sorted(build_systems_by_depth.keys())
			
 
				+        
			
 
				+        # Check each depth from shallowest to deepest
			
 
				+        for depth in sorted_depths:
			
 
				+            build_systems_at_depth = build_systems_by_depth[depth]
			
 
				+            
			
 
				+            # If only one build system at this depth, use it
			
 
				+            if len(build_systems_at_depth) == 1:
			
 
				+                return list(build_systems_at_depth.keys())[0]
			
 
				+            
			
 
				+            # If multiple build systems at the same depth, use priority to resolve
			
 
				+            if len(build_systems_at_depth) > 1:
			
 
				+                return self._resolve_build_system_conflict_at_same_depth(
			
 
				+                    list(build_systems_at_depth.keys())
			
 
				+                )
			
 
				+        
			
 
				+        # Fallback to UNKNOWN
			
 
				+        return BuildSystemType.UNKNOWN
			
 
				+    
			
 
				+    def _resolve_build_system_conflict_at_same_depth(self, build_types: List[BuildSystemType]) -> BuildSystemType:
			
 
				+        """Resolve conflicts when multiple build systems are detected at the same depth.
			
 
				+        
			
 
				+        Args:
			
 
				+            build_types: List of build system types detected at the same depth
			
 
				+            
			
 
				+        Returns:
			
 
				+            The preferred build system
			
 
				+        """
			
 
				+        # Define priority order (higher number = higher priority)
			
 
				+        priority = {
			
 
				+            BuildSystemType.AUTOTOOLS: 4,
			
 
				+            BuildSystemType.CMAKE: 5,
			
 
				+            BuildSystemType.MESON: 6,
			
 
				+            BuildSystemType.MAKE: 3,
			
 
				+            BuildSystemType.PYTHON: 2,
			
 
				+            BuildSystemType.CARGO: 7,
			
 
				+            BuildSystemType.NPM: 8,
			
 
				+            BuildSystemType.UNKNOWN: 1
			
 
				+        }
			
 
				+        
			
 
				+        # Find the build system with the highest priority
			
 
				+        best_build_type = BuildSystemType.UNKNOWN
			
 
				+        best_priority = priority[BuildSystemType.UNKNOWN]
			
 
				+        
			
 
				+        for build_type in build_types:
			
 
				+            if priority[build_type] > best_priority:
			
 
				+                best_priority = priority[build_type]
			
 
				+                best_build_type = build_type
			
 
				+        
			
 
				+        return best_build_type
			
 
				+    
			
 
				     def _resolve_build_system_conflict(self, current: BuildSystemType, new: BuildSystemType) -> BuildSystemType:
			
 
				         """Resolve conflicts when multiple build systems are detected.
			
 
				         
			
--- a/src/autusm/manifest.py
+++ b/src/autusm/manifest.py
@@ -116,29 +116,97 @@ class ManifestGenerator:
 
				         """
			
 
				         resource_refs = []
			
 
				         
			
 
				+        # Define categories of dependencies to filter out
			
 
				+        system_libraries = {
			
 
				+            # Standard C library
			
 
				+            'libc', 'libc6', 'glibc', 'musl', 'bsdlibc',
			
 
				+            # Core system libraries that are always present
			
 
				+            'libm', 'libpthread', 'libdl', 'librt', 'libutil',
			
 
				+            # Dynamic linker
			
 
				+            'ld-linux', 'ld.so', 'ld64.so',
			
 
				+            # Other fundamental system libraries
			
 
				+            'libgcc_s', 'libstdc++', 'libxcb', 'libx11', 'libxext', 'libxrender',
			
 
				+            'libfontconfig', 'libfreetype', 'libexpat', 'libz', 'libbz2',
			
 
				+            'libpng', 'libjpeg', 'libtiff', 'libgif', 'libwebp',
			
 
				+            'libssl', 'libcrypto', 'libgssapi_krb5', 'libkrb5', 'libcom_err',
			
 
				+            'libresolv', 'libnss_dns', 'libnss_files', 'libnss_compat',
			
 
				+            'libdbus', 'libsystemd', 'libudev', 'libEGL', 'libGL', 'libGLES',
			
 
				+            'libasound', 'libpulse', 'libgtk', 'libgdk', 'libglib', 'libgobject',
			
 
				+            'libgio', 'libcairo', 'libpango', 'libatk', 'libgdk_pixbuf',
			
 
				+            'libsqlite3', 'libxml2', 'libxslt', 'libcurl', 'libnghttp2',
			
 
				+            'libreadline', 'libncurses', 'libtinfo', 'libhistory',
			
 
				+            'libffi', 'libgmp', 'libmpfr', 'libmpc', 'libisl'
			
 
				+        }
			
 
				+        
			
 
				+        build_time_dependencies = {
			
 
				+            # Build tools that shouldn't be runtime dependencies
			
 
				+            'gcc', 'g++', 'cc', 'c++', 'clang', 'clang++', 'rustc', 'cargo',
			
 
				+            'make', 'cmake', 'ninja', 'meson', 'autotools', 'autoconf', 'automake',
			
 
				+            'libtool', 'pkg-config', 'yacc', 'bison', 'flex', 'm4', 'perl',
			
 
				+            'python', 'python3', 'node', 'nodejs', 'ruby', 'gem', 'java', 'javac',
			
 
				+            'mvn', 'gradle', 'go', 'rust', 'cargo', 'npm', 'pip', 'pecl',
			
 
				+            'doxygen', 'sphinx', 'pandoc', 'groff', 'texinfo', 'help2man',
			
 
				+            # Rust-specific build dependencies (crates)
			
 
				+            'generic_format_parser', 'serde', 'serde_derive', 'serde_json', 'tokio',
			
 
				+            'log', 'env_logger', 'clap', 'anyhow', 'thiserror', 'rayon', 'crossbeam',
			
 
				+            'regex', 'lazy_static', 'once_cell', 'parking_lot', 'rand', 'uuid',
			
 
				+            'chrono', 'time', 'bytes', 'futures', 'async_trait', 'tokio',
			
 
				+            'hyper', 'reqwest', 'serde', 'serde_json', 'toml', 'yaml', 'config',
			
 
				+            'tracing', 'slog', 'log', 'env_logger', 'fern', 'pretty_env_logger',
			
 
				+            'criterion', 'proptest', 'quickcheck', 'mockall', 'tempfile'
			
 
				+        }
			
 
				+        
			
 
				         for dep in dependencies:
			
 
				-            # This is a simplified conversion
			
 
				-            # In a real implementation, you'd need more sophisticated mapping
			
 
				-            dep_lower = dep.lower()
			
 
				+            dep_lower = dep.lower().strip()
			
 
				             
			
 
				-            # Common package name to resource reference mappings
			
 
				-            if dep_lower.startswith("lib"):
			
 
				-                # Library dependency
			
 
				+            # Skip empty dependencies
			
 
				+            if not dep_lower:
			
 
				+                continue
			
 
				+                
			
 
				+            # Filter out system libraries - they're always present and shouldn't be explicit dependencies
			
 
				+            if dep_lower in system_libraries:
			
 
				+                logger.debug(f"Filtering out system library dependency: {dep}")
			
 
				+                continue
			
 
				+                
			
 
				+            # Filter out build-time dependencies - they're not needed at runtime
			
 
				+            if dep_lower in build_time_dependencies:
			
 
				+                logger.debug(f"Filtering out build-time dependency: {dep}")
			
 
				+                continue
			
 
				+                
			
 
				+            # Filter out Rust crates (they typically end with common patterns)
			
 
				+            if (dep_lower.endswith('_parser') or dep_lower.endswith('_generator') or
			
 
				+                dep_lower.endswith('_serializer') or dep_lower.endswith('_deserializer') or
			
 
				+                dep_lower.endswith('_derive') or dep_lower.endswith('_macro') or
			
 
				+                dep_lower.startswith('serde_') or dep_lower.startswith('tokio_') or
			
 
				+                dep_lower.startswith('tracing_') or dep_lower.startswith('async_') or
			
 
				+                '_' in dep_lower and any(suffix in dep_lower for suffix in ['_io', '_util', '_core', '_base', '_common'])):
			
 
				+                logger.debug(f"Filtering out Rust crate dependency: {dep}")
			
 
				+                continue
			
 
				+            
			
 
				+            # Only process dependencies that are likely to be actual runtime dependencies
			
 
				+            # Common package name to resource reference mappings for valid runtime dependencies
			
 
				+            if dep_lower.startswith("lib") and not dep_lower.startswith("libformat_parser"):
			
 
				+                # Library dependency (but not our internal ones)
			
 
				                 lib_name = dep_lower.replace("lib", "")
			
 
				                 resource_refs.append(f"lib:{dep_lower}")
			
 
				             elif dep_lower.endswith("-dev") or dep_lower.endswith("-devel"):
			
 
				-                # Development dependency
			
 
				-                base_name = dep_lower.replace("-dev", "").replace("-devel", "")
			
 
				-                resource_refs.append(f"inc:{base_name}")
			
 
				-            elif dep_lower in ["gcc", "clang", "rustc"]:
			
 
				-                # Compiler dependency
			
 
				+                # Development dependency - skip for runtime
			
 
				+                logger.debug(f"Skipping development dependency for runtime: {dep}")
			
 
				+                continue
			
 
				+            elif dep_lower in ["bash", "sh", "zsh", "fish", "dash"]:
			
 
				+                # Shell dependencies
			
 
				                 resource_refs.append(f"bin:{dep_lower}")
			
 
				-            elif dep_lower in ["python", "python3", "node", "nodejs"]:
			
 
				-                # Runtime dependency
			
 
				+            elif dep_lower in ["perl", "python", "python3", "ruby", "java", "node", "nodejs"]:
			
 
				+                # Runtime interpreter dependencies
			
 
				                 resource_refs.append(f"bin:{dep_lower}")
			
 
				-            else:
			
 
				-                # Default to bin for unknown dependencies
			
 
				+            elif dep_lower in ["gzip", "bzip2", "xz", "zip", "unzip", "tar"]:
			
 
				+                # Compression utilities
			
 
				                 resource_refs.append(f"bin:{dep_lower}")
			
 
				+            else:
			
 
				+                # For unknown dependencies, be conservative and don't include them
			
 
				+                # rather than potentially creating invalid dependencies
			
 
				+                logger.debug(f"Skipping unknown dependency type: {dep}")
			
 
				+                continue
			
 
				         
			
 
				         return resource_refs
			
 
				 
			
--- a/src/autusm/metadata.py
+++ b/src/autusm/metadata.py
@@ -105,33 +105,43 @@ class MetadataExtractor:
 
				             # Find and process package files
			
 
				             package_files = self._find_package_files(source_dir)
			
 
				             
			
 
				+            # Track which file types we've already processed to prioritize root-level files
			
 
				+            processed_file_types = set()
			
 
				+            
			
 
				             for file_path in package_files:
			
 
				                 relative_path = file_path.relative_to(source_dir)
			
 
				                 package_info.metadata_files.append(str(relative_path))
			
 
				                 
			
 
				-                # Extract metadata based on file type
			
 
				-                if file_path.name == "package.json":
			
 
				+                # Extract metadata based on file type, but only if we haven't processed this type yet
			
 
				+                if file_path.name == "package.json" and "package.json" not in processed_file_types:
			
 
				                     self._extract_from_package_json(file_path, package_info)
			
 
				-                elif file_path.name == "setup.py":
			
 
				+                    processed_file_types.add("package.json")
			
 
				+                elif file_path.name == "setup.py" and "setup.py" not in processed_file_types:
			
 
				                     self._extract_from_setup_py(file_path, package_info)
			
 
				-                elif file_path.name == "pyproject.toml":
			
 
				+                    processed_file_types.add("setup.py")
			
 
				+                elif file_path.name == "pyproject.toml" and "pyproject.toml" not in processed_file_types:
			
 
				                     self._extract_from_pyproject_toml(file_path, package_info)
			
 
				-                elif file_path.name == "Cargo.toml":
			
 
				+                    processed_file_types.add("pyproject.toml")
			
 
				+                elif file_path.name == "Cargo.toml" and "Cargo.toml" not in processed_file_types:
			
 
				                     self._extract_from_cargo_toml(file_path, package_info)
			
 
				-                elif file_path.name == "composer.json":
			
 
				+                    processed_file_types.add("Cargo.toml")
			
 
				+                elif file_path.name == "composer.json" and "composer.json" not in processed_file_types:
			
 
				                     self._extract_from_composer_json(file_path, package_info)
			
 
				-                elif file_path.name == "pom.xml":
			
 
				+                    processed_file_types.add("composer.json")
			
 
				+                elif file_path.name == "pom.xml" and "pom.xml" not in processed_file_types:
			
 
				                     self._extract_from_pom_xml(file_path, package_info)
			
 
				-                elif file_path.name == "build.gradle":
			
 
				+                    processed_file_types.add("pom.xml")
			
 
				+                elif file_path.name == "build.gradle" and "build.gradle" not in processed_file_types:
			
 
				                     self._extract_from_build_gradle(file_path, package_info)
			
 
				+                    processed_file_types.add("build.gradle")
			
 
				             
			
 
				             # Extract additional metadata from common locations
			
 
				             self._extract_from_readme(source_dir, package_info)
			
 
				             self._extract_from_license_files(source_dir, package_info)
			
 
				             self._extract_from_git_info(source_dir, package_info)
			
 
				             
			
 
				-            # If we still don't have a name or version, try to derive from URL/filename
			
 
				-            if url and (not package_info.name or not package_info.version):
			
 
				+            # Always try to derive from URL/filename first for better accuracy
			
 
				+            if url:
			
 
				                 self._extract_from_filename(url, package_info)
			
 
				             
			
 
				             # If we still don't have a name, try to derive from directory
			
@@ -152,7 +162,7 @@ class MetadataExtractor:
 
				             source_dir: Path to the source directory
			
 
				             
			
 
				         Returns:
			
 
				-            List of package configuration file paths
			
 
				+            List of package configuration file paths, sorted by proximity to root
			
 
				         """
			
 
				         package_files = []
			
 
				         
			
@@ -167,10 +177,16 @@ class MetadataExtractor:
 
				                 for package_type, patterns in self.package_patterns.items():
			
 
				                     for pattern in patterns:
			
 
				                         if self._match_pattern(file, pattern):
			
 
				-                            package_files.append(file_path)
			
 
				+                            # Calculate depth relative to source_dir
			
 
				+                            depth = len(file_path.relative_to(source_dir).parts) - 1
			
 
				+                            package_files.append((file_path, depth))
			
 
				                             break
			
 
				         
			
 
				-        return package_files
			
 
				+        # Sort by depth (0 = root directory, 1 = one level deep, etc.)
			
 
				+        package_files.sort(key=lambda x: x[1])
			
 
				+        
			
 
				+        # Return just the paths, now sorted by proximity to root
			
 
				+        return [file_path for file_path, _ in package_files]
			
 
				 
			
 
				     def _match_pattern(self, filename: str, pattern: str) -> bool:
			
 
				         """Check if a filename matches a pattern.
			
@@ -794,14 +810,20 @@ class MetadataExtractor:
 
				             # Parse common patterns
			
 
				             name, version, confidence = self._parse_filename_pattern(base_name)
			
 
				             
			
 
				-            # Only use if we haven't already found name/version
			
 
				-            if name and not package_info.name:
			
 
				+            # Always use filename extraction if it has reasonable confidence
			
 
				+            if name and confidence >= 0.6:
			
 
				                 package_info.name = name
			
 
				                 logger.debug(f"Extracted name from filename: {name} (confidence: {confidence})")
			
 
				+            elif name and not package_info.name:
			
 
				+                package_info.name = name
			
 
				+                logger.debug(f"Extracted name from filename (fallback): {name} (confidence: {confidence})")
			
 
				                 
			
 
				-            if version and not package_info.version:
			
 
				+            if version and confidence >= 0.6:
			
 
				                 package_info.version = version
			
 
				                 logger.debug(f"Extracted version from filename: {version} (confidence: {confidence})")
			
 
				+            elif version and not package_info.version:
			
 
				+                package_info.version = version
			
 
				+                logger.debug(f"Extracted version from filename (fallback): {version} (confidence: {confidence})")
			
 
				                 
			
 
				             # Always store confidence level for potential user confirmation
			
 
				             package_info.extra_data["filename_confidence"] = confidence
			
--- a/src/autusm/usm_integration.py
+++ b/src/autusm/usm_integration.py
@@ -60,8 +60,7 @@ class USMIntegration:
 
				                 cmd,
			
 
				                 cwd=source_dir,
			
 
				                 capture_output=True,
			
 
				-                text=True,
			
 
				-                timeout=300  # 5 minutes timeout
			
 
				+                text=True
			
 
				             )
			
 
				             
			
 
				             # Log command result separately to ensure no contamination
			
@@ -91,9 +90,6 @@ class USMIntegration:
 
				             logger.info(f"Got {len(autoprovides)} autoprovides from USM")
			
 
				             return autoprovides
			
 
				             
			
 
				-        except subprocess.TimeoutExpired:
			
 
				-            logger.error("USM autoprovides timed out")
			
 
				-            raise USMIntegrationError("USM autoprovides timed out")
			
 
				         except subprocess.SubprocessError as e:
			
 
				             logger.error(f"USM autoprovides failed: {e}")
			
 
				             raise USMIntegrationError(f"USM autoprovides failed: {e}")
			
--- a/test_comprehensive_build_system_detection.py
+++ b/test_comprehensive_build_system_detection.py
@@ -0,0 +1,142 @@
 
				+#!/usr/bin/env python3
			
 
				+"""
			
 
				+Comprehensive test script for build system detection with depth-based prioritization.
			
 
				+"""
			
 
				+
			
 
				+import os
			
 
				+import tempfile
			
 
				+import shutil
			
 
				+from pathlib import Path
			
 
				+from src.autusm.analyzer import SourceAnalyzer
			
 
				+
			
 
				+def test_case_1_autotools_at_root():
			
 
				+    """Test case: Autotools at root, cargo in subdirectory (like GCC)."""
			
 
				+    with tempfile.TemporaryDirectory() as temp_dir:
			
 
				+        base_dir = Path(temp_dir)
			
 
				+        
			
 
				+        # Root level autotools
			
 
				+        (base_dir / "configure.ac").touch()
			
 
				+        (base_dir / "Makefile.am").touch()
			
 
				+        
			
 
				+        # Subdirectory with cargo
			
 
				+        libformat_dir = base_dir / "libformat_parser"
			
 
				+        libformat_dir.mkdir()
			
 
				+        (libformat_dir / "Cargo.toml").touch()
			
 
				+        
			
 
				+        analyzer = SourceAnalyzer()
			
 
				+        build_system = analyzer.detect_build_system(base_dir)
			
 
				+        
			
 
				+        assert build_system.type.value == "autotools", f"Expected autotools, got {build_system.type.value}"
			
 
				+        print("✓ Test 1 passed: Autotools at root prioritized over cargo in subdirectory")
			
 
				+        return True
			
 
				+
			
 
				+def test_case_2_multiple_build_systems_at_root():
			
 
				+    """Test case: Multiple build systems at root level (should use priority)."""
			
 
				+    with tempfile.TemporaryDirectory() as temp_dir:
			
 
				+        base_dir = Path(temp_dir)
			
 
				+        
			
 
				+        # Multiple build systems at root
			
 
				+        (base_dir / "configure.ac").touch()  # autotools
			
 
				+        (base_dir / "CMakeLists.txt").touch()  # cmake
			
 
				+        (base_dir / "meson.build").touch()  # meson
			
 
				+        
			
 
				+        analyzer = SourceAnalyzer()
			
 
				+        build_system = analyzer.detect_build_system(base_dir)
			
 
				+        
			
 
				+        # CMAKE has priority 5, AUTOTOOLS has 4, MESON has 6
			
 
				+        # MESON should be selected due to highest priority at same depth
			
 
				+        assert build_system.type.value == "meson", f"Expected meson, got {build_system.type.value}"
			
 
				+        print("✓ Test 2 passed: Meson selected due to highest priority at same depth")
			
 
				+        return True
			
 
				+
			
 
				+def test_case_3_only_subdirectory_build_systems():
			
 
				+    """Test case: Build systems only in subdirectories (should use priority)."""
			
 
				+    with tempfile.TemporaryDirectory() as temp_dir:
			
 
				+        base_dir = Path(temp_dir)
			
 
				+        
			
 
				+        # Create subdirectories with different build systems
			
 
				+        cargo_dir = base_dir / "rust_component"
			
 
				+        cargo_dir.mkdir()
			
 
				+        (cargo_dir / "Cargo.toml").touch()
			
 
				+        
			
 
				+        npm_dir = base_dir / "js_component"
			
 
				+        npm_dir.mkdir()
			
 
				+        (npm_dir / "package.json").touch()
			
 
				+        
			
 
				+        analyzer = SourceAnalyzer()
			
 
				+        build_system = analyzer.detect_build_system(base_dir)
			
 
				+        
			
 
				+        # NPM has priority 8, CARGO has 7
			
 
				+        # NPM should be selected due to higher priority at same depth
			
 
				+        assert build_system.type.value == "npm", f"Expected npm, got {build_system.type.value}"
			
 
				+        print("✓ Test 3 passed: NPM selected due to higher priority at same depth")
			
 
				+        return True
			
 
				+
			
 
				+def test_case_4_nested_build_systems():
			
 
				+    """Test case: Build systems at different depths."""
			
 
				+    with tempfile.TemporaryDirectory() as temp_dir:
			
 
				+        base_dir = Path(temp_dir)
			
 
				+        
			
 
				+        # Shallow build system
			
 
				+        cmake_dir = base_dir / "cmake_component"
			
 
				+        cmake_dir.mkdir()
			
 
				+        (cmake_dir / "CMakeLists.txt").touch()
			
 
				+        
			
 
				+        # Deeper build system
			
 
				+        cargo_dir = base_dir / "deep" / "rust_component"
			
 
				+        cargo_dir.mkdir(parents=True)
			
 
				+        (cargo_dir / "Cargo.toml").touch()
			
 
				+        
			
 
				+        analyzer = SourceAnalyzer()
			
 
				+        build_system = analyzer.detect_build_system(base_dir)
			
 
				+        
			
 
				+        # CMAKE should be selected due to shallower depth (1 vs 2)
			
 
				+        assert build_system.type.value == "cmake", f"Expected cmake, got {build_system.type.value}"
			
 
				+        print("✓ Test 4 passed: Shallower build system prioritized regardless of priority")
			
 
				+        return True
			
 
				+
			
 
				+def test_case_5_no_build_system():
			
 
				+    """Test case: No build system detected."""
			
 
				+    with tempfile.TemporaryDirectory() as temp_dir:
			
 
				+        base_dir = Path(temp_dir)
			
 
				+        
			
 
				+        # Just some source files, no build system
			
 
				+        (base_dir / "main.c").touch()
			
 
				+        (base_dir / "utils.h").touch()
			
 
				+        
			
 
				+        analyzer = SourceAnalyzer()
			
 
				+        build_system = analyzer.detect_build_system(base_dir)
			
 
				+        
			
 
				+        assert build_system.type.value == "unknown", f"Expected unknown, got {build_system.type.value}"
			
 
				+        print("✓ Test 5 passed: Unknown build system when none detected")
			
 
				+        return True
			
 
				+
			
 
				+def run_all_tests():
			
 
				+    """Run all test cases."""
			
 
				+    tests = [
			
 
				+        test_case_1_autotools_at_root,
			
 
				+        test_case_2_multiple_build_systems_at_root,
			
 
				+        test_case_3_only_subdirectory_build_systems,
			
 
				+        test_case_4_nested_build_systems,
			
 
				+        test_case_5_no_build_system
			
 
				+    ]
			
 
				+    
			
 
				+    passed = 0
			
 
				+    failed = 0
			
 
				+    
			
 
				+    for test in tests:
			
 
				+        try:
			
 
				+            if test():
			
 
				+                passed += 1
			
 
				+            else:
			
 
				+                failed += 1
			
 
				+        except Exception as e:
			
 
				+            print(f"✗ Test {test.__name__} failed with exception: {e}")
			
 
				+            failed += 1
			
 
				+    
			
 
				+    print(f"\nTest Results: {passed} passed, {failed} failed")
			
 
				+    return failed == 0
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    success = run_all_tests()
			
 
				+    exit(0 if success else 1)
			
--- a/test_gcc_build_system_detection.py
+++ b/test_gcc_build_system_detection.py
@@ -0,0 +1,54 @@
 
				+#!/usr/bin/env python3
			
 
				+"""
			
 
				+Test script to reproduce the GCC build system detection issue.
			
 
				+"""
			
 
				+
			
 
				+import os
			
 
				+import tempfile
			
 
				+import shutil
			
 
				+from pathlib import Path
			
 
				+from src.autusm.analyzer import SourceAnalyzer
			
 
				+
			
 
				+def create_mock_gcc_structure(base_dir):
			
 
				+    """Create a mock GCC directory structure with both autotools and cargo files."""
			
 
				+    
			
 
				+    # Create root-level autotools files
			
 
				+    (base_dir / "configure.ac").touch()
			
 
				+    (base_dir / "Makefile.am").touch()
			
 
				+    (base_dir / "autogen.sh").touch()
			
 
				+    
			
 
				+    # Create a subdirectory with cargo files (like libformat_parser)
			
 
				+    libformat_dir = base_dir / "libformat_parser"
			
 
				+    libformat_dir.mkdir()
			
 
				+    (libformat_dir / "Cargo.toml").touch()
			
 
				+    (libformat_dir / "Cargo.lock").touch()
			
 
				+    
			
 
				+    # Add some other subdirectories to make it more realistic
			
 
				+    (base_dir / "gcc").mkdir()
			
 
				+    (base_dir / "libstdc++-v3").mkdir()
			
 
				+    (base_dir / "libgcc").mkdir()
			
 
				+
			
 
				+def test_build_system_detection():
			
 
				+    """Test that build system detection prioritizes root-level build systems."""
			
 
				+    
			
 
				+    with tempfile.TemporaryDirectory() as temp_dir:
			
 
				+        base_dir = Path(temp_dir)
			
 
				+        create_mock_gcc_structure(base_dir)
			
 
				+        
			
 
				+        analyzer = SourceAnalyzer()
			
 
				+        build_system = analyzer.detect_build_system(base_dir)
			
 
				+        
			
 
				+        print(f"Detected build system: {build_system.type.value}")
			
 
				+        print(f"Config files: {build_system.config_files}")
			
 
				+        
			
 
				+        # This should detect AUTOTOOLS, not CARGO
			
 
				+        if build_system.type.value == "autotools":
			
 
				+            print("✓ PASS: Correctly detected autotools")
			
 
				+            return True
			
 
				+        else:
			
 
				+            print("✗ FAIL: Incorrectly detected cargo instead of autotools")
			
 
				+            return False
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    success = test_build_system_detection()
			
 
				+    exit(0 if success else 1)
			
--- a/test_metadata_detection.py
+++ b/test_metadata_detection.py
@@ -0,0 +1,82 @@
 
				+#!/usr/bin/env python3
			
 
				+"""
			
 
				+Test script to reproduce the metadata detection issue with GCC.
			
 
				+"""
			
 
				+
			
 
				+import os
			
 
				+import sys
			
 
				+import tempfile
			
 
				+import shutil
			
 
				+import urllib.request
			
 
				+import tarfile
			
 
				+from pathlib import Path
			
 
				+
			
 
				+# Add src to path
			
 
				+sys.path.insert(0, 'src')
			
 
				+
			
 
				+from autusm.metadata import MetadataExtractor
			
 
				+from autusm.models import PackageInfo
			
 
				+
			
 
				+def test_gcc_metadata_extraction():
			
 
				+    """Test metadata extraction with GCC source code."""
			
 
				+    
			
 
				+    # GCC URL
			
 
				+    gcc_url = "https://mirrors.middlendian.com/gnu//gcc/gcc-15.1.0/gcc-15.1.0.tar.xz"
			
 
				+    
			
 
				+    # Create a temporary directory for extraction
			
 
				+    with tempfile.TemporaryDirectory() as temp_dir:
			
 
				+        temp_path = Path(temp_dir)
			
 
				+        archive_path = temp_path / "gcc.tar.xz"
			
 
				+        extract_path = temp_path / "extracted"
			
 
				+        
			
 
				+        print(f"Downloading GCC from {gcc_url}...")
			
 
				+        try:
			
 
				+            # Download the archive
			
 
				+            urllib.request.urlretrieve(gcc_url, archive_path)
			
 
				+            print("Download completed.")
			
 
				+            
			
 
				+            # Extract the archive
			
 
				+            print("Extracting archive...")
			
 
				+            extract_path.mkdir(exist_ok=True)
			
 
				+            with tarfile.open(archive_path, "r:xz") as tar:
			
 
				+                tar.extractall(extract_path)
			
 
				+            print("Extraction completed.")
			
 
				+            
			
 
				+            # Find the extracted directory
			
 
				+            extracted_dirs = [d for d in extract_path.iterdir() if d.is_dir()]
			
 
				+            if not extracted_dirs:
			
 
				+                print("Error: No extracted directory found")
			
 
				+                return False
			
 
				+                
			
 
				+            source_dir = extracted_dirs[0]
			
 
				+            print(f"Source directory: {source_dir}")
			
 
				+            
			
 
				+            # Test metadata extraction
			
 
				+            print("\nTesting metadata extraction...")
			
 
				+            extractor = MetadataExtractor()
			
 
				+            package_info = extractor.extract(source_dir, gcc_url)
			
 
				+            
			
 
				+            # Print the results
			
 
				+            print(f"\nExtracted package name: {package_info.name}")
			
 
				+            print(f"Version: {package_info.version}")
			
 
				+            print(f"Summary: {package_info.summary}")
			
 
				+            print(f"URL: {package_info.url}")
			
 
				+            print(f"Metadata files found: {package_info.metadata_files}")
			
 
				+            
			
 
				+            # Check if it correctly identified GCC
			
 
				+            if package_info.name and "gcc" in package_info.name.lower():
			
 
				+                print("\n✅ SUCCESS: Correctly identified GCC as the main package")
			
 
				+                return True
			
 
				+            else:
			
 
				+                print(f"\n❌ FAILURE: Identified '{package_info.name}' instead of GCC")
			
 
				+                return False
			
 
				+                
			
 
				+        except Exception as e:
			
 
				+            print(f"Error during test: {e}")
			
 
				+            import traceback
			
 
				+            traceback.print_exc()
			
 
				+            return False
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    success = test_gcc_metadata_extraction()
			
 
				+    sys.exit(0 if success else 1)
			
--- a/test_no_timeout.py
+++ b/test_no_timeout.py
@@ -0,0 +1,50 @@
 
				+#!/usr/bin/env python3
			
 
				+"""
			
 
				+Test script to verify that USM autoprovides command no longer has a timeout.
			
 
				+"""
			
 
				+
			
 
				+import sys
			
 
				+import time
			
 
				+from pathlib import Path
			
 
				+
			
 
				+# Add the src directory to the path so we can import autusm
			
 
				+sys.path.insert(0, str(Path(__file__).parent / "src"))
			
 
				+
			
 
				+from autusm.usm_integration import USMIntegration
			
 
				+
			
 
				+def test_no_timeout():
			
 
				+    """Test that autoprovides can run without timeout."""
			
 
				+    print("Testing USM autoprovides without timeout...")
			
 
				+    
			
 
				+    # Initialize USM integration
			
 
				+    usm = USMIntegration()
			
 
				+    
			
 
				+    # Check if USM is available
			
 
				+    if not usm.is_available():
			
 
				+        print("USM is not available on this system. Skipping test.")
			
 
				+        return True
			
 
				+    
			
 
				+    print("USM is available. Testing autoprovides without timeout...")
			
 
				+    
			
 
				+    # Try to get autoprovides for the current directory
			
 
				+    # This will test if the timeout has been removed
			
 
				+    try:
			
 
				+        start_time = time.time()
			
 
				+        autoprovides = usm.get_autoprovides(Path.cwd())
			
 
				+        end_time = time.time()
			
 
				+        
			
 
				+        elapsed_time = end_time - start_time
			
 
				+        print(f"Autoprovides completed in {elapsed_time:.2f} seconds")
			
 
				+        print(f"Found {len(autoprovides)} autoprovides")
			
 
				+        
			
 
				+        # If we get here without a timeout error, the test passes
			
 
				+        print("SUCCESS: No timeout occurred!")
			
 
				+        return True
			
 
				+        
			
 
				+    except Exception as e:
			
 
				+        print(f"ERROR: {e}")
			
 
				+        return False
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    success = test_no_timeout()
			
 
				+    sys.exit(0 if success else 1)