Răsfoiți Sursa

Basic working implementation

clanker 1 lună în urmă
părinte
comite
84b8c5728e

+ 159 - 0
debug_autoprovides.py

@@ -0,0 +1,159 @@
+#!/usr/bin/env python3
+"""
+Debug script to test USM autoprovides functionality.
+"""
+
+import sys
+import tempfile
+from pathlib import Path
+
+# Add the src directory to the path
+sys.path.insert(0, 'src')
+
+from autusm.usm_integration import USMIntegration
+from autusm.manifest import ManifestGenerator
+from autusm.models import PackageInfo, BuildSystem, BuildSystemType, License, LicenseCategory
+
+def test_usm_integration():
+    """Test USM integration directly."""
+    print("Testing USM integration...")
+    
+    # Create USM integration instance
+    usm_integration = USMIntegration()
+    
+    # Check if USM is available
+    if not usm_integration.is_available():
+        print("USM is not available on this system")
+        return False
+    
+    # Create a temporary directory with a simple manifest
+    with tempfile.TemporaryDirectory() as temp_dir:
+        temp_path = Path(temp_dir)
+        
+        # Create a simple manifest file
+        manifest_content = """{
+  "name": "test",
+  "version": "1.0.0",
+  "summary": "Test package",
+  "licences": [
+    {
+      "name": "MIT",
+      "text": "LICENSE",
+      "category": "open-source"
+    }
+  ],
+  "provides": {
+    "bin:test": "as-expected"
+  },
+  "depends": {
+    "runtime": [],
+    "build": [],
+    "manage": []
+  },
+  "flags": [],
+  "execs": {
+    "build": "scripts/build"
+  }
+}"""
+        
+        manifest_path = temp_path / "MANIFEST.usm"
+        with open(manifest_path, "w") as f:
+            f.write(manifest_content)
+        
+        print(f"Created test manifest at: {manifest_path}")
+        
+        # Try to get autoprovides
+        try:
+            autoprovides = usm_integration.get_autoprovides(temp_path)
+            print(f"Got autoprovides: {autoprovides}")
+            return True
+        except Exception as e:
+            print(f"Error getting autoprovides: {e}")
+            import traceback
+            traceback.print_exc()
+            return False
+
+def test_parsing_with_garbage():
+    """Test parsing with garbage output similar to what we're seeing."""
+    print("\nTesting parsing with garbage output...")
+    
+    usm_integration = USMIntegration()
+    
+    # Test with the garbage pattern we're seeing
+    garbage_output = '"provides": {\n    "type": "reg",\n    "path": "{",\n    "pathBase": "source"\n  }'
+    
+    print(f"Testing with garbage output: {repr(garbage_output)}")
+    
+    try:
+        parsed = usm_integration._parse_autoprovides(garbage_output)
+        print(f"Parsed result: {parsed}")
+        return True
+    except Exception as e:
+        print(f"Error parsing garbage: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+
+def test_manifest_update():
+    """Test manifest update with problematic data."""
+    print("\nTesting manifest update...")
+    
+    # Create test package info
+    package_info = PackageInfo(
+        name="test-package",
+        version="1.0.0",
+        summary="A test package",
+        licenses=[License(name="MIT", text="LICENSE", category=LicenseCategory.OPEN_SOURCE)]
+    )
+    
+    # Create test build system
+    build_system = BuildSystem(type=BuildSystemType.MAKE)
+    
+    # Create manifest generator
+    manifest_generator = ManifestGenerator()
+    
+    # Generate initial manifest
+    manifest = manifest_generator.generate(package_info, build_system)
+    
+    print(f"Initial manifest provides: {manifest.provides}")
+    
+    # Try to update with garbage data
+    garbage_autoprovides = {
+        '"provides":': {
+            "type": "reg",
+            "path": "{",
+            "pathBase": "source"
+        }
+    }
+    
+    try:
+        updated_manifest = manifest_generator.update_with_autoprovides(manifest, garbage_autoprovides)
+        print(f"Updated manifest provides: {updated_manifest.provides}")
+        
+        # Test JSON serialization
+        json_str = updated_manifest.to_json()
+        print("JSON serialization result:")
+        print(json_str)
+        
+        return True
+    except Exception as e:
+        print(f"Error updating manifest: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+
+if __name__ == "__main__":
+    print("Running debug tests...\n")
+    
+    try:
+        test_usm_integration()
+        test_parsing_with_garbage()
+        test_manifest_update()
+        
+        print("\n✅ All debug tests completed")
+        
+    except Exception as e:
+        print(f"\n❌ Debug test failed: {e}")
+        import traceback
+        traceback.print_exc()
+        sys.exit(1)

+ 106 - 17
src/autusm/cli.py

@@ -8,6 +8,7 @@ handling argument parsing and coordinating the various components.
 import os
 import sys
 import logging
+import shutil
 from pathlib import Path
 from typing import Optional
 
@@ -82,6 +83,12 @@ logger = logging.getLogger(__name__)
     default=False,
     help="Skip USM availability check and autoprovides"
 )
+@click.option(
+    "--clean-source",
+    is_flag=True,
+    default=False,
+    help="Clean up extracted source files after package creation"
+)
 def main(
     url: str,
     output_dir: str,
@@ -92,7 +99,8 @@ def main(
     non_interactive: bool,
     verbose: bool,
     quiet: bool,
-    skip_usm_check: bool
+    skip_usm_check: bool,
+    clean_source: bool
 ) -> None:
     """
     Generate USM manifest from source archive URL.
@@ -126,14 +134,19 @@ def main(
 
         logger.info(f"Using working directory: {work_path}")
 
+        # Set up output directory
+        output_path = Path(output_dir)
+        output_path.mkdir(parents=True, exist_ok=True)
+        logger.info(f"Using output directory: {output_path}")
+
         # Step 1: Download the source archive
         logger.info(f"Downloading source archive from: {url}")
         archive_path = download_manager.download(url, work_path)
         logger.info(f"Downloaded to: {archive_path}")
 
-        # Step 2: Extract the archive
-        logger.info("Extracting archive...")
-        source_dir = extractor.extract(archive_path, work_path)
+        # Step 2: Extract the archive directly to the output directory
+        logger.info("Extracting archive to output directory...")
+        source_dir = extractor.extract(archive_path, output_path)
         logger.info(f"Extracted to: {source_dir}")
 
         # Step 3: Analyze the source code
@@ -152,6 +165,10 @@ def main(
             package_info.version = version
         if summary:
             package_info.summary = summary
+        
+        # Always preserve the original URL from command line
+        # This takes precedence over any URLs found in metadata files
+        package_info.url = url
 
         # Set the source directory
         package_info.source_dir = str(source_dir)
@@ -161,7 +178,7 @@ def main(
 
         # Step 6: Generate USM scripts
         logger.info("Generating USM scripts...")
-        scripts_dir = Path(output_dir) / "scripts"
+        scripts_dir = output_path / "scripts"
         scripts_dir.mkdir(parents=True, exist_ok=True)
         
         script_generator.generate_scripts(
@@ -174,21 +191,40 @@ def main(
         logger.info("Generating USM manifest...")
         manifest = manifest_generator.generate(package_info, build_system)
 
-        # Step 8: Get autoprovides from USM if available
+        # Step 8: Write the initial manifest file
+        manifest_file_path = output_path / "MANIFEST.usm"
+        with open(manifest_file_path, "w") as f:
+            f.write(manifest.to_json())
+        
+        logger.info(f"Initial USM manifest written to: {manifest_file_path}")
+
+        # Step 9: Get autoprovides from USM if available
         if not skip_usm_check and usm_integration.is_available():
             logger.info("Getting autoprovides from USM...")
-            autoprovides = usm_integration.get_autoprovides(source_dir)
+            # Run autoprovides in the output directory where both MANIFEST.usm and source are located
+            autoprovides = usm_integration.get_autoprovides(output_path)
             if autoprovides:
-                # Merge autoprovides into manifest
-                for resource_ref, resource in autoprovides.items():
-                    manifest.provides[resource_ref] = resource
-
-        # Step 9: Write the manifest file
-        output_path = Path(output_dir) / "MANIFEST.usm"
-        with open(output_path, "w") as f:
-            f.write(manifest.to_json())
+                logger.info(f"Updating manifest with {len(autoprovides)} autoprovides")
+                # Update manifest with autoprovides
+                manifest = manifest_generator.update_with_autoprovides(manifest, autoprovides)
+                
+                # Rewrite the manifest file with updated provides section
+                manifest_file_path = output_path / "MANIFEST.usm"
+                with open(manifest_file_path, "w") as f:
+                    f.write(manifest.to_json())
+                
+                logger.info(f"Updated USM manifest with autoprovides: {manifest_file_path}")
+            else:
+                logger.warning("No autoprovides returned from USM")
+        else:
+            if skip_usm_check:
+                logger.info("Skipping USM autoprovides as requested")
+            else:
+                logger.warning("USM is not available, skipping autoprovides")
         
-        logger.info(f"USM manifest written to: {output_path}")
+        # Step 10: Clean up source files if requested
+        if clean_source:
+            _cleanup_source_files(source_dir, output_path, logger)
         
         # Print summary
         if not quiet:
@@ -197,7 +233,7 @@ def main(
             click.echo(f"Summary: {package_info.summary}")
             click.echo(f"Build System: {build_system.type.value}")
             click.echo(f"Output files:")
-            click.echo(f"  - {output_path}")
+            click.echo(f"  - {manifest_file_path}")
             click.echo(f"  - {scripts_dir}/ (USM scripts)")
 
     except AutusmError as e:
@@ -214,5 +250,58 @@ def main(
         sys.exit(1)
 
 
+def _cleanup_source_files(source_dir: Path, output_path: Path, logger) -> None:
+    """
+    Clean up extracted source files while preserving USM package files.
+    
+    Args:
+        source_dir: Path to the extracted source directory
+        output_path: Output directory containing both source and USM files
+        logger: Logger instance for reporting cleanup actions
+    """
+    try:
+        # Files and directories to preserve (USM package files)
+        preserve_names = {"MANIFEST.usm", "scripts"}
+        
+        # Count files before cleanup for reporting
+        total_files = 0
+        removed_files = 0
+        removed_dirs = 0
+        
+        # Count total files in source directory
+        for item in source_dir.rglob("*"):
+            total_files += 1
+        
+        logger.info(f"Starting cleanup of source files in {source_dir}")
+        
+        # Remove all files and directories except those we want to preserve
+        for item in source_dir.iterdir():
+            if item.name not in preserve_names:
+                if item.is_file():
+                    item.unlink()
+                    removed_files += 1
+                    logger.debug(f"Removed file: {item}")
+                elif item.is_dir():
+                    shutil.rmtree(item)
+                    removed_dirs += 1
+                    logger.debug(f"Removed directory: {item}")
+        
+        # Count remaining files
+        remaining_files = 0
+        for item in source_dir.rglob("*"):
+            remaining_files += 1
+        
+        logger.info(f"Cleanup completed: removed {removed_files} files and {removed_dirs} directories")
+        logger.info(f"Preserved {remaining_files} USM package files")
+        
+        # If the source directory is now empty or only contains USM files,
+        # we can consider removing it entirely if it's a subdirectory of output_path
+        # but we'll keep it to maintain the expected structure
+        
+    except Exception as e:
+        logger.error(f"Failed to clean up source files: {e}")
+        # Don't raise the exception - cleanup failure shouldn't break the whole process
+
+
 if __name__ == "__main__":
     main()

+ 30 - 12
src/autusm/extractor.py

@@ -33,14 +33,14 @@ class ArchiveExtractor:
         }
 
     def extract(self, archive_path: Path, destination: Path) -> Path:
-        """Extract an archive to the destination directory.
+        """Extract an archive to the destination directory with flattened structure.
         
         Args:
             archive_path: Path to the archive file
             destination: Directory to extract to
             
         Returns:
-            Path to the extracted source directory
+            Path to the extracted source directory (now always the destination)
             
         Raises:
             ExtractionError: If extraction fails
@@ -55,17 +55,35 @@ class ArchiveExtractor:
             # Create destination directory if it doesn't exist
             destination.mkdir(parents=True, exist_ok=True)
             
-            logger.info(f"Extracting {archive_path} (format: {archive_format}) to {destination}")
-            
-            # Extract the archive
-            extract_func = self.supported_formats[archive_format]
-            extract_func(archive_path, destination)
-            
-            # Find the extracted source directory
-            source_dir = self._find_source_directory(destination)
+            # Create a temporary directory for extraction
+            import tempfile
+            with tempfile.TemporaryDirectory() as temp_dir:
+                temp_path = Path(temp_dir)
+                
+                logger.info(f"Extracting {archive_path} (format: {archive_format}) to temporary directory")
+                
+                # Extract to temporary directory first
+                extract_func = self.supported_formats[archive_format]
+                extract_func(archive_path, temp_path)
+                
+                # Find the extracted source directory
+                source_dir = self._find_source_directory(temp_path)
+                
+                # If source is in a subdirectory, move its contents to destination
+                if source_dir != temp_path:
+                    logger.info(f"Moving contents from {source_dir} to {destination}")
+                    for item in source_dir.iterdir():
+                        import shutil
+                        shutil.move(str(item), str(destination))
+                else:
+                    # If source is directly in temp directory, move its contents
+                    logger.info(f"Moving contents from {temp_path} to {destination}")
+                    for item in temp_path.iterdir():
+                        import shutil
+                        shutil.move(str(item), str(destination))
             
-            logger.info(f"Successfully extracted to {source_dir}")
-            return source_dir
+            logger.info(f"Successfully extracted to {destination}")
+            return destination
             
         except Exception as e:
             logger.error(f"Failed to extract {archive_path}: {e}")

+ 601 - 130
src/autusm/generator.py

@@ -66,6 +66,152 @@ class ScriptGenerator:
             }
         }
 
+    def _common_acquire_template(self, package_info: PackageInfo, build_system: BuildSystem) -> str:
+        """Generate a common acquire script with actual download and extraction functionality."""
+        url = package_info.url or ""
+        
+        return f"""#!/bin/sh
+# Acquire script for {package_info.name}
+
+# This script downloads and extracts the source code for {package_info.name}
+
+# Configuration
+SOURCE_URL="{url}"
+TEMP_DIR=$(mktemp -d)
+ARCHIVE_NAME=""
+EXTRACT_DIR="$PWD"
+
+# Error handling
+set -e
+trap 'rm -rf "$TEMP_DIR"' EXIT
+
+# Function to detect archive type
+detect_archive_type() {{
+    local filename="$1"
+    case "$filename" in
+        *.tar.gz|*.tgz) echo "tar.gz" ;;
+        *.tar.bz2|*.tbz2) echo "tar.bz2" ;;
+        *.tar) echo "tar" ;;
+        *.zip) echo "zip" ;;
+        *) echo "unknown" ;;
+    esac
+}}
+
+# Function to download using wget or curl
+download_file() {{
+    local url="$1"
+    local output="$2"
+    
+    echo "Downloading from $url..."
+    
+    if command -v wget >/dev/null 2>&1; then
+        wget -O "$output" "$url"
+    elif command -v curl >/dev/null 2>&1; then
+        curl -L -o "$output" "$url"
+    else
+        echo "Error: Neither wget nor curl is available for downloading"
+        exit 1
+    fi
+}}
+
+# Function to extract archive with strip-components to flatten directory structure
+extract_archive() {{
+    local archive="$1"
+    local extract_to="$2"
+    local archive_type="$3"
+    
+    echo "Extracting $archive (type: $archive_type) to $extract_to..."
+    
+    case "$archive_type" in
+        tar.gz|tgz)
+            # Use --strip-components=1 to remove the top-level directory
+            tar -xzf "$archive" -C "$extract_to" --strip-components=1
+            ;;
+        tar.bz2|tbz2)
+            # Use --strip-components=1 to remove the top-level directory
+            tar -xjf "$archive" -C "$extract_to" --strip-components=1
+            ;;
+        tar)
+            # Use --strip-components=1 to remove the top-level directory
+            tar -xf "$archive" -C "$extract_to" --strip-components=1
+            ;;
+        zip)
+            # For zip, we need to extract to temp and then move contents
+            local zip_temp_dir="$extract_temp_dir/zip_extract"
+            mkdir -p "$zip_temp_dir"
+            unzip -q "$archive" -d "$zip_temp_dir"
+            
+            # Find the top-level directory and move its contents up
+            for item in "$zip_temp_dir"/*; do
+                if [ -d "$item" ]; then
+                    # Move contents of the subdirectory to extract_to
+                    (cd "$item" && find . -mindepth 1 -maxdepth 1 -exec cp -r {{}} "$extract_to/" \\;)
+                    break
+                fi
+            done
+            rm -rf "$zip_temp_dir"
+            ;;
+        *)
+            echo "Error: Unsupported archive type: $archive_type"
+            exit 1
+            ;;
+    esac
+}}
+
+# Function to find source directory (now works with flattened extraction)
+find_source_dir() {{
+    local search_dir="$1"
+    
+    # With flattened extraction, we should check the current directory first
+    if [ -f "$search_dir/configure" ] || [ -f "$search_dir/CMakeLists.txt" ] || \
+       [ -f "$search_dir/meson.build" ] || [ -f "$search_dir/Makefile" ] || \
+       [ -f "$search_dir/setup.py" ] || [ -f "$search_dir/Cargo.toml" ] || \
+       [ -f "$search_dir/package.json" ]; then
+        echo "$search_dir"
+        return 0
+    fi
+    
+    # If no build system files found in current directory, return the search directory
+    echo "$search_dir"
+}}
+
+# Main execution
+if [ -z "$SOURCE_URL" ]; then
+    echo "Error: No source URL provided for {package_info.name}"
+    exit 1
+fi
+
+# Extract filename from URL
+ARCHIVE_NAME=$(basename "$SOURCE_URL")
+
+if [ -z "$ARCHIVE_NAME" ] || [ "$ARCHIVE_NAME" = "/" ]; then
+    # Generate a filename if none can be extracted
+    ARCHIVE_NAME="{package_info.name}_source.tar.gz"
+fi
+
+# Download the archive
+ARCHIVE_PATH="$TEMP_DIR/$ARCHIVE_NAME"
+download_file "$SOURCE_URL" "$ARCHIVE_PATH"
+
+# Detect archive type
+ARCHIVE_TYPE=$(detect_archive_type "$ARCHIVE_NAME")
+
+if [ "$ARCHIVE_TYPE" = "unknown" ]; then
+    echo "Error: Cannot determine archive type from filename: $ARCHIVE_NAME"
+    exit 1
+fi
+
+# Extract directly to current directory with strip-components to flatten structure
+echo "Extracting archive contents to current directory..."
+extract_archive "$ARCHIVE_PATH" "$EXTRACT_DIR" "$ARCHIVE_TYPE"
+
+# Verify the extraction was successful
+SOURCE_DIR=$(find_source_dir "$EXTRACT_DIR")
+echo "Source directory verified: $SOURCE_DIR"
+
+echo "Source acquired for {package_info.name}"
+"""
+
     def generate_scripts(self, package_info: PackageInfo, build_system: BuildSystem, output_dir: Path) -> None:
         """Generate USM scripts for a package.
         
@@ -125,21 +271,7 @@ class ScriptGenerator:
 
     def _autotools_acquire_template(self, package_info: PackageInfo, build_system: BuildSystem) -> str:
         """Generate acquire script for autotools-based packages."""
-        return f"""#!/bin/sh
-# Acquire script for {package_info.name}
-
-# This script is used to download the source code
-# It's typically used when creating USM packages from upstream sources
-
-# For autotools packages, the source is usually already available
-# If you need to download from a specific URL, uncomment and modify:
-# wget -O - {package_info.url or 'https://example.com/source.tar.gz'} | tar -xz
-
-# Or if using git:
-# git clone {package_info.url or 'https://github.com/example/repo.git'} .
-
-echo "Source acquired for {package_info.name}"
-"""
+        return self._common_acquire_template(package_info, build_system)
 
     def _autotools_build_template(self, package_info: PackageInfo, build_system: BuildSystem) -> str:
         """Generate build script for autotools-based packages."""
@@ -161,6 +293,38 @@ fi
 
 echo "Building {package_info.name} in $BUILD_DIR"
 
+# Function to find the source directory (updated for flattened extraction)
+find_source_dir() {{
+    local search_dir="$1"
+    
+    # With flattened extraction, check current directory first
+    if [ -f "$search_dir/configure" ] || [ -f "$search_dir/configure.ac" ] || [ -f "$search_dir/Makefile.am" ]; then
+        echo "$search_dir"
+        return 0
+    fi
+    
+    # Look for common indicators of an autotools source directory
+    for item in "$search_dir"/*; do
+        if [ -d "$item" ]; then
+            # Check for autotools files
+            if [ -f "$item/configure" ] || [ -f "$item/configure.ac" ] || [ -f "$item/Makefile.am" ]; then
+                echo "$item"
+                return 0
+            fi
+        fi
+    done
+    
+    # If still not found, return the search directory
+    echo "$search_dir"
+}}
+
+# Find the source directory
+SOURCE_DIR=$(find_source_dir ".")
+echo "Using source directory: $SOURCE_DIR"
+
+# Change to the source directory
+cd "$SOURCE_DIR"
+
 # Prepare the build
 if [ -f "autogen.sh" ]; then
     echo "Running autogen.sh..."
@@ -201,8 +365,37 @@ fi
 
 echo "Installing {package_info.name} to $INSTALL_DIR (type: $INSTALL_TYPE)"
 
+# Function to find the source directory (updated for flattened extraction)
+find_source_dir() {{
+    local search_dir="$1"
+    
+    # With flattened extraction, check current directory first
+    if [ -f "$search_dir/configure" ] || [ -f "$search_dir/configure.ac" ] || [ -f "$search_dir/Makefile.am" ]; then
+        echo "$search_dir"
+        return 0
+    fi
+    
+    # Look for common indicators of an autotools source directory
+    for item in "$search_dir"/*; do
+        if [ -d "$item" ]; then
+            # Check for autotools files
+            if [ -f "$item/configure" ] || [ -f "$item/configure.ac" ] || [ -f "$item/Makefile.am" ]; then
+                echo "$item"
+                return 0
+            fi
+        fi
+    done
+    
+    # If still not found, return the search directory
+    echo "$search_dir"
+}}
+
+# Find the source directory
+SOURCE_DIR=$(find_source_dir ".")
+echo "Using source directory: $SOURCE_DIR"
+
 # Change to the source directory
-cd "$BUILD_DIR"
+cd "$SOURCE_DIR"
 
 # Install the package
 echo "Installing..."
@@ -213,21 +406,7 @@ echo "Installation completed for {package_info.name}"
 
     def _cmake_acquire_template(self, package_info: PackageInfo, build_system: BuildSystem) -> str:
         """Generate acquire script for CMake-based packages."""
-        return f"""#!/bin/sh
-# Acquire script for {package_info.name}
-
-# This script is used to download the source code
-# It's typically used when creating USM packages from upstream sources
-
-# For CMake packages, the source is usually already available
-# If you need to download from a specific URL, uncomment and modify:
-# wget -O - {package_info.url or 'https://example.com/source.tar.gz'} | tar -xz
-
-# Or if using git:
-# git clone {package_info.url or 'https://github.com/example/repo.git'} .
-
-echo "Source acquired for {package_info.name}"
-"""
+        return self._common_acquire_template(package_info, build_system)
 
     def _cmake_build_template(self, package_info: PackageInfo, build_system: BuildSystem) -> str:
         """Generate build script for CMake-based packages."""
@@ -249,13 +428,42 @@ fi
 
 echo "Building {package_info.name} in $BUILD_DIR"
 
+# Function to find the source directory (updated for flattened extraction)
+find_source_dir() {{
+    local search_dir="$1"
+    
+    # With flattened extraction, check current directory first
+    if [ -f "$search_dir/CMakeLists.txt" ]; then
+        echo "$search_dir"
+        return 0
+    fi
+    
+    # Look for common indicators of a CMake source directory
+    for item in "$search_dir"/*; do
+        if [ -d "$item" ]; then
+            # Check for CMake files
+            if [ -f "$item/CMakeLists.txt" ]; then
+                echo "$item"
+                return 0
+            fi
+        fi
+    done
+    
+    # If still not found, return the search directory
+    echo "$search_dir"
+}}
+
+# Find the source directory
+SOURCE_DIR=$(find_source_dir ".")
+echo "Using source directory: $SOURCE_DIR"
+
 # Create build directory
 mkdir -p "$BUILD_DIR"
 cd "$BUILD_DIR"
 
 # Configure the build
 echo "Configuring with CMake..."
-cmake -DCMAKE_INSTALL_PREFIX=/usr{custom_args} "$PWD"/..
+cmake -DCMAKE_INSTALL_PREFIX=/usr{custom_args} "$SOURCE_DIR"
 
 # Build the package
 echo "Building..."
@@ -296,21 +504,7 @@ echo "Installation completed for {package_info.name}"
 
     def _meson_acquire_template(self, package_info: PackageInfo, build_system: BuildSystem) -> str:
         """Generate acquire script for Meson-based packages."""
-        return f"""#!/bin/sh
-# Acquire script for {package_info.name}
-
-# This script is used to download the source code
-# It's typically used when creating USM packages from upstream sources
-
-# For Meson packages, the source is usually already available
-# If you need to download from a specific URL, uncomment and modify:
-# wget -O - {package_info.url or 'https://example.com/source.tar.gz'} | tar -xz
-
-# Or if using git:
-# git clone {package_info.url or 'https://github.com/example/repo.git'} .
-
-echo "Source acquired for {package_info.name}"
-"""
+        return self._common_acquire_template(package_info, build_system)
 
     def _meson_build_template(self, package_info: PackageInfo, build_system: BuildSystem) -> str:
         """Generate build script for Meson-based packages."""
@@ -332,9 +526,38 @@ fi
 
 echo "Building {package_info.name} in $BUILD_DIR"
 
+# Function to find the source directory (updated for flattened extraction)
+find_source_dir() {{
+    local search_dir="$1"
+    
+    # With flattened extraction, check current directory first
+    if [ -f "$search_dir/meson.build" ]; then
+        echo "$search_dir"
+        return 0
+    fi
+    
+    # Look for common indicators of a Meson source directory
+    for item in "$search_dir"/*; do
+        if [ -d "$item" ]; then
+            # Check for Meson files
+            if [ -f "$item/meson.build" ]; then
+                echo "$item"
+                return 0
+            fi
+        fi
+    done
+    
+    # If still not found, return the search directory
+    echo "$search_dir"
+}}
+
+# Find the source directory
+SOURCE_DIR=$(find_source_dir ".")
+echo "Using source directory: $SOURCE_DIR"
+
 # Configure the build
 echo "Configuring with Meson..."
-meson setup "$BUILD_DIR" --prefix=/usr{custom_args}
+meson setup "$BUILD_DIR" --prefix=/usr{custom_args} "$SOURCE_DIR"
 
 # Build the package
 echo "Building..."
@@ -372,21 +595,7 @@ echo "Installation completed for {package_info.name}"
 
     def _make_acquire_template(self, package_info: PackageInfo, build_system: BuildSystem) -> str:
         """Generate acquire script for Make-based packages."""
-        return f"""#!/bin/sh
-# Acquire script for {package_info.name}
-
-# This script is used to download the source code
-# It's typically used when creating USM packages from upstream sources
-
-# For Make packages, the source is usually already available
-# If you need to download from a specific URL, uncomment and modify:
-# wget -O - {package_info.url or 'https://example.com/source.tar.gz'} | tar -xz
-
-# Or if using git:
-# git clone {package_info.url or 'https://github.com/example/repo.git'} .
-
-echo "Source acquired for {package_info.name}"
-"""
+        return self._common_acquire_template(package_info, build_system)
 
     def _make_build_template(self, package_info: PackageInfo, build_system: BuildSystem) -> str:
         """Generate build script for Make-based packages."""
@@ -403,6 +612,38 @@ fi
 
 echo "Building {package_info.name} in $BUILD_DIR"
 
+# Function to find the source directory (updated for flattened extraction)
+find_source_dir() {{
+    local search_dir="$1"
+    
+    # With flattened extraction, check current directory first
+    if [ -f "$search_dir/Makefile" ] || [ -f "$search_dir/makefile" ]; then
+        echo "$search_dir"
+        return 0
+    fi
+    
+    # Look for common indicators of a Make-based source directory
+    for item in "$search_dir"/*; do
+        if [ -d "$item" ]; then
+            # Check for Make files
+            if [ -f "$item/Makefile" ] || [ -f "$item/makefile" ]; then
+                echo "$item"
+                return 0
+            fi
+        fi
+    done
+    
+    # If still not found, return the search directory
+    echo "$search_dir"
+}}
+
+# Find the source directory
+SOURCE_DIR=$(find_source_dir ".")
+echo "Using source directory: $SOURCE_DIR"
+
+# Change to the source directory
+cd "$SOURCE_DIR"
+
 # Build the package
 echo "Building..."
 make -j$(nproc)
@@ -430,8 +671,37 @@ fi
 
 echo "Installing {package_info.name} to $INSTALL_DIR (type: $INSTALL_TYPE)"
 
+# Function to find the source directory (updated for flattened extraction)
+find_source_dir() {{
+    local search_dir="$1"
+    
+    # With flattened extraction, check current directory first
+    if [ -f "$search_dir/Makefile" ] || [ -f "$search_dir/makefile" ]; then
+        echo "$search_dir"
+        return 0
+    fi
+    
+    # Look for common indicators of a Make-based source directory
+    for item in "$search_dir"/*; do
+        if [ -d "$item" ]; then
+            # Check for Make files
+            if [ -f "$item/Makefile" ] || [ -f "$item/makefile" ]; then
+                echo "$item"
+                return 0
+            fi
+        fi
+    done
+    
+    # If still not found, return the search directory
+    echo "$search_dir"
+}}
+
+# Find the source directory
+SOURCE_DIR=$(find_source_dir ".")
+echo "Using source directory: $SOURCE_DIR"
+
 # Change to the source directory
-cd "$BUILD_DIR"
+cd "$SOURCE_DIR"
 
 # Install the package
 echo "Installing..."
@@ -442,21 +712,7 @@ echo "Installation completed for {package_info.name}"
 
     def _python_acquire_template(self, package_info: PackageInfo, build_system: BuildSystem) -> str:
         """Generate acquire script for Python-based packages."""
-        return f"""#!/bin/sh
-# Acquire script for {package_info.name}
-
-# This script is used to download the source code
-# It's typically used when creating USM packages from upstream sources
-
-# For Python packages, the source is usually already available
-# If you need to download from a specific URL, uncomment and modify:
-# wget -O - {package_info.url or 'https://example.com/source.tar.gz'} | tar -xz
-
-# Or if using git:
-# git clone {package_info.url or 'https://github.com/example/repo.git'} .
-
-echo "Source acquired for {package_info.name}"
-"""
+        return self._common_acquire_template(package_info, build_system)
 
     def _python_build_template(self, package_info: PackageInfo, build_system: BuildSystem) -> str:
         """Generate build script for Python-based packages."""
@@ -473,6 +729,38 @@ fi
 
 echo "Building {package_info.name} in $BUILD_DIR"
 
+# Function to find the source directory (updated for flattened extraction)
+find_source_dir() {{
+    local search_dir="$1"
+    
+    # With flattened extraction, check current directory first
+    if [ -f "$search_dir/setup.py" ] || [ -f "$search_dir/pyproject.toml" ] || [ -f "$search_dir/requirements.txt" ]; then
+        echo "$search_dir"
+        return 0
+    fi
+    
+    # Look for common indicators of a Python source directory
+    for item in "$search_dir"/*; do
+        if [ -d "$item" ]; then
+            # Check for Python files
+            if [ -f "$item/setup.py" ] || [ -f "$item/pyproject.toml" ] || [ -f "$item/requirements.txt" ]; then
+                echo "$item"
+                return 0
+            fi
+        fi
+    done
+    
+    # If still not found, return the search directory
+    echo "$search_dir"
+}}
+
+# Find the source directory
+SOURCE_DIR=$(find_source_dir ".")
+echo "Using source directory: $SOURCE_DIR"
+
+# Change to the source directory
+cd "$SOURCE_DIR"
+
 # Build the package
 echo "Building..."
 python setup.py build
@@ -500,8 +788,37 @@ fi
 
 echo "Installing {package_info.name} to $INSTALL_DIR (type: $INSTALL_TYPE)"
 
+# Function to find the source directory (updated for flattened extraction)
+find_source_dir() {{
+    local search_dir="$1"
+    
+    # With flattened extraction, check current directory first
+    if [ -f "$search_dir/setup.py" ] || [ -f "$search_dir/pyproject.toml" ] || [ -f "$search_dir/requirements.txt" ]; then
+        echo "$search_dir"
+        return 0
+    fi
+    
+    # Look for common indicators of a Python source directory
+    for item in "$search_dir"/*; do
+        if [ -d "$item" ]; then
+            # Check for Python files
+            if [ -f "$item/setup.py" ] || [ -f "$item/pyproject.toml" ] || [ -f "$item/requirements.txt" ]; then
+                echo "$item"
+                return 0
+            fi
+        fi
+    done
+    
+    # If still not found, return the search directory
+    echo "$search_dir"
+}}
+
+# Find the source directory
+SOURCE_DIR=$(find_source_dir ".")
+echo "Using source directory: $SOURCE_DIR"
+
 # Change to the source directory
-cd "$BUILD_DIR"
+cd "$SOURCE_DIR"
 
 # Install the package
 echo "Installing..."
@@ -512,21 +829,7 @@ echo "Installation completed for {package_info.name}"
 
     def _cargo_acquire_template(self, package_info: PackageInfo, build_system: BuildSystem) -> str:
         """Generate acquire script for Cargo-based packages."""
-        return f"""#!/bin/sh
-# Acquire script for {package_info.name}
-
-# This script is used to download the source code
-# It's typically used when creating USM packages from upstream sources
-
-# For Cargo packages, the source is usually already available
-# If you need to download from a specific URL, uncomment and modify:
-# wget -O - {package_info.url or 'https://example.com/source.tar.gz'} | tar -xz
-
-# Or if using git:
-# git clone {package_info.url or 'https://github.com/example/repo.git'} .
-
-echo "Source acquired for {package_info.name}"
-"""
+        return self._common_acquire_template(package_info, build_system)
 
     def _cargo_build_template(self, package_info: PackageInfo, build_system: BuildSystem) -> str:
         """Generate build script for Cargo-based packages."""
@@ -543,6 +846,38 @@ fi
 
 echo "Building {package_info.name} in $BUILD_DIR"
 
+# Function to find the source directory (updated for flattened extraction)
+find_source_dir() {{
+    local search_dir="$1"
+    
+    # With flattened extraction, check current directory first
+    if [ -f "$search_dir/Cargo.toml" ]; then
+        echo "$search_dir"
+        return 0
+    fi
+    
+    # Look for common indicators of a Cargo source directory
+    for item in "$search_dir"/*; do
+        if [ -d "$item" ]; then
+            # Check for Cargo files
+            if [ -f "$item/Cargo.toml" ]; then
+                echo "$item"
+                return 0
+            fi
+        fi
+    done
+    
+    # If still not found, return the search directory
+    echo "$search_dir"
+}}
+
+# Find the source directory
+SOURCE_DIR=$(find_source_dir ".")
+echo "Using source directory: $SOURCE_DIR"
+
+# Change to the source directory
+cd "$SOURCE_DIR"
+
 # Build the package
 echo "Building..."
 cargo build --release
@@ -570,8 +905,37 @@ fi
 
 echo "Installing {package_info.name} to $INSTALL_DIR (type: $INSTALL_TYPE)"
 
+# Function to find the source directory (updated for flattened extraction)
+find_source_dir() {{
+    local search_dir="$1"
+    
+    # With flattened extraction, check current directory first
+    if [ -f "$search_dir/Cargo.toml" ]; then
+        echo "$search_dir"
+        return 0
+    fi
+    
+    # Look for common indicators of a Cargo source directory
+    for item in "$search_dir"/*; do
+        if [ -d "$item" ]; then
+            # Check for Cargo files
+            if [ -f "$item/Cargo.toml" ]; then
+                echo "$item"
+                return 0
+            fi
+        fi
+    done
+    
+    # If still not found, return the search directory
+    echo "$search_dir"
+}}
+
+# Find the source directory
+SOURCE_DIR=$(find_source_dir ".")
+echo "Using source directory: $SOURCE_DIR"
+
 # Change to the source directory
-cd "$BUILD_DIR"
+cd "$SOURCE_DIR"
 
 # Install the package
 echo "Installing..."
@@ -582,21 +946,7 @@ echo "Installation completed for {package_info.name}"
 
     def _npm_acquire_template(self, package_info: PackageInfo, build_system: BuildSystem) -> str:
         """Generate acquire script for NPM-based packages."""
-        return f"""#!/bin/sh
-# Acquire script for {package_info.name}
-
-# This script is used to download the source code
-# It's typically used when creating USM packages from upstream sources
-
-# For NPM packages, the source is usually already available
-# If you need to download from a specific URL, uncomment and modify:
-# wget -O - {package_info.url or 'https://example.com/source.tar.gz'} | tar -xz
-
-# Or if using git:
-# git clone {package_info.url or 'https://github.com/example/repo.git'} .
-
-echo "Source acquired for {package_info.name}"
-"""
+        return self._common_acquire_template(package_info, build_system)
 
     def _npm_build_template(self, package_info: PackageInfo, build_system: BuildSystem) -> str:
         """Generate build script for NPM-based packages."""
@@ -613,6 +963,38 @@ fi
 
 echo "Building {package_info.name} in $BUILD_DIR"
 
+# Function to find the source directory (updated for flattened extraction)
+find_source_dir() {{
+    local search_dir="$1"
+    
+    # With flattened extraction, check current directory first
+    if [ -f "$search_dir/package.json" ]; then
+        echo "$search_dir"
+        return 0
+    fi
+    
+    # Look for common indicators of an NPM source directory
+    for item in "$search_dir"/*; do
+        if [ -d "$item" ]; then
+            # Check for NPM files
+            if [ -f "$item/package.json" ]; then
+                echo "$item"
+                return 0
+            fi
+        fi
+    done
+    
+    # If still not found, return the search directory
+    echo "$search_dir"
+}}
+
+# Find the source directory
+SOURCE_DIR=$(find_source_dir ".")
+echo "Using source directory: $SOURCE_DIR"
+
+# Change to the source directory
+cd "$SOURCE_DIR"
+
 # Install dependencies
 echo "Installing dependencies..."
 npm ci
@@ -644,8 +1026,37 @@ fi
 
 echo "Installing {package_info.name} to $INSTALL_DIR (type: $INSTALL_TYPE)"
 
+# Function to find the source directory (updated for flattened extraction)
+find_source_dir() {{
+    local search_dir="$1"
+    
+    # With flattened extraction, check current directory first
+    if [ -f "$search_dir/package.json" ]; then
+        echo "$search_dir"
+        return 0
+    fi
+    
+    # Look for common indicators of an NPM source directory
+    for item in "$search_dir"/*; do
+        if [ -d "$item" ]; then
+            # Check for NPM files
+            if [ -f "$item/package.json" ]; then
+                echo "$item"
+                return 0
+            fi
+        fi
+    done
+    
+    # If still not found, return the search directory
+    echo "$search_dir"
+}}
+
+# Find the source directory
+SOURCE_DIR=$(find_source_dir ".")
+echo "Using source directory: $SOURCE_DIR"
+
 # Change to the source directory
-cd "$BUILD_DIR"
+cd "$SOURCE_DIR"
 
 # Install the package globally
 echo "Installing..."
@@ -656,21 +1067,7 @@ echo "Installation completed for {package_info.name}"
 
     def _generic_acquire_template(self, package_info: PackageInfo, build_system: BuildSystem) -> str:
         """Generic acquire script for unknown build systems."""
-        return f"""#!/bin/sh
-# Acquire script for {package_info.name}
-
-# This script is used to download the source code
-# It's typically used when creating USM packages from upstream sources
-
-# For unknown build systems, the source is usually already available
-# If you need to download from a specific URL, uncomment and modify:
-# wget -O - {package_info.url or 'https://example.com/source.tar.gz'} | tar -xz
-
-# Or if using git:
-# git clone {package_info.url or 'https://github.com/example/repo.git'} .
-
-echo "Source acquired for {package_info.name}"
-"""
+        return self._common_acquire_template(package_info, build_system)
 
     def _generic_build_template(self, package_info: PackageInfo, build_system: BuildSystem) -> str:
         """Generic build script for unknown build systems."""
@@ -687,6 +1084,44 @@ fi
 
 echo "Building {package_info.name} in $BUILD_DIR"
 
+# Function to find the source directory (updated for flattened extraction)
+find_source_dir() {{
+    local search_dir="$1"
+    
+    # With flattened extraction, check current directory first
+    if [ -f "$search_dir/configure" ] || [ -f "$search_dir/CMakeLists.txt" ] || \
+       [ -f "$search_dir/meson.build" ] || [ -f "$search_dir/Makefile" ] || \
+       [ -f "$search_dir/setup.py" ] || [ -f "$search_dir/Cargo.toml" ] || \
+       [ -f "$search_dir/package.json" ]; then
+        echo "$search_dir"
+        return 0
+    fi
+    
+    # Look for common indicators of a source directory
+    for item in "$search_dir"/*; do
+        if [ -d "$item" ]; then
+            # Check for common build system files
+            if [ -f "$item/configure" ] || [ -f "$item/CMakeLists.txt" ] || \
+               [ -f "$item/meson.build" ] || [ -f "$item/Makefile" ] || \
+               [ -f "$item/setup.py" ] || [ -f "$item/Cargo.toml" ] || \
+               [ -f "$item/package.json" ]; then
+                echo "$item"
+                return 0
+            fi
+        fi
+    done
+    
+    # If no clear source directory found, return the search directory
+    echo "$search_dir"
+}}
+
+# Find the source directory
+SOURCE_DIR=$(find_source_dir ".")
+echo "Using source directory: $SOURCE_DIR"
+
+# Change to the source directory
+cd "$SOURCE_DIR"
+
 # This is a generic build script for unknown build systems
 # You may need to customize this based on the actual build system
 
@@ -701,7 +1136,7 @@ elif [ -f "CMakeLists.txt" ]; then
     echo "Found CMakeLists.txt, using CMake..."
     mkdir -p "$BUILD_DIR"
     cd "$BUILD_DIR"
-    cmake -DCMAKE_INSTALL_PREFIX=/usr ..
+    cmake -DCMAKE_INSTALL_PREFIX=/usr "$SOURCE_DIR"
     make -j$(nproc)
 else
     echo "No known build system found. Please customize this script."
@@ -731,17 +1166,53 @@ fi
 
 echo "Installing {package_info.name} to $INSTALL_DIR (type: $INSTALL_TYPE)"
 
+# Function to find the source directory (updated for flattened extraction)
+find_source_dir() {{
+    local search_dir="$1"
+    
+    # With flattened extraction, check current directory first
+    if [ -f "$search_dir/configure" ] || [ -f "$search_dir/CMakeLists.txt" ] || \
+       [ -f "$search_dir/meson.build" ] || [ -f "$search_dir/Makefile" ] || \
+       [ -f "$search_dir/setup.py" ] || [ -f "$search_dir/Cargo.toml" ] || \
+       [ -f "$search_dir/package.json" ]; then
+        echo "$search_dir"
+        return 0
+    fi
+    
+    # Look for common indicators of a source directory
+    for item in "$search_dir"/*; do
+        if [ -d "$item" ]; then
+            # Check for common build system files
+            if [ -f "$item/configure" ] || [ -f "$item/CMakeLists.txt" ] || \
+               [ -f "$item/meson.build" ] || [ -f "$item/Makefile" ] || \
+               [ -f "$item/setup.py" ] || [ -f "$item/Cargo.toml" ] || \
+               [ -f "$item/package.json" ]; then
+                echo "$item"
+                return 0
+            fi
+        fi
+    done
+    
+    # If no clear source directory found, return the search directory
+    echo "$search_dir"
+}}
+
+# Find the source directory
+SOURCE_DIR=$(find_source_dir ".")
+echo "Using source directory: $SOURCE_DIR"
+
+# Change to the source directory
+cd "$SOURCE_DIR"
+
 # This is a generic install script for unknown build systems
 # You may need to customize this based on the actual build system
 
 # Try common install commands
 if [ -f "Makefile" ] || [ -f "makefile" ]; then
     echo "Found Makefile, using make install..."
-    cd "$BUILD_DIR"
     make DESTDIR="$INSTALL_DIR" install
 elif [ -f "install.sh" ]; then
     echo "Found install.sh, running it..."
-    cd "$BUILD_DIR"
     ./install.sh --prefix="$INSTALL_DIR/usr"
 else
     echo "No known install method found. Please customize this script."

+ 61 - 4
src/autusm/manifest.py

@@ -349,6 +349,10 @@ class ManifestGenerator:
     def update_with_autoprovides(self, manifest: USMManifest, autoprovides: Dict[str, Any]) -> USMManifest:
         """Update a manifest with autoprovides from USM.
         
+        This method replaces the template-based provides section with the actual
+        autoprovides output from USM, ensuring that the manifest contains the
+        real provides data rather than template placeholders.
+        
         Args:
             manifest: Existing USM manifest
             autoprovides: Autoprovides dictionary from USM
@@ -356,10 +360,63 @@ class ManifestGenerator:
         Returns:
             Updated USM manifest
         """
-        # Merge autoprovides into the provides section
-        for resource_ref, resource in autoprovides.items():
-            if resource_ref not in manifest.provides:
-                manifest.provides[resource_ref] = resource
+        # Validate autoprovides before applying them
+        if autoprovides:
+            # Check if autoprovides looks valid (not empty or garbage)
+            if not autoprovides or len(autoprovides) == 0:
+                logger.warning("Empty autoprovides provided, keeping original provides section")
+                return manifest
+            
+            # Valid USM resource types based on the specification
+            valid_resource_types = {
+                'rootpath', 'path', 'opt', 'res', 'cfg', 'bin', 'sbin', 'lib',
+                'libexec', 'libres', 'info', 'man', 'locale', 'app', 'inc',
+                'pc', 'vapi', 'gir', 'typelib', 'tag'
+            }
+            
+            # Check for truly suspicious keys that might indicate parsing errors
+            # These are indicators of JSON parsing problems, not valid USM resource references
+            truly_suspicious_patterns = [
+                '"provides":',  # JSON fragment indicating incomplete parsing
+                '{', '}',        # Unbalanced braces
+                '[', ']',        # Unbalanced brackets
+                '\\',           # Escape characters that shouldn't be in final keys
+                'type:',        # JSON field indicators
+                'path:',        # JSON field indicators
+                'pathBase:',    # JSON field indicators
+            ]
+            
+            has_suspicious_keys = False
+            for key in autoprovides.keys():
+                key_str = str(key)
+                
+                # Check if it's a valid USM resource reference (format: type:name)
+                if ':' in key_str:
+                    resource_type = key_str.split(':', 1)[0]
+                    if resource_type in valid_resource_types:
+                        # This is a valid USM resource reference, skip further checks
+                        continue
+                
+                # Check for truly suspicious patterns
+                for pattern in truly_suspicious_patterns:
+                    if pattern in key_str:
+                        has_suspicious_keys = True
+                        logger.warning(f"Suspicious key detected: {key_str} (contains pattern: {pattern})")
+                        break
+                
+                if has_suspicious_keys:
+                    break
+            
+            if has_suspicious_keys:
+                logger.warning(f"Autoprovides contains suspicious keys that may indicate parsing errors: {list(autoprovides.keys())}")
+                logger.warning("Keeping original provides section instead of potentially corrupted autoprovides")
+                return manifest
+            
+            # Autoprovides look valid, apply them
+            manifest.provides = autoprovides.copy()
+            logger.info(f"Replaced template provides with {len(autoprovides)} actual autoprovides")
+        else:
+            logger.warning("No autoprovides provided, keeping original provides section")
         
         return manifest
 

+ 8 - 2
src/autusm/metadata.py

@@ -203,6 +203,7 @@ class MetadataExtractor:
             if not package_info.summary and "description" in data:
                 package_info.summary = data["description"]
             
+            # Only set URL if not already set (preserve original URL)
             if not package_info.url and "homepage" in data:
                 package_info.url = data["homepage"]
             elif not package_info.url and "repository" in data:
@@ -398,7 +399,8 @@ class MetadataExtractor:
                         License(name=license_name, text="LICENSE", category=category)
                     )
                 
-                if "repository" in package:
+                # Only set URL if not already set (preserve original URL)
+                if not package_info.url and "repository" in package:
                     package_info.url = package["repository"]
             
             # Extract dependencies
@@ -432,6 +434,7 @@ class MetadataExtractor:
             if not package_info.summary and "description" in data:
                 package_info.summary = data["description"]
             
+            # Only set URL if not already set (preserve original URL)
             if not package_info.url and "homepage" in data:
                 package_info.url = data["homepage"]
             
@@ -496,6 +499,7 @@ class MetadataExtractor:
                 if desc_match:
                     package_info.summary = desc_match.group(1)
             
+            # Only set URL if not already set (preserve original URL)
             if not package_info.url:
                 url_match = re.search(r'<url>(.*?)</url>', content)
                 if url_match:
@@ -571,10 +575,12 @@ class MetadataExtractor:
                     # Extract project URL if not already set
                     if not package_info.url:
                         # Look for common URL patterns
+                        # Be more specific to avoid picking up redirection URLs
                         url_patterns = [
                             r'github\.com/([^\s/]+/[^\s/]+)',
                             r'gitlab\.com/([^\s/]+/[^\s/]+)',
-                            r'https?://([^\s]+)'
+                            r'Home page:\s*(https?://[^\s]+)',
+                            r'Developer page:\s*(https?://[^\s]+)'
                         ]
                         
                         for pattern in url_patterns:

+ 4 - 1
src/autusm/models.py

@@ -220,9 +220,12 @@ class USMManifest:
                 if value.skip_for:
                     resource_dict["skipFor"] = value.skip_for
                 result["provides"][key] = resource_dict
-            else:
+            elif isinstance(value, str):
                 # Use shorthand string directly
                 result["provides"][key] = value
+            else:
+                # Handle any other type by converting to string
+                result["provides"][key] = str(value)
 
         # Add optional fields
         if self.md:

+ 281 - 68
src/autusm/usm_integration.py

@@ -8,6 +8,7 @@ including running "usm manifest autoprovides" and parsing the output.
 import json
 import logging
 import subprocess
+import os
 from pathlib import Path
 from typing import Dict, List, Optional, Any
 
@@ -32,14 +33,7 @@ class USMIntegration:
             True if USM is available, False otherwise
         """
         try:
-            # Try to run usm --version
-            result = subprocess.run(
-                [self.usm_command, "--version"],
-                capture_output=True,
-                text=True,
-                timeout=10
-            )
-            return result.returncode == 0
+            return os.path.exists("/usr/bin/usm")
         except (subprocess.TimeoutExpired, subprocess.SubprocessError, FileNotFoundError):
             return False
 
@@ -60,6 +54,8 @@ class USMIntegration:
             
             # Run usm manifest autoprovides
             cmd = [self.usm_command, "manifest", "autoprovides"]
+            logger.debug(f"Running USM command: {' '.join(cmd)} in directory: {source_dir}")
+            
             result = subprocess.run(
                 cmd,
                 cwd=source_dir,
@@ -68,11 +64,31 @@ class USMIntegration:
                 timeout=300  # 5 minutes timeout
             )
             
+            # Log stdout and stderr separately to ensure no contamination
+            logger.debug(f"USM command exit code: {result.returncode}")
+            logger.debug(f"USM stdout (for parsing): {repr(result.stdout)}")
+            logger.debug(f"USM stderr (debug info): {repr(result.stderr)}")
+            
+            # Always log stderr separately for debugging but don't mix with stdout
+            if result.stderr:
+                logger.debug(f"USM stderr output: {result.stderr}")
+            
+            # Check if USM command succeeded
             if result.returncode != 0:
-                logger.warning(f"USM autoprovides failed: {result.stderr}")
+                error_msg = result.stderr.strip() if result.stderr else "Unknown error"
+                logger.warning(f"USM autoprovides failed with exit code {result.returncode}: {error_msg}")
+                # Note: We do NOT log stdout here to avoid contamination
+                logger.debug(f"USM stderr details: {result.stderr}")
+                return {}
+            
+            # Check if there's any stdout output to parse (only use stdout for parsing)
+            if not result.stdout or not result.stdout.strip():
+                logger.warning("USM autoprovides returned empty stdout output")
+                # Note: stderr might contain error messages but we don't parse it
                 return {}
             
-            # Parse the output
+            # Parse only the stdout output
+            logger.debug(f"Parsing USM stdout output: {repr(result.stdout)}")
             autoprovides = self._parse_autoprovides(result.stdout)
             
             logger.info(f"Got {len(autoprovides)} autoprovides from USM")
@@ -88,67 +104,240 @@ class USMIntegration:
             logger.error(f"Unexpected error in USM integration: {e}")
             raise USMIntegrationError(f"Unexpected error in USM integration: {e}")
 
-    def _parse_autoprovides(self, output: str) -> Dict[str, Any]:
-        """Parse the output of "usm manifest autoprovides".
+    def _parse_autoprovides(self, stdout_output: str) -> Dict[str, Any]:
+        """Parse the stdout output of "usm manifest autoprovides".
+        
+        This method exclusively processes stdout output to avoid contamination
+        from stderr messages.
         
         Args:
-            output: Output from USM command
+            stdout_output: Stdout output from USM command (stderr should be handled separately)
             
         Returns:
             Dictionary of autoprovides resources
         """
         try:
-            # Try to parse as JSON first
-            if output.strip().startswith('{'):
-                return json.loads(output)
+            # Check if stdout output looks like valid JSON
+            stdout_output = stdout_output.strip()
+            logger.debug(f"Trimmed stdout output for parsing: {repr(stdout_output)}")
             
-            # If not JSON, try to parse line by line
+            if not stdout_output:
+                logger.warning("Empty autoprovides stdout output")
+                return {}
+            
+            # Check for the new USM format: "provides": { ... }
+            if stdout_output.startswith('"provides": {') and stdout_output.endswith('}'):
+                logger.debug("Detected USM 'provides' format, extracting JSON content")
+                try:
+                    # Extract the JSON content between the braces
+                    # Everything after the opening brace and before the closing brace
+                    json_content = stdout_output[len('"provides": {'):-1].strip()
+                    logger.debug(f"Extracted JSON content: {repr(json_content)}")
+                    
+                    # Parse the extracted content as JSON
+                    json_data = json.loads('{' + json_content + '}')
+                    logger.debug(f"Successfully parsed extracted JSON: {json_data}")
+                    
+                    # Validate that we have a proper JSON structure
+                    if not isinstance(json_data, dict):
+                        logger.warning(f"USM autoprovides JSON is not a dictionary: {type(json_data)}")
+                        return {}
+                    
+                    # The parsed data is the provides data
+                    provides_data = json_data
+                    
+                    autoprovides = {}
+                    
+                    # Convert JSON data to proper Resource objects
+                    for resource_ref, resource_data in provides_data.items():
+                        if isinstance(resource_data, str):
+                            # Shorthand string like "as-expected"
+                            autoprovides[resource_ref] = resource_data
+                        elif isinstance(resource_data, dict):
+                            # Resource object in JSON format
+                            path = resource_data.get("path")
+                            path_base_str = resource_data.get("pathBase", "source")
+                            file_type_str = resource_data.get("type", "reg")
+                            
+                            # Map path base strings to enum
+                            base_mapping = {
+                                "source": PathBase.SOURCE,
+                                "build": PathBase.BUILD,
+                                "install": PathBase.INSTALL,
+                                "as-expected": PathBase.AS_EXPECTED
+                            }
+                            
+                            # Map file type strings to enum
+                            type_mapping = {
+                                "reg": FileType.REG,
+                                "dir": FileType.DIR,
+                                "lnk": FileType.LNK
+                            }
+                            
+                            path_base = base_mapping.get(path_base_str, PathBase.SOURCE)
+                            file_type = type_mapping.get(file_type_str, FileType.REG)
+                            
+                            autoprovides[resource_ref] = Resource(
+                                path=path,
+                                path_base=path_base,
+                                type=file_type,
+                                dest=resource_data.get("dest"),
+                                keep_on=resource_data.get("keepOn", []),
+                                skip_for=resource_data.get("skipFor", [])
+                            )
+                    
+                    return autoprovides
+                    
+                except json.JSONDecodeError as e:
+                    logger.warning(f"Failed to parse USM autoprovides 'provides' format: {e}")
+                    logger.debug(f"USM stdout output that failed to parse: {stdout_output}")
+                    return {}
+            
+            # Try to parse as JSON first (original logic)
+            if stdout_output.startswith('{') and stdout_output.endswith('}'):
+                logger.debug("Stdout output looks like JSON, attempting to parse")
+                try:
+                    json_data = json.loads(stdout_output)
+                    logger.debug(f"Successfully parsed JSON: {json_data}")
+                    
+                    # Validate that we have a proper JSON structure
+                    if not isinstance(json_data, dict):
+                        logger.warning(f"USM autoprovides JSON is not a dictionary: {type(json_data)}")
+                        return {}
+                    
+                    # Check if the output has a "provides" property (nested structure)
+                    if "provides" in json_data:
+                        provides_data = json_data["provides"]
+                        if not isinstance(provides_data, dict):
+                            logger.warning(f"USM autoprovides 'provides' field is not a dictionary: {type(provides_data)}")
+                            return {}
+                    else:
+                        # The output itself is the provides data
+                        provides_data = json_data
+                    
+                    autoprovides = {}
+                    
+                    # Convert JSON data to proper Resource objects
+                    for resource_ref, resource_data in provides_data.items():
+                        if isinstance(resource_data, str):
+                            # Shorthand string like "as-expected"
+                            autoprovides[resource_ref] = resource_data
+                        elif isinstance(resource_data, dict):
+                            # Resource object in JSON format
+                            path = resource_data.get("path")
+                            path_base_str = resource_data.get("pathBase", "source")
+                            file_type_str = resource_data.get("type", "reg")
+                            
+                            # Map path base strings to enum
+                            base_mapping = {
+                                "source": PathBase.SOURCE,
+                                "build": PathBase.BUILD,
+                                "install": PathBase.INSTALL,
+                                "as-expected": PathBase.AS_EXPECTED
+                            }
+                            
+                            # Map file type strings to enum
+                            type_mapping = {
+                                "reg": FileType.REG,
+                                "dir": FileType.DIR,
+                                "lnk": FileType.LNK
+                            }
+                            
+                            path_base = base_mapping.get(path_base_str, PathBase.SOURCE)
+                            file_type = type_mapping.get(file_type_str, FileType.REG)
+                            
+                            autoprovides[resource_ref] = Resource(
+                                path=path,
+                                path_base=path_base,
+                                type=file_type,
+                                dest=resource_data.get("dest"),
+                                keep_on=resource_data.get("keepOn", []),
+                                skip_for=resource_data.get("skipFor", [])
+                            )
+                    
+                    return autoprovides
+                    
+                except json.JSONDecodeError as e:
+                    logger.warning(f"Failed to parse USM autoprovides stdout as JSON: {e}")
+                    logger.debug(f"USM stdout output that failed to parse: {stdout_output}")
+                    return {}
+            else:
+                # Stdout doesn't look like JSON, might be an error message
+                if any(error_indicator in stdout_output.lower() for error_indicator in ["error", "failed", "exception", "traceback"]):
+                    logger.warning(f"USM autoprovides stdout appears to contain an error: {stdout_output}")
+                    return {}
+                
+                # Check if stdout looks like JSON fragments or error messages
+                # If it contains quotes, braces, or other JSON-like fragments but isn't valid JSON,
+                # it's likely an error message or malformed output
+                if any(char in stdout_output for char in ['"', '{', '}', '\\', '[', ']']):
+                    logger.warning(f"USM autoprovides stdout contains JSON-like fragments but is not valid JSON: {stdout_output}")
+                    return {}
+            
+            # If not JSON, try to parse line by line (only for valid line-by-line format)
             autoprovides = {}
             
-            for line in output.strip().split('\n'):
+            # First check if this looks like valid line-by-line format
+            # Valid format should be like: "bin:package-name path" or "lib:libname.so install:/usr/lib"
+            lines = stdout_output.strip().split('\n')
+            valid_line_count = 0
+            
+            for line in lines:
                 line = line.strip()
                 if not line or line.startswith('#'):
                     continue
                 
-                # Parse lines in format "resource-type:resource-name path"
-                if ':' in line:
+                # Check if this looks like a valid resource line
+                # Valid lines should have resource reference with colon and reasonable content
+                if ':' in line and not any(char in line for char in ['"', '{', '}', '\\', '[', ']']):
                     parts = line.split(maxsplit=1)
                     if len(parts) >= 2:
-                        resource_ref = parts[0]
+                        resource_ref = parts[0].strip()
                         path_info = parts[1].strip()
                         
-                        # Convert to resource object or shorthand
-                        if path_info == "as-expected":
-                            autoprovides[resource_ref] = "as-expected"
-                        else:
-                            # Try to parse as path:base format
-                            if ':' in path_info:
-                                path_parts = path_info.split(':', 1)
-                                if len(path_parts) == 2:
-                                    path_base = path_parts[0]
-                                    path = path_parts[1]
-                                    
-                                    # Map path base strings to enum
-                                    base_mapping = {
-                                        "source": PathBase.SOURCE,
-                                        "build": PathBase.BUILD,
-                                        "install": PathBase.INSTALL,
-                                        "as-expected": PathBase.AS_EXPECTED
-                                    }
-                                    
-                                    if path_base in base_mapping:
-                                        autoprovides[resource_ref] = Resource(
-                                            path=path,
-                                            path_base=base_mapping[path_base],
-                                            type=FileType.REG
-                                        )
+                        # Additional validation - resource ref should look reasonable
+                        if (resource_ref and ':' in resource_ref and
+                            len(resource_ref.split(':')) == 2 and
+                            resource_ref.split(':')[0] in ['bin', 'lib', 'inc', 'res', 'man', 'app', 'sbin', 'cfg', 'doc']):
+                            
+                            valid_line_count += 1
+                            # Convert to resource object or shorthand
+                            if path_info == "as-expected":
+                                autoprovides[resource_ref] = "as-expected"
                             else:
-                                # Just a path
-                                autoprovides[resource_ref] = Resource(
-                                    path=path_info,
-                                    path_base=PathBase.SOURCE,
-                                    type=FileType.REG
-                                )
+                                # Try to parse as path:base format
+                                if ':' in path_info:
+                                    path_parts = path_info.split(':', 1)
+                                    if len(path_parts) == 2:
+                                        path_base = path_parts[0]
+                                        path = path_parts[1]
+                                        
+                                        # Map path base strings to enum
+                                        base_mapping = {
+                                            "source": PathBase.SOURCE,
+                                            "build": PathBase.BUILD,
+                                            "install": PathBase.INSTALL,
+                                            "as-expected": PathBase.AS_EXPECTED
+                                        }
+                                        
+                                        if path_base in base_mapping:
+                                            autoprovides[resource_ref] = Resource(
+                                                path=path,
+                                                path_base=base_mapping[path_base],
+                                                type=FileType.REG
+                                            )
+                                else:
+                                    # Just a path
+                                    autoprovides[resource_ref] = Resource(
+                                        path=path_info,
+                                        path_base=PathBase.SOURCE,
+                                        type=FileType.REG
+                                    )
+            
+            # If we didn't find any valid lines, the stdout is likely malformed
+            if valid_line_count == 0:
+                logger.warning(f"USM autoprovides stdout doesn't appear to be in valid format: {stdout_output}")
+                return {}
             
             return autoprovides
             
@@ -177,10 +366,16 @@ class USMIntegration:
                 timeout=60
             )
             
+            # Log stdout and stderr separately
+            if result.stdout:
+                logger.debug(f"USM validate stdout: {result.stdout}")
+            if result.stderr:
+                logger.debug(f"USM validate stderr: {result.stderr}")
+            
             if result.returncode == 0:
                 return []
             
-            # Parse errors from output
+            # Parse errors from stderr only (not stdout)
             errors = []
             for line in result.stderr.split('\n'):
                 line = line.strip()
@@ -218,10 +413,16 @@ class USMIntegration:
                 timeout=30
             )
             
+            # Log stdout and stderr separately
+            if result.stdout:
+                logger.debug(f"USM info stdout: {result.stdout}")
+            if result.stderr:
+                logger.debug(f"USM info stderr: {result.stderr}")
+            
             if result.returncode != 0:
                 return None
             
-            # Parse the output
+            # Parse only the stdout output
             return self._parse_package_info(result.stdout)
             
         except subprocess.TimeoutExpired:
@@ -234,18 +435,18 @@ class USMIntegration:
             logger.error(f"Unexpected error getting package info: {e}")
             return None
 
-    def _parse_package_info(self, output: str) -> Dict[str, Any]:
-        """Parse the output of "usm info".
+    def _parse_package_info(self, stdout_output: str) -> Dict[str, Any]:
+        """Parse the stdout output of "usm info".
         
         Args:
-            output: Output from USM command
+            stdout_output: Stdout output from USM command
             
         Returns:
             Package information dictionary
         """
         info = {}
         
-        for line in output.strip().split('\n'):
+        for line in stdout_output.strip().split('\n'):
             line = line.strip()
             if not line or line.startswith('#'):
                 continue
@@ -276,10 +477,16 @@ class USMIntegration:
                 timeout=60
             )
             
+            # Log stdout and stderr separately
+            if result.stdout:
+                logger.debug(f"USM list stdout: {result.stdout}")
+            if result.stderr:
+                logger.debug(f"USM list stderr: {result.stderr}")
+            
             if result.returncode != 0:
                 return []
             
-            # Parse the output
+            # Parse only the stdout output
             return self._parse_package_list(result.stdout)
             
         except subprocess.TimeoutExpired:
@@ -292,18 +499,18 @@ class USMIntegration:
             logger.error(f"Unexpected error listing packages: {e}")
             return []
 
-    def _parse_package_list(self, output: str) -> List[Dict[str, Any]]:
-        """Parse the output of "usm list".
+    def _parse_package_list(self, stdout_output: str) -> List[Dict[str, Any]]:
+        """Parse the stdout output of "usm list".
         
         Args:
-            output: Output from USM command
+            stdout_output: Stdout output from USM command
             
         Returns:
             List of package information dictionaries
         """
         packages = []
         
-        for line in output.strip().split('\n'):
+        for line in stdout_output.strip().split('\n'):
             line = line.strip()
             if not line or line.startswith('#'):
                 continue
@@ -346,10 +553,16 @@ class USMIntegration:
                 timeout=60
             )
             
+            # Log stdout and stderr separately
+            if result.stdout:
+                logger.debug(f"USM deps stdout: {result.stdout}")
+            if result.stderr:
+                logger.debug(f"USM deps stderr: {result.stderr}")
+            
             if result.returncode != 0:
                 return {name: [] for name in package_names}
             
-            # Parse the output
+            # Parse only the stdout output
             return self._parse_dependencies(result.stdout, package_names)
             
         except subprocess.TimeoutExpired:
@@ -362,11 +575,11 @@ class USMIntegration:
             logger.error(f"Unexpected error checking dependencies: {e}")
             return {name: [] for name in package_names}
 
-    def _parse_dependencies(self, output: str, package_names: List[str]) -> Dict[str, List[str]]:
-        """Parse the output of "usm deps".
+    def _parse_dependencies(self, stdout_output: str, package_names: List[str]) -> Dict[str, List[str]]:
+        """Parse the stdout output of "usm deps".
         
         Args:
-            output: Output from USM command
+            stdout_output: Stdout output from USM command
             package_names: List of package names
             
         Returns:
@@ -376,7 +589,7 @@ class USMIntegration:
         
         current_package = None
         
-        for line in output.strip().split('\n'):
+        for line in stdout_output.strip().split('\n'):
             line = line.strip()
             if not line or line.startswith('#'):
                 continue

+ 213 - 0
test_autoprovides_fix.py

@@ -0,0 +1,213 @@
+#!/usr/bin/env python3
+"""
+Test script to verify the autoprovides fix in autusm.
+
+This script tests that the autoprovides functionality correctly
+replaces template-based provides with actual USM output.
+"""
+
+import json
+import tempfile
+import os
+from pathlib import Path
+from unittest.mock import Mock, patch
+
+# Add the src directory to the path
+import sys
+sys.path.insert(0, 'src')
+
+from autusm.manifest import ManifestGenerator
+from autusm.models import PackageInfo, BuildSystem, BuildSystemType, USMManifest, License, LicenseCategory, Resource, PathBase, FileType
+from autusm.usm_integration import USMIntegration
+
+
+def test_autoprovides_integration():
+    """Test that autoprovides correctly replaces template provides."""
+    print("Testing autoprovides integration...")
+    
+    # Create test package info
+    package_info = PackageInfo(
+        name="test-package",
+        version="1.0.0",
+        summary="A test package",
+        licenses=[License(name="MIT", text="LICENSE", category=LicenseCategory.OPEN_SOURCE)]
+    )
+    
+    # Create test build system
+    build_system = BuildSystem(type=BuildSystemType.MAKE)
+    
+    # Create manifest generator
+    manifest_generator = ManifestGenerator()
+    
+    # Generate initial manifest
+    manifest = manifest_generator.generate(package_info, build_system)
+    
+    # Check that template provides are present
+    print(f"Initial provides count: {len(manifest.provides)}")
+    print("Initial provides:")
+    for key, value in manifest.provides.items():
+        print(f"  {key}: {value}")
+    
+    # Create mock autoprovides (simulating USM output)
+    mock_autoprovides = {
+        "bin:test-package": "as-expected",
+        "lib:libtest.so": Resource(path="usr/lib/libtest.so", path_base=PathBase.INSTALL, type=FileType.REG),
+        "inc:test.h": Resource(path="usr/include/test.h", path_base=PathBase.INSTALL, type=FileType.REG),
+        "res:doc/test-package": "as-expected"
+    }
+    
+    # Update manifest with autoprovides
+    updated_manifest = manifest_generator.update_with_autoprovides(manifest, mock_autoprovides)
+    
+    # Check that autoprovides replaced the template provides
+    print(f"\nUpdated provides count: {len(updated_manifest.provides)}")
+    print("Updated provides:")
+    for key, value in updated_manifest.provides.items():
+        print(f"  {key}: {value}")
+    
+    # Verify the autoprovides are now in the manifest
+    assert "bin:test-package" in updated_manifest.provides
+    assert "lib:libtest.so" in updated_manifest.provides
+    assert "inc:test.h" in updated_manifest.provides
+    assert "res:doc/test-package" in updated_manifest.provides
+    
+    # Verify the autoprovides values are correct
+    assert updated_manifest.provides["bin:test-package"] == "as-expected"
+    assert isinstance(updated_manifest.provides["lib:libtest.so"], Resource)
+    assert updated_manifest.provides["lib:libtest.so"].path == "usr/lib/libtest.so"
+    
+    print("\n✅ Autoprovides integration test passed!")
+    return True
+
+
+def test_usm_integration_parsing():
+    """Test that USM integration correctly parses autoprovides output."""
+    print("\nTesting USM integration parsing...")
+    
+    usm_integration = USMIntegration()
+    
+    # Test JSON format output (nested structure)
+    json_output = """{
+    "provides": {
+        "bin:test-package": "as-expected",
+        "lib:libtest.so": {
+            "path": "usr/lib/libtest.so",
+            "pathBase": "install",
+            "type": "reg"
+        }
+    }
+}"""
+    
+    parsed_json = usm_integration._parse_autoprovides(json_output)
+    print(f"Parsed JSON autoprovides: {parsed_json}")
+    assert "bin:test-package" in parsed_json
+    assert "lib:libtest.so" in parsed_json
+    assert isinstance(parsed_json["lib:libtest.so"], Resource)
+    
+    # Test line-by-line format output
+    line_output = """bin:test-package as-expected
+lib:libtest.so install:usr/lib/libtest.so
+inc:test.h source:include/test.h
+# This is a comment
+res:doc/test-package as-expected"""
+    
+    parsed_lines = usm_integration._parse_autoprovides(line_output)
+    print(f"Parsed line autoprovides: {parsed_lines}")
+    assert "bin:test-package" in parsed_lines
+    assert "lib:libtest.so" in parsed_lines
+    assert "inc:test.h" in parsed_lines
+    assert "res:doc/test-package" in parsed_lines
+    
+    print("✅ USM integration parsing test passed!")
+    return True
+
+
+def test_full_workflow():
+    """Test the full workflow with mocked USM integration."""
+    print("\nTesting full workflow...")
+    
+    # Create a temporary directory for testing
+    with tempfile.TemporaryDirectory() as temp_dir:
+        temp_path = Path(temp_dir)
+        
+        # Create test package info
+        package_info = PackageInfo(
+            name="full-test-package",
+            version="1.0.0",
+            summary="A full test package",
+            licenses=[License(name="MIT", text="LICENSE", category=LicenseCategory.OPEN_SOURCE)]
+        )
+        
+        # Create test build system
+        build_system = BuildSystem(type=BuildSystemType.MAKE)
+        
+        # Create manifest generator
+        manifest_generator = ManifestGenerator()
+        
+        # Generate initial manifest
+        manifest = manifest_generator.generate(package_info, build_system)
+        
+        # Write initial manifest to file
+        manifest_path = temp_path / "MANIFEST.usm"
+        with open(manifest_path, "w") as f:
+            f.write(manifest.to_json())
+        
+        print(f"Initial manifest written to: {manifest_path}")
+        
+        # Mock USM integration
+        with patch.object(USMIntegration, 'is_available', return_value=True), \
+             patch.object(USMIntegration, 'get_autoprovides') as mock_autoprovides:
+            
+            # Setup mock autoprovides
+            mock_autoprovides.return_value = {
+                "bin:full-test-package": "as-expected",
+                "lib:libfulltest.so": Resource(path="usr/lib/libfulltest.so", path_base=PathBase.INSTALL, type=FileType.REG)
+            }
+            
+            # Create USM integration instance
+            usm_integration = USMIntegration()
+            
+            # Simulate the workflow
+            if usm_integration.is_available():
+                autoprovides = usm_integration.get_autoprovides(temp_path)
+                if autoprovides:
+                    manifest = manifest_generator.update_with_autoprovides(manifest, autoprovides)
+                    
+                    # Rewrite the manifest file
+                    with open(manifest_path, "w") as f:
+                        f.write(manifest.to_json())
+            
+            # Verify the final manifest
+            with open(manifest_path, "r") as f:
+                final_manifest_data = json.load(f)
+            
+            print("Final manifest provides section:")
+            for key, value in final_manifest_data["provides"].items():
+                print(f"  {key}: {value}")
+            
+            # Verify autoprovides are in the final manifest
+            assert "bin:full-test-package" in final_manifest_data["provides"]
+            assert "lib:libfulltest.so" in final_manifest_data["provides"]
+            
+            print("✅ Full workflow test passed!")
+            return True
+
+
+if __name__ == "__main__":
+    print("Running autoprovides fix tests...\n")
+    
+    try:
+        test_autoprovides_integration()
+        test_usm_integration_parsing()
+        test_full_workflow()
+        
+        print("\n🎉 All tests passed! The autoprovides fix is working correctly.")
+        
+    except AssertionError as e:
+        print(f"\n❌ Test failed: {e}")
+        sys.exit(1)
+    except Exception as e:
+        print(f"\n❌ Unexpected error: {e}")
+        import traceback
+        traceback.print_exc()
+        sys.exit(1)

+ 157 - 0
test_extraction_consistency.py

@@ -0,0 +1,157 @@
+#!/usr/bin/env python3
+"""
+Test script to verify extraction consistency between autusm and USM acquire script.
+
+This script tests that both autusm's initial extraction and the acquire script
+produce the same directory structure (flattened to the output directory).
+"""
+
+import os
+import sys
+import tempfile
+import shutil
+from pathlib import Path
+
+# Add src to path to import autusm modules
+sys.path.insert(0, str(Path(__file__).parent / "src"))
+
+from autusm.extractor import ArchiveExtractor
+from autusm.generator import ScriptGenerator
+from autusm.models import PackageInfo, BuildSystem, BuildSystemType
+
+
+def test_extraction_consistency():
+    """Test that extraction behavior is consistent between autusm and acquire script."""
+    
+    # Test URL for hello-2.12.tar.gz
+    test_url = "https://mirror.freedif.org/GNU/hello/hello-2.12.tar.gz"
+    package_name = "hello"
+    
+    print("Testing extraction consistency...")
+    print(f"Test URL: {test_url}")
+    print(f"Package: {package_name}")
+    print()
+    
+    # Create test directories
+    with tempfile.TemporaryDirectory() as temp_dir:
+        temp_path = Path(temp_dir)
+        
+        # Create output directories for both tests
+        autusm_output = temp_path / "autusm_output"
+        usm_output = temp_path / "usm_output"
+        autusm_output.mkdir()
+        usm_output.mkdir()
+        
+        # Download the test archive
+        print("Downloading test archive...")
+        from autusm.download import DownloadManager
+        download_manager = DownloadManager()
+        archive_path = download_manager.download(test_url, temp_path)
+        print(f"Downloaded to: {archive_path}")
+        print()
+        
+        # Test 1: autusm extraction
+        print("Test 1: autusm extraction")
+        extractor = ArchiveExtractor()
+        autusm_source_dir = extractor.extract(archive_path, autusm_output)
+        print(f"autusm extracted to: {autusm_source_dir}")
+        
+        # List the structure
+        print("autusm directory structure:")
+        for item in autusm_output.rglob("*"):
+            if item.is_file():
+                rel_path = item.relative_to(autusm_output)
+                print(f"  {rel_path}")
+        print()
+        
+        # Test 2: Simulate USM acquire script extraction
+        print("Test 2: Simulating USM acquire script extraction")
+        
+        # Create a test package info
+        package_info = PackageInfo(
+            name=package_name,
+            version="2.12",
+            summary="Test package",
+            url=test_url
+        )
+        
+        # Create a test build system
+        build_system = BuildSystem(
+            type=BuildSystemType.AUTOTOOLS,
+            config_files=[],
+            build_files=[],
+            detected_commands=[],
+            custom_args={}
+        )
+        
+        # Generate the acquire script
+        script_generator = ScriptGenerator()
+        acquire_script = script_generator._common_acquire_template(package_info, build_system)
+        
+        # Write the acquire script to a file
+        acquire_script_path = usm_output / "acquire"
+        with open(acquire_script_path, "w") as f:
+            f.write(acquire_script)
+        os.chmod(acquire_script_path, 0o755)
+        
+        # Run the acquire script
+        import subprocess
+        result = subprocess.run(
+            ["./acquire"],
+            cwd=usm_output,
+            capture_output=True,
+            text=True
+        )
+        
+        if result.returncode != 0:
+            print(f"Error running acquire script: {result.stderr}")
+            return False
+        
+        print("USM acquire script executed successfully")
+        
+        # List the structure
+        print("USM directory structure:")
+        for item in usm_output.rglob("*"):
+            if item.is_file() and item.name != "acquire":
+                rel_path = item.relative_to(usm_output)
+                print(f"  {rel_path}")
+        print()
+        
+        # Compare the directory structures
+        print("Comparing directory structures...")
+        
+        # Get file lists (excluding the acquire script)
+        autusm_files = set()
+        for item in autusm_output.rglob("*"):
+            if item.is_file():
+                rel_path = item.relative_to(autusm_output)
+                autusm_files.add(str(rel_path))
+        
+        usm_files = set()
+        for item in usm_output.rglob("*"):
+            if item.is_file() and item.name != "acquire":
+                rel_path = item.relative_to(usm_output)
+                usm_files.add(str(rel_path))
+        
+        # Check if they match
+        if autusm_files == usm_files:
+            print("✓ SUCCESS: Directory structures match!")
+            print(f"Both contain {len(autusm_files)} files")
+            return True
+        else:
+            print("✗ FAILURE: Directory structures differ!")
+            
+            print("Files only in autusm:")
+            for f in sorted(autusm_files - usm_files):
+                print(f"  {f}")
+            
+            print("Files only in USM:")
+            for f in sorted(usm_files - autusm_files):
+                print(f"  {f}")
+            
+            return False
+
+
+if __name__ == "__main__":
+    success = test_extraction_consistency()
+    sys.exit(0 if success else 1)

+ 126 - 0
test_fix.py

@@ -0,0 +1,126 @@
+#!/usr/bin/env python3
+"""
+Test script to verify the autoprovides fix works end-to-end.
+"""
+
+import sys
+import tempfile
+import json
+from pathlib import Path
+
+# Add the src directory to the path
+sys.path.insert(0, 'src')
+
+from autusm.download import DownloadManager
+from autusm.extractor import ArchiveExtractor
+from autusm.analyzer import SourceAnalyzer
+from autusm.metadata import MetadataExtractor
+from autusm.generator import ScriptGenerator
+from autusm.manifest import ManifestGenerator
+from autusm.usm_integration import USMIntegration
+from autusm.interaction import UserInteraction
+
+def test_full_workflow():
+    """Test the full workflow with a simple package."""
+    print("Testing full autusm workflow...")
+    
+    # Create a temporary directory for testing
+    with tempfile.TemporaryDirectory() as temp_dir:
+        temp_path = Path(temp_dir)
+        
+        # URL to a simple package
+        url = "https://mirror.freedif.org/GNU/hello/hello-2.12.tar.gz"
+        
+        try:
+            # Step 1: Download
+            print("Downloading source archive...")
+            download_manager = DownloadManager()
+            archive_path = download_manager.download(url, temp_path)
+            print(f"Downloaded to: {archive_path}")
+            
+            # Step 2: Extract
+            print("Extracting archive...")
+            extractor = ArchiveExtractor()
+            source_dir = extractor.extract(archive_path, temp_path)
+            print(f"Extracted to: {source_dir}")
+            
+            # Step 3: Analyze
+            print("Analyzing source code...")
+            analyzer = SourceAnalyzer()
+            build_system = analyzer.detect_build_system(source_dir)
+            print(f"Detected build system: {build_system.type.value}")
+            
+            # Step 4: Extract metadata
+            print("Extracting metadata...")
+            metadata_extractor = MetadataExtractor()
+            package_info = metadata_extractor.extract(source_dir)
+            package_info.name = "hello"
+            package_info.version = "2.12"
+            package_info.summary = "GNU Hello package, a classic example"
+            package_info.url = url
+            package_info.source_dir = str(source_dir)
+            
+            # Step 5: Generate scripts
+            print("Generating scripts...")
+            script_generator = ScriptGenerator()
+            scripts_dir = source_dir / "scripts"
+            scripts_dir.mkdir(parents=True, exist_ok=True)
+            script_generator.generate_scripts(package_info, build_system, scripts_dir)
+            
+            # Step 6: Generate manifest
+            print("Generating manifest...")
+            manifest_generator = ManifestGenerator()
+            manifest = manifest_generator.generate(package_info, build_system)
+            
+            # Write initial manifest
+            manifest_path = source_dir / "MANIFEST.usm"
+            with open(manifest_path, "w") as f:
+                f.write(manifest.to_json())
+            print(f"Initial manifest written to: {manifest_path}")
+            
+            # Also save to test_output for inspection
+            test_output_path = Path("test_output") / "MANIFEST.usm"
+            test_output_path.parent.mkdir(parents=True, exist_ok=True)
+            with open(test_output_path, "w") as f:
+                f.write(manifest.to_json())
+            print(f"Also saved to: {test_output_path}")
+            
+            # Step 7: Test USM integration
+            print("Testing USM integration...")
+            usm_integration = USMIntegration()
+            
+            if usm_integration.is_available():
+                print("USM is available, testing autoprovides...")
+                autoprovides = usm_integration.get_autoprovides(source_dir)
+                print(f"Got autoprovides: {autoprovides}")
+                
+                if autoprovides:
+                    print("Updating manifest with autoprovides...")
+                    updated_manifest = manifest_generator.update_with_autoprovides(manifest, autoprovides)
+                    
+                    # Rewrite the manifest
+                    with open(manifest_path, "w") as f:
+                        f.write(updated_manifest.to_json())
+                    
+                    print("Final manifest:")
+                    print(updated_manifest.to_json())
+                else:
+                    print("No autoprovides returned from USM")
+            else:
+                print("USM is not available, skipping autoprovides")
+            
+            return True
+            
+        except Exception as e:
+            print(f"Error: {e}")
+            import traceback
+            traceback.print_exc()
+            return False
+
+if __name__ == "__main__":
+    success = test_full_workflow()
+    if success:
+        print("\n✅ Full workflow test completed successfully!")
+    else:
+        print("\n❌ Full workflow test failed!")
+        sys.exit(1)