Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion dumpyara/dumpyara.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,22 @@
from dumpyara.steps.extract_images import extract_images
from dumpyara.steps.prepare_images import prepare_images

try:
import firmware_parsers # noqa: F401

_HAS_FIRMWARE_PARSERS = True
except ImportError:
_HAS_FIRMWARE_PARSERS = False

# Package name to package commands
REQUIRED_TOOLS = {
"7-zip or p7zip": [SEVEN_ZIP_EXECUTABLE, P7ZIP_EXECUTABLE],
"erofs-utils": ["fsck.erofs"],
"android-sdk-libsparse-utils or platform-utils": ["simg2img"],
}

if not _HAS_FIRMWARE_PARSERS:
REQUIRED_TOOLS["android-sdk-libsparse-utils or platform-utils"] = ["simg2img"]


def dumpyara(file: Path, output_path: Path, debug: bool = False):
"""Dump an Android firmware."""
Expand Down
148 changes: 147 additions & 1 deletion dumpyara/steps/extract_archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,102 @@
This step will extract the archive into a folder.
"""

import re
from pathlib import Path
from re import Pattern, compile
from shutil import unpack_archive
from sebaubuntu_libs.liblogging import LOGD, LOGI
from typing import Callable, Dict
from zipfile import ZipFile, is_zipfile

from dumpyara.utils.files import get_recursive_files_list

try:
import firmware_parsers

_HAS_FIRMWARE_PARSERS = True
except ImportError:
_HAS_FIRMWARE_PARSERS = False


def _strip_vendor_prefix(directory: Path):
"""Strip common vendor prefixes from extracted filenames.

Detects patterns like Nokia NB0 'LFC-X-YYYY-ZZZZ-name.ext' and renames to 'name.ext'.
Only acts when a clear majority of files share the same prefix pattern.
"""
files = [f for f in directory.iterdir() if f.is_file()]
if len(files) < 3:
return

# Match Nokia-style prefix: LFC-{seg}-{seg}-{seg}- or LFC-{seg}-{seg}-
prefix_pattern = re.compile(r"^[A-Z]{2,4}(?:-[A-Za-z0-9]{1,6}){2,4}-")
prefixed = {}
for f in files:
m = prefix_pattern.match(f.name)
if m:
new_name = f.name[m.end() :]
if new_name and not (directory / new_name).exists():
prefixed[f] = directory / new_name

# Only rename if most files have the prefix (avoid false positives)
if len(prefixed) >= len(files) * 0.6:
for old, new in prefixed.items():
LOGD(f"Stripping vendor prefix: {old.name} → {new.name}")
old.rename(new)


def _has_nested_partition_markers(archive_path: Path) -> bool:
"""Return True when a nested zip contains dumpable partition container markers."""
if not is_zipfile(archive_path):
LOGD(f"Skipping nested zip scan for non-zip archive: {archive_path.name}")
return False

try:
with ZipFile(archive_path, "r") as zip_file:
for file_name in zip_file.namelist():
for pattern in NESTED_ZIP_PARTITION_MARKERS:
if pattern.search(file_name):
return True
except Exception as e:
LOGD(f"Failed to inspect nested zip {archive_path.name}: {e}")
return False

return False


def extract_archive(archive_path: Path, extracted_archive_path: Path, is_nested: bool = False):
"""
Extract the archive into a folder.
"""
LOGD(f"Extracting archive: {archive_path.name}")

# Try firmware_parsers detection first
if _HAS_FIRMWARE_PARSERS:
try:
fmt = firmware_parsers.detect(str(archive_path))
if fmt != "unknown":
extractor = getattr(firmware_parsers, fmt, None)
if extractor is not None:
LOGI(f"Detected firmware format: {fmt}")
extractor(str(archive_path), str(extracted_archive_path))
if is_nested:
archive_path.unlink()
return
except Exception as e:
LOGI(f"firmware_parsers failed ({e}), falling back to generic extraction")

# Extract the archive
unpack_archive(archive_path, extracted_archive_path)
try:
unpack_archive(archive_path, extracted_archive_path)
except Exception:
# Fallback: try as zip for non-standard extensions (.ozip, .ftf, etc.)
if is_zipfile(archive_path):
LOGD(f"Falling back to zipfile for {archive_path.name}")
with ZipFile(archive_path, "r") as zf:
zf.extractall(extracted_archive_path)
else:
raise
if is_nested:
LOGD("Archive is nested, unlinking")
archive_path.unlink()
Expand All @@ -36,6 +115,24 @@ def extract_archive(archive_path: Path, extracted_archive_path: Path, is_nested:

file.rename(extracted_archive_path / file.name)

# Re-detect firmware formats in extracted files
if _HAS_FIRMWARE_PARSERS:
for file in list(get_recursive_files_list(extracted_archive_path)):
try:
fmt = firmware_parsers.detect(str(file))
if fmt != "unknown":
extractor = getattr(firmware_parsers, fmt, None)
if extractor is not None:
LOGI(f"Detected nested firmware format: {fmt} in {file.name}")
extractor(str(file), str(extracted_archive_path))
file.unlink()
except Exception as e:
LOGD(f"firmware_parsers failed on {file.name}: {e}")

# Strip common vendor prefixes from filenames
# (e.g., Nokia NB0 "LFC-0-1060-00WW-boot.img" -> "boot.img")
_strip_vendor_prefix(extracted_archive_path)

# Check for nested archives
extracted_archive_tempdir_files_list = list(
get_recursive_files_list(extracted_archive_path, True)
Expand All @@ -60,9 +157,58 @@ def extract_archive(archive_path: Path, extracted_archive_path: Path, is_nested:

func(nested_archive, extracted_archive_path, True)

nested_archive_patterns = tuple(NESTED_ARCHIVES.keys())
for file in extracted_archive_tempdir_files_list:
if any(pattern.match(str(file)) for pattern in nested_archive_patterns):
continue

if not NESTED_ZIP_PATTERN.match(str(file)):
continue

nested_archive = extracted_archive_path / file
LOGI(f"Found nested zip candidate: {nested_archive.name}")

if not nested_archive.is_file():
LOGD(f"Nested zip {nested_archive.name} probably already handled, skipping")
continue

if not _has_nested_partition_markers(nested_archive):
LOGD(f"Skipping nested zip {nested_archive.name}: no partition markers")
continue

extract_archive(nested_archive, extracted_archive_path, True)

LOGD(f"Extracted archive: {archive_path.name}")


NESTED_ZIP_PARTITION_MARKERS = (
compile(
r"(?:^|/)"
r"(?:boot|boot-debug|boot-verified|cust|dtbo|dtbo-verified|exaid|factory|india|"
r"init_boot|mi_ext|modem|my_bigball|my_carrier|my_company|my_country|my_custom|"
r"my_engineering|my_heytap|my_manifest|my_odm|my_operator|my_preload|my_product|"
r"my_region|my_stock|my_version|NON-HLOS|odm|odm_dlkm|odm_ext|oem|oppo_product|"
r"opproduct|preas|preavs|preload|preload_common|product|product_h|recovery|rescue|"
r"reserve|special_preload|super|system|system_dlkm|system_ext|system_other|"
r"systemex|tz|vendor|vendor_boot|vendor_boot-debug|vendor_dlkm|"
r"vendor_kernel_boot|xrom)(?:_[ab])?\.new\.dat\.br$"
),
compile(
r"(?:^|/)"
r"(?:boot|boot-debug|boot-verified|cust|dtbo|dtbo-verified|exaid|factory|india|"
r"init_boot|mi_ext|modem|my_bigball|my_carrier|my_company|my_country|my_custom|"
r"my_engineering|my_heytap|my_manifest|my_odm|my_operator|my_preload|my_product|"
r"my_region|my_stock|my_version|NON-HLOS|odm|odm_dlkm|odm_ext|oem|oppo_product|"
r"opproduct|preas|preavs|preload|preload_common|product|product_h|recovery|rescue|"
r"reserve|special_preload|super|system|system_dlkm|system_ext|system_other|"
r"systemex|tz|vendor|vendor_boot|vendor_boot-debug|vendor_dlkm|"
r"vendor_kernel_boot|xrom)(?:_[ab])?\.transfer\.list$"
),
compile(r"(?:^|/)payload\.bin$"),
compile(r"(?:^|/)super(?!.*(_empty)).*\.img$"),
compile(r"(?:^|/)[^/]+\.tar\.md5$"),
)
NESTED_ZIP_PATTERN = compile(r".*\.zip$")
NESTED_ARCHIVES: Dict[Pattern[str], Callable[[Path, Path, bool], None]] = {
compile(key): value
for key, value in {
Expand Down
16 changes: 13 additions & 3 deletions dumpyara/utils/multipartitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,13 @@

from dumpyara.lib.libpayload import extract_android_ota_payload

try:
import firmware_parsers

_HAS_FIRMWARE_PARSERS = True
except ImportError:
_HAS_FIRMWARE_PARSERS = False


def extract_payload(image: Path, output_dir: Path):
extract_android_ota_payload(image, output_dir)
Expand All @@ -22,9 +29,12 @@ def extract_super(image: Path, output_dir: Path):
unsparsed_super = output_dir / "super.unsparsed.img"

try:
check_output(
["simg2img", image, unsparsed_super], stderr=STDOUT
) # TODO: Rewrite libsparse...
if _HAS_FIRMWARE_PARSERS:
firmware_parsers.sparse_to_raw(str(image), str(unsparsed_super))
else:
check_output(
["simg2img", image, unsparsed_super], stderr=STDOUT
) # TODO: Rewrite libsparse...
except Exception:
LOGI(f"Failed to unsparse {image.name}")
else:
Expand Down
1 change: 1 addition & 0 deletions dumpyara/utils/partitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ def fix_aliases(images_path: Path):
if partition_path.exists():
LOGI(f"Ignoring {alt_name} ({name} already extracted)")
alt_path.unlink()
continue

LOGI(f"Fixing alias {alt_name} -> {name}")
move(alt_path, partition_path)
Expand Down
16 changes: 13 additions & 3 deletions dumpyara/utils/raw_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@
from shutil import copyfile, move
from subprocess import STDOUT, check_output

try:
import firmware_parsers

_HAS_FIRMWARE_PARSERS = True
except ImportError:
_HAS_FIRMWARE_PARSERS = False


def get_raw_image(partition: str, files_path: Path, output_image_path: Path):
"""
Expand Down Expand Up @@ -59,9 +66,12 @@ def get_raw_image(partition: str, files_path: Path, output_image_path: Path):
continue

try:
check_output(
["simg2img", image_path, unsparsed_image], stderr=STDOUT
) # TODO: Rewrite libsparse...
if _HAS_FIRMWARE_PARSERS:
firmware_parsers.sparse_to_raw(str(image_path), str(unsparsed_image))
else:
check_output(
["simg2img", image_path, unsparsed_image], stderr=STDOUT
) # TODO: Rewrite libsparse...
except Exception:
LOGD(f"Failed to unsparse {image_path.name}, should be a raw image")
pass
Expand Down
15 changes: 14 additions & 1 deletion dumpyara/utils/sparsed_images.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,13 @@

from dumpyara.utils.partitions import get_partition_names_with_alias

try:
import firmware_parsers

_HAS_FIRMWARE_PARSERS = True
except ImportError:
_HAS_FIRMWARE_PARSERS = False


def prepare_sparsed_images(files_path: Path):
"""
Expand All @@ -31,6 +38,12 @@ def prepare_sparsed_images(files_path: Path):
if sparsechunk_image_files:
LOGI(f"Preparing sparsechunk images for {partition}")
LOGI(f"Converting {sparsechunk_image_files[0]} to {output_image.name}")
check_output(["simg2img", *sparsechunk_image_files, output_image], stderr=STDOUT)
if _HAS_FIRMWARE_PARSERS:
firmware_parsers.sparse_chunks_to_raw(
[str(f) for f in sparsechunk_image_files],
str(output_image),
)
else:
check_output(["simg2img", *sparsechunk_image_files, output_image], stderr=STDOUT)
for sparsechunk_image_file in sparsechunk_image_files:
sparsechunk_image_file.unlink()
Loading