diff --git a/src/murfey/instrument_server/api.py b/src/murfey/instrument_server/api.py index f26ba7a2d..abf92a64c 100644 --- a/src/murfey/instrument_server/api.py +++ b/src/murfey/instrument_server/api.py @@ -476,17 +476,22 @@ class UpstreamFileDownloadInfo(BaseModel): download_dir: Path upstream_instrument: str upstream_visit_path: Path + search_strings: list[str] | None = None @router.post("/visits/{visit_name}/sessions/{session_id}/upstream_file_data_request") -def gather_upstream_files( +def run_upstream_file_download_request( visit_name: str, session_id: MurfeySessionID, upstream_file_download: UpstreamFileDownloadInfo, ): """ - Instrument server endpoint that will query the backend for files in the chosen - visit directory + Instrument server endpoint that will receive an order from the backend server + to trigger an upstream file download request. + + It will get the list of files matching the provided list of search strings, + then iteratively request for those files, saving them locally at the specified + download directory. """ # Check for forbidden characters if any(c in visit_name for c in ("/", "\\", ":", ";")): @@ -518,6 +523,7 @@ def gather_upstream_files( json={ "upstream_instrument": upstream_instrument, "upstream_visit_path": str(upstream_visit_path), + "search_strings": upstream_file_download.search_strings, }, ).json() diff --git a/src/murfey/server/api/instrument.py b/src/murfey/server/api/instrument.py index 66401938d..9e7be9f5f 100644 --- a/src/murfey/server/api/instrument.py +++ b/src/murfey/server/api/instrument.py @@ -437,7 +437,7 @@ async def request_upstream_tiff_data_download( @router.post("/visits/{visit_name}/sessions/{session_id}/upstream_file_data_request") -async def request_upstream_file_data_download( +async def request_upstream_file_download( visit_name: str, session_id: MurfeySessionID, upstream_file_request: UpstreamFileRequestInfo, @@ -475,7 +475,7 @@ async def request_upstream_file_data_download( async with aiohttp.ClientSession() as clientsession: url_path = url_path_for( "api.router", - "gather_upstream_files", + "run_upstream_file_download_request", visit_name=secure_filename(visit_name), session_id=session_id, ) @@ -488,6 +488,7 @@ async def request_upstream_file_data_download( "download_dir": download_dir, "upstream_instrument": upstream_file_request.upstream_instrument, "upstream_visit_path": str(upstream_file_request.upstream_visit_path), + "search_strings": upstream_file_request.search_strings, }, ) as resp: data = await resp.json() diff --git a/src/murfey/server/api/session_control.py b/src/murfey/server/api/session_control.py index 2faff232f..703f62b90 100644 --- a/src/murfey/server/api/session_control.py +++ b/src/murfey/server/api/session_control.py @@ -554,6 +554,7 @@ async def gather_upstream_files( session_id=session_id, upstream_instrument=upstream_file_request.upstream_instrument, upstream_visit_path=upstream_file_request.upstream_visit_path, + search_strings=upstream_file_request.search_strings, db=db, ) diff --git a/src/murfey/server/api/session_info.py b/src/murfey/server/api/session_info.py index cd52ee011..a28746e12 100644 --- a/src/murfey/server/api/session_info.py +++ b/src/murfey/server/api/session_info.py @@ -530,6 +530,7 @@ async def gather_upstream_files( session_id=session_id, upstream_instrument=upstream_file_request.upstream_instrument, upstream_visit_path=upstream_file_request.upstream_visit_path, + search_strings=upstream_file_request.search_strings, db=db, ) diff --git a/src/murfey/server/api/session_shared.py b/src/murfey/server/api/session_shared.py index 7e428752b..efa020a23 100644 --- a/src/murfey/server/api/session_shared.py +++ b/src/murfey/server/api/session_shared.py @@ -20,7 +20,7 @@ Session as MurfeySession, ) -logger = logging.getLogger("murfey.server.api.shared") +logger = logging.getLogger("murfey.server.api.session_shared") def remove_session_by_id(session_id: int, db): @@ -211,44 +211,52 @@ def gather_upstream_files( session_id: int, upstream_instrument: str, upstream_visit_path: Path, + search_strings: list[str] | None, db: SQLModelSession, ): """ Searches the specified upstream instrument for files based on the search strings set in the MachineConfig and returns them as a list of file paths. """ - # Load the current instrument's machine config - murfey_session = db.exec( - select(MurfeySession).where(MurfeySession.id == session_id) - ).one() - instrument_name = murfey_session.instrument_name - machine_config = get_machine_config(instrument_name=instrument_name)[ - instrument_name - ] - - # Search for files using the configured strings for that upstream instrument file_list: list[Path] = [] logger.info(f"Searching for files in {sanitise(str(upstream_visit_path))!r}") - if ( - machine_config.upstream_data_search_strings.get(upstream_instrument, None) - is not None - ): - for search_string in machine_config.upstream_data_search_strings[ - upstream_instrument - ]: - logger.info(f"Using search string {search_string}") - for file in upstream_visit_path.glob(search_string): - if file.is_file(): - file_list.append(file) - logger.info( - f"Found {len(file_list)} files for download " - f"from {sanitise(upstream_instrument)}" + + # If search strings weren't provided, read them from the machine config + if search_strings is None: + # Load the current instrument's machine config + murfey_session = db.exec( + select(MurfeySession).where(MurfeySession.id == session_id) + ).one() + instrument_name = murfey_session.instrument_name + machine_config = get_machine_config(instrument_name=instrument_name)[ + instrument_name + ] + search_strings = machine_config.upstream_data_search_strings.get( + upstream_instrument, None ) - else: + # Return empty list if no search strings for the instrument were found + if search_strings is None: + logger.warning( + "Upstream file searching has not been configured for " + f"{sanitise(upstream_instrument)} on {sanitise(instrument_name)}" + ) + return file_list + elif not search_strings: + # Return empty list if no search strings were provided to begin with logger.warning( - "Upstream file searching has not been configured for " - f"{sanitise(upstream_instrument)} on {sanitise(instrument_name)}" + "No search strings were included as part of the file download request" ) + return file_list + # Search for files matching the provided search strings + for search_string in search_strings: + logger.info(f"Using search string {sanitise(search_string)}") + for file in upstream_visit_path.glob(search_string): + if file.is_file(): + file_list.append(file) + logger.info( + f"Found {len(file_list)} files for download " + f"from {sanitise(upstream_instrument)}" + ) return file_list diff --git a/src/murfey/util/models.py b/src/murfey/util/models.py index c8b7af7ac..cdf6898c3 100644 --- a/src/murfey/util/models.py +++ b/src/murfey/util/models.py @@ -89,6 +89,7 @@ class UpstreamFileRequestInfo(BaseModel): # Used in backend server for cross-instrument file download requests upstream_instrument: str upstream_visit_path: Path + search_strings: list[str] | None = None """ diff --git a/src/murfey/util/route_manifest.yaml b/src/murfey/util/route_manifest.yaml index 4fd3b9985..2caed3c13 100644 --- a/src/murfey/util/route_manifest.yaml +++ b/src/murfey/util/route_manifest.yaml @@ -149,7 +149,7 @@ murfey.instrument_server.api.router: methods: - POST - path: /visits/{visit_name}/sessions/{session_id}/upstream_file_data_request - function: gather_upstream_files + function: run_upstream_file_download_request path_params: - name: visit_name type: str @@ -569,7 +569,7 @@ murfey.server.api.instrument.router: methods: - POST - path: /instrument_server/visits/{visit_name}/sessions/{session_id}/upstream_file_data_request - function: request_upstream_file_data_download + function: request_upstream_file_download path_params: - name: visit_name type: str diff --git a/tests/server/api/test_session_control.py b/tests/server/api/test_session_control.py index abf63285b..4cc791e21 100644 --- a/tests/server/api/test_session_control.py +++ b/tests/server/api/test_session_control.py @@ -1,7 +1,9 @@ from pathlib import Path +from typing import Any from unittest import mock from unittest.mock import MagicMock +import pytest from fastapi import FastAPI from fastapi.testclient import TestClient from pytest_mock import MockerFixture @@ -10,9 +12,10 @@ validate_instrument_server_session_access, validate_instrument_token, ) -from murfey.server.api.session_control import spa_router +from murfey.server.api.session_control import gather_upstream_files, spa_router from murfey.server.murfey_db import murfey_db_session from murfey.util.api import url_path_for +from murfey.util.models import UpstreamFileRequestInfo def test_make_atlas_jpg(mocker: MockerFixture, tmp_path: Path): @@ -74,3 +77,55 @@ def mock_get_db_session(): # Check that the expected calls were made mock_atlas_jpg.assert_called_once_with(instrument_name, visit_name, test_file) assert response.status_code == 200 + + +@pytest.mark.parametrize( + "search_strings", + ( + ["dummy"], + [], + None, + ), +) +@pytest.mark.asyncio +async def test_gather_upstream_files( + mocker: MockerFixture, + tmp_path: Path, + search_strings: list[str] | None, +): + # Construct dictionary to pass to Pydantic model + session_id = 1 + upstream_instrument = "dummy" + upstream_visit_path = str(tmp_path / "dummy") + params_dict: dict[str, Any] = { + "upstream_instrument": upstream_instrument, + "upstream_visit_path": upstream_visit_path, + } + if search_strings is not None: + params_dict["search_strings"] = search_strings + + # Validate the incoming message + params = UpstreamFileRequestInfo(**params_dict) + + # Patch the actual 'gather_upstream_files' function + mock_gather = mocker.patch( + "murfey.server.api.session_control._gather_upstream_files" + ) + + # Create a mock database session + mock_db = MagicMock() + + # Run the function and check that the expected calls were made: + await gather_upstream_files( + visit_name="dummy", + session_id=session_id, + upstream_file_request=params, + db=mock_db, + ) + mock_gather.assert_called_with( + session_id=session_id, + upstream_instrument=upstream_instrument, + upstream_visit_path=Path(upstream_visit_path), + search_strings=search_strings, + db=mock_db, + ) diff --git a/tests/server/api/test_session_info.py b/tests/server/api/test_session_info.py new file mode 100644 index 000000000..1e3cf8e51 --- /dev/null +++ b/tests/server/api/test_session_info.py @@ -0,0 +1,59 @@ +from pathlib import Path +from typing import Any +from unittest.mock import MagicMock + +import pytest +from pytest_mock import MockerFixture + +from murfey.server.api.session_info import gather_upstream_files +from murfey.util.models import UpstreamFileRequestInfo + + +@pytest.mark.parametrize( + "search_strings", + ( + ["dummy"], + [], + None, + ), +) +@pytest.mark.asyncio +async def test_gather_upstream_files( + mocker: MockerFixture, + tmp_path: Path, + search_strings: list[str] | None, +): + # Construct dictionary to pass to Pydantic model + session_id = 1 + upstream_instrument = "dummy" + upstream_visit_path = str(tmp_path / "dummy") + params_dict: dict[str, Any] = { + "upstream_instrument": upstream_instrument, + "upstream_visit_path": upstream_visit_path, + } + if search_strings is not None: + params_dict["search_strings"] = search_strings + + # Validate the incoming message + params = UpstreamFileRequestInfo(**params_dict) + + # Patch the actual 'gather_upstream_files' function + mock_gather = mocker.patch("murfey.server.api.session_info._gather_upstream_files") + + # Create a mock database session + mock_db = MagicMock() + + # Run the function and check that the expected calls were made: + await gather_upstream_files( + visit_name="dummy", + session_id=session_id, + upstream_file_request=params, + db=mock_db, + ) + mock_gather.assert_called_with( + session_id=session_id, + upstream_instrument=upstream_instrument, + upstream_visit_path=Path(upstream_visit_path), + search_strings=search_strings, + db=mock_db, + ) diff --git a/tests/server/api/test_session_shared.py b/tests/server/api/test_session_shared.py index 6e9f56aca..b97a32b5d 100644 --- a/tests/server/api/test_session_shared.py +++ b/tests/server/api/test_session_shared.py @@ -129,73 +129,77 @@ def test_find_upstream_visits_permission_error( assert result == upstream_visits -gather_upstream_files_test_matrix: tuple[ - tuple[tuple[str, list[str], list[str]], ...], ... -] = ( - # CLEM - ( - # Search strings, files to match, and files to avoid - ( - "processed/**/composite*.tiff", - [ - file - for sublist in [ - [ - f"processed/grid1/TileScan1/Position_{n}/composite_BF_FL.tiff" - for n in range(5) - ], - ] - for file in sublist - ], - [ - file - for sublist in [ - [ - f"processed/grid1/TileScan1/Position_{n}/{color}.tiff" - for n in range(5) - for color in ("gray", "green", "red") - ], - ] - for file in sublist - ], - ), - ( - "screenshots/**/*", - [ - file - for sublist in [ - [f"screenshots/overview_{n}.png" for n in range(10)], - [f"screenshots/annotated_{n}.png" for n in range(10)], - ] - for file in sublist - ], - [], - ), - ), - # FIB +# File search strings configured, and the files they will be associated with +clem_upstream_file_dict = { + "processed/**/composite*.tiff": [ + f"processed/grid1/TileScan1/Position_{n}/composite_BF_FL.tiff" for n in range(5) + ], + "processed/**/gray.tiff": [ + f"processed/grid1/TileScan1/Position_{n}/gray.tiff" for n in range(5) + ], + "processed/**/red.tiff": [ + f"processed/grid1/TileScan1/Position_{n}/red.tiff" for n in range(5) + ], + "processed/**/green.tiff": [ + f"processed/grid1/TileScan1/Position_{n}/green.tiff" for n in range(5) + ], + "processed/**/blue.tiff": [ + f"processed/grid1/TileScan1/Position_{n}/blue.tiff" for n in range(5) + ], + "processed/**/cyan.tiff": [ + f"processed/grid1/TileScan1/Position_{n}/cyan.tiff" for n in range(5) + ], + "processed/**/magenta.tiff": [ + f"processed/grid1/TileScan1/Position_{n}/magenta.tiff" for n in range(5) + ], + "processed/**/yellow.tiff": [ + f"processed/grid1/TileScan1/Position_{n}/yellow.tiff" for n in range(5) + ], + "screenshots/**/*": [ + *[f"screenshots/overview_{n}.png" for n in range(10)], + *[f"screenshots/annotated_{n}.png" for n in range(10)], + ], +} +fib_upstream_file_dict = { + "maps/**/*": [ + *[f"maps/data_{n}.txt" for n in range(5)], + *[f"maps/map/image_{n}.tiff" for n in range(5)], + ], +} + + +@pytest.mark.parametrize( + "test_params", ( - # Search strings, files to match, and files to avoid + # Workflow to test | Search strings to use + (clem_upstream_file_dict, ["processed/**/composite*.tiff"]), + (clem_upstream_file_dict, ["processed/**/gray.tiff"]), + (clem_upstream_file_dict, ["processed/**/red.tiff"]), + (clem_upstream_file_dict, ["processed/**/green.tiff"]), + (clem_upstream_file_dict, ["processed/**/blue.tiff"]), + (clem_upstream_file_dict, ["processed/**/cyan.tiff"]), + (clem_upstream_file_dict, ["processed/**/magenta.tiff"]), + (clem_upstream_file_dict, ["processed/**/yellow.tiff"]), + (clem_upstream_file_dict, ["screenshots/**/*"]), ( - "maps/**/*", + clem_upstream_file_dict, [ - file - for sublist in [ - [f"maps/data_{n}.txt" for n in range(5)], - [f"maps/map/image_{n}.tiff" for n in range(5)], - ] - for file in sublist + "processed/**/composite*.tiff", + "processed/**/gray.tiff", + "screenshots/**/*", ], - [], ), + (clem_upstream_file_dict, []), + (clem_upstream_file_dict, None), + (fib_upstream_file_dict, ["maps/**/*"]), + (fib_upstream_file_dict, []), + (fib_upstream_file_dict, None), ), ) - - -@pytest.mark.parametrize("test_params", gather_upstream_files_test_matrix) def test_gather_upstream_files( mocker: MockerFixture, tmp_path: Path, - test_params: tuple[tuple[str, list[str], list[str]], ...], + test_params: tuple[dict[str, list[str]], list[str] | None], ): # Get the visit, instrument name, and session ID visit_name_root = f"{ExampleVisit.proposal_code}{ExampleVisit.proposal_number}" @@ -203,30 +207,42 @@ def test_gather_upstream_files( instrument_name = ExampleVisit.instrument_name session_id = ExampleVisit.murfey_session_id - # Unpack the test params - search_strings = [item[0] for item in test_params] - upstream_relative_paths = [file for item in test_params for file in item[1]] - other_relative_paths = [file for item in test_params for file in item[2]] - # Set the upstream instrument and upstream visit to access upstream_instrument = f"{instrument_name}01" upstream_visit = f"{visit_name_root}-5" upstream_visit_path = tmp_path / f"{upstream_instrument}/data/2020/{upstream_visit}" - # Construct the files and directories - upstream_files = [ - upstream_visit_path / relative_path for relative_path in upstream_relative_paths - ] - other_files = [ - upstream_visit_path / relative_path for relative_path in other_relative_paths - ] + # Unpack the test params + upstream_file_dict, search_strings = test_params + + # Sort files into expected ones and skipped ones + if search_strings is None: + expected_files = [ + upstream_visit_path / file + for file_list in upstream_file_dict.values() + for file in file_list + ] + skipped_files = [] + else: + expected_files = [ + upstream_visit_path / file + for search_string in search_strings + for file in upstream_file_dict[search_string] + ] + skipped_files = [ + upstream_visit_path / file + for search_string, file_list in upstream_file_dict.items() + for file in file_list + if search_string not in search_strings + ] - for file in upstream_files: + # Make files + for file in expected_files: if not file.parent.exists(): file.parent.mkdir(parents=True) file.touch(exist_ok=True) assert file.is_file() - for file in other_files: + for file in skipped_files: if not file.parent.exists(): file.parent.mkdir(parents=True) file.touch(exist_ok=True) @@ -242,7 +258,7 @@ def test_gather_upstream_files( # Mock the MachineConfig for this instrument mock_machine_config = MagicMock(spec=MachineConfig) mock_machine_config.upstream_data_search_strings = { - upstream_instrument: search_strings + upstream_instrument: list(upstream_file_dict.keys()), } mock_get_machine_config = mocker.patch( "murfey.server.api.session_shared.get_machine_config", @@ -254,6 +270,7 @@ def test_gather_upstream_files( session_id=session_id, upstream_instrument=upstream_instrument, upstream_visit_path=upstream_visit_path, + search_strings=search_strings, db=mock_murfey_db, ) - ) == sorted(upstream_files) + ) == sorted(expected_files)